2 * Copyright @ 2017 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWAR OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Pengfei Qu <Pengfei.qu@intel.com>
26 * Sreerenj Balachandran <sreerenj.balachandran@intel.com>
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
41 #include "i965_defines.h"
42 #include "i965_structs.h"
43 #include "i965_drv_video.h"
44 #include "i965_encoder.h"
45 #include "i965_encoder_utils.h"
46 #include "intel_media.h"
48 #include "i965_gpe_utils.h"
49 #include "i965_encoder_common.h"
50 #include "i965_avc_encoder_common.h"
51 #include "i965_avc_encoder_kernels.h"
52 #include "i965_avc_encoder.h"
53 #include "i965_avc_const_def.h"
55 #define MAX_URB_SIZE 4096 /* In register */
56 #define NUM_KERNELS_PER_GPE_CONTEXT 1
57 #define MBENC_KERNEL_BASE GEN9_AVC_KERNEL_MBENC_QUALITY_I
58 #define GPE_RESOURCE_ALIGNMENT 4 /* 4 means 16 = 1 << 4) */
60 #define OUT_BUFFER_2DW(batch, bo, is_target, delta) do { \
62 OUT_BCS_RELOC64(batch, \
64 I915_GEM_DOMAIN_INSTRUCTION, \
65 is_target ? I915_GEM_DOMAIN_RENDER : 0, \
68 OUT_BCS_BATCH(batch, 0); \
69 OUT_BCS_BATCH(batch, 0); \
73 #define OUT_BUFFER_3DW(batch, bo, is_target, delta, attr) do { \
74 OUT_BUFFER_2DW(batch, bo, is_target, delta); \
75 OUT_BCS_BATCH(batch, attr); \
78 /* FEI specific buffer sizes per MB in bytes for gen9 */
79 #define FEI_AVC_MB_CODE_BUFFER_SIZE 64
80 #define FEI_AVC_MV_DATA_BUFFER_SIZE 128
81 #define FEI_AVC_MB_CONTROL_BUFFER_SIZE 16
82 #define FEI_AVC_MV_PREDICTOR_BUFFER_SIZE 40
83 #define FEI_AVC_DISTORTION_BUFFER_SIZE 48
84 #define FEI_AVC_QP_BUFFER_SIZE 1
85 #define PREENC_AVC_STATISTICS_BUFFER_SIZE 64
87 #define SCALE_CUR_PIC 1
88 #define SCALE_PAST_REF_PIC 2
89 #define SCALE_FUTURE_REF_PIC 3
91 static const uint32_t qm_flat[16] = {
92 0x10101010, 0x10101010, 0x10101010, 0x10101010,
93 0x10101010, 0x10101010, 0x10101010, 0x10101010,
94 0x10101010, 0x10101010, 0x10101010, 0x10101010,
95 0x10101010, 0x10101010, 0x10101010, 0x10101010
98 static const uint32_t fqm_flat[32] = {
99 0x10001000, 0x10001000, 0x10001000, 0x10001000,
100 0x10001000, 0x10001000, 0x10001000, 0x10001000,
101 0x10001000, 0x10001000, 0x10001000, 0x10001000,
102 0x10001000, 0x10001000, 0x10001000, 0x10001000,
103 0x10001000, 0x10001000, 0x10001000, 0x10001000,
104 0x10001000, 0x10001000, 0x10001000, 0x10001000,
105 0x10001000, 0x10001000, 0x10001000, 0x10001000,
106 0x10001000, 0x10001000, 0x10001000, 0x10001000
109 static const unsigned int slice_type_kernel[3] = {1, 2, 0};
111 static const gen9_avc_brc_init_reset_curbe_data gen9_avc_brc_init_reset_curbe_init_data = {
268 static const gen8_avc_frame_brc_update_curbe_data gen8_avc_frame_brc_update_curbe_init_data = {
400 static const gen9_avc_frame_brc_update_curbe_data gen9_avc_frame_brc_update_curbe_init_data = {
558 gen9_avc_update_misc_parameters(VADriverContextP ctx,
559 struct encode_state *encode_state,
560 struct intel_encoder_context *encoder_context)
562 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
563 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
567 generic_state->max_bit_rate = (encoder_context->brc.bits_per_second[0] + 1000 - 1) / 1000;
569 generic_state->brc_need_reset = encoder_context->brc.need_reset;
571 if (generic_state->internal_rate_mode == VA_RC_CBR) {
572 generic_state->min_bit_rate = generic_state->max_bit_rate;
573 generic_state->mb_brc_enabled = encoder_context->brc.mb_rate_control[0] == 1;
575 if (generic_state->target_bit_rate != generic_state->max_bit_rate) {
576 generic_state->target_bit_rate = generic_state->max_bit_rate;
577 generic_state->brc_need_reset = 1;
579 } else if (generic_state->internal_rate_mode == VA_RC_VBR) {
580 generic_state->min_bit_rate = generic_state->max_bit_rate * (2 * encoder_context->brc.target_percentage[0] - 100) / 100;
581 generic_state->mb_brc_enabled = encoder_context->brc.mb_rate_control[0] == 1;
583 if (generic_state->target_bit_rate != generic_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100) {
584 generic_state->target_bit_rate = generic_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100;
585 generic_state->brc_need_reset = 1;
590 if (generic_state->internal_rate_mode != VA_RC_CQP) {
591 generic_state->frames_per_100s = encoder_context->brc.framerate[0].num * 100 / encoder_context->brc.framerate[0].den ;
592 generic_state->frame_rate = encoder_context->brc.framerate[0].num / encoder_context->brc.framerate[0].den ;
593 generic_state->frames_per_window_size = (int)(encoder_context->brc.window_size * generic_state->frame_rate / 1000); // brc.windows size in ms as the unit
595 generic_state->frames_per_100s = 30 * 100;
596 generic_state->frame_rate = 30 ;
597 generic_state->frames_per_window_size = 30;
601 if (generic_state->internal_rate_mode != VA_RC_CQP) {
602 generic_state->vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;//misc->buffer_size;
603 generic_state->init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;//misc->initial_buffer_fullness;
607 generic_state->num_roi = MIN(encoder_context->brc.num_roi, 3);
608 if (generic_state->num_roi > 0) {
609 generic_state->max_delta_qp = encoder_context->brc.roi_max_delta_qp;
610 generic_state->min_delta_qp = encoder_context->brc.roi_min_delta_qp;
612 for (i = 0; i < generic_state->num_roi; i++) {
613 generic_state->roi[i].left = encoder_context->brc.roi[i].left;
614 generic_state->roi[i].right = encoder_context->brc.roi[i].right;
615 generic_state->roi[i].top = encoder_context->brc.roi[i].top;
616 generic_state->roi[i].bottom = encoder_context->brc.roi[i].bottom;
617 generic_state->roi[i].value = encoder_context->brc.roi[i].value;
619 generic_state->roi[i].left /= 16;
620 generic_state->roi[i].right /= 16;
621 generic_state->roi[i].top /= 16;
622 generic_state->roi[i].bottom /= 16;
629 intel_avc_get_kernel_header_and_size(void *pvbinary,
631 INTEL_GENERIC_ENC_OPERATION operation,
633 struct i965_kernel *ret_kernel)
635 typedef uint32_t BIN_PTR[4];
638 gen9_avc_encoder_kernel_header *pkh_table;
639 kernel_header *pcurr_header, *pinvalid_entry, *pnext_header;
642 if (!pvbinary || !ret_kernel)
645 bin_start = (char *)pvbinary;
646 pkh_table = (gen9_avc_encoder_kernel_header *)pvbinary;
647 pinvalid_entry = &(pkh_table->static_detection) + 1;
648 next_krnoffset = binary_size;
650 if (operation == INTEL_GENERIC_ENC_SCALING4X) {
651 pcurr_header = &pkh_table->ply_dscale_ply;
652 } else if (operation == INTEL_GENERIC_ENC_SCALING2X) {
653 pcurr_header = &pkh_table->ply_2xdscale_ply;
654 } else if (operation == INTEL_GENERIC_ENC_ME) {
655 pcurr_header = &pkh_table->me_p;
656 } else if (operation == INTEL_GENERIC_ENC_BRC) {
657 pcurr_header = &pkh_table->frame_brc_init;
658 } else if (operation == INTEL_GENERIC_ENC_MBENC) {
659 pcurr_header = &pkh_table->mbenc_quality_I;
660 } else if (operation == INTEL_GENERIC_ENC_WP) {
661 pcurr_header = &pkh_table->wp;
662 } else if (operation == INTEL_GENERIC_ENC_SFD) {
663 pcurr_header = &pkh_table->static_detection;
668 pcurr_header += krnstate_idx;
669 ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
671 pnext_header = (pcurr_header + 1);
672 if (pnext_header < pinvalid_entry) {
673 next_krnoffset = pnext_header->kernel_start_pointer << 6;
675 ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
681 intel_avc_fei_get_kernel_header_and_size(
684 INTEL_GENERIC_ENC_OPERATION operation,
686 struct i965_kernel *ret_kernel)
688 typedef uint32_t BIN_PTR[4];
691 gen9_avc_fei_encoder_kernel_header *pkh_table;
692 kernel_header *pcurr_header, *pinvalid_entry, *pnext_header;
695 if (!pvbinary || !ret_kernel)
698 bin_start = (char *)pvbinary;
699 pkh_table = (gen9_avc_fei_encoder_kernel_header *)pvbinary;
700 pinvalid_entry = &(pkh_table->wp) + 1;
701 next_krnoffset = binary_size;
703 if (operation == INTEL_GENERIC_ENC_SCALING4X) {
704 pcurr_header = &pkh_table->ply_dscale_ply;
705 } else if (operation == INTEL_GENERIC_ENC_ME) {
706 pcurr_header = &pkh_table->me_p;
707 } else if (operation == INTEL_GENERIC_ENC_MBENC) {
708 pcurr_header = &pkh_table->mbenc_i;
709 } else if (operation == INTEL_GENERIC_ENC_PREPROC) {
710 pcurr_header = &pkh_table->preproc;
715 pcurr_header += krnstate_idx;
716 ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
718 pnext_header = (pcurr_header + 1);
719 if (pnext_header < pinvalid_entry) {
720 next_krnoffset = pnext_header->kernel_start_pointer << 6;
722 ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
728 gen9_free_surfaces_avc(void **data)
730 struct gen9_surface_avc *avc_surface;
737 if (avc_surface->scaled_4x_surface_obj) {
738 i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_4x_surface_id, 1);
739 avc_surface->scaled_4x_surface_id = VA_INVALID_SURFACE;
740 avc_surface->scaled_4x_surface_obj = NULL;
743 if (avc_surface->scaled_16x_surface_obj) {
744 i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_16x_surface_id, 1);
745 avc_surface->scaled_16x_surface_id = VA_INVALID_SURFACE;
746 avc_surface->scaled_16x_surface_obj = NULL;
749 if (avc_surface->scaled_32x_surface_obj) {
750 i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_32x_surface_id, 1);
751 avc_surface->scaled_32x_surface_id = VA_INVALID_SURFACE;
752 avc_surface->scaled_32x_surface_obj = NULL;
755 i965_free_gpe_resource(&avc_surface->res_mb_code_surface);
756 i965_free_gpe_resource(&avc_surface->res_mv_data_surface);
757 i965_free_gpe_resource(&avc_surface->res_ref_pic_select_surface);
759 /* FEI specific resources */
760 /* since the driver previously taken an extra reference to the drm_bo
761 * in case the buffers were supplied by middleware, there shouldn't
762 * be any memory handling issue */
763 i965_free_gpe_resource(&avc_surface->res_fei_mb_cntrl_surface);
764 i965_free_gpe_resource(&avc_surface->res_fei_mv_predictor_surface);
765 i965_free_gpe_resource(&avc_surface->res_fei_vme_distortion_surface);
766 i965_free_gpe_resource(&avc_surface->res_fei_mb_qp_surface);
768 dri_bo_unreference(avc_surface->dmv_top);
769 avc_surface->dmv_top = NULL;
770 dri_bo_unreference(avc_surface->dmv_bottom);
771 avc_surface->dmv_bottom = NULL;
781 gen9_avc_init_check_surfaces(VADriverContextP ctx,
782 struct object_surface *obj_surface,
783 struct intel_encoder_context *encoder_context,
784 struct avc_surface_param *surface_param)
786 struct i965_driver_data *i965 = i965_driver_data(ctx);
787 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
788 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
789 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
791 struct gen9_surface_avc *avc_surface;
792 int downscaled_width_4x, downscaled_height_4x;
793 int downscaled_width_16x, downscaled_height_16x;
794 int downscaled_width_32x, downscaled_height_32x;
796 unsigned int frame_width_in_mbs = ALIGN(surface_param->frame_width, 16) / 16;
797 unsigned int frame_height_in_mbs = ALIGN(surface_param->frame_height, 16) / 16;
798 unsigned int frame_mb_nums = frame_width_in_mbs * frame_height_in_mbs;
799 int allocate_flag = 1;
802 if (!obj_surface || !obj_surface->bo)
803 return VA_STATUS_ERROR_INVALID_SURFACE;
805 if (obj_surface->private_data) {
806 return VA_STATUS_SUCCESS;
809 avc_surface = calloc(1, sizeof(struct gen9_surface_avc));
812 return VA_STATUS_ERROR_ALLOCATION_FAILED;
814 avc_surface->ctx = ctx;
815 obj_surface->private_data = avc_surface;
816 obj_surface->free_private_data = gen9_free_surfaces_avc;
818 downscaled_width_4x = generic_state->frame_width_4x;
819 downscaled_height_4x = generic_state->frame_height_4x;
821 i965_CreateSurfaces(ctx,
823 downscaled_height_4x,
826 &avc_surface->scaled_4x_surface_id);
828 avc_surface->scaled_4x_surface_obj = SURFACE(avc_surface->scaled_4x_surface_id);
830 if (!avc_surface->scaled_4x_surface_obj) {
831 return VA_STATUS_ERROR_ALLOCATION_FAILED;
834 i965_check_alloc_surface_bo(ctx, avc_surface->scaled_4x_surface_obj, 1,
835 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
837 downscaled_width_16x = generic_state->frame_width_16x;
838 downscaled_height_16x = generic_state->frame_height_16x;
839 i965_CreateSurfaces(ctx,
840 downscaled_width_16x,
841 downscaled_height_16x,
844 &avc_surface->scaled_16x_surface_id);
845 avc_surface->scaled_16x_surface_obj = SURFACE(avc_surface->scaled_16x_surface_id);
847 if (!avc_surface->scaled_16x_surface_obj) {
848 return VA_STATUS_ERROR_ALLOCATION_FAILED;
851 i965_check_alloc_surface_bo(ctx, avc_surface->scaled_16x_surface_obj, 1,
852 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
854 if (generic_state->b32xme_supported ||
855 generic_state->b32xme_enabled) {
856 downscaled_width_32x = generic_state->frame_width_32x;
857 downscaled_height_32x = generic_state->frame_height_32x;
858 i965_CreateSurfaces(ctx,
859 downscaled_width_32x,
860 downscaled_height_32x,
863 &avc_surface->scaled_32x_surface_id);
864 avc_surface->scaled_32x_surface_obj = SURFACE(avc_surface->scaled_32x_surface_id);
866 if (!avc_surface->scaled_32x_surface_obj) {
867 return VA_STATUS_ERROR_ALLOCATION_FAILED;
870 i965_check_alloc_surface_bo(ctx, avc_surface->scaled_32x_surface_obj, 1,
871 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
874 /*mb code and mv data for each frame*/
875 if (!encoder_context->fei_enabled) {
876 size = frame_mb_nums * 16 * 4;
877 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
878 &avc_surface->res_mb_code_surface,
882 goto failed_allocation;
884 size = frame_mb_nums * 32 * 4;
885 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
886 &avc_surface->res_mv_data_surface,
890 goto failed_allocation;
894 if (avc_state->ref_pic_select_list_supported) {
895 width = ALIGN(frame_width_in_mbs * 8, 64);
896 height = frame_height_in_mbs ;
897 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
898 &avc_surface->res_ref_pic_select_surface,
901 "Ref pic select list buffer");
903 goto failed_allocation;
907 avc_surface->dmv_top =
908 dri_bo_alloc(i965->intel.bufmgr,
909 "direct mv top Buffer",
912 avc_surface->dmv_bottom =
913 dri_bo_alloc(i965->intel.bufmgr,
914 "direct mv bottom Buffer",
917 assert(avc_surface->dmv_top);
918 assert(avc_surface->dmv_bottom);
920 return VA_STATUS_SUCCESS;
923 return VA_STATUS_ERROR_ALLOCATION_FAILED;
927 gen9_avc_generate_slice_map(VADriverContextP ctx,
928 struct encode_state *encode_state,
929 struct intel_encoder_context *encoder_context)
931 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
932 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
933 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
934 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
936 struct i965_gpe_resource *gpe_resource = NULL;
937 VAEncSliceParameterBufferH264 * slice_param = NULL;
938 unsigned int * data = NULL;
939 unsigned int * data_row = NULL;
941 unsigned int pitch = ALIGN((generic_state->frame_width_in_mbs + 1) * 4, 64) / 4;
943 if (!avc_state->arbitrary_num_mbs_in_slice)
946 gpe_resource = &(avc_ctx->res_mbenc_slice_map_surface);
947 assert(gpe_resource);
949 i965_zero_gpe_resource(gpe_resource);
951 data_row = (unsigned int *)i965_map_gpe_resource(gpe_resource);
955 for (i = 0; i < avc_state->slice_num; i++) {
956 slice_param = avc_state->slice_param[i];
957 for (j = 0; j < slice_param->num_macroblocks; j++) {
959 if ((count > 0) && (count % generic_state->frame_width_in_mbs == 0)) {
967 *data++ = 0xFFFFFFFF;
969 i965_unmap_gpe_resource(gpe_resource);
973 gen9_avc_allocate_resources(VADriverContextP ctx,
974 struct encode_state *encode_state,
975 struct intel_encoder_context *encoder_context)
977 struct i965_driver_data *i965 = i965_driver_data(ctx);
978 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
979 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
980 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
981 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
982 unsigned int size = 0;
983 unsigned int width = 0;
984 unsigned int height = 0;
985 unsigned char * data = NULL;
986 int allocate_flag = 1;
989 /*all the surface/buffer are allocated here*/
991 /*second level batch buffer for image state write when cqp etc*/
992 i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
993 size = INTEL_AVC_IMAGE_STATE_CMD_SIZE ;
994 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
995 &avc_ctx->res_image_state_batch_buffer_2nd_level,
997 "second levle batch (image state write) buffer");
999 goto failed_allocation;
1001 /* scaling related surface */
1002 if (avc_state->mb_status_supported) {
1003 i965_free_gpe_resource(&avc_ctx->res_mb_status_buffer);
1004 size = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * 16 * 4 + 1023) & ~0x3ff;
1005 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1006 &avc_ctx->res_mb_status_buffer,
1007 ALIGN(size, 0x1000),
1008 "MB statistics output buffer");
1010 goto failed_allocation;
1011 i965_zero_gpe_resource(&avc_ctx->res_mb_status_buffer);
1014 if (avc_state->flatness_check_supported) {
1015 width = generic_state->frame_width_in_mbs * 4;
1016 height = generic_state->frame_height_in_mbs * 4;
1017 i965_free_gpe_resource(&avc_ctx->res_flatness_check_surface);
1018 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1019 &avc_ctx->res_flatness_check_surface,
1022 "Flatness check buffer");
1024 goto failed_allocation;
1026 /* me related surface */
1027 width = generic_state->downscaled_width_4x_in_mb * 8;
1028 height = generic_state->downscaled_height_4x_in_mb * 4 * 10;
1029 i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
1030 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1031 &avc_ctx->s4x_memv_distortion_buffer,
1034 "4x MEMV distortion buffer");
1036 goto failed_allocation;
1037 i965_zero_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
1039 width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
1040 height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
1041 i965_free_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
1042 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1043 &avc_ctx->s4x_memv_min_distortion_brc_buffer,
1046 "4x MEMV min distortion brc buffer");
1048 goto failed_allocation;
1049 i965_zero_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
1052 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 32, 64);
1053 height = generic_state->downscaled_height_4x_in_mb * 4 * 2 * 10;
1054 i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
1055 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1056 &avc_ctx->s4x_memv_data_buffer,
1059 "4x MEMV data buffer");
1061 goto failed_allocation;
1062 i965_zero_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
1065 width = ALIGN(generic_state->downscaled_width_16x_in_mb * 32, 64);
1066 height = generic_state->downscaled_height_16x_in_mb * 4 * 2 * 10 ;
1067 i965_free_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
1068 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1069 &avc_ctx->s16x_memv_data_buffer,
1072 "16x MEMV data buffer");
1074 goto failed_allocation;
1075 i965_zero_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
1078 width = ALIGN(generic_state->downscaled_width_32x_in_mb * 32, 64);
1079 height = generic_state->downscaled_height_32x_in_mb * 4 * 2 * 10 ;
1080 i965_free_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
1081 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1082 &avc_ctx->s32x_memv_data_buffer,
1085 "32x MEMV data buffer");
1087 goto failed_allocation;
1088 i965_zero_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
1091 if (!generic_state->brc_allocated) {
1092 /*brc related surface */
1093 i965_free_gpe_resource(&avc_ctx->res_brc_history_buffer);
1095 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1096 &avc_ctx->res_brc_history_buffer,
1097 ALIGN(size, 0x1000),
1098 "brc history buffer");
1100 goto failed_allocation;
1102 i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
1104 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1105 &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
1106 ALIGN(size, 0x1000),
1107 "brc pak statistic buffer");
1109 goto failed_allocation;
1111 i965_free_gpe_resource(&avc_ctx->res_brc_image_state_read_buffer);
1112 size = INTEL_AVC_IMAGE_STATE_CMD_SIZE * 7;
1113 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1114 &avc_ctx->res_brc_image_state_read_buffer,
1115 ALIGN(size, 0x1000),
1116 "brc image state read buffer");
1118 goto failed_allocation;
1120 i965_free_gpe_resource(&avc_ctx->res_brc_image_state_write_buffer);
1121 size = INTEL_AVC_IMAGE_STATE_CMD_SIZE * 7;
1122 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1123 &avc_ctx->res_brc_image_state_write_buffer,
1124 ALIGN(size, 0x1000),
1125 "brc image state write buffer");
1127 goto failed_allocation;
1129 width = ALIGN(avc_state->brc_const_data_surface_width, 64);
1130 height = avc_state->brc_const_data_surface_height;
1131 i965_free_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
1132 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1133 &avc_ctx->res_brc_const_data_buffer,
1136 "brc const data buffer");
1138 goto failed_allocation;
1139 i965_zero_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
1141 if (generic_state->brc_distortion_buffer_supported) {
1142 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 8, 64);
1143 height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1144 width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
1145 height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
1146 i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
1147 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1148 &avc_ctx->res_brc_dist_data_surface,
1151 "brc dist data buffer");
1153 goto failed_allocation;
1154 i965_zero_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
1157 if (generic_state->brc_roi_enable) {
1158 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 16, 64);
1159 height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1160 i965_free_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
1161 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1162 &avc_ctx->res_mbbrc_roi_surface,
1165 "mbbrc roi buffer");
1167 goto failed_allocation;
1168 i965_zero_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
1172 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 4, 64);
1173 height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1174 i965_free_gpe_resource(&avc_ctx->res_mbbrc_mb_qp_data_surface);
1175 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1176 &avc_ctx->res_mbbrc_mb_qp_data_surface,
1179 "mbbrc mb qp buffer");
1181 goto failed_allocation;
1183 i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
1184 size = 16 * AVC_QP_MAX * 4;
1185 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1186 &avc_ctx->res_mbbrc_const_data_buffer,
1187 ALIGN(size, 0x1000),
1188 "mbbrc const data buffer");
1190 goto failed_allocation;
1192 if (avc_state->decouple_mbenc_curbe_from_brc_enable) {
1193 i965_free_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
1194 size = avc_state->mbenc_brc_buffer_size;
1195 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1196 &avc_ctx->res_mbenc_brc_buffer,
1197 ALIGN(size, 0x1000),
1198 "mbenc brc buffer");
1200 goto failed_allocation;
1201 i965_zero_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
1203 generic_state->brc_allocated = 1;
1207 if (avc_state->mb_qp_data_enable) {
1208 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 4, 64);
1209 height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1210 i965_free_gpe_resource(&avc_ctx->res_mb_qp_data_surface);
1211 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1212 &avc_ctx->res_mb_qp_data_surface,
1215 "external mb qp buffer");
1217 goto failed_allocation;
1220 /* mbenc related surface. it share most of surface with other kernels */
1221 if (avc_state->arbitrary_num_mbs_in_slice) {
1222 width = ALIGN((generic_state->frame_width_in_mbs + 1) * 4, 64);
1223 height = generic_state->frame_height_in_mbs ;
1224 i965_free_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1225 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1226 &avc_ctx->res_mbenc_slice_map_surface,
1229 "slice map buffer");
1231 goto failed_allocation;
1232 i965_zero_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1234 /*generate slice map,default one slice per frame.*/
1237 /* sfd related surface */
1238 if (avc_state->sfd_enable) {
1239 i965_free_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1241 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1242 &avc_ctx->res_sfd_output_buffer,
1244 "sfd output buffer");
1246 goto failed_allocation;
1247 i965_zero_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1249 i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer);
1250 size = ALIGN(52, 64);
1251 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1252 &avc_ctx->res_sfd_cost_table_p_frame_buffer,
1254 "sfd P frame cost table buffer");
1256 goto failed_allocation;
1257 data = i965_map_gpe_resource(&(avc_ctx->res_sfd_cost_table_p_frame_buffer));
1259 memcpy(data, gen9_avc_sfd_cost_table_p_frame, sizeof(unsigned char) * 52);
1260 i965_unmap_gpe_resource(&(avc_ctx->res_sfd_cost_table_p_frame_buffer));
1262 i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_b_frame_buffer);
1263 size = ALIGN(52, 64);
1264 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1265 &avc_ctx->res_sfd_cost_table_b_frame_buffer,
1267 "sfd B frame cost table buffer");
1269 goto failed_allocation;
1270 data = i965_map_gpe_resource(&(avc_ctx->res_sfd_cost_table_b_frame_buffer));
1272 memcpy(data, gen9_avc_sfd_cost_table_b_frame, sizeof(unsigned char) * 52);
1273 i965_unmap_gpe_resource(&(avc_ctx->res_sfd_cost_table_b_frame_buffer));
1276 /* wp related surfaces */
1277 if (avc_state->weighted_prediction_supported) {
1278 for (i = 0; i < 2 ; i++) {
1279 if (avc_ctx->wp_output_pic_select_surface_obj[i]) {
1283 width = generic_state->frame_width_in_pixel;
1284 height = generic_state->frame_height_in_pixel ;
1285 i965_CreateSurfaces(ctx,
1288 VA_RT_FORMAT_YUV420,
1290 &avc_ctx->wp_output_pic_select_surface_id[i]);
1291 avc_ctx->wp_output_pic_select_surface_obj[i] = SURFACE(avc_ctx->wp_output_pic_select_surface_id[i]);
1293 if (!avc_ctx->wp_output_pic_select_surface_obj[i]) {
1294 goto failed_allocation;
1297 i965_check_alloc_surface_bo(ctx, avc_ctx->wp_output_pic_select_surface_obj[i], 1,
1298 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
1300 i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[0]);
1301 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_wp_output_pic_select_surface_list[0], avc_ctx->wp_output_pic_select_surface_obj[0], GPE_RESOURCE_ALIGNMENT);
1302 i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[1]);
1303 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_wp_output_pic_select_surface_list[1], avc_ctx->wp_output_pic_select_surface_obj[1], GPE_RESOURCE_ALIGNMENT);
1308 i965_free_gpe_resource(&avc_ctx->res_mad_data_buffer);
1310 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1311 &avc_ctx->res_mad_data_buffer,
1312 ALIGN(size, 0x1000),
1315 goto failed_allocation;
1317 return VA_STATUS_SUCCESS;
1320 return VA_STATUS_ERROR_ALLOCATION_FAILED;
1324 gen9_avc_free_resources(struct encoder_vme_mfc_context * vme_context)
1329 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1330 VADriverContextP ctx = avc_ctx->ctx;
1333 /* free all the surface/buffer here*/
1334 i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
1335 i965_free_gpe_resource(&avc_ctx->res_mb_status_buffer);
1336 i965_free_gpe_resource(&avc_ctx->res_flatness_check_surface);
1337 i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
1338 i965_free_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
1339 i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
1340 i965_free_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
1341 i965_free_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
1342 i965_free_gpe_resource(&avc_ctx->res_brc_history_buffer);
1343 i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
1344 i965_free_gpe_resource(&avc_ctx->res_brc_image_state_read_buffer);
1345 i965_free_gpe_resource(&avc_ctx->res_brc_image_state_write_buffer);
1346 i965_free_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
1347 i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
1348 i965_free_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
1349 i965_free_gpe_resource(&avc_ctx->res_mbbrc_mb_qp_data_surface);
1350 i965_free_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
1351 i965_free_gpe_resource(&avc_ctx->res_mb_qp_data_surface);
1352 i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
1353 i965_free_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1354 i965_free_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1355 i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer);
1356 i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_b_frame_buffer);
1357 i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[0]);
1358 i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[1]);
1359 i965_free_gpe_resource(&avc_ctx->res_mad_data_buffer);
1361 for (i = 0; i < 2 ; i++) {
1362 if (avc_ctx->wp_output_pic_select_surface_obj[i]) {
1363 i965_DestroySurfaces(ctx, &avc_ctx->wp_output_pic_select_surface_id[i], 1);
1364 avc_ctx->wp_output_pic_select_surface_id[i] = VA_INVALID_SURFACE;
1365 avc_ctx->wp_output_pic_select_surface_obj[i] = NULL;
1369 /* free preenc resources */
1370 i965_free_gpe_resource(&avc_ctx->preproc_mv_predictor_buffer);
1371 i965_free_gpe_resource(&avc_ctx->preproc_mb_qp_buffer);
1372 i965_free_gpe_resource(&avc_ctx->preproc_mv_data_out_buffer);
1373 i965_free_gpe_resource(&avc_ctx->preproc_stat_data_out_buffer);
1375 i965_free_gpe_resource(&avc_ctx->preenc_past_ref_stat_data_out_buffer);
1376 i965_free_gpe_resource(&avc_ctx->preenc_future_ref_stat_data_out_buffer);
1378 i965_DestroySurfaces(ctx, &avc_ctx->preenc_scaled_4x_surface_id, 1);
1379 avc_ctx->preenc_scaled_4x_surface_id = VA_INVALID_SURFACE;
1380 avc_ctx->preenc_scaled_4x_surface_obj = NULL;
1382 i965_DestroySurfaces(ctx, &avc_ctx->preenc_past_ref_scaled_4x_surface_id, 1);
1383 avc_ctx->preenc_past_ref_scaled_4x_surface_id = VA_INVALID_SURFACE;
1384 avc_ctx->preenc_past_ref_scaled_4x_surface_obj = NULL;
1386 i965_DestroySurfaces(ctx, &avc_ctx->preenc_future_ref_scaled_4x_surface_id, 1);
1387 avc_ctx->preenc_future_ref_scaled_4x_surface_id = VA_INVALID_SURFACE;
1388 avc_ctx->preenc_future_ref_scaled_4x_surface_obj = NULL;
1392 gen9_avc_run_kernel_media_object(VADriverContextP ctx,
1393 struct intel_encoder_context *encoder_context,
1394 struct i965_gpe_context *gpe_context,
1396 struct gpe_media_object_parameter *param)
1398 struct i965_driver_data *i965 = i965_driver_data(ctx);
1399 struct i965_gpe_table *gpe = &i965->gpe_table;
1400 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1401 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1403 struct intel_batchbuffer *batch = encoder_context->base.batch;
1404 struct encoder_status_buffer_internal *status_buffer;
1405 struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1410 intel_batchbuffer_start_atomic(batch, 0x1000);
1411 intel_batchbuffer_emit_mi_flush(batch);
1413 status_buffer = &(avc_ctx->status_buffer);
1414 memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1415 mi_store_data_imm.bo = status_buffer->bo;
1416 mi_store_data_imm.offset = status_buffer->media_index_offset;
1417 mi_store_data_imm.dw0 = media_function;
1418 gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1420 gpe->pipeline_setup(ctx, gpe_context, batch);
1421 gpe->media_object(ctx, gpe_context, batch, param);
1422 gpe->media_state_flush(ctx, gpe_context, batch);
1424 gpe->pipeline_end(ctx, gpe_context, batch);
1426 intel_batchbuffer_end_atomic(batch);
1428 intel_batchbuffer_flush(batch);
1432 gen9_avc_run_kernel_media_object_walker(VADriverContextP ctx,
1433 struct intel_encoder_context *encoder_context,
1434 struct i965_gpe_context *gpe_context,
1436 struct gpe_media_object_walker_parameter *param)
1438 struct i965_driver_data *i965 = i965_driver_data(ctx);
1439 struct i965_gpe_table *gpe = &i965->gpe_table;
1440 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1441 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1443 struct intel_batchbuffer *batch = encoder_context->base.batch;
1444 struct encoder_status_buffer_internal *status_buffer;
1445 struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1450 intel_batchbuffer_start_atomic(batch, 0x1000);
1452 intel_batchbuffer_emit_mi_flush(batch);
1454 status_buffer = &(avc_ctx->status_buffer);
1455 memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1456 mi_store_data_imm.bo = status_buffer->bo;
1457 mi_store_data_imm.offset = status_buffer->media_index_offset;
1458 mi_store_data_imm.dw0 = media_function;
1459 gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1461 gpe->pipeline_setup(ctx, gpe_context, batch);
1462 gpe->media_object_walker(ctx, gpe_context, batch, param);
1463 gpe->media_state_flush(ctx, gpe_context, batch);
1465 gpe->pipeline_end(ctx, gpe_context, batch);
1467 intel_batchbuffer_end_atomic(batch);
1469 intel_batchbuffer_flush(batch);
1473 gen9_init_gpe_context_avc(VADriverContextP ctx,
1474 struct i965_gpe_context *gpe_context,
1475 struct encoder_kernel_parameter *kernel_param)
1477 struct i965_driver_data *i965 = i965_driver_data(ctx);
1479 gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
1481 gpe_context->sampler.entry_size = 0;
1482 gpe_context->sampler.max_entries = 0;
1484 if (kernel_param->sampler_size) {
1485 gpe_context->sampler.entry_size = ALIGN(kernel_param->sampler_size, 64);
1486 gpe_context->sampler.max_entries = 1;
1489 gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
1490 gpe_context->idrt.max_entries = NUM_KERNELS_PER_GPE_CONTEXT;
1492 gpe_context->surface_state_binding_table.max_entries = MAX_AVC_ENCODER_SURFACES;
1493 gpe_context->surface_state_binding_table.binding_table_offset = 0;
1494 gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_AVC_ENCODER_SURFACES * 4, 64);
1495 gpe_context->surface_state_binding_table.length = ALIGN(MAX_AVC_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_AVC_ENCODER_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
1497 if (i965->intel.eu_total > 0)
1498 gpe_context->vfe_state.max_num_threads = 6 * i965->intel.eu_total;
1500 gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads
1502 gpe_context->vfe_state.curbe_allocation_size = MAX(1, ALIGN(gpe_context->curbe.length, 32) >> 5); // in registers
1503 gpe_context->vfe_state.urb_entry_size = MAX(1, ALIGN(kernel_param->inline_data_size, 32) >> 5); // in registers
1504 gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE -
1505 gpe_context->vfe_state.curbe_allocation_size -
1506 ((gpe_context->idrt.entry_size >> 5) *
1507 gpe_context->idrt.max_entries)) / gpe_context->vfe_state.urb_entry_size;
1508 gpe_context->vfe_state.num_urb_entries = CLAMP(1, 127, gpe_context->vfe_state.num_urb_entries);
1509 gpe_context->vfe_state.gpgpu_mode = 0;
1513 gen9_init_vfe_scoreboard_avc(struct i965_gpe_context *gpe_context,
1514 struct encoder_scoreboard_parameter *scoreboard_param)
1516 gpe_context->vfe_desc5.scoreboard0.mask = scoreboard_param->mask;
1517 gpe_context->vfe_desc5.scoreboard0.type = scoreboard_param->type;
1518 gpe_context->vfe_desc5.scoreboard0.enable = scoreboard_param->enable;
1520 if (scoreboard_param->walkpat_flag) {
1521 gpe_context->vfe_desc5.scoreboard0.mask = 0x0F;
1522 gpe_context->vfe_desc5.scoreboard0.type = 1;
1524 gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0;
1525 gpe_context->vfe_desc6.scoreboard1.delta_y0 = -1;
1527 gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0;
1528 gpe_context->vfe_desc6.scoreboard1.delta_y1 = -2;
1530 gpe_context->vfe_desc6.scoreboard1.delta_x2 = -1;
1531 gpe_context->vfe_desc6.scoreboard1.delta_y2 = 3;
1533 gpe_context->vfe_desc6.scoreboard1.delta_x3 = -1;
1534 gpe_context->vfe_desc6.scoreboard1.delta_y3 = 1;
1537 gpe_context->vfe_desc6.scoreboard1.delta_x0 = -1;
1538 gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0;
1541 gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0;
1542 gpe_context->vfe_desc6.scoreboard1.delta_y1 = -1;
1545 gpe_context->vfe_desc6.scoreboard1.delta_x2 = 1;
1546 gpe_context->vfe_desc6.scoreboard1.delta_y2 = -1;
1549 gpe_context->vfe_desc6.scoreboard1.delta_x3 = -1;
1550 gpe_context->vfe_desc6.scoreboard1.delta_y3 = -1;
1553 gpe_context->vfe_desc7.scoreboard2.delta_x4 = -1;
1554 gpe_context->vfe_desc7.scoreboard2.delta_y4 = 1;
1557 gpe_context->vfe_desc7.scoreboard2.delta_x5 = 0;
1558 gpe_context->vfe_desc7.scoreboard2.delta_y5 = -2;
1561 gpe_context->vfe_desc7.scoreboard2.delta_x6 = 1;
1562 gpe_context->vfe_desc7.scoreboard2.delta_y6 = -2;
1565 gpe_context->vfe_desc7.scoreboard2.delta_x6 = -1;
1566 gpe_context->vfe_desc7.scoreboard2.delta_y6 = -2;
1570 VME pipeline related function
1574 scaling kernel related function
1577 gen9_avc_set_curbe_scaling4x(VADriverContextP ctx,
1578 struct encode_state *encode_state,
1579 struct i965_gpe_context *gpe_context,
1580 struct intel_encoder_context *encoder_context,
1583 gen9_avc_scaling4x_curbe_data *curbe_cmd;
1584 struct scaling_param *surface_param = (struct scaling_param *)param;
1586 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1591 memset(curbe_cmd, 0, sizeof(gen9_avc_scaling4x_curbe_data));
1593 curbe_cmd->dw0.input_picture_width = surface_param->input_frame_width;
1594 curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1596 curbe_cmd->dw1.input_y_bti = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1597 curbe_cmd->dw2.output_y_bti = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1600 curbe_cmd->dw5.flatness_threshold = 128;
1601 curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
1602 curbe_cmd->dw7.enable_mb_variance_output = surface_param->enable_mb_variance_output;
1603 curbe_cmd->dw8.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
1605 if (curbe_cmd->dw6.enable_mb_flatness_check ||
1606 curbe_cmd->dw7.enable_mb_variance_output ||
1607 curbe_cmd->dw8.enable_mb_pixel_average_output) {
1608 curbe_cmd->dw10.mbv_proc_stat_bti = GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX;
1611 i965_gpe_context_unmap_curbe(gpe_context);
1616 gen95_avc_set_curbe_scaling4x(VADriverContextP ctx,
1617 struct encode_state *encode_state,
1618 struct i965_gpe_context *gpe_context,
1619 struct intel_encoder_context *encoder_context,
1622 gen95_avc_scaling4x_curbe_data *curbe_cmd;
1623 struct scaling_param *surface_param = (struct scaling_param *)param;
1625 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1630 memset(curbe_cmd, 0, sizeof(gen95_avc_scaling4x_curbe_data));
1632 curbe_cmd->dw0.input_picture_width = surface_param->input_frame_width;
1633 curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1635 curbe_cmd->dw1.input_y_bti_frame = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1636 curbe_cmd->dw2.output_y_bti_frame = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1638 if (surface_param->enable_mb_flatness_check)
1639 curbe_cmd->dw5.flatness_threshold = 128;
1640 curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
1641 curbe_cmd->dw6.enable_mb_variance_output = surface_param->enable_mb_variance_output;
1642 curbe_cmd->dw6.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
1643 curbe_cmd->dw6.enable_block8x8_statistics_output = surface_param->blk8x8_stat_enabled;
1645 if (curbe_cmd->dw6.enable_mb_flatness_check ||
1646 curbe_cmd->dw6.enable_mb_variance_output ||
1647 curbe_cmd->dw6.enable_mb_pixel_average_output) {
1648 curbe_cmd->dw8.mbv_proc_stat_bti_frame = GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX;
1651 i965_gpe_context_unmap_curbe(gpe_context);
1656 gen9_avc_set_curbe_scaling2x(VADriverContextP ctx,
1657 struct encode_state *encode_state,
1658 struct i965_gpe_context *gpe_context,
1659 struct intel_encoder_context *encoder_context,
1662 gen9_avc_scaling2x_curbe_data *curbe_cmd;
1663 struct scaling_param *surface_param = (struct scaling_param *)param;
1665 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1670 memset(curbe_cmd, 0, sizeof(gen9_avc_scaling2x_curbe_data));
1672 curbe_cmd->dw0.input_picture_width = surface_param->input_frame_width;
1673 curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1675 curbe_cmd->dw8.input_y_bti = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1676 curbe_cmd->dw9.output_y_bti = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1678 i965_gpe_context_unmap_curbe(gpe_context);
1683 gen9_avc_send_surface_scaling(VADriverContextP ctx,
1684 struct encode_state *encode_state,
1685 struct i965_gpe_context *gpe_context,
1686 struct intel_encoder_context *encoder_context,
1689 struct scaling_param *surface_param = (struct scaling_param *)param;
1690 struct i965_driver_data *i965 = i965_driver_data(ctx);
1691 unsigned int surface_format;
1692 unsigned int res_size;
1694 if (surface_param->scaling_out_use_32unorm_surf_fmt)
1695 surface_format = I965_SURFACEFORMAT_R32_UNORM;
1696 else if (surface_param->scaling_out_use_16unorm_surf_fmt)
1697 surface_format = I965_SURFACEFORMAT_R16_UNORM;
1699 surface_format = I965_SURFACEFORMAT_R8_UNORM;
1701 i965_add_2d_gpe_surface(ctx, gpe_context,
1702 surface_param->input_surface,
1703 0, 1, surface_format,
1704 GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX);
1706 i965_add_2d_gpe_surface(ctx, gpe_context,
1707 surface_param->output_surface,
1708 0, 1, surface_format,
1709 GEN9_AVC_SCALING_FRAME_DST_Y_INDEX);
1711 /*add buffer mv_proc_stat, here need change*/
1712 if (IS_GEN8(i965->intel.device_info)) {
1713 if (surface_param->mbv_proc_stat_enabled) {
1714 res_size = 16 * (surface_param->input_frame_width / 16) * (surface_param->input_frame_height / 16) * sizeof(unsigned int);
1716 i965_add_buffer_gpe_surface(ctx,
1718 surface_param->pres_mbv_proc_stat_buffer,
1722 GEN8_SCALING_FRAME_MBVPROCSTATS_DST_CM);
1724 if (surface_param->enable_mb_flatness_check) {
1725 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
1726 surface_param->pres_flatness_check_surface,
1728 I965_SURFACEFORMAT_R8_UNORM,
1729 GEN8_SCALING_FRAME_FLATNESS_DST_CM);
1732 if (surface_param->mbv_proc_stat_enabled) {
1733 res_size = 16 * (surface_param->input_frame_width / 16) * (surface_param->input_frame_height / 16) * sizeof(unsigned int);
1735 i965_add_buffer_gpe_surface(ctx,
1737 surface_param->pres_mbv_proc_stat_buffer,
1741 GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
1742 } else if (surface_param->enable_mb_flatness_check) {
1743 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
1744 surface_param->pres_flatness_check_surface,
1746 I965_SURFACEFORMAT_R8_UNORM,
1747 GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
1754 gen9_avc_kernel_scaling(VADriverContextP ctx,
1755 struct encode_state *encode_state,
1756 struct intel_encoder_context *encoder_context,
1759 struct i965_driver_data *i965 = i965_driver_data(ctx);
1760 struct i965_gpe_table *gpe = &i965->gpe_table;
1761 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1762 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1763 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1764 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1765 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
1767 struct i965_gpe_context *gpe_context;
1768 struct scaling_param surface_param;
1769 struct object_surface *obj_surface;
1770 struct gen9_surface_avc *avc_priv_surface;
1771 struct gpe_media_object_walker_parameter media_object_walker_param;
1772 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
1773 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
1774 int media_function = 0;
1777 obj_surface = encode_state->reconstructed_object;
1778 avc_priv_surface = obj_surface->private_data;
1780 memset(&surface_param, 0, sizeof(struct scaling_param));
1782 case INTEL_ENC_HME_4x : {
1783 media_function = INTEL_MEDIA_STATE_4X_SCALING;
1784 kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
1785 downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
1786 downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
1788 surface_param.input_surface = encode_state->input_yuv_object ;
1789 surface_param.input_frame_width = generic_state->frame_width_in_pixel ;
1790 surface_param.input_frame_height = generic_state->frame_height_in_pixel ;
1792 surface_param.output_surface = avc_priv_surface->scaled_4x_surface_obj ;
1793 surface_param.output_frame_width = generic_state->frame_width_4x ;
1794 surface_param.output_frame_height = generic_state->frame_height_4x ;
1796 surface_param.enable_mb_flatness_check = avc_state->flatness_check_enable;
1797 surface_param.enable_mb_variance_output = avc_state->mb_status_enable;
1798 surface_param.enable_mb_pixel_average_output = avc_state->mb_status_enable;
1800 surface_param.blk8x8_stat_enabled = 0 ;
1801 surface_param.use_4x_scaling = 1 ;
1802 surface_param.use_16x_scaling = 0 ;
1803 surface_param.use_32x_scaling = 0 ;
1806 case INTEL_ENC_HME_16x : {
1807 media_function = INTEL_MEDIA_STATE_16X_SCALING;
1808 kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
1809 downscaled_width_in_mb = generic_state->downscaled_width_16x_in_mb;
1810 downscaled_height_in_mb = generic_state->downscaled_height_16x_in_mb;
1812 surface_param.input_surface = avc_priv_surface->scaled_4x_surface_obj ;
1813 surface_param.input_frame_width = generic_state->frame_width_4x ;
1814 surface_param.input_frame_height = generic_state->frame_height_4x ;
1816 surface_param.output_surface = avc_priv_surface->scaled_16x_surface_obj ;
1817 surface_param.output_frame_width = generic_state->frame_width_16x ;
1818 surface_param.output_frame_height = generic_state->frame_height_16x ;
1820 surface_param.enable_mb_flatness_check = 0 ;
1821 surface_param.enable_mb_variance_output = 0 ;
1822 surface_param.enable_mb_pixel_average_output = 0 ;
1824 surface_param.blk8x8_stat_enabled = 0 ;
1825 surface_param.use_4x_scaling = 0 ;
1826 surface_param.use_16x_scaling = 1 ;
1827 surface_param.use_32x_scaling = 0 ;
1831 case INTEL_ENC_HME_32x : {
1832 media_function = INTEL_MEDIA_STATE_32X_SCALING;
1833 kernel_idx = GEN9_AVC_KERNEL_SCALING_2X_IDX;
1834 downscaled_width_in_mb = generic_state->downscaled_width_32x_in_mb;
1835 downscaled_height_in_mb = generic_state->downscaled_height_32x_in_mb;
1837 surface_param.input_surface = avc_priv_surface->scaled_16x_surface_obj ;
1838 surface_param.input_frame_width = generic_state->frame_width_16x ;
1839 surface_param.input_frame_height = generic_state->frame_height_16x ;
1841 surface_param.output_surface = avc_priv_surface->scaled_32x_surface_obj ;
1842 surface_param.output_frame_width = generic_state->frame_width_32x ;
1843 surface_param.output_frame_height = generic_state->frame_height_32x ;
1845 surface_param.enable_mb_flatness_check = 0 ;
1846 surface_param.enable_mb_variance_output = 0 ;
1847 surface_param.enable_mb_pixel_average_output = 0 ;
1849 surface_param.blk8x8_stat_enabled = 0 ;
1850 surface_param.use_4x_scaling = 0 ;
1851 surface_param.use_16x_scaling = 0 ;
1852 surface_param.use_32x_scaling = 1 ;
1860 gpe_context = &(avc_ctx->context_scaling.gpe_contexts[kernel_idx]);
1862 gpe->context_init(ctx, gpe_context);
1863 gpe->reset_binding_table(ctx, gpe_context);
1865 if (surface_param.use_32x_scaling) {
1866 generic_ctx->pfn_set_curbe_scaling2x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1868 generic_ctx->pfn_set_curbe_scaling4x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1871 if (surface_param.use_32x_scaling) {
1872 surface_param.scaling_out_use_16unorm_surf_fmt = 1 ;
1873 surface_param.scaling_out_use_32unorm_surf_fmt = 0 ;
1875 surface_param.scaling_out_use_16unorm_surf_fmt = 0 ;
1876 surface_param.scaling_out_use_32unorm_surf_fmt = 1 ;
1879 if (surface_param.use_4x_scaling) {
1880 if (avc_state->mb_status_supported) {
1881 surface_param.enable_mb_flatness_check = 0;
1882 surface_param.mbv_proc_stat_enabled = (surface_param.use_4x_scaling) ? (avc_state->mb_status_enable || avc_state->flatness_check_enable) : 0 ;
1883 surface_param.pres_mbv_proc_stat_buffer = &(avc_ctx->res_mb_status_buffer);
1886 surface_param.enable_mb_flatness_check = (surface_param.use_4x_scaling) ? avc_state->flatness_check_enable : 0;
1887 surface_param.mbv_proc_stat_enabled = 0 ;
1888 surface_param.pres_flatness_check_surface = &(avc_ctx->res_flatness_check_surface);
1892 generic_ctx->pfn_send_scaling_surface(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1894 /* setup the interface data */
1895 gpe->setup_interface_data(ctx, gpe_context);
1897 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
1898 if (surface_param.use_32x_scaling) {
1899 kernel_walker_param.resolution_x = downscaled_width_in_mb ;
1900 kernel_walker_param.resolution_y = downscaled_height_in_mb ;
1902 /* the scaling is based on 8x8 blk level */
1903 kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
1904 kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
1906 kernel_walker_param.no_dependency = 1;
1908 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
1910 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
1913 &media_object_walker_param);
1915 return VA_STATUS_SUCCESS;
1919 frame/mb brc related function
1922 gen9_avc_init_mfx_avc_img_state(VADriverContextP ctx,
1923 struct encode_state *encode_state,
1924 struct intel_encoder_context *encoder_context,
1925 struct gen9_mfx_avc_img_state *pstate)
1927 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1928 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1929 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1931 VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
1932 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
1934 memset(pstate, 0, sizeof(*pstate));
1936 pstate->dw0.dword_length = (sizeof(struct gen9_mfx_avc_img_state)) / 4 - 2;
1937 pstate->dw0.sub_opcode_b = 0;
1938 pstate->dw0.sub_opcode_a = 0;
1939 pstate->dw0.command_opcode = 1;
1940 pstate->dw0.pipeline = 2;
1941 pstate->dw0.command_type = 3;
1943 pstate->dw1.frame_size_in_mbs = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs ;
1945 pstate->dw2.frame_width_in_mbs_minus1 = generic_state->frame_width_in_mbs - 1;
1946 pstate->dw2.frame_height_in_mbs_minus1 = generic_state->frame_height_in_mbs - 1;
1948 pstate->dw3.image_structure = 0;//frame is zero
1949 pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
1950 pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
1951 pstate->dw3.brc_domain_rate_control_enable = 0;//0,set for non-vdenc mode;
1952 pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
1953 pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
1955 pstate->dw4.field_picture_flag = 0;
1956 pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
1957 pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
1958 pstate->dw4.transform_8x8_idct_mode_flag = pic_param->pic_fields.bits.transform_8x8_mode_flag;
1959 pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
1960 pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
1961 pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
1962 pstate->dw4.mb_mv_format_flag = 1;
1963 pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
1964 pstate->dw4.mv_unpacked_flag = 1;
1965 pstate->dw4.insert_test_flag = 0;
1966 pstate->dw4.load_slice_pointer_flag = 0;
1967 pstate->dw4.macroblock_stat_enable = 0; /* disable in the first pass */
1968 pstate->dw4.minimum_frame_size = 0;
1969 pstate->dw5.intra_mb_max_bit_flag = 1;
1970 pstate->dw5.inter_mb_max_bit_flag = 1;
1971 pstate->dw5.frame_size_over_flag = 1;
1972 pstate->dw5.frame_size_under_flag = 1;
1973 pstate->dw5.intra_mb_ipcm_flag = 1;
1974 pstate->dw5.mb_rate_ctrl_flag = 0;
1975 pstate->dw5.non_first_pass_flag = 0;
1976 pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
1977 pstate->dw5.aq_chroma_disable = 1;
1978 if (pstate->dw4.entropy_coding_flag && (avc_state->tq_enable)) {
1979 pstate->dw5.aq_enable = avc_state->tq_enable;
1980 pstate->dw5.aq_rounding = avc_state->tq_rounding;
1982 pstate->dw5.aq_rounding = 0;
1985 pstate->dw6.intra_mb_max_size = 2700;
1986 pstate->dw6.inter_mb_max_size = 4095;
1988 pstate->dw8.slice_delta_qp_max0 = 0;
1989 pstate->dw8.slice_delta_qp_max1 = 0;
1990 pstate->dw8.slice_delta_qp_max2 = 0;
1991 pstate->dw8.slice_delta_qp_max3 = 0;
1993 pstate->dw9.slice_delta_qp_min0 = 0;
1994 pstate->dw9.slice_delta_qp_min1 = 0;
1995 pstate->dw9.slice_delta_qp_min2 = 0;
1996 pstate->dw9.slice_delta_qp_min3 = 0;
1998 pstate->dw10.frame_bitrate_min = 0;
1999 pstate->dw10.frame_bitrate_min_unit = 1;
2000 pstate->dw10.frame_bitrate_min_unit_mode = 1;
2001 pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
2002 pstate->dw10.frame_bitrate_max_unit = 1;
2003 pstate->dw10.frame_bitrate_max_unit_mode = 1;
2005 pstate->dw11.frame_bitrate_min_delta = 0;
2006 pstate->dw11.frame_bitrate_max_delta = 0;
2008 pstate->dw12.vad_error_logic = 1;
2009 /* set paramters DW19/DW20 for slices */
2013 gen8_avc_init_mfx_avc_img_state(VADriverContextP ctx,
2014 struct encode_state *encode_state,
2015 struct intel_encoder_context *encoder_context,
2016 struct gen8_mfx_avc_img_state *pstate)
2018 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2019 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2020 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2022 VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
2023 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
2025 memset(pstate, 0, sizeof(*pstate));
2027 pstate->dw0.dword_length = (sizeof(struct gen8_mfx_avc_img_state)) / 4 - 2;
2028 pstate->dw0.command_sub_opcode_b = 0;
2029 pstate->dw0.command_sub_opcode_a = 0;
2030 pstate->dw0.command_opcode = 1;
2031 pstate->dw0.command_pipeline = 2;
2032 pstate->dw0.command_type = 3;
2034 pstate->dw1.frame_size_in_mbs = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs ;
2036 pstate->dw2.frame_width_in_mbs_minus1 = generic_state->frame_width_in_mbs - 1;
2037 pstate->dw2.frame_height_in_mbs_minus1 = generic_state->frame_height_in_mbs - 1;
2039 pstate->dw3.image_structure = 0;//frame is zero
2040 pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
2041 pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
2042 pstate->dw3.inter_mb_conf_flag = 0;
2043 pstate->dw3.intra_mb_conf_flag = 0;
2044 pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
2045 pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
2047 pstate->dw4.field_picture_flag = 0;
2048 pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
2049 pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
2050 pstate->dw4.transform_8x8_idct_mode_flag = pic_param->pic_fields.bits.transform_8x8_mode_flag;
2051 pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
2052 pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
2053 pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
2054 pstate->dw4.mb_mv_format_flag = 1;
2055 pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
2056 pstate->dw4.mv_unpacked_flag = 1;
2057 pstate->dw4.insert_test_flag = 0;
2058 pstate->dw4.load_slice_pointer_flag = 0;
2059 pstate->dw4.macroblock_stat_enable = 0; /* disable in the first pass */
2060 pstate->dw4.minimum_frame_size = 0;
2061 pstate->dw5.intra_mb_max_bit_flag = 1;
2062 pstate->dw5.inter_mb_max_bit_flag = 1;
2063 pstate->dw5.frame_size_over_flag = 1;
2064 pstate->dw5.frame_size_under_flag = 1;
2065 pstate->dw5.intra_mb_ipcm_flag = 1;
2066 pstate->dw5.mb_rate_ctrl_flag = 0;
2067 pstate->dw5.non_first_pass_flag = 0;
2068 pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
2069 pstate->dw5.aq_chroma_disable = 1;
2070 if (pstate->dw4.entropy_coding_flag && (avc_state->tq_enable)) {
2071 pstate->dw5.aq_enable = avc_state->tq_enable;
2072 pstate->dw5.aq_rounding = avc_state->tq_rounding;
2074 pstate->dw5.aq_rounding = 0;
2077 pstate->dw6.intra_mb_max_size = 2700;
2078 pstate->dw6.inter_mb_max_size = 4095;
2080 pstate->dw8.slice_delta_qp_max0 = 0;
2081 pstate->dw8.slice_delta_qp_max1 = 0;
2082 pstate->dw8.slice_delta_qp_max2 = 0;
2083 pstate->dw8.slice_delta_qp_max3 = 0;
2085 pstate->dw9.slice_delta_qp_min0 = 0;
2086 pstate->dw9.slice_delta_qp_min1 = 0;
2087 pstate->dw9.slice_delta_qp_min2 = 0;
2088 pstate->dw9.slice_delta_qp_min3 = 0;
2090 pstate->dw10.frame_bitrate_min = 0;
2091 pstate->dw10.frame_bitrate_min_unit = 1;
2092 pstate->dw10.frame_bitrate_min_unit_mode = 1;
2093 pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
2094 pstate->dw10.frame_bitrate_max_unit = 1;
2095 pstate->dw10.frame_bitrate_max_unit_mode = 1;
2097 pstate->dw11.frame_bitrate_min_delta = 0;
2098 pstate->dw11.frame_bitrate_max_delta = 0;
2099 /* set paramters DW19/DW20 for slices */
2101 void gen9_avc_set_image_state(VADriverContextP ctx,
2102 struct encode_state *encode_state,
2103 struct intel_encoder_context *encoder_context,
2104 struct i965_gpe_resource *gpe_resource)
2106 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2107 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
2110 unsigned int * data;
2111 struct gen9_mfx_avc_img_state cmd;
2113 pdata = i965_map_gpe_resource(gpe_resource);
2118 gen9_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
2119 for (i = 0; i < generic_state->num_pak_passes; i++) {
2122 cmd.dw4.macroblock_stat_enable = 0;
2123 cmd.dw5.non_first_pass_flag = 0;
2125 cmd.dw4.macroblock_stat_enable = 1;
2126 cmd.dw5.non_first_pass_flag = 1;
2127 cmd.dw5.intra_mb_ipcm_flag = 1;
2130 cmd.dw5.mb_rate_ctrl_flag = 0;
2131 memcpy(pdata, &cmd, sizeof(struct gen9_mfx_avc_img_state));
2132 data = (unsigned int *)(pdata + sizeof(struct gen9_mfx_avc_img_state));
2133 *data = MI_BATCH_BUFFER_END;
2135 pdata += INTEL_AVC_IMAGE_STATE_CMD_SIZE;
2137 i965_unmap_gpe_resource(gpe_resource);
2141 void gen8_avc_set_image_state(VADriverContextP ctx,
2142 struct encode_state *encode_state,
2143 struct intel_encoder_context *encoder_context,
2144 struct i965_gpe_resource *gpe_resource)
2146 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2147 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
2150 unsigned int * data;
2151 struct gen8_mfx_avc_img_state cmd;
2153 pdata = i965_map_gpe_resource(gpe_resource);
2158 gen8_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
2159 for (i = 0; i < generic_state->num_pak_passes; i++) {
2162 cmd.dw4.macroblock_stat_enable = 0;
2163 cmd.dw5.non_first_pass_flag = 0;
2165 cmd.dw4.macroblock_stat_enable = 1;
2166 cmd.dw5.non_first_pass_flag = 1;
2167 cmd.dw5.intra_mb_ipcm_flag = 1;
2168 cmd.dw3.inter_mb_conf_flag = 1;
2169 cmd.dw3.intra_mb_conf_flag = 1;
2171 cmd.dw5.mb_rate_ctrl_flag = 0;
2172 memcpy(pdata, &cmd, sizeof(struct gen8_mfx_avc_img_state));
2173 data = (unsigned int *)(pdata + sizeof(struct gen8_mfx_avc_img_state));
2174 *data = MI_BATCH_BUFFER_END;
2176 pdata += INTEL_AVC_IMAGE_STATE_CMD_SIZE;
2178 i965_unmap_gpe_resource(gpe_resource);
2182 void gen9_avc_set_image_state_non_brc(VADriverContextP ctx,
2183 struct encode_state *encode_state,
2184 struct intel_encoder_context *encoder_context,
2185 struct i965_gpe_resource *gpe_resource)
2187 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2188 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
2191 unsigned int * data;
2192 struct gen9_mfx_avc_img_state cmd;
2194 pdata = i965_map_gpe_resource(gpe_resource);
2199 gen9_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
2201 if (generic_state->curr_pak_pass == 0) {
2202 cmd.dw4.macroblock_stat_enable = 0;
2203 cmd.dw5.non_first_pass_flag = 0;
2206 cmd.dw4.macroblock_stat_enable = 1;
2207 cmd.dw5.non_first_pass_flag = 0;
2208 cmd.dw5.intra_mb_ipcm_flag = 1;
2211 cmd.dw5.mb_rate_ctrl_flag = 0;
2212 memcpy(pdata, &cmd, sizeof(struct gen9_mfx_avc_img_state));
2213 data = (unsigned int *)(pdata + sizeof(struct gen9_mfx_avc_img_state));
2214 *data = MI_BATCH_BUFFER_END;
2216 i965_unmap_gpe_resource(gpe_resource);
2221 gen95_avc_calc_lambda_table(VADriverContextP ctx,
2222 struct encode_state *encode_state,
2223 struct intel_encoder_context *encoder_context)
2225 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2226 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2227 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2228 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
2229 unsigned int value, inter, intra;
2230 unsigned int rounding_value = 0;
2231 unsigned int size = 0;
2234 unsigned int * lambda_table = (unsigned int *)(avc_state->lamda_value_lut);
2240 size = AVC_QP_MAX * 2 * sizeof(unsigned int);
2241 switch (generic_state->frame_type) {
2243 memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_i_frame[0][0], size * sizeof(unsigned char));
2246 memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_p_frame[0][0], size * sizeof(unsigned char));
2249 memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_b_frame[0][0], size * sizeof(unsigned char));
2256 for (i = 0; i < AVC_QP_MAX ; i++) {
2257 for (col = 0; col < 2; col++) {
2258 value = *(lambda_table + i * 2 + col);
2259 intra = value >> 16;
2261 if (intra < GEN95_AVC_MAX_LAMBDA) {
2262 if (intra == 0xfffa) {
2263 intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
2267 intra = intra << 16;
2268 inter = value & 0xffff;
2270 if (inter < GEN95_AVC_MAX_LAMBDA) {
2271 if (inter == 0xffef) {
2272 if (generic_state->frame_type == SLICE_TYPE_P) {
2273 if (avc_state->rounding_inter_p == AVC_INVALID_ROUNDING_VALUE)
2274 rounding_value = gen9_avc_inter_rounding_p[generic_state->preset];
2276 rounding_value = avc_state->rounding_inter_p;
2277 } else if (generic_state->frame_type == SLICE_TYPE_B) {
2278 if (pic_param->pic_fields.bits.reference_pic_flag) {
2279 if (avc_state->rounding_inter_b_ref == AVC_INVALID_ROUNDING_VALUE)
2280 rounding_value = gen9_avc_inter_rounding_b_ref[generic_state->preset];
2282 rounding_value = avc_state->rounding_inter_b_ref;
2284 if (avc_state->rounding_inter_b == AVC_INVALID_ROUNDING_VALUE)
2285 rounding_value = gen9_avc_inter_rounding_b[generic_state->preset];
2287 rounding_value = avc_state->rounding_inter_b;
2291 inter = 0xf000 + rounding_value;
2293 *(lambda_table + i * 2 + col) = intra + inter;
2299 gen9_avc_init_brc_const_data(VADriverContextP ctx,
2300 struct encode_state *encode_state,
2301 struct intel_encoder_context *encoder_context)
2303 struct i965_driver_data *i965 = i965_driver_data(ctx);
2304 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2305 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2306 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2307 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2309 struct i965_gpe_resource *gpe_resource = NULL;
2310 unsigned char * data = NULL;
2311 unsigned char * data_tmp = NULL;
2312 unsigned int size = 0;
2313 unsigned int table_idx = 0;
2314 unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
2317 struct object_surface *obj_surface;
2318 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
2319 VASurfaceID surface_id;
2320 unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
2322 gpe_resource = &(avc_ctx->res_brc_const_data_buffer);
2323 assert(gpe_resource);
2325 i965_zero_gpe_resource(gpe_resource);
2327 data = i965_map_gpe_resource(gpe_resource);
2330 table_idx = slice_type_kernel[generic_state->frame_type];
2332 /* Fill surface with QP Adjustment table, Distortion threshold table, MaxFrame threshold table, Distortion QP Adjustment Table*/
2333 size = sizeof(gen9_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb);
2334 memcpy(data, gen9_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb, size * sizeof(unsigned char));
2338 /* skip threshold table*/
2340 switch (generic_state->frame_type) {
2342 memcpy(data, gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2345 memcpy(data, gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2348 /*SLICE_TYPE_I,no change */
2352 if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->non_ftq_skip_threshold_lut_input_enable) {
2353 for (i = 0; i < AVC_QP_MAX ; i++) {
2354 *(data + 1 + (i * 2)) = (unsigned char)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
2359 /*fill the qp for ref list*/
2360 size = 32 + 32 + 32 + 160;
2361 memset(data, 0xff, 32);
2362 memset(data + 32 + 32, 0xff, 32);
2363 switch (generic_state->frame_type) {
2364 case SLICE_TYPE_P: {
2365 for (i = 0 ; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
2366 surface_id = slice_param->RefPicList0[i].picture_id;
2367 obj_surface = SURFACE(surface_id);
2370 *(data + i) = avc_state->list_ref_idx[0][i];//?
2374 case SLICE_TYPE_B: {
2375 data = data + 32 + 32;
2376 for (i = 0 ; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
2377 surface_id = slice_param->RefPicList1[i].picture_id;
2378 obj_surface = SURFACE(surface_id);
2381 *(data + i) = avc_state->list_ref_idx[1][i];//?
2384 data = data - 32 - 32;
2386 for (i = 0 ; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
2387 surface_id = slice_param->RefPicList0[i].picture_id;
2388 obj_surface = SURFACE(surface_id);
2391 *(data + i) = avc_state->list_ref_idx[0][i];//?
2396 /*SLICE_TYPE_I,no change */
2401 /*mv cost and mode cost*/
2403 memcpy(data, (unsigned char *)&gen9_avc_mode_mv_cost_table[table_idx][0][0], size * sizeof(unsigned char));
2405 if (avc_state->old_mode_cost_enable) {
2407 for (i = 0; i < AVC_QP_MAX ; i++) {
2408 *(data_tmp + 3) = (unsigned int)gen9_avc_old_intra_mode_cost[i];
2413 if (avc_state->ftq_skip_threshold_lut_input_enable) {
2414 for (i = 0; i < AVC_QP_MAX ; i++) {
2415 *(data + (i * 32) + 24) =
2416 *(data + (i * 32) + 25) =
2417 *(data + (i * 32) + 27) =
2418 *(data + (i * 32) + 28) =
2419 *(data + (i * 32) + 29) =
2420 *(data + (i * 32) + 30) =
2421 *(data + (i * 32) + 31) = avc_state->ftq_skip_threshold_lut[i];
2429 memcpy(data, (unsigned char *)&gen9_avc_ref_cost[table_idx][0], size * sizeof(unsigned char));
2434 if (avc_state->adaptive_intra_scaling_enable) {
2435 memcpy(data, (unsigned char *)gen9_avc_adaptive_intra_scaling_factor, size * sizeof(unsigned char));
2437 memcpy(data, (unsigned char *)gen9_avc_intra_scaling_factor, size * sizeof(unsigned char));
2440 if (IS_KBL(i965->intel.device_info) ||
2441 IS_GEN10(i965->intel.device_info) ||
2442 IS_GLK(i965->intel.device_info)) {
2446 memcpy(data, (unsigned char *)gen95_avc_lambda_data, size * sizeof(unsigned char));
2450 memcpy(data, (unsigned char *)gen95_avc_ftq25, size * sizeof(unsigned char));
2453 i965_unmap_gpe_resource(gpe_resource);
2457 gen9_avc_init_brc_const_data_old(VADriverContextP ctx,
2458 struct encode_state *encode_state,
2459 struct intel_encoder_context *encoder_context)
2461 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2462 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2463 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2464 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2466 struct i965_gpe_resource *gpe_resource = NULL;
2467 unsigned int * data = NULL;
2468 unsigned int * data_tmp = NULL;
2469 unsigned int size = 0;
2470 unsigned int table_idx = 0;
2471 unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
2472 unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
2475 gpe_resource = &(avc_ctx->res_brc_const_data_buffer);
2476 assert(gpe_resource);
2478 i965_zero_gpe_resource(gpe_resource);
2480 data = i965_map_gpe_resource(gpe_resource);
2483 table_idx = slice_type_kernel[generic_state->frame_type];
2485 /* Fill surface with QP Adjustment table, Distortion threshold table, MaxFrame threshold table, Distortion QP Adjustment Table*/
2486 size = sizeof(gen75_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb);
2487 memcpy(data, gen75_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb, size * sizeof(unsigned char));
2491 /* skip threshold table*/
2493 switch (generic_state->frame_type) {
2495 memcpy(data, gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2498 memcpy(data, gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2501 /*SLICE_TYPE_I,no change */
2505 if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->non_ftq_skip_threshold_lut_input_enable) {
2506 for (i = 0; i < AVC_QP_MAX ; i++) {
2507 *(data + 1 + (i * 2)) = (unsigned char)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
2512 /*fill the qp for ref list*/
2518 /*mv cost and mode cost*/
2520 memcpy(data, (unsigned char *)&gen75_avc_mode_mv_cost_table[table_idx][0][0], size * sizeof(unsigned char));
2522 if (avc_state->old_mode_cost_enable) {
2524 for (i = 0; i < AVC_QP_MAX ; i++) {
2525 *(data_tmp + 3) = (unsigned int)gen9_avc_old_intra_mode_cost[i];
2530 if (avc_state->ftq_skip_threshold_lut_input_enable) {
2531 for (i = 0; i < AVC_QP_MAX ; i++) {
2532 *(data + (i * 32) + 24) =
2533 *(data + (i * 32) + 25) =
2534 *(data + (i * 32) + 27) =
2535 *(data + (i * 32) + 28) =
2536 *(data + (i * 32) + 29) =
2537 *(data + (i * 32) + 30) =
2538 *(data + (i * 32) + 31) = avc_state->ftq_skip_threshold_lut[i];
2546 memcpy(data, (unsigned char *)&gen9_avc_ref_cost[table_idx][0], size * sizeof(unsigned char));
2548 i965_unmap_gpe_resource(gpe_resource);
2551 gen9_avc_set_curbe_brc_init_reset(VADriverContextP ctx,
2552 struct encode_state *encode_state,
2553 struct i965_gpe_context *gpe_context,
2554 struct intel_encoder_context *encoder_context,
2557 gen9_avc_brc_init_reset_curbe_data *cmd;
2558 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2559 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2560 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2561 double input_bits_per_frame = 0;
2562 double bps_ratio = 0;
2563 VAEncSequenceParameterBufferH264 * seq_param = avc_state->seq_param;
2564 struct avc_param common_param;
2566 cmd = i965_gpe_context_map_curbe(gpe_context);
2571 memcpy(cmd, &gen9_avc_brc_init_reset_curbe_init_data, sizeof(gen9_avc_brc_init_reset_curbe_data));
2573 memset(&common_param, 0, sizeof(common_param));
2574 common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
2575 common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
2576 common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
2577 common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
2578 common_param.frames_per_100s = generic_state->frames_per_100s;
2579 common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
2580 common_param.target_bit_rate = generic_state->target_bit_rate;
2582 cmd->dw0.profile_level_max_frame = i965_avc_get_profile_level_max_frame(&common_param, seq_param->level_idc);
2583 cmd->dw1.init_buf_full_in_bits = generic_state->init_vbv_buffer_fullness_in_bit;
2584 cmd->dw2.buf_size_in_bits = generic_state->vbv_buffer_size_in_bit;
2585 cmd->dw3.average_bit_rate = generic_state->target_bit_rate * 1000;
2586 cmd->dw4.max_bit_rate = generic_state->max_bit_rate * 1000;
2587 cmd->dw8.gop_p = (generic_state->gop_ref_distance) ? ((generic_state->gop_size - 1) / generic_state->gop_ref_distance) : 0;
2588 cmd->dw9.gop_b = (generic_state->gop_size - 1 - cmd->dw8.gop_p);
2589 cmd->dw9.frame_width_in_bytes = generic_state->frame_width_in_pixel;
2590 cmd->dw10.frame_height_in_bytes = generic_state->frame_height_in_pixel;
2591 cmd->dw12.no_slices = avc_state->slice_num;
2594 if (seq_param->vui_parameters_present_flag && generic_state->internal_rate_mode != INTEL_BRC_AVBR) {
2595 cmd->dw4.max_bit_rate = cmd->dw4.max_bit_rate;
2596 if (generic_state->internal_rate_mode == VA_RC_CBR) {
2597 cmd->dw3.average_bit_rate = cmd->dw4.max_bit_rate;
2602 cmd->dw6.frame_rate_m = generic_state->frames_per_100s;
2603 cmd->dw7.frame_rate_d = 100;
2604 cmd->dw8.brc_flag = 0;
2605 cmd->dw8.brc_flag |= (generic_state->mb_brc_enabled) ? 0 : 0x8000;
2608 if (generic_state->internal_rate_mode == VA_RC_CBR) {
2610 cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate;
2611 cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISCBR;
2613 } else if (generic_state->internal_rate_mode == VA_RC_VBR) {
2615 if (cmd->dw4.max_bit_rate < cmd->dw3.average_bit_rate) {
2616 cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate << 1;
2618 cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISVBR;
2620 } else if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2622 cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate;
2623 cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISAVBR;
2626 //igonre icq/vcm/qvbr
2628 cmd->dw10.avbr_accuracy = generic_state->avbr_curracy;
2629 cmd->dw11.avbr_convergence = generic_state->avbr_convergence;
2632 input_bits_per_frame = (double)(cmd->dw4.max_bit_rate) * (double)(cmd->dw7.frame_rate_d) / (double)(cmd->dw6.frame_rate_m);;
2634 if (cmd->dw2.buf_size_in_bits == 0) {
2635 cmd->dw2.buf_size_in_bits = (unsigned int)(input_bits_per_frame * 4);
2638 if (cmd->dw1.init_buf_full_in_bits == 0) {
2639 cmd->dw1.init_buf_full_in_bits = cmd->dw2.buf_size_in_bits * 7 / 8;
2641 if (cmd->dw1.init_buf_full_in_bits < (unsigned int)(input_bits_per_frame * 2)) {
2642 cmd->dw1.init_buf_full_in_bits = (unsigned int)(input_bits_per_frame * 2);
2644 if (cmd->dw1.init_buf_full_in_bits > cmd->dw2.buf_size_in_bits) {
2645 cmd->dw1.init_buf_full_in_bits = cmd->dw2.buf_size_in_bits;
2649 if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2650 cmd->dw2.buf_size_in_bits = 2 * generic_state->target_bit_rate * 1000;
2651 cmd->dw1.init_buf_full_in_bits = (unsigned int)(3 * cmd->dw2.buf_size_in_bits / 4);
2655 bps_ratio = input_bits_per_frame / (cmd->dw2.buf_size_in_bits / 30.0);
2656 bps_ratio = (bps_ratio < 0.1) ? 0.1 : (bps_ratio > 3.5) ? 3.5 : bps_ratio;
2659 cmd->dw16.deviation_threshold_0_pand_b = (unsigned int)(-50 * pow(0.90, bps_ratio));
2660 cmd->dw16.deviation_threshold_1_pand_b = (unsigned int)(-50 * pow(0.66, bps_ratio));
2661 cmd->dw16.deviation_threshold_2_pand_b = (unsigned int)(-50 * pow(0.46, bps_ratio));
2662 cmd->dw16.deviation_threshold_3_pand_b = (unsigned int)(-50 * pow(0.3, bps_ratio));
2663 cmd->dw17.deviation_threshold_4_pand_b = (unsigned int)(50 * pow(0.3, bps_ratio));
2664 cmd->dw17.deviation_threshold_5_pand_b = (unsigned int)(50 * pow(0.46, bps_ratio));
2665 cmd->dw17.deviation_threshold_6_pand_b = (unsigned int)(50 * pow(0.7, bps_ratio));
2666 cmd->dw17.deviation_threshold_7_pand_b = (unsigned int)(50 * pow(0.9, bps_ratio));
2667 cmd->dw18.deviation_threshold_0_vbr = (unsigned int)(-50 * pow(0.9, bps_ratio));
2668 cmd->dw18.deviation_threshold_1_vbr = (unsigned int)(-50 * pow(0.7, bps_ratio));
2669 cmd->dw18.deviation_threshold_2_vbr = (unsigned int)(-50 * pow(0.5, bps_ratio));
2670 cmd->dw18.deviation_threshold_3_vbr = (unsigned int)(-50 * pow(0.3, bps_ratio));
2671 cmd->dw19.deviation_threshold_4_vbr = (unsigned int)(100 * pow(0.4, bps_ratio));
2672 cmd->dw19.deviation_threshold_5_vbr = (unsigned int)(100 * pow(0.5, bps_ratio));
2673 cmd->dw19.deviation_threshold_6_vbr = (unsigned int)(100 * pow(0.75, bps_ratio));
2674 cmd->dw19.deviation_threshold_7_vbr = (unsigned int)(100 * pow(0.9, bps_ratio));
2675 cmd->dw20.deviation_threshold_0_i = (unsigned int)(-50 * pow(0.8, bps_ratio));
2676 cmd->dw20.deviation_threshold_1_i = (unsigned int)(-50 * pow(0.6, bps_ratio));
2677 cmd->dw20.deviation_threshold_2_i = (unsigned int)(-50 * pow(0.34, bps_ratio));
2678 cmd->dw20.deviation_threshold_3_i = (unsigned int)(-50 * pow(0.2, bps_ratio));
2679 cmd->dw21.deviation_threshold_4_i = (unsigned int)(50 * pow(0.2, bps_ratio));
2680 cmd->dw21.deviation_threshold_5_i = (unsigned int)(50 * pow(0.4, bps_ratio));
2681 cmd->dw21.deviation_threshold_6_i = (unsigned int)(50 * pow(0.66, bps_ratio));
2682 cmd->dw21.deviation_threshold_7_i = (unsigned int)(50 * pow(0.9, bps_ratio));
2684 cmd->dw22.sliding_window_size = generic_state->frames_per_window_size;
2686 i965_gpe_context_unmap_curbe(gpe_context);
2692 gen9_avc_send_surface_brc_init_reset(VADriverContextP ctx,
2693 struct encode_state *encode_state,
2694 struct i965_gpe_context *gpe_context,
2695 struct intel_encoder_context *encoder_context,
2698 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2699 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2701 i965_add_buffer_gpe_surface(ctx,
2703 &avc_ctx->res_brc_history_buffer,
2705 avc_ctx->res_brc_history_buffer.size,
2707 GEN9_AVC_BRC_INIT_RESET_HISTORY_INDEX);
2709 i965_add_buffer_2d_gpe_surface(ctx,
2711 &avc_ctx->res_brc_dist_data_surface,
2713 I965_SURFACEFORMAT_R8_UNORM,
2714 GEN9_AVC_BRC_INIT_RESET_DISTORTION_INDEX);
2720 gen9_avc_kernel_brc_init_reset(VADriverContextP ctx,
2721 struct encode_state *encode_state,
2722 struct intel_encoder_context *encoder_context)
2724 struct i965_driver_data *i965 = i965_driver_data(ctx);
2725 struct i965_gpe_table *gpe = &i965->gpe_table;
2726 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2727 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2728 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2729 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
2731 struct i965_gpe_context *gpe_context;
2732 struct gpe_media_object_parameter media_object_param;
2733 struct gpe_media_object_inline_data media_object_inline_data;
2734 int media_function = 0;
2735 int kernel_idx = GEN9_AVC_KERNEL_BRC_INIT;
2737 media_function = INTEL_MEDIA_STATE_BRC_INIT_RESET;
2739 if (generic_state->brc_inited)
2740 kernel_idx = GEN9_AVC_KERNEL_BRC_RESET;
2742 gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
2744 gpe->context_init(ctx, gpe_context);
2745 gpe->reset_binding_table(ctx, gpe_context);
2747 generic_ctx->pfn_set_curbe_brc_init_reset(ctx, encode_state, gpe_context, encoder_context, NULL);
2749 generic_ctx->pfn_send_brc_init_reset_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
2751 gpe->setup_interface_data(ctx, gpe_context);
2753 memset(&media_object_param, 0, sizeof(media_object_param));
2754 memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
2755 media_object_param.pinline_data = &media_object_inline_data;
2756 media_object_param.inline_size = sizeof(media_object_inline_data);
2758 gen9_avc_run_kernel_media_object(ctx, encoder_context,
2761 &media_object_param);
2763 return VA_STATUS_SUCCESS;
2767 gen9_avc_set_curbe_brc_frame_update(VADriverContextP ctx,
2768 struct encode_state *encode_state,
2769 struct i965_gpe_context *gpe_context,
2770 struct intel_encoder_context *encoder_context,
2773 gen9_avc_frame_brc_update_curbe_data *cmd;
2774 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2775 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2776 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2777 struct object_surface *obj_surface;
2778 struct gen9_surface_avc *avc_priv_surface;
2779 struct avc_param common_param;
2780 VAEncSequenceParameterBufferH264 * seq_param = avc_state->seq_param;
2782 obj_surface = encode_state->reconstructed_object;
2784 if (!obj_surface || !obj_surface->private_data)
2786 avc_priv_surface = obj_surface->private_data;
2788 cmd = i965_gpe_context_map_curbe(gpe_context);
2793 memcpy(cmd, &gen9_avc_frame_brc_update_curbe_init_data, sizeof(gen9_avc_frame_brc_update_curbe_data));
2795 cmd->dw5.target_size_flag = 0 ;
2796 if (generic_state->brc_init_current_target_buf_full_in_bits > (double)generic_state->brc_init_reset_buf_size_in_bits) {
2798 generic_state->brc_init_current_target_buf_full_in_bits -= (double)generic_state->brc_init_reset_buf_size_in_bits;
2799 cmd->dw5.target_size_flag = 1 ;
2802 if (generic_state->skip_frame_enbale) {
2803 cmd->dw6.num_skip_frames = generic_state->num_skip_frames ;
2804 cmd->dw7.size_skip_frames = generic_state->size_skip_frames;
2806 generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame * generic_state->num_skip_frames;
2809 cmd->dw0.target_size = (unsigned int)generic_state->brc_init_current_target_buf_full_in_bits ;
2810 cmd->dw1.frame_number = generic_state->seq_frame_number ;
2811 cmd->dw2.size_of_pic_headers = generic_state->herder_bytes_inserted << 3 ;
2812 cmd->dw5.cur_frame_type = generic_state->frame_type ;
2813 cmd->dw5.brc_flag = 0 ;
2814 cmd->dw5.brc_flag |= (avc_priv_surface->is_as_ref) ? INTEL_ENCODE_BRCUPDATE_IS_REFERENCE : 0 ;
2816 if (avc_state->multi_pre_enable) {
2817 cmd->dw5.brc_flag |= INTEL_ENCODE_BRCUPDATE_IS_ACTUALQP ;
2818 cmd->dw14.qp_index_of_cur_pic = avc_priv_surface->frame_idx ; //do not know this. use -1
2821 cmd->dw5.max_num_paks = generic_state->num_pak_passes ;
2822 if (avc_state->min_max_qp_enable) {
2823 switch (generic_state->frame_type) {
2825 cmd->dw6.minimum_qp = avc_state->min_qp_i ;
2826 cmd->dw6.maximum_qp = avc_state->max_qp_i ;
2829 cmd->dw6.minimum_qp = avc_state->min_qp_p ;
2830 cmd->dw6.maximum_qp = avc_state->max_qp_p ;
2833 cmd->dw6.minimum_qp = avc_state->min_qp_b ;
2834 cmd->dw6.maximum_qp = avc_state->max_qp_b ;
2838 cmd->dw6.minimum_qp = 0 ;
2839 cmd->dw6.maximum_qp = 0 ;
2841 cmd->dw6.enable_force_skip = avc_state->enable_force_skip ;
2842 cmd->dw6.enable_sliding_window = 0 ;
2844 generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame;
2846 if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2847 cmd->dw3.start_gadj_frame0 = (unsigned int)((10 * generic_state->avbr_convergence) / (double)150);
2848 cmd->dw3.start_gadj_frame1 = (unsigned int)((50 * generic_state->avbr_convergence) / (double)150);
2849 cmd->dw4.start_gadj_frame2 = (unsigned int)((100 * generic_state->avbr_convergence) / (double)150);
2850 cmd->dw4.start_gadj_frame3 = (unsigned int)((150 * generic_state->avbr_convergence) / (double)150);
2851 cmd->dw11.g_rate_ratio_threshold_0 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 40)));
2852 cmd->dw11.g_rate_ratio_threshold_1 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 75)));
2853 cmd->dw12.g_rate_ratio_threshold_2 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 97)));
2854 cmd->dw12.g_rate_ratio_threshold_3 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (103 - 100)));
2855 cmd->dw12.g_rate_ratio_threshold_4 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (125 - 100)));
2856 cmd->dw12.g_rate_ratio_threshold_5 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (160 - 100)));
2859 cmd->dw15.enable_roi = generic_state->brc_roi_enable ;
2861 memset(&common_param, 0, sizeof(common_param));
2862 common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
2863 common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
2864 common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
2865 common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
2866 common_param.frames_per_100s = generic_state->frames_per_100s;
2867 common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
2868 common_param.target_bit_rate = generic_state->target_bit_rate;
2870 cmd->dw19.user_max_frame = i965_avc_get_profile_level_max_frame(&common_param, seq_param->level_idc);
2871 i965_gpe_context_unmap_curbe(gpe_context);
2877 gen9_avc_send_surface_brc_frame_update(VADriverContextP ctx,
2878 struct encode_state *encode_state,
2879 struct i965_gpe_context *gpe_context,
2880 struct intel_encoder_context *encoder_context,
2883 struct i965_driver_data *i965 = i965_driver_data(ctx);
2884 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2885 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2886 struct brc_param * param = (struct brc_param *)param_brc ;
2887 struct i965_gpe_context * gpe_context_mbenc = param->gpe_context_mbenc;
2888 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2889 unsigned char is_g95 = 0;
2891 if (IS_SKL(i965->intel.device_info) ||
2892 IS_BXT(i965->intel.device_info) ||
2893 IS_GEN8(i965->intel.device_info))
2895 else if (IS_KBL(i965->intel.device_info) ||
2896 IS_GEN10(i965->intel.device_info) ||
2897 IS_GLK(i965->intel.device_info))
2900 /* brc history buffer*/
2901 i965_add_buffer_gpe_surface(ctx,
2903 &avc_ctx->res_brc_history_buffer,
2905 avc_ctx->res_brc_history_buffer.size,
2907 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_HISTORY_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_HISTORY_INDEX));
2909 /* previous pak buffer*/
2910 i965_add_buffer_gpe_surface(ctx,
2912 &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
2914 avc_ctx->res_brc_pre_pak_statistics_output_buffer.size,
2916 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_PAK_STATISTICS_OUTPUT_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_PAK_STATISTICS_OUTPUT_INDEX));
2918 /* image state command buffer read only*/
2919 i965_add_buffer_gpe_surface(ctx,
2921 &avc_ctx->res_brc_image_state_read_buffer,
2923 avc_ctx->res_brc_image_state_read_buffer.size,
2925 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_READ_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_READ_INDEX));
2927 /* image state command buffer write only*/
2928 i965_add_buffer_gpe_surface(ctx,
2930 &avc_ctx->res_brc_image_state_write_buffer,
2932 avc_ctx->res_brc_image_state_write_buffer.size,
2934 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_WRITE_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_WRITE_INDEX));
2936 if (avc_state->mbenc_brc_buffer_size > 0) {
2937 i965_add_buffer_gpe_surface(ctx,
2939 &(avc_ctx->res_mbenc_brc_buffer),
2941 avc_ctx->res_mbenc_brc_buffer.size,
2943 GEN95_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_WRITE_INDEX);
2945 /* Mbenc curbe input buffer */
2946 gen9_add_dri_buffer_gpe_surface(ctx,
2948 gpe_context_mbenc->dynamic_state.bo,
2950 ALIGN(gpe_context_mbenc->curbe.length, 64),
2951 gpe_context_mbenc->curbe.offset,
2952 GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_READ_INDEX);
2953 /* Mbenc curbe output buffer */
2954 gen9_add_dri_buffer_gpe_surface(ctx,
2956 gpe_context_mbenc->dynamic_state.bo,
2958 ALIGN(gpe_context_mbenc->curbe.length, 64),
2959 gpe_context_mbenc->curbe.offset,
2960 GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_WRITE_INDEX);
2963 /* AVC_ME Distortion 2D surface buffer,input/output. is it res_brc_dist_data_surface*/
2964 i965_add_buffer_2d_gpe_surface(ctx,
2966 &avc_ctx->res_brc_dist_data_surface,
2968 I965_SURFACEFORMAT_R8_UNORM,
2969 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_DISTORTION_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_DISTORTION_INDEX));
2971 /* BRC const data 2D surface buffer */
2972 i965_add_buffer_2d_gpe_surface(ctx,
2974 &avc_ctx->res_brc_const_data_buffer,
2976 I965_SURFACEFORMAT_R8_UNORM,
2977 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX));
2979 /* MB statistical data surface*/
2980 if (!IS_GEN8(i965->intel.device_info)) {
2981 i965_add_buffer_gpe_surface(ctx,
2983 &avc_ctx->res_mb_status_buffer,
2985 avc_ctx->res_mb_status_buffer.size,
2987 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX));
2989 i965_add_buffer_2d_gpe_surface(ctx,
2991 &avc_ctx->res_mbbrc_mb_qp_data_surface,
2993 I965_SURFACEFORMAT_R8_UNORM,
2994 GEN9_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX);
3000 gen9_avc_kernel_brc_frame_update(VADriverContextP ctx,
3001 struct encode_state *encode_state,
3002 struct intel_encoder_context *encoder_context)
3005 struct i965_driver_data *i965 = i965_driver_data(ctx);
3006 struct i965_gpe_table *gpe = &i965->gpe_table;
3007 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3008 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
3009 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3010 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3011 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3013 struct i965_gpe_context *gpe_context = NULL;
3014 struct gpe_media_object_parameter media_object_param;
3015 struct gpe_media_object_inline_data media_object_inline_data;
3016 int media_function = 0;
3018 unsigned int mb_const_data_buffer_in_use, mb_qp_buffer_in_use;
3019 unsigned int brc_enabled = 0;
3020 unsigned int roi_enable = (generic_state->num_roi > 0) ? 1 : 0;
3021 unsigned int dirty_roi_enable = ((generic_state->dirty_num_roi > 0) && (generic_state->frame_type == SLICE_TYPE_P) && (0));
3023 /* the following set the mbenc curbe*/
3024 struct mbenc_param curbe_mbenc_param ;
3025 struct brc_param curbe_brc_param ;
3027 mb_const_data_buffer_in_use =
3028 generic_state->mb_brc_enabled ||
3031 avc_state->mb_qp_data_enable ||
3032 avc_state->rolling_intra_refresh_enable;
3033 mb_qp_buffer_in_use =
3034 generic_state->mb_brc_enabled ||
3035 generic_state->brc_roi_enable ||
3036 avc_state->mb_qp_data_enable;
3038 switch (generic_state->kernel_mode) {
3039 case INTEL_ENC_KERNEL_NORMAL : {
3040 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_NORMAL_I;
3043 case INTEL_ENC_KERNEL_PERFORMANCE : {
3044 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_PERFORMANCE_I;
3047 case INTEL_ENC_KERNEL_QUALITY : {
3048 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_QUALITY_I;
3056 if (generic_state->frame_type == SLICE_TYPE_P) {
3058 } else if (generic_state->frame_type == SLICE_TYPE_B) {
3062 gpe_context = &(avc_ctx->context_mbenc.gpe_contexts[kernel_idx]);
3063 gpe->context_init(ctx, gpe_context);
3065 memset(&curbe_mbenc_param, 0, sizeof(struct mbenc_param));
3067 curbe_mbenc_param.mb_const_data_buffer_in_use = mb_const_data_buffer_in_use;
3068 curbe_mbenc_param.mb_qp_buffer_in_use = mb_qp_buffer_in_use;
3069 curbe_mbenc_param.mbenc_i_frame_dist_in_use = 0;
3070 curbe_mbenc_param.brc_enabled = brc_enabled;
3071 curbe_mbenc_param.roi_enabled = roi_enable;
3073 /* set curbe mbenc*/
3074 generic_ctx->pfn_set_curbe_mbenc(ctx, encode_state, gpe_context, encoder_context, &curbe_mbenc_param);
3076 // gen95 set curbe out of the brc. gen9 do it here
3077 avc_state->mbenc_curbe_set_in_brc_update = !avc_state->decouple_mbenc_curbe_from_brc_enable;
3078 /*begin brc frame update*/
3079 memset(&curbe_brc_param, 0, sizeof(struct brc_param));
3080 curbe_brc_param.gpe_context_mbenc = gpe_context;
3081 media_function = INTEL_MEDIA_STATE_BRC_UPDATE;
3082 kernel_idx = GEN9_AVC_KERNEL_BRC_FRAME_UPDATE;
3083 gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
3084 curbe_brc_param.gpe_context_brc_frame_update = gpe_context;
3086 gpe->context_init(ctx, gpe_context);
3087 gpe->reset_binding_table(ctx, gpe_context);
3088 /*brc copy ignored*/
3090 /* set curbe frame update*/
3091 generic_ctx->pfn_set_curbe_brc_frame_update(ctx, encode_state, gpe_context, encoder_context, &curbe_brc_param);
3093 /* load brc constant data, is it same as mbenc mb brc constant data? no.*/
3094 if (avc_state->multi_pre_enable) {
3095 gen9_avc_init_brc_const_data(ctx, encode_state, encoder_context);
3097 gen9_avc_init_brc_const_data_old(ctx, encode_state, encoder_context);
3099 /* image state construct*/
3100 if (IS_GEN8(i965->intel.device_info)) {
3101 gen8_avc_set_image_state(ctx, encode_state, encoder_context, &(avc_ctx->res_brc_image_state_read_buffer));
3103 gen9_avc_set_image_state(ctx, encode_state, encoder_context, &(avc_ctx->res_brc_image_state_read_buffer));
3105 /* set surface frame mbenc*/
3106 generic_ctx->pfn_send_brc_frame_update_surface(ctx, encode_state, gpe_context, encoder_context, &curbe_brc_param);
3109 gpe->setup_interface_data(ctx, gpe_context);
3111 memset(&media_object_param, 0, sizeof(media_object_param));
3112 memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
3113 media_object_param.pinline_data = &media_object_inline_data;
3114 media_object_param.inline_size = sizeof(media_object_inline_data);
3116 gen9_avc_run_kernel_media_object(ctx, encoder_context,
3119 &media_object_param);
3121 return VA_STATUS_SUCCESS;
3125 gen9_avc_set_curbe_brc_mb_update(VADriverContextP ctx,
3126 struct encode_state *encode_state,
3127 struct i965_gpe_context *gpe_context,
3128 struct intel_encoder_context *encoder_context,
3131 gen9_avc_mb_brc_curbe_data *cmd;
3132 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3133 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3135 cmd = i965_gpe_context_map_curbe(gpe_context);
3140 memset(cmd, 0, sizeof(gen9_avc_mb_brc_curbe_data));
3142 cmd->dw0.cur_frame_type = generic_state->frame_type;
3143 if (generic_state->brc_roi_enable) {
3144 cmd->dw0.enable_roi = 1;
3146 cmd->dw0.enable_roi = 0;
3149 i965_gpe_context_unmap_curbe(gpe_context);
3155 gen9_avc_send_surface_brc_mb_update(VADriverContextP ctx,
3156 struct encode_state *encode_state,
3157 struct i965_gpe_context *gpe_context,
3158 struct intel_encoder_context *encoder_context,
3161 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3162 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3163 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3165 /* brc history buffer*/
3166 i965_add_buffer_gpe_surface(ctx,
3168 &avc_ctx->res_brc_history_buffer,
3170 avc_ctx->res_brc_history_buffer.size,
3172 GEN9_AVC_MB_BRC_UPDATE_HISTORY_INDEX);
3174 /* MB qp data buffer is it same as res_mbbrc_mb_qp_data_surface*/
3175 if (generic_state->mb_brc_enabled) {
3176 i965_add_buffer_2d_gpe_surface(ctx,
3178 &avc_ctx->res_mbbrc_mb_qp_data_surface,
3180 I965_SURFACEFORMAT_R8_UNORM,
3181 GEN9_AVC_MB_BRC_UPDATE_MB_QP_INDEX);
3185 /* BRC roi feature*/
3186 if (generic_state->brc_roi_enable) {
3187 i965_add_buffer_gpe_surface(ctx,
3189 &avc_ctx->res_mbbrc_roi_surface,
3191 avc_ctx->res_mbbrc_roi_surface.size,
3193 GEN9_AVC_MB_BRC_UPDATE_ROI_INDEX);
3197 /* MB statistical data surface*/
3198 i965_add_buffer_gpe_surface(ctx,
3200 &avc_ctx->res_mb_status_buffer,
3202 avc_ctx->res_mb_status_buffer.size,
3204 GEN9_AVC_MB_BRC_UPDATE_MB_STATUS_INDEX);
3210 gen9_avc_kernel_brc_mb_update(VADriverContextP ctx,
3211 struct encode_state *encode_state,
3212 struct intel_encoder_context *encoder_context)
3215 struct i965_driver_data *i965 = i965_driver_data(ctx);
3216 struct i965_gpe_table *gpe = &i965->gpe_table;
3217 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3218 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3219 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3220 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
3222 struct i965_gpe_context *gpe_context;
3223 struct gpe_media_object_walker_parameter media_object_walker_param;
3224 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
3225 int media_function = 0;
3228 media_function = INTEL_MEDIA_STATE_MB_BRC_UPDATE;
3229 kernel_idx = GEN9_AVC_KERNEL_BRC_MB_UPDATE;
3230 gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
3232 gpe->context_init(ctx, gpe_context);
3233 gpe->reset_binding_table(ctx, gpe_context);
3235 /* set curbe brc mb update*/
3236 generic_ctx->pfn_set_curbe_brc_mb_update(ctx, encode_state, gpe_context, encoder_context, NULL);
3239 /* set surface brc mb update*/
3240 generic_ctx->pfn_send_brc_mb_update_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
3243 gpe->setup_interface_data(ctx, gpe_context);
3245 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
3246 /* the scaling is based on 8x8 blk level */
3247 kernel_walker_param.resolution_x = (generic_state->frame_width_in_mbs + 1) / 2;
3248 kernel_walker_param.resolution_y = (generic_state->frame_height_in_mbs + 1) / 2 ;
3249 kernel_walker_param.no_dependency = 1;
3251 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
3253 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
3256 &media_object_walker_param);
3258 return VA_STATUS_SUCCESS;
3262 mbenc kernel related function,it include intra dist kernel
3265 gen9_avc_get_biweight(int dist_scale_factor_ref_id0_list0, unsigned short weighted_bipredidc)
3267 int biweight = 32; // default value
3269 /* based on kernel HLD*/
3270 if (weighted_bipredidc != INTEL_AVC_WP_MODE_IMPLICIT) {
3273 biweight = (dist_scale_factor_ref_id0_list0 + 2) >> 2;
3275 if (biweight != 16 && biweight != 21 &&
3276 biweight != 32 && biweight != 43 && biweight != 48) {
3277 biweight = 32; // If # of B-pics between two refs is more than 3. VME does not support it.
3285 gen9_avc_get_dist_scale_factor(VADriverContextP ctx,
3286 struct encode_state *encode_state,
3287 struct intel_encoder_context *encoder_context)
3289 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3290 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3291 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3292 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
3294 int max_num_references;
3295 VAPictureH264 *curr_pic;
3296 VAPictureH264 *ref_pic_l0;
3297 VAPictureH264 *ref_pic_l1;
3306 max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
3308 memset(avc_state->dist_scale_factor_list0, 0, 32 * sizeof(unsigned int));
3309 curr_pic = &pic_param->CurrPic;
3310 for (i = 0; i < max_num_references; i++) {
3311 ref_pic_l0 = &(slice_param->RefPicList0[i]);
3313 if ((ref_pic_l0->flags & VA_PICTURE_H264_INVALID) ||
3314 (ref_pic_l0->picture_id == VA_INVALID_SURFACE))
3316 ref_pic_l1 = &(slice_param->RefPicList1[0]);
3317 if ((ref_pic_l0->flags & VA_PICTURE_H264_INVALID) ||
3318 (ref_pic_l0->picture_id == VA_INVALID_SURFACE))
3321 poc0 = (curr_pic->TopFieldOrderCnt - ref_pic_l0->TopFieldOrderCnt);
3322 poc1 = (ref_pic_l1->TopFieldOrderCnt - ref_pic_l0->TopFieldOrderCnt);
3323 CLIP(poc0, -128, 127);
3324 CLIP(poc1, -128, 127);
3331 tmp = (td / 2 > 0) ? (td / 2) : (-(td / 2));
3332 tx = (16384 + tmp) / td ;
3333 tmp = (tb * tx + 32) >> 6;
3334 CLIP(tmp, -1024, 1023);
3335 avc_state->dist_scale_factor_list0[i] = tmp;
3341 gen9_avc_get_qp_from_ref_list(VADriverContextP ctx,
3342 VAEncSliceParameterBufferH264 *slice_param,
3346 struct i965_driver_data *i965 = i965_driver_data(ctx);
3347 struct object_surface *obj_surface;
3348 struct gen9_surface_avc *avc_priv_surface;
3349 VASurfaceID surface_id;
3351 assert(slice_param);
3355 if (ref_frame_idx < slice_param->num_ref_idx_l0_active_minus1 + 1)
3356 surface_id = slice_param->RefPicList0[ref_frame_idx].picture_id;
3360 if (ref_frame_idx < slice_param->num_ref_idx_l1_active_minus1 + 1)
3361 surface_id = slice_param->RefPicList1[ref_frame_idx].picture_id;
3365 obj_surface = SURFACE(surface_id);
3366 if (obj_surface && obj_surface->private_data) {
3367 avc_priv_surface = obj_surface->private_data;
3368 return avc_priv_surface->qp_value;
3375 gen9_avc_load_mb_brc_const_data(VADriverContextP ctx,
3376 struct encode_state *encode_state,
3377 struct intel_encoder_context *encoder_context)
3379 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3380 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3381 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3382 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3384 struct i965_gpe_resource *gpe_resource = NULL;
3385 unsigned int * data = NULL;
3386 unsigned int * data_tmp = NULL;
3387 unsigned int size = 16 * 52;
3388 unsigned int table_idx = 0;
3389 unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
3390 unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
3393 gpe_resource = &(avc_ctx->res_mbbrc_const_data_buffer);
3394 assert(gpe_resource);
3395 data = i965_map_gpe_resource(gpe_resource);
3398 table_idx = slice_type_kernel[generic_state->frame_type];
3400 memcpy(data, gen9_avc_mb_brc_const_data[table_idx][0], size * sizeof(unsigned int));
3404 switch (generic_state->frame_type) {
3406 for (i = 0; i < AVC_QP_MAX ; i++) {
3407 if (avc_state->old_mode_cost_enable)
3408 *data = (unsigned int)gen9_avc_old_intra_mode_cost[i];
3414 for (i = 0; i < AVC_QP_MAX ; i++) {
3415 if (generic_state->frame_type == SLICE_TYPE_P) {
3416 if (avc_state->skip_bias_adjustment_enable)
3417 *(data + 3) = (unsigned int)gen9_avc_mv_cost_p_skip_adjustment[i];
3419 if (avc_state->non_ftq_skip_threshold_lut_input_enable) {
3420 *(data + 9) = (unsigned int)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
3421 } else if (generic_state->frame_type == SLICE_TYPE_P) {
3422 *(data + 9) = (unsigned int)gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag][i];
3424 *(data + 9) = (unsigned int)gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag][i];
3427 if (avc_state->adaptive_intra_scaling_enable) {
3428 *(data + 10) = (unsigned int)gen9_avc_adaptive_intra_scaling_factor[i];
3430 *(data + 10) = (unsigned int)gen9_avc_intra_scaling_factor[i];
3442 for (i = 0; i < AVC_QP_MAX ; i++) {
3443 if (avc_state->ftq_skip_threshold_lut_input_enable) {
3444 *(data + 6) = (avc_state->ftq_skip_threshold_lut[i] |
3445 (avc_state->ftq_skip_threshold_lut[i] << 16) |
3446 (avc_state->ftq_skip_threshold_lut[i] << 24));
3447 *(data + 7) = (avc_state->ftq_skip_threshold_lut[i] |
3448 (avc_state->ftq_skip_threshold_lut[i] << 8) |
3449 (avc_state->ftq_skip_threshold_lut[i] << 16) |
3450 (avc_state->ftq_skip_threshold_lut[i] << 24));
3453 if (avc_state->kernel_trellis_enable) {
3454 *(data + 11) = (unsigned int)avc_state->lamda_value_lut[i][0];
3455 *(data + 12) = (unsigned int)avc_state->lamda_value_lut[i][1];
3461 i965_unmap_gpe_resource(gpe_resource);
3465 gen9_avc_set_curbe_mbenc(VADriverContextP ctx,
3466 struct encode_state *encode_state,
3467 struct i965_gpe_context *gpe_context,
3468 struct intel_encoder_context *encoder_context,
3471 struct i965_driver_data *i965 = i965_driver_data(ctx);
3473 gen9_avc_mbenc_curbe_data *g9;
3474 gen95_avc_mbenc_curbe_data *g95;
3476 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3477 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3478 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3480 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3481 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
3482 VASurfaceID surface_id;
3483 struct object_surface *obj_surface;
3485 struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
3486 unsigned char qp = 0;
3487 unsigned char me_method = 0;
3488 unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
3489 unsigned int table_idx = 0;
3490 unsigned char is_g9 = 0;
3491 unsigned char is_g95 = 0;
3492 unsigned int curbe_size = 0;
3494 unsigned int preset = generic_state->preset;
3495 if (IS_SKL(i965->intel.device_info) ||
3496 IS_BXT(i965->intel.device_info)) {
3497 cmd.g9 = (gen9_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
3501 curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
3502 memset(cmd.g9, 0, curbe_size);
3504 if (mbenc_i_frame_dist_in_use) {
3505 memcpy(cmd.g9, gen9_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
3508 switch (generic_state->frame_type) {
3510 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
3513 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
3516 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
3523 } else if (IS_KBL(i965->intel.device_info) ||
3524 IS_GEN10(i965->intel.device_info) ||
3525 IS_GLK(i965->intel.device_info)) {
3526 cmd.g95 = (gen95_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
3530 curbe_size = sizeof(gen95_avc_mbenc_curbe_data);
3531 memset(cmd.g9, 0, curbe_size);
3533 if (mbenc_i_frame_dist_in_use) {
3534 memcpy(cmd.g95, gen95_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
3537 switch (generic_state->frame_type) {
3539 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
3542 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
3545 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
3553 /* Never get here, just silence a gcc warning */
3559 me_method = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_b_me_method[preset] : gen9_avc_p_me_method[preset];
3560 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
3562 cmd.g9->dw0.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
3563 cmd.g9->dw37.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
3564 cmd.g9->dw0.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
3565 cmd.g9->dw37.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
3567 cmd.g9->dw2.max_len_sp = gen9_avc_max_len_sp[preset];
3568 cmd.g9->dw38.max_len_sp = 0;
3571 cmd.g95->dw1.extended_mv_cost_range = avc_state->extended_mv_cost_range_enable;
3573 cmd.g9->dw3.src_access = 0;
3574 cmd.g9->dw3.ref_access = 0;
3576 if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) {
3577 //disable ftq_override by now.
3578 if (avc_state->ftq_override) {
3579 cmd.g9->dw3.ftq_enable = avc_state->ftq_enable;
3582 // both gen9 and gen95 come here by now
3583 if (generic_state->frame_type == SLICE_TYPE_P) {
3584 cmd.g9->dw3.ftq_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
3587 cmd.g9->dw3.ftq_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
3591 cmd.g9->dw3.ftq_enable = 0;
3594 if (avc_state->disable_sub_mb_partion)
3595 cmd.g9->dw3.sub_mb_part_mask = 0x7;
3597 if (mbenc_i_frame_dist_in_use) {
3598 cmd.g9->dw2.pitch_width = generic_state->downscaled_width_4x_in_mb;
3599 cmd.g9->dw4.picture_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
3600 cmd.g9->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4;
3601 cmd.g9->dw6.batch_buffer_end = 0;
3602 cmd.g9->dw31.intra_compute_type = 1;
3605 cmd.g9->dw2.pitch_width = generic_state->frame_width_in_mbs;
3606 cmd.g9->dw4.picture_height_minus1 = generic_state->frame_height_in_mbs - 1;
3607 cmd.g9->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ? generic_state->frame_height_in_mbs : avc_state->slice_height;
3610 memcpy(&(cmd.g9->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
3611 if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
3612 } else if (avc_state->skip_bias_adjustment_enable) {
3613 /* Load different MvCost for P picture when SkipBiasAdjustment is enabled
3614 // No need to check for P picture as the flag is only enabled for P picture */
3615 cmd.g9->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
3620 table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
3621 memcpy(&(cmd.g9->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
3623 cmd.g9->dw4.enable_fbr_bypass = avc_state->fbr_bypass_enable;
3624 cmd.g9->dw4.enable_intra_cost_scaling_for_static_frame = avc_state->sfd_enable && generic_state->hme_enabled;
3625 cmd.g9->dw4.field_parity_flag = 0;//bottom field
3626 cmd.g9->dw4.enable_cur_fld_idr = 0;//field realted
3627 cmd.g9->dw4.contrained_intra_pred_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
3628 cmd.g9->dw4.hme_enable = generic_state->hme_enabled;
3629 cmd.g9->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
3630 cmd.g9->dw4.use_actual_ref_qp_value = generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
3633 cmd.g9->dw7.intra_part_mask = avc_state->transform_8x8_mode_enable ? 0 : 0x02;
3634 cmd.g9->dw7.src_field_polarity = 0;//field related
3636 /*ftq_skip_threshold_lut set,dw14 /15*/
3638 /*r5 disable NonFTQSkipThresholdLUT*/
3639 if (generic_state->frame_type == SLICE_TYPE_P) {
3640 cmd.g9->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
3642 } else if (generic_state->frame_type == SLICE_TYPE_B) {
3643 cmd.g9->dw32.skip_val = gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
3647 cmd.g9->dw13.qp_prime_y = qp;
3648 cmd.g9->dw13.qp_prime_cb = qp;
3649 cmd.g9->dw13.qp_prime_cr = qp;
3650 cmd.g9->dw13.target_size_in_word = 0xff;//hardcode for brc disable
3652 if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) {
3653 switch (gen9_avc_multi_pred[preset]) {
3655 cmd.g9->dw32.mult_pred_l0_disable = 128;
3656 cmd.g9->dw32.mult_pred_l1_disable = 128;
3659 cmd.g9->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_P) ? 1 : 128;
3660 cmd.g9->dw32.mult_pred_l1_disable = 128;
3663 cmd.g9->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3664 cmd.g9->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3667 cmd.g9->dw32.mult_pred_l0_disable = 1;
3668 cmd.g9->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3674 cmd.g9->dw32.mult_pred_l0_disable = 128;
3675 cmd.g9->dw32.mult_pred_l1_disable = 128;
3678 /*field setting for dw33 34, ignored*/
3680 if (avc_state->adaptive_transform_decision_enable) {
3681 if (generic_state->frame_type != SLICE_TYPE_I) {
3683 cmd.g9->dw34.enable_adaptive_tx_decision = 1;
3684 cmd.g9->dw58.mb_texture_threshold = 1024;
3685 cmd.g9->dw58.tx_decision_threshold = 128;
3686 } else if (is_g95) {
3687 cmd.g95->dw34.enable_adaptive_tx_decision = 1;
3688 cmd.g95->dw60.mb_texture_threshold = 1024;
3689 cmd.g95->dw60.tx_decision_threshold = 128;
3695 if (generic_state->frame_type == SLICE_TYPE_B) {
3696 cmd.g9->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
3697 cmd.g9->dw34.list1_ref_id0_frm_field_parity = 0;
3698 cmd.g9->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
3701 cmd.g9->dw34.b_original_bff = 0; //frame only
3702 cmd.g9->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
3703 cmd.g9->dw34.roi_enable_flag = curbe_param->roi_enabled;
3704 cmd.g9->dw34.mad_enable_falg = avc_state->mad_enable;
3705 cmd.g9->dw34.mb_brc_enable = avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled;
3706 cmd.g9->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
3708 cmd.g95->dw34.tq_enable = avc_state->tq_enable;
3709 cmd.g95->dw34.cqp_flag = !generic_state->brc_enabled;
3713 cmd.g9->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
3715 if (cmd.g9->dw34.force_non_skip_check) {
3716 cmd.g9->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
3721 cmd.g9->dw36.check_all_fractional_enable = avc_state->caf_enable;
3722 cmd.g9->dw38.ref_threshold = 400;
3723 cmd.g9->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
3725 /* Default:2 used for MBBRC (MB QP Surface width and height are 4x downscaled picture in MB unit * 4 bytes)
3726 0 used for MBQP data surface (MB QP Surface width and height are same as the input picture size in MB unit * 1bytes)
3727 starting GEN9, BRC use split kernel, MB QP surface is same size as input picture */
3728 cmd.g9->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled) ? 0 : 2;
3730 if (mbenc_i_frame_dist_in_use) {
3731 cmd.g9->dw13.qp_prime_y = 0;
3732 cmd.g9->dw13.qp_prime_cb = 0;
3733 cmd.g9->dw13.qp_prime_cr = 0;
3734 cmd.g9->dw33.intra_16x16_nondc_penalty = 0;
3735 cmd.g9->dw33.intra_8x8_nondc_penalty = 0;
3736 cmd.g9->dw33.intra_4x4_nondc_penalty = 0;
3739 if (cmd.g9->dw4.use_actual_ref_qp_value) {
3740 cmd.g9->dw44.actual_qp_value_for_ref_id0_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 0);
3741 cmd.g9->dw44.actual_qp_value_for_ref_id1_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 1);
3742 cmd.g9->dw44.actual_qp_value_for_ref_id2_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 2);
3743 cmd.g9->dw44.actual_qp_value_for_ref_id3_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 3);
3744 cmd.g9->dw45.actual_qp_value_for_ref_id4_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 4);
3745 cmd.g9->dw45.actual_qp_value_for_ref_id5_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 5);
3746 cmd.g9->dw45.actual_qp_value_for_ref_id6_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 6);
3747 cmd.g9->dw45.actual_qp_value_for_ref_id7_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 7);
3748 cmd.g9->dw46.actual_qp_value_for_ref_id0_list1 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 0);
3749 cmd.g9->dw46.actual_qp_value_for_ref_id1_list1 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 1);
3752 table_idx = slice_type_kernel[generic_state->frame_type];
3753 cmd.g9->dw46.ref_cost = gen9_avc_ref_cost[table_idx][qp];
3755 if (generic_state->frame_type == SLICE_TYPE_I) {
3756 cmd.g9->dw0.skip_mode_enable = 0;
3757 cmd.g9->dw37.skip_mode_enable = 0;
3758 cmd.g9->dw36.hme_combine_overlap = 0;
3759 cmd.g9->dw47.intra_cost_sf = 16;
3760 cmd.g9->dw34.enable_direct_bias_adjustment = 0;
3762 cmd.g9->dw34.enable_global_motion_bias_adjustment = 0;
3764 } else if (generic_state->frame_type == SLICE_TYPE_P) {
3765 cmd.g9->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
3766 cmd.g9->dw3.bme_disable_fbr = 1;
3767 cmd.g9->dw5.ref_width = gen9_avc_search_x[preset];
3768 cmd.g9->dw5.ref_height = gen9_avc_search_y[preset];
3769 cmd.g9->dw7.non_skip_zmv_added = 1;
3770 cmd.g9->dw7.non_skip_mode_added = 1;
3771 cmd.g9->dw7.skip_center_mask = 1;
3772 cmd.g9->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
3773 cmd.g9->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame onlys
3774 cmd.g9->dw36.hme_combine_overlap = 1;
3775 cmd.g9->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
3776 cmd.g9->dw39.ref_width = gen9_avc_search_x[preset];
3777 cmd.g9->dw39.ref_height = gen9_avc_search_y[preset];
3778 cmd.g9->dw34.enable_direct_bias_adjustment = 0;
3779 cmd.g9->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
3780 if (is_g9 && avc_state->global_motion_bias_adjustment_enable)
3781 cmd.g9->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
3784 cmd.g9->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
3785 cmd.g9->dw1.bi_weight = avc_state->bi_weight;
3786 cmd.g9->dw3.search_ctrl = 7;
3787 cmd.g9->dw3.skip_type = 1;
3788 cmd.g9->dw5.ref_width = gen9_avc_b_search_x[preset];
3789 cmd.g9->dw5.ref_height = gen9_avc_b_search_y[preset];
3790 cmd.g9->dw7.skip_center_mask = 0xff;
3791 cmd.g9->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
3792 cmd.g9->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
3793 cmd.g9->dw36.hme_combine_overlap = 1;
3794 surface_id = slice_param->RefPicList1[0].picture_id;
3795 obj_surface = SURFACE(surface_id);
3797 WARN_ONCE("Invalid backward reference frame\n");
3800 cmd.g9->dw36.is_fwd_frame_short_term_ref = !!(slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
3802 cmd.g9->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
3803 cmd.g9->dw36.num_ref_idx_l1_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l1_active_minus1 : 0;
3804 cmd.g9->dw39.ref_width = gen9_avc_b_search_x[preset];
3805 cmd.g9->dw39.ref_height = gen9_avc_b_search_y[preset];
3806 cmd.g9->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
3807 cmd.g9->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
3808 cmd.g9->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
3809 cmd.g9->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
3810 cmd.g9->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
3811 cmd.g9->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
3812 cmd.g9->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
3813 cmd.g9->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
3815 cmd.g9->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
3816 if (cmd.g9->dw34.enable_direct_bias_adjustment) {
3817 cmd.g9->dw7.non_skip_zmv_added = 1;
3818 cmd.g9->dw7.non_skip_mode_added = 1;
3821 cmd.g9->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
3822 if (is_g9 && avc_state->global_motion_bias_adjustment_enable)
3823 cmd.g9->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
3827 avc_state->block_based_skip_enable = cmd.g9->dw3.block_based_skip_enable;
3829 if (avc_state->rolling_intra_refresh_enable) {
3830 /*by now disable it*/
3831 cmd.g9->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
3832 cmd.g9->dw32.mult_pred_l0_disable = 128;
3833 /* Pass the same IntraRefreshUnit to the kernel w/o the adjustment by -1, so as to have an overlap of one MB row or column of Intra macroblocks
3834 across one P frame to another P frame, as needed by the RollingI algo */
3836 cmd.g9->dw48.widi_intra_refresh_mb_num = 0;
3837 cmd.g9->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
3838 cmd.g9->dw48.widi_intra_refresh_qp_delta = 0;
3842 if (avc_state->rolling_intra_refresh_enable == INTEL_ROLLING_I_SQUARE && generic_state->brc_enabled) {
3843 cmd.g95->dw4.enable_intra_refresh = 0;
3844 cmd.g95->dw34.widi_intra_refresh_en = INTEL_ROLLING_I_DISABLED;
3845 cmd.g95->dw48.widi_intra_refresh_mb_x = 0;
3846 cmd.g95->dw61.widi_intra_refresh_mb_y = 0;
3848 cmd.g95->dw4.enable_intra_refresh = 1;
3849 cmd.g95->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
3850 cmd.g95->dw48.widi_intra_refresh_mb_x = 0;
3851 cmd.g95->dw61.widi_intra_refresh_mb_y = 0;
3852 cmd.g9->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
3853 cmd.g9->dw48.widi_intra_refresh_qp_delta = 0;
3858 cmd.g9->dw34.widi_intra_refresh_en = 0;
3861 cmd.g9->dw34.enable_per_mb_static_check = avc_state->sfd_enable && generic_state->hme_enabled;
3863 cmd.g9->dw34.enable_adaptive_search_window_size = avc_state->adaptive_search_window_enable;
3865 cmd.g95->dw34.enable_adaptive_search_window_size = avc_state->adaptive_search_window_enable;
3867 /*roi set disable by now. 49-56*/
3868 if (curbe_param->roi_enabled) {
3869 cmd.g9->dw49.roi_1_x_left = generic_state->roi[0].left;
3870 cmd.g9->dw49.roi_1_y_top = generic_state->roi[0].top;
3871 cmd.g9->dw50.roi_1_x_right = generic_state->roi[0].right;
3872 cmd.g9->dw50.roi_1_y_bottom = generic_state->roi[0].bottom;
3874 cmd.g9->dw51.roi_2_x_left = generic_state->roi[1].left;
3875 cmd.g9->dw51.roi_2_y_top = generic_state->roi[1].top;
3876 cmd.g9->dw52.roi_2_x_right = generic_state->roi[1].right;
3877 cmd.g9->dw52.roi_2_y_bottom = generic_state->roi[1].bottom;
3879 cmd.g9->dw53.roi_3_x_left = generic_state->roi[2].left;
3880 cmd.g9->dw53.roi_3_y_top = generic_state->roi[2].top;
3881 cmd.g9->dw54.roi_3_x_right = generic_state->roi[2].right;
3882 cmd.g9->dw54.roi_3_y_bottom = generic_state->roi[2].bottom;
3884 cmd.g9->dw55.roi_4_x_left = generic_state->roi[3].left;
3885 cmd.g9->dw55.roi_4_y_top = generic_state->roi[3].top;
3886 cmd.g9->dw56.roi_4_x_right = generic_state->roi[3].right;
3887 cmd.g9->dw56.roi_4_y_bottom = generic_state->roi[3].bottom;
3889 if (!generic_state->brc_enabled) {
3891 tmp = generic_state->roi[0].value;
3892 CLIP(tmp, -qp, AVC_QP_MAX - qp);
3893 cmd.g9->dw57.roi_1_dqp_prime_y = tmp;
3894 tmp = generic_state->roi[1].value;
3895 CLIP(tmp, -qp, AVC_QP_MAX - qp);
3896 cmd.g9->dw57.roi_2_dqp_prime_y = tmp;
3897 tmp = generic_state->roi[2].value;
3898 CLIP(tmp, -qp, AVC_QP_MAX - qp);
3899 cmd.g9->dw57.roi_3_dqp_prime_y = tmp;
3900 tmp = generic_state->roi[3].value;
3901 CLIP(tmp, -qp, AVC_QP_MAX - qp);
3902 cmd.g9->dw57.roi_4_dqp_prime_y = tmp;
3904 cmd.g9->dw34.roi_enable_flag = 0;
3909 if (avc_state->tq_enable) {
3910 if (generic_state->frame_type == SLICE_TYPE_I) {
3911 cmd.g95->dw58.value = gen95_avc_tq_lambda_i_frame[qp][0];
3912 cmd.g95->dw59.value = gen95_avc_tq_lambda_i_frame[qp][1];
3914 } else if (generic_state->frame_type == SLICE_TYPE_P) {
3915 cmd.g95->dw58.value = gen95_avc_tq_lambda_p_frame[qp][0];
3916 cmd.g95->dw59.value = gen95_avc_tq_lambda_p_frame[qp][1];
3919 cmd.g95->dw58.value = gen95_avc_tq_lambda_b_frame[qp][0];
3920 cmd.g95->dw59.value = gen95_avc_tq_lambda_b_frame[qp][1];
3923 if (cmd.g95->dw58.lambda_8x8_inter > GEN95_AVC_MAX_LAMBDA)
3924 cmd.g95->dw58.lambda_8x8_inter = 0xf000 + avc_state->rounding_value;
3926 if (cmd.g95->dw58.lambda_8x8_intra > GEN95_AVC_MAX_LAMBDA)
3927 cmd.g95->dw58.lambda_8x8_intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
3929 if (cmd.g95->dw59.lambda_inter > GEN95_AVC_MAX_LAMBDA)
3930 cmd.g95->dw59.lambda_inter = 0xf000 + avc_state->rounding_value;
3932 if (cmd.g95->dw59.lambda_intra > GEN95_AVC_MAX_LAMBDA)
3933 cmd.g95->dw59.lambda_intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
3938 cmd.g95->dw66.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
3939 cmd.g95->dw67.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
3940 cmd.g95->dw68.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
3941 cmd.g95->dw69.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
3942 cmd.g95->dw70.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
3943 cmd.g95->dw71.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
3944 cmd.g95->dw72.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
3945 cmd.g95->dw73.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
3946 cmd.g95->dw74.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
3947 cmd.g95->dw75.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
3948 cmd.g95->dw76.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
3949 cmd.g95->dw77.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
3950 cmd.g95->dw78.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
3951 cmd.g95->dw79.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
3952 cmd.g95->dw80.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
3953 cmd.g95->dw81.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
3954 cmd.g95->dw82.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
3955 cmd.g95->dw83.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
3956 cmd.g95->dw84.brc_curbe_surf_index = GEN95_AVC_MBENC_BRC_CURBE_DATA_INDEX;
3957 cmd.g95->dw85.force_non_skip_mb_map_surface = GEN95_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
3958 cmd.g95->dw86.widi_wa_surf_index = GEN95_AVC_MBENC_WIDI_WA_INDEX;
3959 cmd.g95->dw87.static_detection_cost_table_index = GEN95_AVC_MBENC_SFD_COST_TABLE_INDEX;
3963 cmd.g9->dw64.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
3964 cmd.g9->dw65.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
3965 cmd.g9->dw66.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
3966 cmd.g9->dw67.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
3967 cmd.g9->dw68.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
3968 cmd.g9->dw69.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
3969 cmd.g9->dw70.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
3970 cmd.g9->dw71.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
3971 cmd.g9->dw72.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
3972 cmd.g9->dw73.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
3973 cmd.g9->dw74.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
3974 cmd.g9->dw75.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
3975 cmd.g9->dw76.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
3976 cmd.g9->dw77.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
3977 cmd.g9->dw78.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
3978 cmd.g9->dw79.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
3979 cmd.g9->dw80.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
3980 cmd.g9->dw81.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
3981 cmd.g9->dw82.force_non_skip_mb_map_surface = GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
3982 cmd.g9->dw83.widi_wa_surf_index = GEN9_AVC_MBENC_WIDI_WA_INDEX;
3983 cmd.g9->dw84.brc_curbe_surf_index = GEN9_AVC_MBENC_BRC_CURBE_DATA_INDEX;
3984 cmd.g9->dw85.static_detection_cost_table_index = GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX;
3987 i965_gpe_context_unmap_curbe(gpe_context);
3993 gen9_avc_fei_set_curbe_mbenc(VADriverContextP ctx,
3994 struct encode_state *encode_state,
3995 struct i965_gpe_context *gpe_context,
3996 struct intel_encoder_context *encoder_context,
3999 struct i965_driver_data *i965 = i965_driver_data(ctx);
4000 gen9_avc_fei_mbenc_curbe_data *cmd;
4001 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4002 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4003 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4004 VASurfaceID surface_id;
4005 struct object_surface *obj_surface;
4006 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4007 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
4008 VAEncMiscParameterFEIFrameControlH264 *fei_param = avc_state->fei_framectl_param;
4010 struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
4011 unsigned char qp = 0;
4012 unsigned char me_method = 0;
4013 unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
4014 unsigned int table_idx = 0;
4015 int ref_width, ref_height, len_sp;
4016 int is_bframe = (generic_state->frame_type == SLICE_TYPE_B);
4017 int is_pframe = (generic_state->frame_type == SLICE_TYPE_P);
4018 unsigned int preset = generic_state->preset;
4020 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
4022 assert(gpe_context != NULL);
4023 cmd = (gen9_avc_fei_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
4024 memset(cmd, 0, sizeof(gen9_avc_fei_mbenc_curbe_data));
4026 if (mbenc_i_frame_dist_in_use) {
4027 memcpy(cmd, gen9_avc_fei_mbenc_curbe_i_frame_dist_init_data, sizeof(gen9_avc_fei_mbenc_curbe_data));
4030 switch (generic_state->frame_type) {
4032 memcpy(cmd, gen9_avc_fei_mbenc_curbe_i_frame_init_data, sizeof(gen9_avc_fei_mbenc_curbe_data));
4035 memcpy(cmd, gen9_avc_fei_mbenc_curbe_p_frame_init_data, sizeof(gen9_avc_fei_mbenc_curbe_data));
4038 memcpy(cmd, gen9_avc_fei_mbenc_curbe_b_frame_init_data, sizeof(gen9_avc_fei_mbenc_curbe_data));
4045 /* 4 means full search, 6 means diamand search */
4046 me_method = (fei_param->search_window == 5) ||
4047 (fei_param->search_window == 8) ? 4 : 6;
4049 ref_width = fei_param->ref_width;
4050 ref_height = fei_param->ref_height;
4051 len_sp = fei_param->len_sp;
4052 /* If there is a serch_window, discard user provided ref_width, ref_height
4053 * and search_path length */
4054 switch (fei_param->search_window) {
4056 /* not use predefined search window, there should be a search_path input */
4057 if ((fei_param->search_path != 0) &&
4058 (fei_param->search_path != 1) &&
4059 (fei_param->search_path != 2)) {
4060 WARN_ONCE("Invalid input search_path for SearchWindow=0 \n");
4063 /* 4 means full search, 6 means diamand search */
4064 me_method = (fei_param->search_path == 1) ? 6 : 4;
4065 if (((ref_width * ref_height) > 2048) || (ref_width > 64) || (ref_height > 64)) {
4066 WARN_ONCE("Invalid input ref_width/ref_height in"
4067 "SearchWindow=0 case! \n");
4073 /* Tiny - 4 SUs 24x24 window */
4080 /* Small - 9 SUs 28x28 window */
4086 /* Diamond - 16 SUs 48x40 window */
4092 /* Large Diamond - 32 SUs 48x40 window */
4098 /* Exhaustive - 48 SUs 48x40 window */
4104 /* Diamond - 16 SUs 64x32 window */
4110 /* Large Diamond - 32 SUs 64x32 window */
4116 /* Exhaustive - 48 SUs 64x32 window */
4126 /* ref_width*ref_height = Max 64x32 one direction, Max 32x32 two directions */
4128 CLIP(ref_width, 4, 32);
4129 CLIP(ref_height, 4, 32);
4130 } else if (is_pframe) {
4131 CLIP(ref_width, 4, 64);
4132 CLIP(ref_height, 4, 32);
4135 cmd->dw0.adaptive_enable =
4136 cmd->dw37.adaptive_enable = fei_param->adaptive_search;
4137 cmd->dw0.t8x8_flag_for_inter_enable = cmd->dw37.t8x8_flag_for_inter_enable
4138 = avc_state->transform_8x8_mode_enable;
4139 cmd->dw2.max_len_sp = len_sp;
4140 cmd->dw38.max_len_sp = 0; // HLD mandates this field to be Zero
4141 cmd->dw2.max_num_su = cmd->dw38.max_num_su = 57;
4142 cmd->dw3.src_access =
4143 cmd->dw3.ref_access = 0; // change it to (is_frame ? 0: 1) when interlace is suppoted
4145 if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) {
4146 if (avc_state->ftq_override) {
4147 cmd->dw3.ft_enable = avc_state->ftq_enable;
4149 if (generic_state->frame_type == SLICE_TYPE_P) {
4150 cmd->dw3.ft_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
4152 cmd->dw3.ft_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
4156 cmd->dw3.ft_enable = 0;
4159 if (avc_state->disable_sub_mb_partion)
4160 cmd->dw3.sub_mb_part_mask = 0x7;
4162 if (mbenc_i_frame_dist_in_use) {
4163 /* Fixme: Not supported, no brc in fei */
4165 cmd->dw2.pic_width = generic_state->downscaled_width_4x_in_mb;
4166 cmd->dw4.pic_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
4167 cmd->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4;
4168 cmd->dw6.batch_buffer_end = 0;
4169 cmd->dw31.intra_compute_type = 1;
4172 cmd->dw2.pic_width = generic_state->frame_width_in_mbs;
4173 cmd->dw4.pic_height_minus1 = generic_state->frame_height_in_mbs - 1;
4174 cmd->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ?
4175 generic_state->frame_height_in_mbs : avc_state->slice_height;
4176 cmd->dw3.sub_mb_part_mask = fei_param->sub_mb_part_mask;
4177 cmd->dw3.sub_pel_mode = fei_param->sub_pel_mode;
4178 cmd->dw3.inter_sad = fei_param->inter_sad;
4179 cmd->dw3.Intra_sad = fei_param->intra_sad;
4180 cmd->dw3.search_ctrl = (is_bframe) ? 7 : 0;
4181 cmd->dw4.enable_fbr_bypass = avc_state->fbr_bypass_enable;
4182 cmd->dw4.enable_intra_cost_scaling_for_static_frame =
4183 avc_state->sfd_enable && generic_state->hme_enabled;
4184 cmd->dw4.true_distortion_enable = fei_param->distortion_type == 0 ? 1 : 0;
4185 cmd->dw4.constrained_intra_pred_flag =
4186 pic_param->pic_fields.bits.constrained_intra_pred_flag;
4187 cmd->dw4.hme_enable = 0;
4188 cmd->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
4189 cmd->dw4.use_actual_ref_qp_value =
4190 generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
4191 cmd->dw7.intra_part_mask = fei_param->intra_part_mask;
4192 cmd->dw7.src_field_polarity = 0;
4195 memcpy(&(cmd->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
4196 if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
4197 // cmd->dw8 = gen9_avc_old_intra_mode_cost[qp];
4198 } else if (avc_state->skip_bias_adjustment_enable) {
4199 // Load different MvCost for P picture when SkipBiasAdjustment is enabled
4200 // No need to check for P picture as the flag is only enabled for P picture
4201 cmd->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
4205 /* search path tables */
4206 table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
4207 memcpy(&(cmd->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
4209 //ftq_skip_threshold_lut set,dw14 /15
4211 //r5 disable NonFTQSkipThresholdLUT
4212 if (generic_state->frame_type == SLICE_TYPE_P) {
4213 cmd->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
4214 } else if (generic_state->frame_type == SLICE_TYPE_B) {
4215 cmd->dw32.skip_val =
4216 gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
4218 cmd->dw13.qp_prime_y = qp;
4219 cmd->dw13.qp_prime_cb = qp;
4220 cmd->dw13.qp_prime_cr = qp;
4221 cmd->dw13.target_size_in_word = 0xff; /* hardcoded for brc disable */
4223 if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) {
4224 cmd->dw32.mult_pred_l0_disable = fei_param->multi_pred_l0 ? 0x01 : 0x80;
4225 cmd->dw32.mult_pred_l1_disable = ((generic_state->frame_type == SLICE_TYPE_B) && fei_param->multi_pred_l1) ? 0x01 : 0x80;
4228 cmd->dw32.mult_pred_l0_disable = 0x80;
4229 cmd->dw32.mult_pred_l1_disable = 0x80;
4231 /* no field pic setting, not supported */
4234 if (avc_state->adaptive_transform_decision_enable) {
4235 if (generic_state->frame_type != SLICE_TYPE_I) {
4236 cmd->dw34.enable_adaptive_tx_decision = 1;
4239 cmd->dw58.mb_texture_threshold = 1024;
4240 cmd->dw58.tx_decision_threshold = 128;
4242 if (generic_state->frame_type == SLICE_TYPE_B) {
4243 cmd->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
4244 cmd->dw34.list1_ref_id1_frm_field_parity = 0;
4245 cmd->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
4247 cmd->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
4248 cmd->dw34.roi_enable_flag = generic_state->brc_roi_enable;
4249 cmd->dw34.mad_enable_falg = avc_state->mad_enable;
4250 cmd->dw34.mb_brc_enable = avc_state->mb_qp_data_enable && generic_state->mb_brc_enabled;
4251 cmd->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
4252 cmd->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
4254 if (cmd->dw34.force_non_skip_check) {
4255 cmd->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
4257 cmd->dw36.check_all_fractional_enable = avc_state->caf_enable;
4258 cmd->dw38.ref_threshold = 400;
4259 cmd->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
4260 // Default:2 used for MBBRC (MB QP Surface width and height are 4x downscaled picture in MB unit * 4 bytes)
4261 // 0 used for MBQP data surface (MB QP Surface width and height are same as the input picture size in MB unit * 1bytes)
4262 // starting GEN9, BRC use split kernel, MB QP surface is same size as input picture
4263 cmd->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled) ? 0 : 2;
4264 if (mbenc_i_frame_dist_in_use) {
4265 cmd->dw13.qp_prime_y = 0;
4266 cmd->dw13.qp_prime_cb = 0;
4267 cmd->dw13.qp_prime_cr = 0;
4268 cmd->dw33.intra_16x16_nondc_penalty = 0;
4269 cmd->dw33.intra_8x8_nondc_penalty = 0;
4270 cmd->dw33.intra_4x4_nondc_penalty = 0;
4272 if (cmd->dw4.use_actual_ref_qp_value) {
4273 cmd->dw44.actual_qp_value_for_ref_id0_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 0);
4274 cmd->dw44.actual_qp_value_for_ref_id1_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 1);
4275 cmd->dw44.actual_qp_value_for_ref_id2_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 2);
4276 cmd->dw44.actual_qp_value_for_ref_id3_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 3);
4277 cmd->dw45.actual_qp_value_for_ref_id4_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 4);
4278 cmd->dw45.actual_qp_value_for_ref_id5_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 5);
4279 cmd->dw45.actual_qp_value_for_ref_id6_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 6);
4280 cmd->dw45.actual_qp_value_for_ref_id7_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 7);
4281 cmd->dw46.actual_qp_value_for_ref_id0_list1 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 0);
4282 cmd->dw46.actual_qp_value_for_ref_id1_list1 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 1);
4285 table_idx = slice_type_kernel[generic_state->frame_type];
4286 cmd->dw46.ref_cost = gen9_avc_ref_cost[table_idx][qp];
4288 if (generic_state->frame_type == SLICE_TYPE_I) {
4289 cmd->dw0.skip_mode_enable = 0;
4290 cmd->dw37.skip_mode_enable = 0;
4291 cmd->dw36.hme_combine_overlap = 0;
4292 cmd->dw36.check_all_fractional_enable = 0;
4293 cmd->dw47.intra_cost_sf = 16;/* not used, but recommended to set 16 by kernel team */
4294 cmd->dw34.enable_direct_bias_adjustment = 0;
4295 cmd->dw34.enable_global_motion_bias_adjustment = 0;
4297 } else if (generic_state->frame_type == SLICE_TYPE_P) {
4298 cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
4299 cmd->dw3.bme_disable_fbr = 1;
4300 cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
4301 cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
4302 cmd->dw7.non_skip_zmv_added = 1;
4303 cmd->dw7.non_skip_mode_added = 1;
4304 cmd->dw7.skip_center_mask = 1;
4306 cmd->dw47.intra_cost_sf =
4307 (avc_state->adaptive_intra_scaling_enable) ?
4308 gen9_avc_adaptive_intra_scaling_factor[preset] :
4309 gen9_avc_intra_scaling_factor[preset];
4311 cmd->dw47.max_vmv_r =
4312 i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
4314 cmd->dw36.hme_combine_overlap = 1;
4315 cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
4316 cmd->dw36.check_all_fractional_enable = fei_param->repartition_check_enable;
4317 cmd->dw34.enable_direct_bias_adjustment = 0;
4318 cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
4319 if (avc_state->global_motion_bias_adjustment_enable)
4320 cmd->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
4322 cmd->dw64.num_mv_predictors_l0 = fei_param->num_mv_predictors_l0;
4324 } else { /* B slice */
4326 cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
4327 cmd->dw1.bi_Weight = avc_state->bi_weight;
4328 cmd->dw3.search_ctrl = 7;
4329 cmd->dw3.skip_type = 1;
4330 cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
4331 cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
4332 cmd->dw7.skip_center_mask = 0xff;
4334 cmd->dw47.intra_cost_sf = avc_state->adaptive_intra_scaling_enable ?
4335 gen9_avc_adaptive_intra_scaling_factor[qp] :
4336 gen9_avc_intra_scaling_factor[qp];
4338 cmd->dw47.max_vmv_r =
4339 i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
4341 cmd->dw36.hme_combine_overlap = 1;
4343 //check is_fwd_frame_short_term_ref
4344 surface_id = slice_param->RefPicList1[0].picture_id;
4345 obj_surface = SURFACE(surface_id);
4347 WARN_ONCE("Invalid backward reference frame\n");
4349 i965_gpe_context_unmap_curbe(gpe_context);
4352 cmd->dw36.is_fwd_frame_short_term_ref = !!(slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
4354 cmd->dw36.num_ref_idx_l0_minus_one =
4355 (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1
4357 cmd->dw36.num_ref_idx_l1_minus_one =
4358 (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l1_active_minus1
4360 cmd->dw36.check_all_fractional_enable = fei_param->repartition_check_enable;
4362 cmd->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
4363 cmd->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
4364 cmd->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
4365 cmd->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
4366 cmd->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
4367 cmd->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
4368 cmd->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
4369 cmd->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
4371 cmd->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
4372 if (cmd->dw34.enable_direct_bias_adjustment) {
4373 cmd->dw7.non_skip_mode_added = 1;
4374 cmd->dw7.non_skip_zmv_added = 1;
4377 cmd->dw34.enable_global_motion_bias_adjustment =
4378 avc_state->global_motion_bias_adjustment_enable;
4379 if (avc_state->global_motion_bias_adjustment_enable)
4380 cmd->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
4382 cmd->dw64.num_mv_predictors_l0 = fei_param->num_mv_predictors_l0;
4383 cmd->dw64.num_mv_predictors_l1 = fei_param->num_mv_predictors_l1;
4386 avc_state->block_based_skip_enable = cmd->dw3.block_based_skip_enable;
4388 if (avc_state->rolling_intra_refresh_enable) {
4390 cmd->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
4393 cmd->dw34.widi_intra_refresh_en = 0;
4395 cmd->dw34.enable_per_mb_static_check = avc_state->sfd_enable && generic_state->hme_enabled;
4396 cmd->dw34.enable_adaptive_search_window_size = avc_state->adaptive_search_window_enable;
4398 /* Fixme: Skipped ROI stuffs for now */
4400 /* r64: FEI specific fields */
4401 cmd->dw64.fei_enable = 1;
4402 cmd->dw64.multiple_mv_predictor_per_mb_enable = fei_param->mv_predictor_enable;
4403 if (fei_param->distortion != VA_INVALID_ID)
4404 cmd->dw64.vme_distortion_output_enable = 1;
4405 cmd->dw64.per_mb_qp_enable = fei_param->mb_qp;
4406 cmd->dw64.mb_input_enable = fei_param->mb_input;
4408 // FEI mode is disabled when external MVP is available
4409 if (fei_param->mv_predictor_enable)
4410 cmd->dw64.fei_mode = 0;
4412 cmd->dw64.fei_mode = 1;
4414 cmd->dw80.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
4415 cmd->dw81.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
4416 cmd->dw82.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
4417 cmd->dw83.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
4418 cmd->dw84.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
4419 cmd->dw85.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
4420 cmd->dw86.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
4421 cmd->dw87.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
4422 cmd->dw88.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
4423 cmd->dw89.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
4424 cmd->dw90.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
4425 cmd->dw91.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
4426 cmd->dw92.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
4427 cmd->dw93.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
4428 cmd->dw94.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
4429 cmd->dw95.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
4430 cmd->dw96.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
4431 cmd->dw97.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
4432 cmd->dw98.force_non_skip_mb_map_surface = GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
4433 cmd->dw99.widi_wa_surf_index = GEN9_AVC_MBENC_WIDI_WA_INDEX;
4434 cmd->dw100.brc_curbe_surf_index = GEN9_AVC_MBENC_BRC_CURBE_DATA_INDEX;
4435 cmd->dw101.static_detection_cost_table_index = GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX;
4436 cmd->dw102.fei_mv_predictor_surf_index = GEN9_AVC_MBENC_MV_PREDICTOR_INDEX;
4437 i965_gpe_context_unmap_curbe(gpe_context);
4443 gen9_avc_send_surface_mbenc(VADriverContextP ctx,
4444 struct encode_state *encode_state,
4445 struct i965_gpe_context *gpe_context,
4446 struct intel_encoder_context *encoder_context,
4449 struct i965_driver_data *i965 = i965_driver_data(ctx);
4450 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4451 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4452 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4453 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4454 struct object_surface *obj_surface;
4455 struct gen9_surface_avc *avc_priv_surface;
4456 struct i965_gpe_resource *gpe_resource;
4457 struct mbenc_param * param = (struct mbenc_param *)param_mbenc ;
4458 VASurfaceID surface_id;
4459 unsigned int mbenc_i_frame_dist_in_use = param->mbenc_i_frame_dist_in_use;
4460 unsigned int size = 0;
4461 unsigned int frame_mb_size = generic_state->frame_width_in_mbs *
4462 generic_state->frame_height_in_mbs;
4464 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4465 unsigned char is_g95 = 0;
4467 if (IS_SKL(i965->intel.device_info) ||
4468 IS_BXT(i965->intel.device_info))
4470 else if (IS_KBL(i965->intel.device_info) ||
4471 IS_GEN10(i965->intel.device_info) ||
4472 IS_GLK(i965->intel.device_info))
4475 obj_surface = encode_state->reconstructed_object;
4477 if (!obj_surface || !obj_surface->private_data)
4479 avc_priv_surface = obj_surface->private_data;
4481 /*pak obj command buffer output*/
4482 size = frame_mb_size * 16 * 4;
4483 gpe_resource = &avc_priv_surface->res_mb_code_surface;
4484 i965_add_buffer_gpe_surface(ctx,
4490 GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX);
4492 /*mv data buffer output*/
4493 size = frame_mb_size * 32 * 4;
4494 gpe_resource = &avc_priv_surface->res_mv_data_surface;
4495 i965_add_buffer_gpe_surface(ctx,
4501 GEN9_AVC_MBENC_IND_MV_DATA_INDEX);
4503 /*input current YUV surface, current input Y/UV object*/
4504 if (mbenc_i_frame_dist_in_use) {
4505 obj_surface = encode_state->reconstructed_object;
4506 if (!obj_surface || !obj_surface->private_data)
4508 avc_priv_surface = obj_surface->private_data;
4509 obj_surface = avc_priv_surface->scaled_4x_surface_obj;
4511 obj_surface = encode_state->input_yuv_object;
4513 i965_add_2d_gpe_surface(ctx,
4518 I965_SURFACEFORMAT_R8_UNORM,
4519 GEN9_AVC_MBENC_CURR_Y_INDEX);
4521 i965_add_2d_gpe_surface(ctx,
4526 I965_SURFACEFORMAT_R16_UINT,
4527 GEN9_AVC_MBENC_CURR_UV_INDEX);
4529 if (generic_state->hme_enabled) {
4531 if (!IS_GEN8(i965->intel.device_info)) {
4532 gpe_resource = &(avc_ctx->s4x_memv_data_buffer);
4533 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4536 I965_SURFACEFORMAT_R8_UNORM,
4537 GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX);
4538 /* memv distortion input*/
4539 gpe_resource = &(avc_ctx->s4x_memv_distortion_buffer);
4540 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4543 I965_SURFACEFORMAT_R8_UNORM,
4544 GEN9_AVC_MBENC_4XME_DISTORTION_INDEX);
4545 } else if (generic_state->frame_type != SLICE_TYPE_I) {
4546 gpe_resource = &(avc_ctx->s4x_memv_data_buffer);
4547 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4550 I965_SURFACEFORMAT_R8_UNORM,
4551 GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX);
4552 /* memv distortion input*/
4553 gpe_resource = &(avc_ctx->s4x_memv_distortion_buffer);
4554 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4557 I965_SURFACEFORMAT_R8_UNORM,
4558 GEN9_AVC_MBENC_4XME_DISTORTION_INDEX);
4562 /*mbbrc const data_buffer*/
4563 if (param->mb_const_data_buffer_in_use) {
4564 size = 16 * AVC_QP_MAX * sizeof(unsigned int);
4565 gpe_resource = &avc_ctx->res_mbbrc_const_data_buffer;
4566 i965_add_buffer_gpe_surface(ctx,
4572 GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX);
4576 /*mb qp data_buffer*/
4577 if (param->mb_qp_buffer_in_use) {
4578 if (avc_state->mb_qp_data_enable)
4579 gpe_resource = &(avc_ctx->res_mb_qp_data_surface);
4581 gpe_resource = &(avc_ctx->res_mbbrc_mb_qp_data_surface);
4582 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4585 I965_SURFACEFORMAT_R8_UNORM,
4586 GEN9_AVC_MBENC_MBQP_INDEX);
4589 /*input current YUV surface, current input Y/UV object*/
4590 if (mbenc_i_frame_dist_in_use) {
4591 obj_surface = encode_state->reconstructed_object;
4592 if (!obj_surface || !obj_surface->private_data)
4594 avc_priv_surface = obj_surface->private_data;
4595 obj_surface = avc_priv_surface->scaled_4x_surface_obj;
4597 obj_surface = encode_state->input_yuv_object;
4599 i965_add_adv_gpe_surface(ctx, gpe_context,
4601 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX);
4602 /*input ref YUV surface*/
4603 for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
4604 surface_id = slice_param->RefPicList0[i].picture_id;
4605 obj_surface = SURFACE(surface_id);
4606 if (!obj_surface || !obj_surface->private_data)
4609 i965_add_adv_gpe_surface(ctx, gpe_context,
4611 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 1);
4613 /*input current YUV surface, current input Y/UV object*/
4614 if (mbenc_i_frame_dist_in_use) {
4615 obj_surface = encode_state->reconstructed_object;
4616 if (!obj_surface || !obj_surface->private_data)
4618 avc_priv_surface = obj_surface->private_data;
4619 obj_surface = avc_priv_surface->scaled_4x_surface_obj;
4621 obj_surface = encode_state->input_yuv_object;
4623 i965_add_adv_gpe_surface(ctx, gpe_context,
4625 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX);
4627 for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
4628 if (i > 0) break; // only one ref supported here for B frame
4629 surface_id = slice_param->RefPicList1[i].picture_id;
4630 obj_surface = SURFACE(surface_id);
4631 if (!obj_surface || !obj_surface->private_data)
4634 i965_add_adv_gpe_surface(ctx, gpe_context,
4636 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1);
4637 i965_add_adv_gpe_surface(ctx, gpe_context,
4639 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 2);
4641 avc_priv_surface = obj_surface->private_data;
4642 /*pak obj command buffer output(mb code)*/
4643 size = frame_mb_size * 16 * 4;
4644 gpe_resource = &avc_priv_surface->res_mb_code_surface;
4645 i965_add_buffer_gpe_surface(ctx,
4651 GEN9_AVC_MBENC_FWD_MB_DATA_INDEX);
4653 /*mv data buffer output*/
4654 size = frame_mb_size * 32 * 4;
4655 gpe_resource = &avc_priv_surface->res_mv_data_surface;
4656 i965_add_buffer_gpe_surface(ctx,
4662 GEN9_AVC_MBENC_FWD_MV_DATA_INDEX);
4666 if (i < INTEL_AVC_MAX_BWD_REF_NUM) {
4667 i965_add_adv_gpe_surface(ctx, gpe_context,
4669 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1 + INTEL_AVC_MAX_BWD_REF_NUM);
4674 /* BRC distortion data buffer for I frame*/
4675 if (mbenc_i_frame_dist_in_use) {
4676 gpe_resource = &(avc_ctx->res_brc_dist_data_surface);
4677 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4680 I965_SURFACEFORMAT_R8_UNORM,
4681 GEN9_AVC_MBENC_BRC_DISTORTION_INDEX);
4684 /* as ref frame ,update later RefPicSelect of Current Picture*/
4685 obj_surface = encode_state->reconstructed_object;
4686 avc_priv_surface = obj_surface->private_data;
4687 if (avc_state->ref_pic_select_list_supported && avc_priv_surface->is_as_ref) {
4688 gpe_resource = &(avc_priv_surface->res_ref_pic_select_surface);
4689 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4692 I965_SURFACEFORMAT_R8_UNORM,
4693 GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX);
4696 if (!IS_GEN8(i965->intel.device_info)) {
4697 if (param->mb_vproc_stats_enable) {
4698 /*mb status buffer input*/
4699 size = frame_mb_size * 16 * 4;
4700 gpe_resource = &(avc_ctx->res_mb_status_buffer);
4701 i965_add_buffer_gpe_surface(ctx,
4707 GEN9_AVC_MBENC_MB_STATS_INDEX);
4709 } else if (avc_state->flatness_check_enable) {
4710 gpe_resource = &(avc_ctx->res_flatness_check_surface);
4711 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4714 I965_SURFACEFORMAT_R8_UNORM,
4715 GEN9_AVC_MBENC_MB_STATS_INDEX);
4717 } else if (avc_state->flatness_check_enable) {
4718 gpe_resource = &(avc_ctx->res_flatness_check_surface);
4719 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4722 I965_SURFACEFORMAT_R8_UNORM,
4723 GEN9_AVC_MBENC_MB_STATS_INDEX);
4726 if (param->mad_enable) {
4727 /*mad buffer input*/
4729 gpe_resource = &(avc_ctx->res_mad_data_buffer);
4730 i965_add_buffer_gpe_surface(ctx,
4736 GEN9_AVC_MBENC_MAD_DATA_INDEX);
4737 i965_zero_gpe_resource(gpe_resource);
4740 /*brc updated mbenc curbe data buffer,it is ignored by gen9 and used in gen95*/
4741 if (avc_state->mbenc_brc_buffer_size > 0) {
4742 size = avc_state->mbenc_brc_buffer_size;
4743 gpe_resource = &(avc_ctx->res_mbenc_brc_buffer);
4744 i965_add_buffer_gpe_surface(ctx,
4750 GEN95_AVC_MBENC_BRC_CURBE_DATA_INDEX);
4753 /*artitratry num mbs in slice*/
4754 if (avc_state->arbitrary_num_mbs_in_slice) {
4755 /*slice surface input*/
4756 gpe_resource = &(avc_ctx->res_mbenc_slice_map_surface);
4757 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4760 I965_SURFACEFORMAT_R8_UNORM,
4761 GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX);
4762 gen9_avc_generate_slice_map(ctx, encode_state, encoder_context);
4765 /* BRC distortion data buffer for I frame */
4766 if (!mbenc_i_frame_dist_in_use) {
4767 if (avc_state->mb_disable_skip_map_enable) {
4768 gpe_resource = &(avc_ctx->res_mb_disable_skip_map_surface);
4769 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4772 I965_SURFACEFORMAT_R8_UNORM,
4773 (is_g95 ? GEN95_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX : GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX));
4775 if (IS_GEN8(i965->intel.device_info)) {
4776 if (avc_state->sfd_enable) {
4777 size = 128 / sizeof(unsigned long);
4778 gpe_resource = &(avc_ctx->res_sfd_output_buffer);
4779 i965_add_buffer_gpe_surface(ctx,
4785 GEN8_AVC_MBENC_STATIC_FRAME_DETECTION_OUTPUT_CM);
4789 if (avc_state->sfd_enable && generic_state->hme_enabled) {
4790 if (generic_state->frame_type == SLICE_TYPE_P) {
4791 gpe_resource = &(avc_ctx->res_sfd_cost_table_p_frame_buffer);
4792 } else if (generic_state->frame_type == SLICE_TYPE_B) {
4793 gpe_resource = &(avc_ctx->res_sfd_cost_table_b_frame_buffer);
4795 if (generic_state->frame_type != SLICE_TYPE_I) {
4797 i965_add_buffer_gpe_surface(ctx,
4803 (is_g95 ? GEN95_AVC_MBENC_SFD_COST_TABLE_INDEX : GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX));
4814 gen9_avc_fei_send_surface_mbenc(VADriverContextP ctx,
4815 struct encode_state *encode_state,
4816 struct i965_gpe_context *gpe_context,
4817 struct intel_encoder_context *encoder_context,
4820 struct i965_driver_data *i965 = i965_driver_data(ctx);
4821 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4822 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4823 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4824 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4825 VAEncMiscParameterFEIFrameControlH264 *fei_param = NULL;
4826 struct object_buffer *obj_buffer = NULL;
4827 struct buffer_store *buffer_store = NULL;
4828 struct object_surface *obj_surface = NULL;
4829 struct gen9_surface_avc *avc_priv_surface;
4830 struct i965_gpe_resource *gpe_resource;
4831 VASurfaceID surface_id;
4832 unsigned int size = 0;
4833 unsigned int frame_mb_nums;
4834 int i = 0, allocate_flag = 1;
4836 obj_surface = encode_state->reconstructed_object;
4837 if (!obj_surface || !obj_surface->private_data)
4839 avc_priv_surface = obj_surface->private_data;
4841 frame_mb_nums = generic_state->frame_width_in_mbs *
4842 generic_state->frame_height_in_mbs;
4843 fei_param = avc_state->fei_framectl_param;
4845 assert(fei_param != NULL);
4847 /* res_mb_code_surface for MB code */
4848 size = frame_mb_nums * FEI_AVC_MB_CODE_BUFFER_SIZE;
4849 if (avc_priv_surface->res_mb_code_surface.bo != NULL)
4850 i965_free_gpe_resource(&avc_priv_surface->res_mb_code_surface);
4851 if (fei_param->mb_code_data != VA_INVALID_ID) {
4852 obj_buffer = BUFFER(fei_param->mb_code_data);
4853 assert(obj_buffer != NULL);
4854 buffer_store = obj_buffer->buffer_store;
4855 assert(size <= buffer_store->bo->size);
4856 i965_dri_object_to_buffer_gpe_resource(
4857 &avc_priv_surface->res_mb_code_surface,
4860 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
4861 &avc_priv_surface->res_mb_code_surface,
4862 ALIGN(size, 0x1000),
4864 assert(allocate_flag != 0);
4867 /* res_mv_data_surface for MV data */
4868 size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
4869 if (avc_priv_surface->res_mv_data_surface.bo != NULL)
4870 i965_free_gpe_resource(&avc_priv_surface->res_mv_data_surface);
4871 if (fei_param->mv_data != VA_INVALID_ID) {
4872 obj_buffer = BUFFER(fei_param->mv_data);
4873 assert(obj_buffer != NULL);
4874 buffer_store = obj_buffer->buffer_store;
4875 assert(size <= buffer_store->bo->size);
4876 i965_dri_object_to_buffer_gpe_resource(
4877 &avc_priv_surface->res_mv_data_surface,
4880 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
4881 &avc_priv_surface->res_mv_data_surface,
4882 ALIGN(size, 0x1000),
4884 assert(allocate_flag != 0);
4887 /* fei mb control data surface */
4888 size = frame_mb_nums * FEI_AVC_MB_CONTROL_BUFFER_SIZE;
4889 if (fei_param->mb_input | fei_param->mb_size_ctrl) {
4890 assert(fei_param->mb_ctrl != VA_INVALID_ID);
4891 obj_buffer = BUFFER(fei_param->mb_ctrl);
4892 assert(obj_buffer != NULL);
4893 buffer_store = obj_buffer->buffer_store;
4894 assert(size <= buffer_store->bo->size);
4895 if (avc_priv_surface->res_fei_mb_cntrl_surface.bo != NULL)
4896 i965_free_gpe_resource(&avc_priv_surface->res_fei_mb_cntrl_surface);
4897 i965_dri_object_to_buffer_gpe_resource(
4898 &avc_priv_surface->res_fei_mb_cntrl_surface,
4902 /* fei mv predictor surface*/
4903 size = frame_mb_nums * FEI_AVC_MV_PREDICTOR_BUFFER_SIZE;
4904 if (fei_param->mv_predictor_enable &&
4905 (fei_param->mv_predictor != VA_INVALID_ID)) {
4906 obj_buffer = BUFFER(fei_param->mv_predictor);
4907 assert(obj_buffer != NULL);
4908 buffer_store = obj_buffer->buffer_store;
4909 assert(size <= buffer_store->bo->size);
4910 if (avc_priv_surface->res_fei_mv_predictor_surface.bo != NULL)
4911 i965_free_gpe_resource(&avc_priv_surface->res_fei_mv_predictor_surface);
4912 i965_dri_object_to_buffer_gpe_resource(
4913 &avc_priv_surface->res_fei_mv_predictor_surface,
4916 if (fei_param->mv_predictor_enable)
4917 assert(fei_param->mv_predictor != VA_INVALID_ID);
4920 /* fei vme distortion */
4921 size = frame_mb_nums * FEI_AVC_DISTORTION_BUFFER_SIZE;
4922 if (avc_priv_surface->res_fei_vme_distortion_surface.bo != NULL)
4923 i965_free_gpe_resource(&avc_priv_surface->res_fei_vme_distortion_surface);
4924 if (fei_param->distortion != VA_INVALID_ID) {
4925 obj_buffer = BUFFER(fei_param->distortion);
4926 assert(obj_buffer != NULL);
4927 buffer_store = obj_buffer->buffer_store;
4928 assert(size <= buffer_store->bo->size);
4929 i965_dri_object_to_buffer_gpe_resource(
4930 &avc_priv_surface->res_fei_vme_distortion_surface,
4933 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
4934 &avc_priv_surface->res_fei_vme_distortion_surface,
4935 ALIGN(size, 0x1000),
4936 "fei vme distortion");
4937 assert(allocate_flag != 0);
4941 /* Fixme/Confirm: not sure why we need 3 byte padding here */
4942 size = frame_mb_nums * FEI_AVC_QP_BUFFER_SIZE + 3;
4943 if (fei_param->mb_qp && (fei_param->qp != VA_INVALID_ID)) {
4944 obj_buffer = BUFFER(fei_param->qp);
4945 assert(obj_buffer != NULL);
4946 buffer_store = obj_buffer->buffer_store;
4947 assert((size - 3) <= buffer_store->bo->size);
4948 if (avc_priv_surface->res_fei_mb_qp_surface.bo != NULL)
4949 i965_free_gpe_resource(&avc_priv_surface->res_fei_mb_qp_surface);
4950 i965_dri_object_to_buffer_gpe_resource(
4951 &avc_priv_surface->res_fei_mb_qp_surface,
4954 if (fei_param->mb_qp)
4955 assert(fei_param->qp != VA_INVALID_ID);
4958 /*==== pak obj command buffer output ====*/
4959 size = frame_mb_nums * FEI_AVC_MB_CODE_BUFFER_SIZE;
4960 gpe_resource = &avc_priv_surface->res_mb_code_surface;
4961 i965_add_buffer_gpe_surface(ctx,
4967 GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX);
4970 /*=== mv data buffer output */
4971 size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
4972 gpe_resource = &avc_priv_surface->res_mv_data_surface;
4973 i965_add_buffer_gpe_surface(ctx,
4979 GEN9_AVC_MBENC_IND_MV_DATA_INDEX);
4982 /* === current input Y (binding table offset = 3)=== */
4983 obj_surface = encode_state->input_yuv_object;
4984 i965_add_2d_gpe_surface(ctx,
4989 I965_SURFACEFORMAT_R8_UNORM,
4990 GEN9_AVC_MBENC_CURR_Y_INDEX);
4992 /* === current input UV === (binding table offset == 4)*/
4993 i965_add_2d_gpe_surface(ctx,
4998 I965_SURFACEFORMAT_R16_UINT,
4999 GEN9_AVC_MBENC_CURR_UV_INDEX);
5001 /* === input current YUV surface, (binding table offset == 15) === */
5002 i965_add_adv_gpe_surface(ctx, gpe_context,
5004 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX);
5007 /*== input current YUV surface, (binding table offset == 32)*/
5008 i965_add_adv_gpe_surface(ctx, gpe_context,
5010 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX);
5012 /* list 0 references */
5013 for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
5015 surface_id = slice_param->RefPicList0[i].picture_id;
5016 obj_surface = SURFACE(surface_id);
5017 if (!obj_surface || !obj_surface->private_data)
5019 i965_add_adv_gpe_surface(ctx, gpe_context,
5021 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 1);
5025 /* list 1 references */
5026 for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
5027 if (i > 0) break; // only one ref supported here for B frame
5028 surface_id = slice_param->RefPicList1[i].picture_id;
5029 obj_surface = SURFACE(surface_id);
5030 if (!obj_surface || !obj_surface->private_data)
5033 i965_add_adv_gpe_surface(ctx, gpe_context,
5035 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 2);
5037 avc_priv_surface = obj_surface->private_data;
5038 /* mb code of Backward reference frame */
5039 size = frame_mb_nums * FEI_AVC_MB_CODE_BUFFER_SIZE;
5040 gpe_resource = &avc_priv_surface->res_mb_code_surface;
5041 i965_add_buffer_gpe_surface(ctx,
5047 GEN9_AVC_MBENC_FWD_MB_DATA_INDEX);
5049 /* mv data of backward ref frame */
5050 size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
5051 gpe_resource = &avc_priv_surface->res_mv_data_surface;
5052 i965_add_buffer_gpe_surface(ctx,
5058 GEN9_AVC_MBENC_FWD_MV_DATA_INDEX);
5062 if (i < INTEL_AVC_MAX_BWD_REF_NUM) {
5063 i965_add_adv_gpe_surface(ctx, gpe_context,
5065 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1);
5069 /* as ref frame ,update later RefPicSelect of Current Picture*/
5070 obj_surface = encode_state->reconstructed_object;
5071 avc_priv_surface = obj_surface->private_data;
5072 if (avc_state->ref_pic_select_list_supported && avc_priv_surface->is_as_ref) {
5073 gpe_resource = &(avc_priv_surface->res_ref_pic_select_surface);
5074 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5077 I965_SURFACEFORMAT_R8_UNORM,
5078 GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX);
5083 /* mb specific data, macroblock control parameters */
5084 if ((fei_param->mb_input | fei_param->mb_size_ctrl) &&
5085 (fei_param->mb_ctrl != VA_INVALID_ID)) {
5086 size = frame_mb_nums * FEI_AVC_MB_CONTROL_BUFFER_SIZE;
5087 gpe_resource = &avc_priv_surface->res_fei_mb_cntrl_surface;
5088 i965_add_buffer_gpe_surface(ctx,
5094 GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX);
5097 /* multi mv predictor surface */
5098 if (fei_param->mv_predictor_enable && (fei_param->mv_predictor != VA_INVALID_ID)) {
5099 size = frame_mb_nums * 48; //sizeof (VAEncMVPredictorH264Intel) == 40
5100 gpe_resource = &avc_priv_surface->res_fei_mv_predictor_surface;
5101 i965_add_buffer_gpe_surface(ctx,
5107 GEN9_AVC_MBENC_MV_PREDICTOR_INDEX);
5111 if (fei_param->mb_qp && (fei_param->qp != VA_INVALID_ID)) {
5112 size = frame_mb_nums + 3;
5113 gpe_resource = &avc_priv_surface->res_fei_mb_qp_surface,
5114 i965_add_buffer_gpe_surface(ctx,
5120 GEN9_AVC_MBENC_MBQP_INDEX);
5124 /*=== FEI distortion surface ====*/
5125 size = frame_mb_nums * 48; //sizeof (VAEncFEIDistortionBufferH264Intel) == 48
5126 gpe_resource = &avc_priv_surface->res_fei_vme_distortion_surface;
5127 i965_add_buffer_gpe_surface(ctx,
5133 GEN9_AVC_MBENC_AUX_VME_OUT_INDEX);
5139 gen9_avc_kernel_mbenc(VADriverContextP ctx,
5140 struct encode_state *encode_state,
5141 struct intel_encoder_context *encoder_context,
5142 bool i_frame_dist_in_use)
5144 struct i965_driver_data *i965 = i965_driver_data(ctx);
5145 struct i965_gpe_table *gpe = &i965->gpe_table;
5146 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5147 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5148 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5149 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5150 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5152 struct i965_gpe_context *gpe_context;
5153 struct gpe_media_object_walker_parameter media_object_walker_param;
5154 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
5155 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
5156 int media_function = 0;
5158 unsigned int mb_const_data_buffer_in_use = 0;
5159 unsigned int mb_qp_buffer_in_use = 0;
5160 unsigned int brc_enabled = 0;
5161 unsigned int roi_enable = (generic_state->num_roi > 0) ? 1 : 0;
5162 unsigned int dirty_roi_enable = ((generic_state->dirty_num_roi > 0) && (generic_state->frame_type == SLICE_TYPE_P) && (0));
5163 struct mbenc_param param ;
5165 int mbenc_i_frame_dist_in_use = i_frame_dist_in_use;
5167 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5169 mb_const_data_buffer_in_use =
5170 generic_state->mb_brc_enabled ||
5173 avc_state->mb_qp_data_enable ||
5174 avc_state->rolling_intra_refresh_enable;
5175 mb_qp_buffer_in_use =
5176 generic_state->mb_brc_enabled ||
5177 generic_state->brc_roi_enable ||
5178 avc_state->mb_qp_data_enable;
5180 if (mbenc_i_frame_dist_in_use) {
5181 media_function = INTEL_MEDIA_STATE_ENC_I_FRAME_DIST;
5182 kernel_idx = GEN9_AVC_KERNEL_BRC_I_FRAME_DIST;
5183 downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
5184 downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
5188 gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
5190 switch (generic_state->kernel_mode) {
5191 case INTEL_ENC_KERNEL_NORMAL : {
5192 media_function = INTEL_MEDIA_STATE_ENC_NORMAL;
5193 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_NORMAL_I;
5196 case INTEL_ENC_KERNEL_PERFORMANCE : {
5197 media_function = INTEL_MEDIA_STATE_ENC_PERFORMANCE;
5198 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_PERFORMANCE_I;
5201 case INTEL_ENC_KERNEL_QUALITY : {
5202 media_function = INTEL_MEDIA_STATE_ENC_QUALITY;
5203 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_QUALITY_I;
5211 if (encoder_context->fei_enabled) {
5212 media_function = INTEL_MEDIA_STATE_ENC_NORMAL;
5213 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_FEI_KERNEL_I;
5216 if (generic_state->frame_type == SLICE_TYPE_P) {
5218 } else if (generic_state->frame_type == SLICE_TYPE_B) {
5222 downscaled_width_in_mb = generic_state->frame_width_in_mbs;
5223 downscaled_height_in_mb = generic_state->frame_height_in_mbs;
5224 mad_enable = avc_state->mad_enable;
5225 brc_enabled = generic_state->brc_enabled;
5227 gpe_context = &(avc_ctx->context_mbenc.gpe_contexts[kernel_idx]);
5230 memset(¶m, 0, sizeof(struct mbenc_param));
5232 param.mb_const_data_buffer_in_use = mb_const_data_buffer_in_use;
5233 param.mb_qp_buffer_in_use = mb_qp_buffer_in_use;
5234 param.mbenc_i_frame_dist_in_use = mbenc_i_frame_dist_in_use;
5235 param.mad_enable = mad_enable;
5236 param.brc_enabled = brc_enabled;
5237 param.roi_enabled = roi_enable;
5239 if (avc_state->mb_status_supported) {
5240 param.mb_vproc_stats_enable = avc_state->flatness_check_enable || avc_state->adaptive_transform_decision_enable;
5243 if (!avc_state->mbenc_curbe_set_in_brc_update) {
5244 gpe->context_init(ctx, gpe_context);
5247 gpe->reset_binding_table(ctx, gpe_context);
5249 if (!avc_state->mbenc_curbe_set_in_brc_update) {
5251 generic_ctx->pfn_set_curbe_mbenc(ctx, encode_state, gpe_context, encoder_context, ¶m);
5254 /* MB brc const data buffer set up*/
5255 if (mb_const_data_buffer_in_use) {
5256 // caculate the lambda table, it is kernel controlled trellis quantization,gen95+
5257 if (avc_state->lambda_table_enable)
5258 gen95_avc_calc_lambda_table(ctx, encode_state, encoder_context);
5260 gen9_avc_load_mb_brc_const_data(ctx, encode_state, encoder_context);
5263 /*clear the mad buffer*/
5265 i965_zero_gpe_resource(&(avc_ctx->res_mad_data_buffer));
5268 generic_ctx->pfn_send_mbenc_surface(ctx, encode_state, gpe_context, encoder_context, ¶m);
5270 gpe->setup_interface_data(ctx, gpe_context);
5273 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
5275 kernel_walker_param.use_scoreboard = 1;
5276 kernel_walker_param.resolution_x = downscaled_width_in_mb ;
5277 kernel_walker_param.resolution_y = downscaled_height_in_mb ;
5278 if (mbenc_i_frame_dist_in_use) {
5279 kernel_walker_param.no_dependency = 1;
5281 switch (generic_state->frame_type) {
5283 kernel_walker_param.walker_degree = WALKER_45_DEGREE;
5286 kernel_walker_param.walker_degree = WALKER_26_DEGREE;
5289 kernel_walker_param.walker_degree = WALKER_26_DEGREE;
5290 if (!slice_param->direct_spatial_mv_pred_flag) {
5291 kernel_walker_param.walker_degree = WALKER_45_DEGREE;
5297 kernel_walker_param.no_dependency = 0;
5300 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
5302 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
5305 &media_object_walker_param);
5306 return VA_STATUS_SUCCESS;
5310 me kernle related function
5313 gen9_avc_set_curbe_me(VADriverContextP ctx,
5314 struct encode_state *encode_state,
5315 struct i965_gpe_context *gpe_context,
5316 struct intel_encoder_context *encoder_context,
5319 gen9_avc_me_curbe_data *curbe_cmd;
5320 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5321 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5322 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5324 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5326 struct me_param * curbe_param = (struct me_param *)param ;
5327 unsigned char use_mv_from_prev_step = 0;
5328 unsigned char write_distortions = 0;
5329 unsigned char qp_prime_y = 0;
5330 unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
5331 unsigned char seach_table_idx = 0;
5332 unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
5333 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
5334 unsigned int scale_factor = 0;
5336 qp_prime_y = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta;
5337 switch (curbe_param->hme_type) {
5338 case INTEL_ENC_HME_4x : {
5339 use_mv_from_prev_step = (generic_state->b16xme_enabled) ? 1 : 0;
5340 write_distortions = 1;
5341 mv_shift_factor = 2;
5343 prev_mv_read_pos_factor = 0;
5346 case INTEL_ENC_HME_16x : {
5347 use_mv_from_prev_step = (generic_state->b32xme_enabled) ? 1 : 0;
5348 write_distortions = 0;
5349 mv_shift_factor = 2;
5351 prev_mv_read_pos_factor = 1;
5354 case INTEL_ENC_HME_32x : {
5355 use_mv_from_prev_step = 0;
5356 write_distortions = 0;
5357 mv_shift_factor = 1;
5359 prev_mv_read_pos_factor = 0;
5366 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
5371 downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
5372 downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
5374 memcpy(curbe_cmd, gen9_avc_me_curbe_init_data, sizeof(gen9_avc_me_curbe_data));
5376 curbe_cmd->dw3.sub_pel_mode = 3;
5377 if (avc_state->field_scaling_output_interleaved) {
5378 /*frame set to zero,field specified*/
5379 curbe_cmd->dw3.src_access = 0;
5380 curbe_cmd->dw3.ref_access = 0;
5381 curbe_cmd->dw7.src_field_polarity = 0;
5383 curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
5384 curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
5385 curbe_cmd->dw5.qp_prime_y = qp_prime_y;
5387 curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
5388 curbe_cmd->dw6.write_distortions = write_distortions;
5389 curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
5390 curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
5392 if (generic_state->frame_type == SLICE_TYPE_B) {
5393 curbe_cmd->dw1.bi_weight = 32;
5394 curbe_cmd->dw13.num_ref_idx_l1_minus1 = slice_param->num_ref_idx_l1_active_minus1;
5395 me_method = gen9_avc_b_me_method[generic_state->preset];
5396 seach_table_idx = 1;
5399 if (generic_state->frame_type == SLICE_TYPE_P ||
5400 generic_state->frame_type == SLICE_TYPE_B)
5401 curbe_cmd->dw13.num_ref_idx_l0_minus1 = slice_param->num_ref_idx_l0_active_minus1;
5403 curbe_cmd->dw13.ref_streamin_cost = 5;
5404 curbe_cmd->dw13.roi_enable = 0;
5406 curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
5407 curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
5409 memcpy(&curbe_cmd->dw16, table_enc_search_path[seach_table_idx][me_method], 14 * sizeof(int));
5411 curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN9_AVC_ME_MV_DATA_SURFACE_INDEX;
5412 curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x) ? GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX : GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX ;
5413 curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN9_AVC_ME_DISTORTION_SURFACE_INDEX;
5414 curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN9_AVC_ME_BRC_DISTORTION_INDEX;
5415 curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX;
5416 curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX;
5417 curbe_cmd->dw38.reserved = GEN9_AVC_ME_VDENC_STREAMIN_INDEX;
5419 i965_gpe_context_unmap_curbe(gpe_context);
5424 gen9_avc_send_surface_me(VADriverContextP ctx,
5425 struct encode_state *encode_state,
5426 struct i965_gpe_context *gpe_context,
5427 struct intel_encoder_context *encoder_context,
5430 struct i965_driver_data *i965 = i965_driver_data(ctx);
5432 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5433 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5434 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5435 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5437 struct object_surface *obj_surface, *input_surface;
5438 struct gen9_surface_avc *avc_priv_surface;
5439 struct i965_gpe_resource *gpe_resource;
5440 struct me_param * curbe_param = (struct me_param *)param ;
5442 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5443 VASurfaceID surface_id;
5446 /* all scaled input surface stored in reconstructed_object*/
5447 obj_surface = encode_state->reconstructed_object;
5448 if (!obj_surface || !obj_surface->private_data)
5450 avc_priv_surface = obj_surface->private_data;
5453 switch (curbe_param->hme_type) {
5454 case INTEL_ENC_HME_4x : {
5456 gpe_resource = &avc_ctx->s4x_memv_data_buffer;
5457 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5460 I965_SURFACEFORMAT_R8_UNORM,
5461 GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
5464 if (generic_state->b16xme_enabled) {
5465 gpe_resource = &avc_ctx->s16x_memv_data_buffer;
5466 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5469 I965_SURFACEFORMAT_R8_UNORM,
5470 GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX);
5472 /* brc distortion output*/
5473 gpe_resource = &avc_ctx->res_brc_dist_data_surface;
5474 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5477 I965_SURFACEFORMAT_R8_UNORM,
5478 GEN9_AVC_ME_BRC_DISTORTION_INDEX);
5479 /* memv distortion output*/
5480 gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
5481 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5484 I965_SURFACEFORMAT_R8_UNORM,
5485 GEN9_AVC_ME_DISTORTION_SURFACE_INDEX);
5486 /*input current down scaled YUV surface*/
5487 obj_surface = encode_state->reconstructed_object;
5488 avc_priv_surface = obj_surface->private_data;
5489 input_surface = avc_priv_surface->scaled_4x_surface_obj;
5490 i965_add_adv_gpe_surface(ctx, gpe_context,
5492 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
5493 /*input ref scaled YUV surface*/
5494 for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
5495 surface_id = slice_param->RefPicList0[i].picture_id;
5496 obj_surface = SURFACE(surface_id);
5497 if (!obj_surface || !obj_surface->private_data)
5499 avc_priv_surface = obj_surface->private_data;
5501 input_surface = avc_priv_surface->scaled_4x_surface_obj;
5503 i965_add_adv_gpe_surface(ctx, gpe_context,
5505 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
5508 obj_surface = encode_state->reconstructed_object;
5509 avc_priv_surface = obj_surface->private_data;
5510 input_surface = avc_priv_surface->scaled_4x_surface_obj;
5512 i965_add_adv_gpe_surface(ctx, gpe_context,
5514 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
5516 for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
5517 surface_id = slice_param->RefPicList1[i].picture_id;
5518 obj_surface = SURFACE(surface_id);
5519 if (!obj_surface || !obj_surface->private_data)
5521 avc_priv_surface = obj_surface->private_data;
5523 input_surface = avc_priv_surface->scaled_4x_surface_obj;
5525 i965_add_adv_gpe_surface(ctx, gpe_context,
5527 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
5532 case INTEL_ENC_HME_16x : {
5533 gpe_resource = &avc_ctx->s16x_memv_data_buffer;
5534 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5537 I965_SURFACEFORMAT_R8_UNORM,
5538 GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
5540 if (generic_state->b32xme_enabled) {
5541 gpe_resource = &avc_ctx->s32x_memv_data_buffer;
5542 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5545 I965_SURFACEFORMAT_R8_UNORM,
5546 GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX);
5549 obj_surface = encode_state->reconstructed_object;
5550 avc_priv_surface = obj_surface->private_data;
5551 input_surface = avc_priv_surface->scaled_16x_surface_obj;
5552 i965_add_adv_gpe_surface(ctx, gpe_context,
5554 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
5556 for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
5557 surface_id = slice_param->RefPicList0[i].picture_id;
5558 obj_surface = SURFACE(surface_id);
5559 if (!obj_surface || !obj_surface->private_data)
5561 avc_priv_surface = obj_surface->private_data;
5563 input_surface = avc_priv_surface->scaled_16x_surface_obj;
5565 i965_add_adv_gpe_surface(ctx, gpe_context,
5567 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
5570 obj_surface = encode_state->reconstructed_object;
5571 avc_priv_surface = obj_surface->private_data;
5572 input_surface = avc_priv_surface->scaled_16x_surface_obj;
5574 i965_add_adv_gpe_surface(ctx, gpe_context,
5576 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
5578 for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
5579 surface_id = slice_param->RefPicList1[i].picture_id;
5580 obj_surface = SURFACE(surface_id);
5581 if (!obj_surface || !obj_surface->private_data)
5583 avc_priv_surface = obj_surface->private_data;
5585 input_surface = avc_priv_surface->scaled_16x_surface_obj;
5587 i965_add_adv_gpe_surface(ctx, gpe_context,
5589 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
5593 case INTEL_ENC_HME_32x : {
5594 gpe_resource = &avc_ctx->s32x_memv_data_buffer;
5595 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5598 I965_SURFACEFORMAT_R8_UNORM,
5599 GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
5601 obj_surface = encode_state->reconstructed_object;
5602 avc_priv_surface = obj_surface->private_data;
5603 input_surface = avc_priv_surface->scaled_32x_surface_obj;
5604 i965_add_adv_gpe_surface(ctx, gpe_context,
5606 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
5608 for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
5609 surface_id = slice_param->RefPicList0[i].picture_id;
5610 obj_surface = SURFACE(surface_id);
5611 if (!obj_surface || !obj_surface->private_data)
5613 avc_priv_surface = obj_surface->private_data;
5615 input_surface = avc_priv_surface->scaled_32x_surface_obj;
5617 i965_add_adv_gpe_surface(ctx, gpe_context,
5619 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
5622 obj_surface = encode_state->reconstructed_object;
5623 avc_priv_surface = obj_surface->private_data;
5624 input_surface = avc_priv_surface->scaled_32x_surface_obj;
5626 i965_add_adv_gpe_surface(ctx, gpe_context,
5628 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
5630 for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
5631 surface_id = slice_param->RefPicList1[i].picture_id;
5632 obj_surface = SURFACE(surface_id);
5633 if (!obj_surface || !obj_surface->private_data)
5635 avc_priv_surface = obj_surface->private_data;
5637 input_surface = avc_priv_surface->scaled_32x_surface_obj;
5639 i965_add_adv_gpe_surface(ctx, gpe_context,
5641 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
5652 gen9_avc_kernel_me(VADriverContextP ctx,
5653 struct encode_state *encode_state,
5654 struct intel_encoder_context *encoder_context,
5657 struct i965_driver_data *i965 = i965_driver_data(ctx);
5658 struct i965_gpe_table *gpe = &i965->gpe_table;
5659 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5660 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5661 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5662 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5664 struct i965_gpe_context *gpe_context;
5665 struct gpe_media_object_walker_parameter media_object_walker_param;
5666 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
5667 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
5668 int media_function = 0;
5670 struct me_param param ;
5671 unsigned int scale_factor = 0;
5674 case INTEL_ENC_HME_4x : {
5675 media_function = INTEL_MEDIA_STATE_4X_ME;
5679 case INTEL_ENC_HME_16x : {
5680 media_function = INTEL_MEDIA_STATE_16X_ME;
5684 case INTEL_ENC_HME_32x : {
5685 media_function = INTEL_MEDIA_STATE_32X_ME;
5694 downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
5695 downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
5697 /* I frame should not come here.*/
5698 kernel_idx = (generic_state->frame_type == SLICE_TYPE_P) ? GEN9_AVC_KERNEL_ME_P_IDX : GEN9_AVC_KERNEL_ME_B_IDX;
5699 gpe_context = &(avc_ctx->context_me.gpe_contexts[kernel_idx]);
5701 gpe->context_init(ctx, gpe_context);
5702 gpe->reset_binding_table(ctx, gpe_context);
5705 memset(¶m, 0, sizeof(param));
5706 param.hme_type = hme_type;
5707 generic_ctx->pfn_set_curbe_me(ctx, encode_state, gpe_context, encoder_context, ¶m);
5710 generic_ctx->pfn_send_me_surface(ctx, encode_state, gpe_context, encoder_context, ¶m);
5712 gpe->setup_interface_data(ctx, gpe_context);
5714 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
5715 /* the scaling is based on 8x8 blk level */
5716 kernel_walker_param.resolution_x = downscaled_width_in_mb ;
5717 kernel_walker_param.resolution_y = downscaled_height_in_mb ;
5718 kernel_walker_param.no_dependency = 1;
5720 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
5722 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
5725 &media_object_walker_param);
5727 return VA_STATUS_SUCCESS;
5734 gen9_avc_set_curbe_wp(VADriverContextP ctx,
5735 struct encode_state *encode_state,
5736 struct i965_gpe_context *gpe_context,
5737 struct intel_encoder_context *encoder_context,
5740 gen9_avc_wp_curbe_data *cmd;
5741 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5742 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5743 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5744 struct wp_param * curbe_param = (struct wp_param *)param;
5746 cmd = i965_gpe_context_map_curbe(gpe_context);
5750 memset(cmd, 0, sizeof(gen9_avc_wp_curbe_data));
5751 if (curbe_param->ref_list_idx) {
5752 cmd->dw0.default_weight = slice_param->luma_weight_l1[0];
5753 cmd->dw0.default_offset = slice_param->luma_offset_l1[0];
5755 cmd->dw0.default_weight = slice_param->luma_weight_l0[0];
5756 cmd->dw0.default_offset = slice_param->luma_offset_l0[0];
5759 cmd->dw49.input_surface = GEN9_AVC_WP_INPUT_REF_SURFACE_INDEX;
5760 cmd->dw50.output_surface = GEN9_AVC_WP_OUTPUT_SCALED_SURFACE_INDEX;
5762 i965_gpe_context_unmap_curbe(gpe_context);
5767 gen9_avc_send_surface_wp(VADriverContextP ctx,
5768 struct encode_state *encode_state,
5769 struct i965_gpe_context *gpe_context,
5770 struct intel_encoder_context *encoder_context,
5773 struct i965_driver_data *i965 = i965_driver_data(ctx);
5774 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5775 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5776 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5777 struct wp_param * curbe_param = (struct wp_param *)param;
5778 struct object_surface *obj_surface;
5779 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5780 VASurfaceID surface_id;
5782 if (curbe_param->ref_list_idx) {
5783 surface_id = slice_param->RefPicList1[0].picture_id;
5784 obj_surface = SURFACE(surface_id);
5785 if (!obj_surface || !obj_surface->private_data)
5786 avc_state->weighted_ref_l1_enable = 0;
5788 avc_state->weighted_ref_l1_enable = 1;
5790 surface_id = slice_param->RefPicList0[0].picture_id;
5791 obj_surface = SURFACE(surface_id);
5792 if (!obj_surface || !obj_surface->private_data)
5793 avc_state->weighted_ref_l0_enable = 0;
5795 avc_state->weighted_ref_l0_enable = 1;
5798 obj_surface = encode_state->reference_objects[0];
5801 i965_add_adv_gpe_surface(ctx, gpe_context,
5803 GEN9_AVC_WP_INPUT_REF_SURFACE_INDEX);
5805 obj_surface = avc_ctx->wp_output_pic_select_surface_obj[curbe_param->ref_list_idx];
5806 i965_add_adv_gpe_surface(ctx, gpe_context,
5808 GEN9_AVC_WP_OUTPUT_SCALED_SURFACE_INDEX);
5813 gen9_avc_kernel_wp(VADriverContextP ctx,
5814 struct encode_state *encode_state,
5815 struct intel_encoder_context *encoder_context,
5816 unsigned int list1_in_use)
5818 struct i965_driver_data *i965 = i965_driver_data(ctx);
5819 struct i965_gpe_table *gpe = &i965->gpe_table;
5820 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5821 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5822 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5823 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5825 struct i965_gpe_context *gpe_context;
5826 struct gpe_media_object_walker_parameter media_object_walker_param;
5827 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
5828 int media_function = INTEL_MEDIA_STATE_ENC_WP;
5829 struct wp_param param;
5831 gpe_context = &(avc_ctx->context_wp.gpe_contexts);
5833 gpe->context_init(ctx, gpe_context);
5834 gpe->reset_binding_table(ctx, gpe_context);
5836 memset(¶m, 0, sizeof(param));
5837 param.ref_list_idx = (list1_in_use == 1) ? 1 : 0;
5839 generic_ctx->pfn_set_curbe_wp(ctx, encode_state, gpe_context, encoder_context, ¶m);
5842 generic_ctx->pfn_send_wp_surface(ctx, encode_state, gpe_context, encoder_context, ¶m);
5844 gpe->setup_interface_data(ctx, gpe_context);
5846 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
5847 /* the scaling is based on 8x8 blk level */
5848 kernel_walker_param.resolution_x = generic_state->frame_width_in_mbs;
5849 kernel_walker_param.resolution_y = generic_state->frame_height_in_mbs;
5850 kernel_walker_param.no_dependency = 1;
5852 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
5854 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
5857 &media_object_walker_param);
5859 return VA_STATUS_SUCCESS;
5864 sfd related function
5867 gen9_avc_set_curbe_sfd(VADriverContextP ctx,
5868 struct encode_state *encode_state,
5869 struct i965_gpe_context *gpe_context,
5870 struct intel_encoder_context *encoder_context,
5873 gen9_avc_sfd_curbe_data *cmd;
5874 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5875 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5876 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5877 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5879 cmd = i965_gpe_context_map_curbe(gpe_context);
5883 memset(cmd, 0, sizeof(gen9_avc_sfd_curbe_data));
5885 cmd->dw0.enable_intra_cost_scaling_for_static_frame = 1 ;
5886 cmd->dw0.enable_adaptive_mv_stream_in = 0 ;
5887 cmd->dw0.stream_in_type = 7 ;
5888 cmd->dw0.slice_type = slice_type_kernel[generic_state->frame_type] ;
5889 cmd->dw0.brc_mode_enable = generic_state->brc_enabled ;
5890 cmd->dw0.vdenc_mode_disable = 1 ;
5892 cmd->dw1.hme_stream_in_ref_cost = 5 ;
5893 cmd->dw1.num_of_refs = slice_param->num_ref_idx_l0_active_minus1 ;
5894 cmd->dw1.qp_value = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta ;
5896 cmd->dw2.frame_width_in_mbs = generic_state->frame_width_in_mbs ;
5897 cmd->dw2.frame_height_in_mbs = generic_state->frame_height_in_mbs ;
5899 cmd->dw3.large_mv_threshold = 128 ;
5900 cmd->dw4.total_large_mv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs) / 100 ;
5901 cmd->dw5.zmv_threshold = 4 ;
5902 cmd->dw6.total_zmv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * avc_state->zero_mv_threshold) / 100 ; // zero_mv_threshold = 60;
5903 cmd->dw7.min_dist_threshold = 10 ;
5905 if (generic_state->frame_type == SLICE_TYPE_P) {
5906 memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_p_frame, AVC_QP_MAX * sizeof(unsigned char));
5908 } else if (generic_state->frame_type == SLICE_TYPE_B) {
5909 memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_b_frame, AVC_QP_MAX * sizeof(unsigned char));
5912 cmd->dw21.actual_width_in_mb = cmd->dw2.frame_width_in_mbs ;
5913 cmd->dw21.actual_height_in_mb = cmd->dw2.frame_height_in_mbs ;
5914 cmd->dw24.vdenc_input_image_state_index = GEN9_AVC_SFD_VDENC_INPUT_IMAGE_STATE_INDEX ;
5915 cmd->dw26.mv_data_surface_index = GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX ;
5916 cmd->dw27.inter_distortion_surface_index = GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX ;
5917 cmd->dw28.output_data_surface_index = GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX ;
5918 cmd->dw29.vdenc_output_image_state_index = GEN9_AVC_SFD_VDENC_OUTPUT_IMAGE_STATE_INDEX ;
5920 i965_gpe_context_unmap_curbe(gpe_context);
5925 gen9_avc_send_surface_sfd(VADriverContextP ctx,
5926 struct encode_state *encode_state,
5927 struct i965_gpe_context *gpe_context,
5928 struct intel_encoder_context *encoder_context,
5931 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5932 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5933 struct i965_gpe_resource *gpe_resource;
5936 /*HME mv data surface memv output 4x*/
5937 gpe_resource = &avc_ctx->s4x_memv_data_buffer;
5938 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5941 I965_SURFACEFORMAT_R8_UNORM,
5942 GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX);
5944 /* memv distortion */
5945 gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
5946 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5949 I965_SURFACEFORMAT_R8_UNORM,
5950 GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX);
5953 gpe_resource = &avc_ctx->res_sfd_output_buffer;
5954 i965_add_buffer_gpe_surface(ctx,
5960 GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX);
5965 gen9_avc_kernel_sfd(VADriverContextP ctx,
5966 struct encode_state *encode_state,
5967 struct intel_encoder_context *encoder_context)
5969 struct i965_driver_data *i965 = i965_driver_data(ctx);
5970 struct i965_gpe_table *gpe = &i965->gpe_table;
5971 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5972 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5973 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5975 struct i965_gpe_context *gpe_context;
5976 struct gpe_media_object_parameter media_object_param;
5977 struct gpe_media_object_inline_data media_object_inline_data;
5978 int media_function = INTEL_MEDIA_STATE_STATIC_FRAME_DETECTION;
5979 gpe_context = &(avc_ctx->context_sfd.gpe_contexts);
5981 gpe->context_init(ctx, gpe_context);
5982 gpe->reset_binding_table(ctx, gpe_context);
5985 generic_ctx->pfn_set_curbe_sfd(ctx, encode_state, gpe_context, encoder_context, NULL);
5988 generic_ctx->pfn_send_sfd_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
5990 gpe->setup_interface_data(ctx, gpe_context);
5992 memset(&media_object_param, 0, sizeof(media_object_param));
5993 memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
5994 media_object_param.pinline_data = &media_object_inline_data;
5995 media_object_param.inline_size = sizeof(media_object_inline_data);
5997 gen9_avc_run_kernel_media_object(ctx, encoder_context,
6000 &media_object_param);
6002 return VA_STATUS_SUCCESS;
6005 /**************** PreEnc Scaling *************************************/
6006 /* function to run preenc scaling: gen9_avc_preenc_kernel_scaling()
6007 * function to set preenc scaling curbe is the same one using for avc encode
6008 == gen95_avc_set_curbe_scaling4x()
6009 * function to send buffer/surface resources is the same one using for avc encode
6010 == gen9_avc_send_surface_scaling()
6013 gen9_avc_preenc_kernel_scaling(VADriverContextP ctx,
6014 struct encode_state *encode_state,
6015 struct intel_encoder_context *encoder_context,
6017 int scale_surface_type)
6019 struct i965_driver_data *i965 = i965_driver_data(ctx);
6020 struct i965_gpe_table *gpe = &i965->gpe_table;
6021 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6022 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
6023 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6024 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6025 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
6026 VAStatsStatisticsParameterH264 *stat_param_h264 = NULL;
6027 VAStatsStatisticsParameter *stat_param = NULL;
6028 struct i965_gpe_context *gpe_context;
6029 struct scaling_param surface_param;
6030 struct object_surface *obj_surface = NULL;
6031 struct gpe_media_object_walker_parameter media_object_walker_param;
6032 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
6033 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
6034 int media_function = 0;
6036 int enable_statistics_output;
6038 stat_param_h264 = avc_state->stat_param;
6039 assert(stat_param_h264);
6040 stat_param = &stat_param_h264->stats_params;
6041 enable_statistics_output = !stat_param_h264->disable_statistics_output;
6043 memset(&surface_param, 0, sizeof(struct scaling_param));
6044 media_function = INTEL_MEDIA_STATE_4X_SCALING;
6045 kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
6046 downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
6047 downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
6049 surface_param.input_frame_width = generic_state->frame_width_in_pixel;
6050 surface_param.input_frame_height = generic_state->frame_height_in_pixel;
6051 surface_param.output_frame_width = generic_state->frame_width_4x;
6052 surface_param.output_frame_height = generic_state->frame_height_4x;
6053 surface_param.use_4x_scaling = 1 ;
6054 surface_param.use_16x_scaling = 0 ;
6055 surface_param.use_32x_scaling = 0 ;
6056 surface_param.enable_mb_flatness_check = enable_statistics_output;
6057 surface_param.enable_mb_variance_output = enable_statistics_output;
6058 surface_param.enable_mb_pixel_average_output = enable_statistics_output;
6059 surface_param.blk8x8_stat_enabled = stat_param_h264->enable_8x8_statistics;
6061 switch (scale_surface_type) {
6064 surface_param.input_surface = encode_state->input_yuv_object ;
6065 surface_param.output_surface = avc_ctx->preenc_scaled_4x_surface_obj ;
6067 if (enable_statistics_output) {
6068 surface_param.pres_mbv_proc_stat_buffer =
6069 &avc_ctx->preproc_stat_data_out_buffer;
6070 surface_param.mbv_proc_stat_enabled = 1;
6072 surface_param.mbv_proc_stat_enabled = 0;
6073 surface_param.pres_mbv_proc_stat_buffer = NULL;
6077 case SCALE_PAST_REF_PIC:
6078 obj_surface = SURFACE(stat_param->past_references[0].picture_id);
6079 assert(obj_surface);
6080 surface_param.input_surface = obj_surface;
6081 surface_param.output_surface = avc_ctx->preenc_past_ref_scaled_4x_surface_obj;
6083 if (stat_param->past_ref_stat_buf) {
6084 surface_param.pres_mbv_proc_stat_buffer =
6085 &avc_ctx->preenc_past_ref_stat_data_out_buffer;
6086 surface_param.mbv_proc_stat_enabled = 1;
6088 surface_param.mbv_proc_stat_enabled = 0;
6089 surface_param.pres_mbv_proc_stat_buffer = NULL;
6093 case SCALE_FUTURE_REF_PIC:
6095 obj_surface = SURFACE(stat_param->future_references[0].picture_id);
6096 assert(obj_surface);
6097 surface_param.input_surface = obj_surface;
6098 surface_param.output_surface = avc_ctx->preenc_future_ref_scaled_4x_surface_obj;
6100 if (stat_param->future_ref_stat_buf) {
6101 surface_param.pres_mbv_proc_stat_buffer =
6102 &avc_ctx->preenc_future_ref_stat_data_out_buffer;
6103 surface_param.mbv_proc_stat_enabled = 1;
6105 surface_param.mbv_proc_stat_enabled = 0;
6106 surface_param.pres_mbv_proc_stat_buffer = NULL;
6113 gpe_context = &(avc_ctx->context_scaling.gpe_contexts[kernel_idx]);
6115 gpe->context_init(ctx, gpe_context);
6116 gpe->reset_binding_table(ctx, gpe_context);
6118 generic_ctx->pfn_set_curbe_scaling4x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
6120 surface_param.scaling_out_use_16unorm_surf_fmt = 0 ;
6121 surface_param.scaling_out_use_32unorm_surf_fmt = 1 ;
6123 /* No need of explicit flatness_check surface allocation. The field mb_is_flat
6124 * VAStatsStatisticsH264 will be used to store the output. */
6125 surface_param.enable_mb_flatness_check = 0;
6126 generic_ctx->pfn_send_scaling_surface(ctx, encode_state, gpe_context, encoder_context, &surface_param);
6128 /* setup the interface data */
6129 gpe->setup_interface_data(ctx, gpe_context);
6131 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
6132 /* the scaling is based on 8x8 blk level */
6133 kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
6134 kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
6135 kernel_walker_param.no_dependency = 1;
6137 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
6139 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
6142 &media_object_walker_param);
6144 return VA_STATUS_SUCCESS;
6147 /**************** PreEnc HME *************************************/
6148 /* function to run preenc hme is the same one we using in avc encode:
6149 == gen9_avc_kernel_me()
6150 * function to set preenc hme curbe: gen9_avc_preenc_set_curbe_me()
6151 * function to send hme buffer/surface: gen9_avc_preenc_send_surface_me()
6154 gen9_avc_preenc_set_curbe_me(VADriverContextP ctx,
6155 struct encode_state *encode_state,
6156 struct i965_gpe_context *gpe_context,
6157 struct intel_encoder_context *encoder_context,
6160 gen9_avc_fei_me_curbe_data *curbe_cmd;
6161 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6162 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6163 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6164 VAStatsStatisticsParameterH264 * stat_param_h264 = avc_state->stat_param;
6165 VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
6167 struct me_param * curbe_param = (struct me_param *)param ;
6168 unsigned char use_mv_from_prev_step = 0;
6169 unsigned char write_distortions = 0;
6170 unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
6171 unsigned char seach_table_idx = 0;
6172 unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
6173 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
6174 unsigned int scale_factor = 0;
6176 switch (curbe_param->hme_type) {
6177 case INTEL_ENC_HME_4x:
6178 use_mv_from_prev_step = 0;
6179 write_distortions = 0;
6180 mv_shift_factor = 2;
6182 prev_mv_read_pos_factor = 0;
6189 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
6193 downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
6194 downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
6196 memcpy(curbe_cmd, gen9_avc_me_curbe_init_data, sizeof(gen9_avc_me_curbe_init_data));
6198 curbe_cmd->dw3.sub_pel_mode = stat_param_h264->sub_pel_mode;
6199 if (avc_state->field_scaling_output_interleaved) {
6200 /*frame set to zero,field specified*/
6201 curbe_cmd->dw3.src_access = 0;
6202 curbe_cmd->dw3.ref_access = 0;
6203 curbe_cmd->dw7.src_field_polarity = 0;
6205 curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
6206 curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
6207 curbe_cmd->dw5.qp_prime_y = stat_param_h264->frame_qp;
6209 curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
6210 curbe_cmd->dw6.write_distortions = write_distortions;
6211 curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
6212 curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(INTEL_AVC_LEVEL_52) * 4;//frame only
6214 if (generic_state->frame_type == SLICE_TYPE_B) {
6215 curbe_cmd->dw1.bi_weight = 32;
6216 curbe_cmd->dw13.num_ref_idx_l1_minus1 = stat_param->num_future_references - 1;
6217 me_method = gen9_avc_b_me_method[generic_state->preset];
6218 seach_table_idx = 1;
6221 if (generic_state->frame_type == SLICE_TYPE_P ||
6222 generic_state->frame_type == SLICE_TYPE_B)
6223 curbe_cmd->dw13.num_ref_idx_l0_minus1 = stat_param->num_past_references - 1;
6225 curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
6226 curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
6228 memcpy(&curbe_cmd->dw16, table_enc_search_path[seach_table_idx][me_method], 14 * sizeof(int));
6230 curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN9_AVC_ME_MV_DATA_SURFACE_INDEX;
6231 curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x) ? GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX : GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX ;
6232 curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN9_AVC_ME_DISTORTION_SURFACE_INDEX;
6233 curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN9_AVC_ME_BRC_DISTORTION_INDEX;
6234 curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX;
6235 curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX;
6236 curbe_cmd->dw38.reserved = 0;
6238 i965_gpe_context_unmap_curbe(gpe_context);
6243 gen9_avc_preenc_send_surface_me(VADriverContextP ctx,
6244 struct encode_state *encode_state,
6245 struct i965_gpe_context *gpe_context,
6246 struct intel_encoder_context *encoder_context,
6249 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6250 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
6251 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6252 VAStatsStatisticsParameterH264 * stat_param_h264 = avc_state->stat_param;
6253 VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
6254 struct object_surface *input_surface;
6255 struct i965_gpe_resource *gpe_resource;
6256 struct me_param * curbe_param = (struct me_param *)param ;
6259 /* PreEnc Only supports 4xme */
6260 assert(curbe_param->hme_type == INTEL_ENC_HME_4x);
6262 switch (curbe_param->hme_type) {
6263 case INTEL_ENC_HME_4x : {
6265 gpe_resource = &avc_ctx->s4x_memv_data_buffer;
6266 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
6269 I965_SURFACEFORMAT_R8_UNORM,
6270 GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
6272 /* memv distortion output*/
6273 gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
6274 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
6277 I965_SURFACEFORMAT_R8_UNORM,
6278 GEN9_AVC_ME_DISTORTION_SURFACE_INDEX);
6280 /* brc distortion output*/
6281 gpe_resource = &avc_ctx->res_brc_dist_data_surface;
6282 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
6285 I965_SURFACEFORMAT_R8_UNORM,
6286 GEN9_AVC_ME_BRC_DISTORTION_INDEX);
6288 /* input past ref scaled YUV surface*/
6289 for (i = 0; i < stat_param->num_past_references; i++) {
6290 /*input current down scaled YUV surface for forward refef */
6291 input_surface = avc_ctx->preenc_scaled_4x_surface_obj;
6292 i965_add_adv_gpe_surface(ctx, gpe_context,
6294 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
6296 input_surface = avc_ctx->preenc_past_ref_scaled_4x_surface_obj;
6297 i965_add_adv_gpe_surface(ctx, gpe_context,
6299 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
6302 /* input future ref scaled YUV surface*/
6303 for (i = 0; i < stat_param->num_future_references; i++) {
6304 /*input current down scaled YUV surface for backward ref */
6305 input_surface = avc_ctx->preenc_scaled_4x_surface_obj;
6306 i965_add_adv_gpe_surface(ctx, gpe_context,
6308 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
6310 input_surface = avc_ctx->preenc_future_ref_scaled_4x_surface_obj;
6311 i965_add_adv_gpe_surface(ctx, gpe_context,
6313 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
6324 /**************** PreEnc PreProc *************************************/
6325 /* function to run preenc preproc: gen9_avc_preenc_kernel_preproc()
6326 * function to set preenc preproc curbe: gen9_avc_preenc_set_curbe_preproc()
6327 * function to send preproc buffer/surface: gen9_avc_preenc_send_surface_preproc ()
6330 gen9_avc_preenc_set_curbe_preproc(VADriverContextP ctx,
6331 struct encode_state *encode_state,
6332 struct i965_gpe_context *gpe_context,
6333 struct intel_encoder_context *encoder_context,
6336 gen9_avc_preproc_curbe_data *cmd;
6337 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6338 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6339 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6340 VAStatsStatisticsParameterH264 *stat_param_h264 = avc_state->stat_param;
6341 VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
6342 unsigned char me_method = 0;
6343 unsigned int table_idx = 0;
6344 int ref_width, ref_height, len_sp;
6345 int is_bframe = (generic_state->frame_type == SLICE_TYPE_B);
6346 int is_pframe = (generic_state->frame_type == SLICE_TYPE_P);
6347 unsigned int preset = generic_state->preset;
6349 cmd = (gen9_avc_preproc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
6352 memset(cmd, 0, sizeof(gen9_avc_preproc_curbe_data));
6354 switch (generic_state->frame_type) {
6356 memcpy(cmd, gen9_avc_preenc_preproc_curbe_i_frame_init_data,
6357 sizeof(gen9_avc_preproc_curbe_data));
6360 memcpy(cmd, gen9_avc_preenc_preproc_curbe_p_frame_init_data,
6361 sizeof(gen9_avc_preproc_curbe_data));
6364 memcpy(cmd, gen9_avc_preenc_preproc_curbe_b_frame_init_data,
6365 sizeof(gen9_avc_preproc_curbe_data));
6370 /* 4 means full search, 6 means diamand search */
6371 me_method = (stat_param_h264->search_window == 5) ||
6372 (stat_param_h264->search_window == 8) ? 4 : 6;
6374 ref_width = stat_param_h264->ref_width;
6375 ref_height = stat_param_h264->ref_height;
6376 len_sp = stat_param_h264->len_sp;
6377 /* If there is a serch_window, discard user provided ref_width, ref_height
6378 * and search_path length */
6379 switch (stat_param_h264->search_window) {
6381 /* not use predefined search window, there should be a search_path input */
6382 if ((stat_param_h264->search_path != 0) &&
6383 (stat_param_h264->search_path != 1) &&
6384 (stat_param_h264->search_path != 2)) {
6385 WARN_ONCE("Invalid input search_path for SearchWindow=0 \n");
6388 /* 4 means full search, 6 means diamand search */
6389 me_method = (stat_param_h264->search_path == 1) ? 6 : 4;
6390 if (((ref_width * ref_height) > 2048) || (ref_width > 64) || (ref_height > 64)) {
6391 WARN_ONCE("Invalid input ref_width/ref_height in"
6392 "SearchWindow=0 case! \n");
6398 /* Tiny - 4 SUs 24x24 window */
6405 /* Small - 9 SUs 28x28 window */
6411 /* Diamond - 16 SUs 48x40 window */
6417 /* Large Diamond - 32 SUs 48x40 window */
6423 /* Exhaustive - 48 SUs 48x40 window */
6429 /* Diamond - 16 SUs 64x32 window */
6435 /* Large Diamond - 32 SUs 64x32 window */
6441 /* Exhaustive - 48 SUs 64x32 window */
6451 /* ref_width*ref_height = Max 64x32 one direction, Max 32x32 two directions */
6453 CLIP(ref_width, 4, 32);
6454 CLIP(ref_height, 4, 32);
6455 } else if (is_pframe) {
6456 CLIP(ref_width, 4, 64);
6457 CLIP(ref_height, 4, 32);
6460 cmd->dw0.adaptive_enable =
6461 cmd->dw37.adaptive_enable = stat_param_h264->adaptive_search;
6462 cmd->dw2.max_len_sp = len_sp;
6463 cmd->dw38.max_len_sp = 0; // HLD mandates this field to be Zero
6464 cmd->dw2.max_num_su = cmd->dw38.max_num_su = 57;
6465 cmd->dw3.src_access =
6466 cmd->dw3.ref_access = 0; // change it to (is_frame ? 0: 1) when interlace is suppoted
6468 if (generic_state->frame_type != SLICE_TYPE_I && avc_state->ftq_enable)
6469 cmd->dw3.ft_enable = stat_param_h264->ft_enable;
6471 cmd->dw3.ft_enable = 0;
6473 cmd->dw2.pic_width = generic_state->frame_width_in_mbs;
6474 cmd->dw6.pic_height = cmd->dw5.slice_mb_height = generic_state->frame_height_in_mbs;
6475 cmd->dw3.sub_mb_part_mask = stat_param_h264->sub_mb_part_mask;
6476 cmd->dw3.sub_pel_mode = stat_param_h264->sub_pel_mode;
6477 cmd->dw3.inter_sad = stat_param_h264->inter_sad;
6478 cmd->dw3.intra_sad = stat_param_h264->intra_sad;
6479 cmd->dw4.hme_enable = generic_state->hme_enabled;
6480 cmd->dw4.frame_qp = stat_param_h264->frame_qp;
6481 cmd->dw4.per_mb_qp_enable = stat_param_h264->mb_qp;
6483 cmd->dw4.multiple_mv_predictor_per_mb_enable =
6484 (generic_state->frame_type != SLICE_TYPE_I) ? 0 : stat_param_h264->mv_predictor_ctrl;
6486 cmd->dw4.disable_mv_output = (generic_state->frame_type == SLICE_TYPE_I) ? 1 : stat_param_h264->disable_mv_output;
6487 cmd->dw4.disable_mb_stats = stat_param_h264->disable_statistics_output;
6489 cmd->dw4.fwd_ref_pic_enable = (stat_param->num_past_references > 0) ? 1 : 0;
6490 cmd->dw4.bwd_ref_pic_enable = (stat_param->num_future_references > 0) ? 1 : 0;
6492 cmd->dw7.intra_part_mask = stat_param_h264->intra_part_mask;
6495 memcpy(&(cmd->dw8), gen75_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][stat_param_h264->frame_qp], 8 * sizeof(unsigned int));
6497 /* reset all except sic_fwd_trans_coeff_threshold_* from dw8 to dw15 */
6498 memset(&(cmd->dw8), 0, 6 * (sizeof(unsigned int)));
6500 /* search path tables */
6501 table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
6502 memcpy(&(cmd->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
6504 if (stat_param_h264->intra_part_mask == 0x07)
6505 cmd->dw31.intra_compute_type = 3;
6507 cmd->dw38.ref_threshold = 400;
6508 cmd->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
6510 if (generic_state->frame_type == SLICE_TYPE_I) {
6511 cmd->dw0.skip_mode_enable = cmd->dw37.skip_mode_enable = 0;
6512 cmd->dw36.hme_combine_overlap = 0;
6513 } else if (generic_state->frame_type == SLICE_TYPE_P) {
6514 cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(INTEL_AVC_LEVEL_52) / 2;
6515 cmd->dw3.bme_disable_fbr = 1;
6516 cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
6517 cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
6518 cmd->dw7.non_skip_zmv_added = 1;
6519 cmd->dw7.non_skip_mode_added = 1;
6520 cmd->dw7.skip_center_mask = 1;
6521 cmd->dw32.max_vmv_r =
6522 i965_avc_get_max_mv_len(INTEL_AVC_LEVEL_52) * 4;
6523 cmd->dw36.hme_combine_overlap = 1;
6525 } else if (generic_state->frame_type == SLICE_TYPE_P) { /* B slice */
6527 cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(INTEL_AVC_LEVEL_52) / 2;
6528 cmd->dw3.search_ctrl = 0;
6529 cmd->dw3.skip_type = 1;
6530 cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
6531 cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
6532 cmd->dw7.skip_center_mask = 0xff;
6533 cmd->dw32.max_vmv_r =
6534 i965_avc_get_max_mv_len(INTEL_AVC_LEVEL_52) * 4;
6535 cmd->dw36.hme_combine_overlap = 1;
6538 cmd->dw40.curr_pic_surf_index = GEN9_AVC_PREPROC_CURR_Y_INDEX;
6539 cmd->dw41.hme_mv_dat_surf_index = GEN9_AVC_PREPROC_HME_MV_DATA_INDEX;
6540 cmd->dw42.mv_predictor_surf_index = GEN9_AVC_PREPROC_MV_PREDICTOR_INDEX;
6541 cmd->dw43.mb_qp_surf_index = GEN9_AVC_PREPROC_MBQP_INDEX;
6542 cmd->dw44.mv_data_out_surf_index = GEN9_AVC_PREPROC_MV_DATA_INDEX;
6543 cmd->dw45.mb_stats_out_surf_index = GEN9_AVC_PREPROC_MB_STATS_INDEX;
6544 cmd->dw46.vme_inter_prediction_surf_index = GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_0_INDEX;
6545 cmd->dw47.vme_Inter_prediction_mr_surf_index = GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_1_INDEX;
6546 cmd->dw48.ftq_lut_surf_index = GEN9_AVC_PREPROC_FTQ_LUT_INDEX;
6548 i965_gpe_context_unmap_curbe(gpe_context);
6552 gen9_avc_preenc_send_surface_preproc(VADriverContextP ctx,
6553 struct encode_state *encode_state,
6554 struct i965_gpe_context *gpe_context,
6555 struct intel_encoder_context *encoder_context,
6558 struct i965_driver_data *i965 = i965_driver_data(ctx);
6559 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6560 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
6561 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6562 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6563 struct object_surface *obj_surface;
6564 struct i965_gpe_resource *gpe_resource;
6565 VASurfaceID surface_id;
6566 VAStatsStatisticsParameterH264 *stat_param_h264 = avc_state->stat_param;
6567 VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
6568 unsigned int size = 0, frame_mb_nums = 0;
6570 frame_mb_nums = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs;
6572 /* input yuv surface, Y index */
6573 obj_surface = encode_state->input_yuv_object;
6574 i965_add_2d_gpe_surface(ctx,
6579 I965_SURFACEFORMAT_R8_UNORM,
6580 GEN9_AVC_PREPROC_CURR_Y_INDEX);
6582 /* input yuv surface, UV index */
6583 i965_add_2d_gpe_surface(ctx,
6588 I965_SURFACEFORMAT_R16_UINT,
6589 GEN9_AVC_MBENC_CURR_UV_INDEX);
6592 if (generic_state->hme_enabled) {
6593 /* HME mv data buffer */
6594 gpe_resource = &avc_ctx->s4x_memv_data_buffer;
6595 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
6598 I965_SURFACEFORMAT_R8_UNORM,
6599 GEN9_AVC_PREPROC_HME_MV_DATA_INDEX);
6602 /* mv predictor buffer */
6603 if (stat_param_h264->mv_predictor_ctrl) {
6604 size = frame_mb_nums * FEI_AVC_MV_PREDICTOR_BUFFER_SIZE;
6605 gpe_resource = &avc_ctx->preproc_mv_predictor_buffer;
6606 i965_add_buffer_gpe_surface(ctx,
6612 GEN9_AVC_PREPROC_MV_PREDICTOR_INDEX);
6616 if (stat_param_h264->mb_qp) {
6617 size = frame_mb_nums * FEI_AVC_QP_BUFFER_SIZE;
6618 gpe_resource = &avc_ctx->preproc_mb_qp_buffer;
6619 i965_add_buffer_gpe_surface(ctx,
6625 GEN9_AVC_PREPROC_MBQP_INDEX);
6627 gpe_resource = &avc_ctx->res_mbbrc_const_data_buffer;
6628 size = 16 * AVC_QP_MAX * 4;
6629 i965_add_buffer_gpe_surface(ctx,
6635 GEN9_AVC_PREPROC_FTQ_LUT_INDEX);
6639 /* mv data output buffer */
6640 if (!stat_param_h264->disable_mv_output) {
6641 gpe_resource = &avc_ctx->preproc_mv_data_out_buffer;
6642 size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
6643 i965_add_buffer_gpe_surface(ctx,
6649 GEN9_AVC_PREPROC_MV_DATA_INDEX);
6652 /* statistics output buffer */
6653 if (!stat_param_h264->disable_statistics_output) {
6654 gpe_resource = &avc_ctx->preproc_stat_data_out_buffer;
6655 size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
6656 i965_add_buffer_gpe_surface(ctx,
6662 GEN9_AVC_PREPROC_MB_STATS_INDEX);
6666 obj_surface = encode_state->input_yuv_object;
6667 i965_add_2d_gpe_surface(ctx,
6672 I965_SURFACEFORMAT_R8_UNORM,
6673 GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_0_INDEX);
6675 /* vme cur pic y (repeating based on required BTI order for mediakerel)*/
6676 obj_surface = encode_state->input_yuv_object;
6677 i965_add_2d_gpe_surface(ctx,
6682 I965_SURFACEFORMAT_R8_UNORM,
6683 GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_1_INDEX);
6685 /* vme forward ref */
6686 /* Only supports one past ref */
6687 if (stat_param->num_past_references > 0) {
6688 surface_id = stat_param->past_references[0].picture_id;
6689 assert(surface_id != VA_INVALID_ID);
6690 obj_surface = SURFACE(surface_id);
6693 i965_add_adv_gpe_surface(ctx, gpe_context,
6695 GEN9_AVC_PREPROC_VME_FWD_PIC_IDX0_INDEX);
6699 /* vme future ref */
6700 /* Only supports one future ref */
6701 if (stat_param->num_future_references > 0) {
6702 surface_id = stat_param->future_references[0].picture_id;
6703 assert(surface_id != VA_INVALID_ID);
6704 obj_surface = SURFACE(surface_id);
6707 i965_add_adv_gpe_surface(ctx, gpe_context,
6709 GEN9_AVC_PREPROC_VME_BWD_PIC_IDX0_0_INDEX);
6711 surface_id = stat_param->future_references[0].picture_id;
6712 assert(surface_id != VA_INVALID_ID);
6713 obj_surface = SURFACE(surface_id);
6716 i965_add_adv_gpe_surface(ctx, gpe_context,
6718 GEN9_AVC_PREPROC_VME_BWD_PIC_IDX0_1_INDEX);
6726 gen9_avc_preenc_kernel_preproc(VADriverContextP ctx,
6727 struct encode_state *encode_state,
6728 struct intel_encoder_context *encoder_context)
6730 struct i965_driver_data *i965 = i965_driver_data(ctx);
6731 struct i965_gpe_table *gpe = &i965->gpe_table;
6732 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6733 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
6734 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
6735 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6736 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6737 VAStatsStatisticsParameterH264 * stat_param_h264 = avc_state->stat_param;
6738 struct i965_gpe_context *gpe_context;
6739 struct gpe_media_object_walker_parameter media_object_walker_param;
6740 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
6741 int media_function = INTEL_MEDIA_STATE_PREPROC;
6742 struct i965_gpe_resource *gpe_resource = NULL;
6743 unsigned int * data = NULL;
6744 unsigned int ftq_lut_table_size = 16 * 52; /* 16 DW per QP for each qp*/
6746 gpe_context = &(avc_ctx->context_preproc.gpe_contexts);
6747 gpe->context_init(ctx, gpe_context);
6748 gpe->reset_binding_table(ctx, gpe_context);
6751 generic_ctx->pfn_set_curbe_preproc(ctx, encode_state, gpe_context, encoder_context, NULL);
6754 generic_ctx->pfn_send_preproc_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
6756 gpe->setup_interface_data(ctx, gpe_context);
6758 /* Set up FtqLut Buffer if there is QP change within a frame */
6759 if (stat_param_h264->mb_qp) {
6760 gpe_resource = &(avc_ctx->res_mbbrc_const_data_buffer);
6761 assert(gpe_resource);
6762 data = i965_map_gpe_resource(gpe_resource);
6764 memcpy(data, gen9_avc_preenc_preproc_ftq_lut, ftq_lut_table_size * sizeof(unsigned int));
6767 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
6768 kernel_walker_param.resolution_x = generic_state->frame_width_in_mbs ;
6769 kernel_walker_param.resolution_y = generic_state->frame_height_in_mbs ;
6770 kernel_walker_param.no_dependency = 1;
6772 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
6774 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
6777 &media_object_walker_param);
6779 return VA_STATUS_SUCCESS;
6784 gen8_avc_set_curbe_mbenc(VADriverContextP ctx,
6785 struct encode_state *encode_state,
6786 struct i965_gpe_context *gpe_context,
6787 struct intel_encoder_context *encoder_context,
6790 struct i965_driver_data *i965 = i965_driver_data(ctx);
6791 gen8_avc_mbenc_curbe_data *cmd;
6792 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6793 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6794 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6796 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
6797 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
6798 VASurfaceID surface_id;
6799 struct object_surface *obj_surface;
6801 struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
6802 unsigned char qp = 0;
6803 unsigned char me_method = 0;
6804 unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
6805 unsigned int table_idx = 0;
6806 unsigned int curbe_size = 0;
6808 unsigned int preset = generic_state->preset;
6809 if (IS_GEN8(i965->intel.device_info)) {
6810 cmd = (gen8_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
6813 curbe_size = sizeof(gen8_avc_mbenc_curbe_data);
6814 memset(cmd, 0, curbe_size);
6816 if (mbenc_i_frame_dist_in_use) {
6817 memcpy(cmd, gen8_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
6819 switch (generic_state->frame_type) {
6821 memcpy(cmd, gen8_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
6824 memcpy(cmd, gen8_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
6827 memcpy(cmd, gen8_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
6839 me_method = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_b_me_method[preset] : gen9_avc_p_me_method[preset];
6840 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
6842 cmd->dw0.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
6843 cmd->dw37.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
6844 cmd->dw0.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
6845 cmd->dw37.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
6847 cmd->dw2.max_len_sp = gen9_avc_max_len_sp[preset];
6848 cmd->dw38.max_len_sp = 0;
6850 cmd->dw3.src_access = 0;
6851 cmd->dw3.ref_access = 0;
6853 if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) {
6854 //disable ftq_override by now.
6855 if (avc_state->ftq_override) {
6856 cmd->dw3.ftq_enable = avc_state->ftq_enable;
6859 if (generic_state->frame_type == SLICE_TYPE_P) {
6860 cmd->dw3.ftq_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
6863 cmd->dw3.ftq_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
6867 cmd->dw3.ftq_enable = 0;
6870 if (avc_state->disable_sub_mb_partion)
6871 cmd->dw3.sub_mb_part_mask = 0x7;
6873 if (mbenc_i_frame_dist_in_use) {
6874 cmd->dw2.pitch_width = generic_state->downscaled_width_4x_in_mb;
6875 cmd->dw4.picture_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
6876 cmd->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4;
6877 cmd->dw6.batch_buffer_end = 0;
6878 cmd->dw31.intra_compute_type = 1;
6880 cmd->dw2.pitch_width = generic_state->frame_width_in_mbs;
6881 cmd->dw4.picture_height_minus1 = generic_state->frame_height_in_mbs - 1;
6882 cmd->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ? generic_state->frame_height_in_mbs : avc_state->slice_height;
6885 memcpy(&(cmd->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
6886 if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
6887 } else if (avc_state->skip_bias_adjustment_enable) {
6888 /* Load different MvCost for P picture when SkipBiasAdjustment is enabled
6889 // No need to check for P picture as the flag is only enabled for P picture */
6890 cmd->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
6893 table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
6894 memcpy(&(cmd->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
6896 cmd->dw4.enable_intra_cost_scaling_for_static_frame = avc_state->sfd_enable && generic_state->hme_enabled;
6897 cmd->dw4.field_parity_flag = 0;//bottom field
6898 cmd->dw4.enable_cur_fld_idr = 0;//field realted
6899 cmd->dw4.contrained_intra_pred_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
6900 cmd->dw4.hme_enable = generic_state->hme_enabled;
6901 cmd->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
6902 cmd->dw4.use_actual_ref_qp_value = generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
6904 cmd->dw7.intra_part_mask = avc_state->transform_8x8_mode_enable ? 0 : 0x02;
6905 cmd->dw7.src_field_polarity = 0;//field related
6907 /*ftq_skip_threshold_lut set,dw14 /15*/
6909 /*r5 disable NonFTQSkipThresholdLUT*/
6910 if (generic_state->frame_type == SLICE_TYPE_P) {
6911 cmd->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
6912 } else if (generic_state->frame_type == SLICE_TYPE_B) {
6913 cmd->dw32.skip_val = gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
6916 cmd->dw13.qp_prime_y = qp;
6917 cmd->dw13.qp_prime_cb = qp;
6918 cmd->dw13.qp_prime_cr = qp;
6919 cmd->dw13.target_size_in_word = 0xff;//hardcode for brc disable
6921 if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) {
6922 switch (gen9_avc_multi_pred[preset]) {
6924 cmd->dw32.mult_pred_l0_disable = 128;
6925 cmd->dw32.mult_pred_l1_disable = 128;
6928 cmd->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_P) ? 1 : 128;
6929 cmd->dw32.mult_pred_l1_disable = 128;
6932 cmd->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
6933 cmd->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
6936 cmd->dw32.mult_pred_l0_disable = 1;
6937 cmd->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
6942 cmd->dw32.mult_pred_l0_disable = 128;
6943 cmd->dw32.mult_pred_l1_disable = 128;
6946 if (generic_state->frame_type == SLICE_TYPE_B) {
6947 cmd->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
6948 cmd->dw34.list1_ref_id0_frm_field_parity = 0;
6949 cmd->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
6952 cmd->dw34.b_original_bff = 0; //frame only
6953 cmd->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
6954 cmd->dw34.roi_enable_flag = curbe_param->roi_enabled;
6955 cmd->dw34.mad_enable_falg = avc_state->mad_enable;
6956 cmd->dw34.mb_brc_enable = avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled;
6957 cmd->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
6958 cmd->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
6960 if (cmd->dw34.force_non_skip_check) {
6961 cmd->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
6964 cmd->dw36.check_all_fractional_enable = avc_state->caf_enable;
6965 cmd->dw38.ref_threshold = 400;
6966 cmd->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
6967 cmd->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable) ? 0 : 2;
6969 if (mbenc_i_frame_dist_in_use) {
6970 cmd->dw13.qp_prime_y = 0;
6971 cmd->dw13.qp_prime_cb = 0;
6972 cmd->dw13.qp_prime_cr = 0;
6973 cmd->dw33.intra_16x16_nondc_penalty = 0;
6974 cmd->dw33.intra_8x8_nondc_penalty = 0;
6975 cmd->dw33.intra_4x4_nondc_penalty = 0;
6977 if (cmd->dw4.use_actual_ref_qp_value) {
6978 cmd->dw44.actual_qp_value_for_ref_id0_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 0);
6979 cmd->dw44.actual_qp_value_for_ref_id1_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 1);
6980 cmd->dw44.actual_qp_value_for_ref_id2_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 2);
6981 cmd->dw44.actual_qp_value_for_ref_id3_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 3);
6982 cmd->dw45.actual_qp_value_for_ref_id4_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 4);
6983 cmd->dw45.actual_qp_value_for_ref_id5_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 5);
6984 cmd->dw45.actual_qp_value_for_ref_id6_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 6);
6985 cmd->dw45.actual_qp_value_for_ref_id7_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 7);
6986 cmd->dw46.actual_qp_value_for_ref_id0_list1 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 0);
6987 cmd->dw46.actual_qp_value_for_ref_id1_list1 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 1);
6990 table_idx = slice_type_kernel[generic_state->frame_type];
6991 cmd->dw46.ref_cost = gen8_avc_ref_cost[table_idx][qp];
6992 if (generic_state->frame_type == SLICE_TYPE_I) {
6993 cmd->dw0.skip_mode_enable = 0;
6994 cmd->dw37.skip_mode_enable = 0;
6995 cmd->dw36.hme_combine_overlap = 0;
6996 cmd->dw47.intra_cost_sf = 16;
6997 cmd->dw34.enable_direct_bias_adjustment = 0;
6998 cmd->dw34.enable_global_motion_bias_adjustment = 0;
7000 } else if (generic_state->frame_type == SLICE_TYPE_P) {
7001 cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
7002 cmd->dw3.bme_disable_fbr = 1;
7003 cmd->dw5.ref_width = gen9_avc_search_x[preset];
7004 cmd->dw5.ref_height = gen9_avc_search_y[preset];
7005 cmd->dw7.non_skip_zmv_added = 1;
7006 cmd->dw7.non_skip_mode_added = 1;
7007 cmd->dw7.skip_center_mask = 1;
7008 cmd->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
7009 cmd->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame onlys
7010 cmd->dw36.hme_combine_overlap = 1;
7011 cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
7012 cmd->dw39.ref_width = gen9_avc_search_x[preset];
7013 cmd->dw39.ref_height = gen9_avc_search_y[preset];
7014 cmd->dw34.enable_direct_bias_adjustment = 0;
7015 cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
7016 if (avc_state->global_motion_bias_adjustment_enable)
7017 cmd->dw58.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
7019 cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
7020 cmd->dw1.bi_weight = avc_state->bi_weight;
7021 cmd->dw3.search_ctrl = 7;
7022 cmd->dw3.skip_type = 1;
7023 cmd->dw5.ref_width = gen9_avc_b_search_x[preset];
7024 cmd->dw5.ref_height = gen9_avc_b_search_y[preset];
7025 cmd->dw7.skip_center_mask = 0xff;
7026 cmd->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
7027 cmd->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
7028 cmd->dw36.hme_combine_overlap = 1;
7029 surface_id = slice_param->RefPicList1[0].picture_id;
7030 obj_surface = SURFACE(surface_id);
7032 WARN_ONCE("Invalid backward reference frame\n");
7035 cmd->dw36.is_fwd_frame_short_term_ref = !!(slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
7036 cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
7037 cmd->dw36.num_ref_idx_l1_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l1_active_minus1 : 0;
7038 cmd->dw39.ref_width = gen9_avc_b_search_x[preset];
7039 cmd->dw39.ref_height = gen9_avc_b_search_y[preset];
7040 cmd->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
7041 cmd->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
7042 cmd->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
7043 cmd->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
7044 cmd->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
7045 cmd->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
7046 cmd->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
7047 cmd->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
7048 cmd->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
7049 if (cmd->dw34.enable_direct_bias_adjustment) {
7050 cmd->dw7.non_skip_zmv_added = 1;
7051 cmd->dw7.non_skip_mode_added = 1;
7054 cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
7055 if (avc_state->global_motion_bias_adjustment_enable)
7056 cmd->dw58.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
7058 avc_state->block_based_skip_enable = cmd->dw3.block_based_skip_enable;
7060 if (avc_state->rolling_intra_refresh_enable) {
7061 /*by now disable it*/
7062 if (generic_state->brc_enabled) {
7063 cmd->dw4.enable_intra_refresh = false;
7064 cmd->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
7065 cmd->dw48.widi_intra_refresh_mbx = 0;
7066 cmd->dw58.widi_intra_refresh_mby = 0;
7068 cmd->dw4.enable_intra_refresh = true;
7069 cmd->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
7071 cmd->dw32.mult_pred_l0_disable = 128;
7072 /* Pass the same IntraRefreshUnit to the kernel w/o the adjustment by -1, so as to have an overlap of one MB row or column of Intra macroblocks
7073 across one P frame to another P frame, as needed by the RollingI algo */
7074 cmd->dw48.widi_intra_refresh_mbx = 0;
7075 cmd->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
7076 cmd->dw48.widi_intra_refresh_qp_delta = 0;
7079 cmd->dw34.widi_intra_refresh_en = 0;
7082 /*roi set disable by now. 49-56*/
7083 if (curbe_param->roi_enabled) {
7084 cmd->dw49.roi_1_x_left = generic_state->roi[0].left;
7085 cmd->dw49.roi_1_y_top = generic_state->roi[0].top;
7086 cmd->dw50.roi_1_x_right = generic_state->roi[0].right;
7087 cmd->dw50.roi_1_y_bottom = generic_state->roi[0].bottom;
7089 cmd->dw51.roi_2_x_left = generic_state->roi[1].left;
7090 cmd->dw51.roi_2_y_top = generic_state->roi[1].top;
7091 cmd->dw52.roi_2_x_right = generic_state->roi[1].right;
7092 cmd->dw52.roi_2_y_bottom = generic_state->roi[1].bottom;
7094 cmd->dw53.roi_3_x_left = generic_state->roi[2].left;
7095 cmd->dw53.roi_3_y_top = generic_state->roi[2].top;
7096 cmd->dw54.roi_3_x_right = generic_state->roi[2].right;
7097 cmd->dw54.roi_3_y_bottom = generic_state->roi[2].bottom;
7099 cmd->dw55.roi_4_x_left = generic_state->roi[3].left;
7100 cmd->dw55.roi_4_y_top = generic_state->roi[3].top;
7101 cmd->dw56.roi_4_x_right = generic_state->roi[3].right;
7102 cmd->dw56.roi_4_y_bottom = generic_state->roi[3].bottom;
7104 cmd->dw36.enable_cabac_work_around = 0;
7106 if (!generic_state->brc_enabled) {
7108 tmp = generic_state->roi[0].value;
7109 CLIP(tmp, -qp, AVC_QP_MAX - qp);
7110 cmd->dw57.roi_1_dqp_prime_y = tmp;
7111 tmp = generic_state->roi[1].value;
7112 CLIP(tmp, -qp, AVC_QP_MAX - qp);
7113 cmd->dw57.roi_2_dqp_prime_y = tmp;
7114 tmp = generic_state->roi[2].value;
7115 CLIP(tmp, -qp, AVC_QP_MAX - qp);
7116 cmd->dw57.roi_3_dqp_prime_y = tmp;
7117 tmp = generic_state->roi[3].value;
7118 CLIP(tmp, -qp, AVC_QP_MAX - qp);
7119 cmd->dw57.roi_4_dqp_prime_y = tmp;
7121 cmd->dw34.roi_enable_flag = 0;
7125 cmd->dw65.mb_data_surf_index = GEN8_AVC_MBENC_MFC_AVC_PAK_OBJ_CM;
7126 cmd->dw66.mv_data_surf_index = GEN8_AVC_MBENC_IND_MV_DATA_CM;
7127 cmd->dw67.i_dist_surf_index = GEN8_AVC_MBENC_BRC_DISTORTION_CM;
7128 cmd->dw68.src_y_surf_index = GEN8_AVC_MBENC_CURR_Y_CM;
7129 cmd->dw69.mb_specific_data_surf_index = GEN8_AVC_MBENC_MB_SPECIFIC_DATA_CM;
7130 cmd->dw70.aux_vme_out_surf_index = GEN8_AVC_MBENC_AUX_VME_OUT_CM;
7131 cmd->dw71.curr_ref_pic_sel_surf_index = GEN8_AVC_MBENC_REFPICSELECT_L0_CM;
7132 cmd->dw72.hme_mv_pred_fwd_bwd_surf_index = GEN8_AVC_MBENC_MV_DATA_FROM_ME_CM;
7133 cmd->dw73.hme_dist_surf_index = GEN8_AVC_MBENC_4xME_DISTORTION_CM;
7134 cmd->dw74.slice_map_surf_index = GEN8_AVC_MBENC_SLICEMAP_DATA_CM;
7135 cmd->dw75.fwd_frm_mb_data_surf_index = GEN8_AVC_MBENC_FWD_MB_DATA_CM;
7136 cmd->dw76.fwd_frm_mv_surf_index = GEN8_AVC_MBENC_FWD_MV_DATA_CM;
7137 cmd->dw77.mb_qp_buffer = GEN8_AVC_MBENC_MBQP_CM;
7138 cmd->dw78.mb_brc_lut = GEN8_AVC_MBENC_MBBRC_CONST_DATA_CM;
7139 cmd->dw79.vme_inter_prediction_surf_index = GEN8_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_CM;
7140 cmd->dw80.vme_inter_prediction_mr_surf_index = GEN8_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_CM;
7141 cmd->dw81.flatness_chk_surf_index = GEN8_AVC_MBENC_FLATNESS_CHECK_CM;
7142 cmd->dw82.mad_surf_index = GEN8_AVC_MBENC_MAD_DATA_CM;
7143 cmd->dw83.force_non_skip_mb_map_surface = GEN8_AVC_MBENC_FORCE_NONSKIP_MB_MAP_CM;
7144 cmd->dw84.widi_wa_surf_index = GEN8_AVC_MBENC_WIDI_WA_DATA_CM;
7145 cmd->dw85.brc_curbe_surf_index = GEN8_AVC_MBENC_BRC_CURBE_DATA_CM;
7146 cmd->dw86.static_detection_cost_table_index = GEN8_AVC_MBENC_STATIC_FRAME_DETECTION_OUTPUT_CM;
7148 i965_gpe_context_unmap_curbe(gpe_context);
7154 gen8_avc_set_curbe_scaling4x(VADriverContextP ctx,
7155 struct encode_state *encode_state,
7156 struct i965_gpe_context *gpe_context,
7157 struct intel_encoder_context *encoder_context,
7160 gen8_avc_scaling4x_curbe_data *curbe_cmd;
7161 struct scaling_param *surface_param = (struct scaling_param *)param;
7163 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
7168 memset(curbe_cmd, 0, sizeof(gen9_avc_scaling4x_curbe_data));
7170 curbe_cmd->dw0.input_picture_width = surface_param->input_frame_width;
7171 curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
7173 curbe_cmd->dw1.input_y_bti = GEN8_SCALING_FRAME_SRC_Y_CM;
7174 curbe_cmd->dw2.output_y_bti = GEN8_SCALING_FRAME_DST_Y_CM;
7176 curbe_cmd->dw5.flatness_threshold = 0;
7177 if (surface_param->enable_mb_flatness_check) {
7178 curbe_cmd->dw5.flatness_threshold = 128;
7179 curbe_cmd->dw8.flatness_output_bti_top_field = 4;
7182 curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
7183 curbe_cmd->dw6.enable_mb_variance_output = surface_param->enable_mb_variance_output;
7184 curbe_cmd->dw6.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
7186 if (curbe_cmd->dw6.enable_mb_variance_output ||
7187 curbe_cmd->dw6.enable_mb_pixel_average_output) {
7188 curbe_cmd->dw10.mbv_proc_states_bti_top_field = GEN8_SCALING_FIELD_TOP_MBVPROCSTATS_DST_CM;
7189 curbe_cmd->dw11.mbv_proc_states_bti_bottom_field = GEN8_SCALING_FIELD_BOT_MBVPROCSTATS_DST_CM;
7192 i965_gpe_context_unmap_curbe(gpe_context);
7197 gen8_avc_set_curbe_me(VADriverContextP ctx,
7198 struct encode_state *encode_state,
7199 struct i965_gpe_context *gpe_context,
7200 struct intel_encoder_context *encoder_context,
7203 gen8_avc_me_curbe_data *curbe_cmd;
7204 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7205 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
7206 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
7208 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
7210 struct me_param * curbe_param = (struct me_param *)param ;
7211 unsigned char use_mv_from_prev_step = 0;
7212 unsigned char write_distortions = 0;
7213 unsigned char qp_prime_y = 0;
7214 unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
7215 unsigned char seach_table_idx = 0;
7216 unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
7217 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
7218 unsigned int scale_factor = 0;
7220 qp_prime_y = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta;
7221 switch (curbe_param->hme_type) {
7222 case INTEL_ENC_HME_4x : {
7223 use_mv_from_prev_step = (generic_state->b16xme_enabled) ? 1 : 0;
7224 write_distortions = 1;
7225 mv_shift_factor = 2;
7227 prev_mv_read_pos_factor = 0;
7230 case INTEL_ENC_HME_16x : {
7231 use_mv_from_prev_step = (generic_state->b32xme_enabled) ? 1 : 0;
7232 write_distortions = 0;
7233 mv_shift_factor = 2;
7235 prev_mv_read_pos_factor = 1;
7238 case INTEL_ENC_HME_32x : {
7239 use_mv_from_prev_step = 0;
7240 write_distortions = 0;
7241 mv_shift_factor = 1;
7243 prev_mv_read_pos_factor = 0;
7250 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
7255 downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
7256 downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
7258 memcpy(curbe_cmd, gen8_avc_me_curbe_init_data, sizeof(gen8_avc_me_curbe_data));
7260 curbe_cmd->dw3.sub_pel_mode = 3;
7261 if (avc_state->field_scaling_output_interleaved) {
7262 /*frame set to zero,field specified*/
7263 curbe_cmd->dw3.src_access = 0;
7264 curbe_cmd->dw3.ref_access = 0;
7265 curbe_cmd->dw7.src_field_polarity = 0;
7267 curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
7268 curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
7269 curbe_cmd->dw5.qp_prime_y = qp_prime_y;
7271 curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
7272 curbe_cmd->dw6.write_distortions = write_distortions;
7273 curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
7274 curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
7276 if (generic_state->frame_type == SLICE_TYPE_B) {
7277 curbe_cmd->dw1.bi_weight = 32;
7278 curbe_cmd->dw13.num_ref_idx_l1_minus1 = slice_param->num_ref_idx_l1_active_minus1;
7279 me_method = gen9_avc_b_me_method[generic_state->preset];
7280 seach_table_idx = 1;
7283 if (generic_state->frame_type == SLICE_TYPE_P ||
7284 generic_state->frame_type == SLICE_TYPE_B)
7285 curbe_cmd->dw13.num_ref_idx_l0_minus1 = slice_param->num_ref_idx_l0_active_minus1;
7287 curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
7288 curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
7290 memcpy(&curbe_cmd->dw16, table_enc_search_path[seach_table_idx][me_method], 14 * sizeof(int));
7292 curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN8_AVC_ME_MV_DATA_SURFACE_CM;
7293 curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x) ? GEN8_AVC_32xME_MV_DATA_SURFACE_CM : GEN8_AVC_16xME_MV_DATA_SURFACE_CM ;
7294 curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN8_AVC_ME_DISTORTION_SURFACE_CM;
7295 curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN8_AVC_ME_BRC_DISTORTION_CM;
7296 curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN8_AVC_ME_CURR_FOR_FWD_REF_CM;
7297 curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN8_AVC_ME_CURR_FOR_BWD_REF_CM;
7298 curbe_cmd->dw38.reserved = 0;
7300 i965_gpe_context_unmap_curbe(gpe_context);
7305 gen8_avc_set_curbe_brc_frame_update(VADriverContextP ctx,
7306 struct encode_state *encode_state,
7307 struct i965_gpe_context *gpe_context,
7308 struct intel_encoder_context *encoder_context,
7311 gen8_avc_frame_brc_update_curbe_data *cmd;
7312 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7313 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
7314 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
7315 struct object_surface *obj_surface;
7316 struct gen9_surface_avc *avc_priv_surface;
7317 struct avc_param common_param;
7319 obj_surface = encode_state->reconstructed_object;
7321 if (!obj_surface || !obj_surface->private_data)
7323 avc_priv_surface = obj_surface->private_data;
7325 cmd = i965_gpe_context_map_curbe(gpe_context);
7330 memcpy(cmd, &gen8_avc_frame_brc_update_curbe_init_data, sizeof(gen8_avc_frame_brc_update_curbe_data));
7332 cmd->dw5.target_size_flag = 0 ;
7333 if (generic_state->brc_init_current_target_buf_full_in_bits > (double)generic_state->brc_init_reset_buf_size_in_bits) {
7335 generic_state->brc_init_current_target_buf_full_in_bits -= (double)generic_state->brc_init_reset_buf_size_in_bits;
7336 cmd->dw5.target_size_flag = 1 ;
7339 if (generic_state->skip_frame_enbale) {
7340 cmd->dw6.num_skip_frames = generic_state->num_skip_frames ;
7341 cmd->dw7.size_skip_frames = generic_state->size_skip_frames;
7343 generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame * generic_state->num_skip_frames;
7346 cmd->dw0.target_size = (unsigned int)generic_state->brc_init_current_target_buf_full_in_bits ;
7347 cmd->dw1.frame_number = generic_state->seq_frame_number ;
7348 cmd->dw2.size_of_pic_headers = generic_state->herder_bytes_inserted << 3 ;
7349 cmd->dw5.cur_frame_type = generic_state->frame_type ;
7350 cmd->dw5.brc_flag = 0 ;
7351 cmd->dw5.brc_flag |= (avc_priv_surface->is_as_ref) ? INTEL_ENCODE_BRCUPDATE_IS_REFERENCE : 0 ;
7353 if (avc_state->multi_pre_enable) {
7354 cmd->dw5.brc_flag |= INTEL_ENCODE_BRCUPDATE_IS_ACTUALQP ;
7355 cmd->dw14.qp_index_of_cur_pic = avc_priv_surface->frame_idx ; //do not know this. use -1
7358 cmd->dw5.max_num_paks = generic_state->num_pak_passes ;
7359 if (avc_state->min_max_qp_enable) {
7360 switch (generic_state->frame_type) {
7362 cmd->dw6.minimum_qp = avc_state->min_qp_i ;
7363 cmd->dw6.maximum_qp = avc_state->max_qp_i ;
7366 cmd->dw6.minimum_qp = avc_state->min_qp_p ;
7367 cmd->dw6.maximum_qp = avc_state->max_qp_p ;
7370 cmd->dw6.minimum_qp = avc_state->min_qp_b ;
7371 cmd->dw6.maximum_qp = avc_state->max_qp_b ;
7375 cmd->dw6.minimum_qp = 0 ;
7376 cmd->dw6.maximum_qp = 0 ;
7379 generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame;
7381 if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
7382 cmd->dw3.start_gadj_frame0 = (unsigned int)((10 * generic_state->avbr_convergence) / (double)150);
7383 cmd->dw3.start_gadj_frame1 = (unsigned int)((50 * generic_state->avbr_convergence) / (double)150);
7384 cmd->dw4.start_gadj_frame2 = (unsigned int)((100 * generic_state->avbr_convergence) / (double)150);
7385 cmd->dw4.start_gadj_frame3 = (unsigned int)((150 * generic_state->avbr_convergence) / (double)150);
7386 cmd->dw11.g_rate_ratio_threshold_0 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 40)));
7387 cmd->dw11.g_rate_ratio_threshold_1 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 75)));
7388 cmd->dw12.g_rate_ratio_threshold_2 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 97)));
7389 cmd->dw12.g_rate_ratio_threshold_3 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (103 - 100)));
7390 cmd->dw12.g_rate_ratio_threshold_4 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (125 - 100)));
7391 cmd->dw12.g_rate_ratio_threshold_5 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (160 - 100)));
7395 memset(&common_param, 0, sizeof(common_param));
7396 common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
7397 common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
7398 common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
7399 common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
7400 common_param.frames_per_100s = generic_state->frames_per_100s;
7401 common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
7402 common_param.target_bit_rate = generic_state->target_bit_rate;
7404 i965_gpe_context_unmap_curbe(gpe_context);
7410 kernel related function:init/destroy etc
7413 gen9_avc_kernel_init_scaling(VADriverContextP ctx,
7414 struct generic_encoder_context *generic_context,
7415 struct gen_avc_scaling_context *kernel_context,
7418 struct i965_driver_data *i965 = i965_driver_data(ctx);
7419 struct i965_gpe_table *gpe = &i965->gpe_table;
7420 struct i965_gpe_context *gpe_context = NULL;
7421 struct encoder_kernel_parameter kernel_param ;
7422 struct encoder_scoreboard_parameter scoreboard_param;
7423 struct i965_kernel common_kernel;
7425 memset(&kernel_param, 0, sizeof(kernel_param));
7426 if (IS_SKL(i965->intel.device_info) ||
7427 IS_BXT(i965->intel.device_info)) {
7428 if (!preenc_enabled) {
7429 kernel_param.curbe_size = sizeof(gen9_avc_scaling4x_curbe_data);
7430 kernel_param.inline_data_size = sizeof(gen9_avc_scaling4x_curbe_data);
7432 /* Skylake PreEnc using GEN95/gen10 DS kernel */
7433 kernel_param.curbe_size = sizeof(gen95_avc_scaling4x_curbe_data);
7434 kernel_param.inline_data_size = sizeof(gen95_avc_scaling4x_curbe_data);
7436 } else if (IS_KBL(i965->intel.device_info) ||
7437 IS_GEN10(i965->intel.device_info) ||
7438 IS_GLK(i965->intel.device_info)) {
7439 kernel_param.curbe_size = sizeof(gen95_avc_scaling4x_curbe_data);
7440 kernel_param.inline_data_size = sizeof(gen95_avc_scaling4x_curbe_data);
7441 } else if (IS_GEN8(i965->intel.device_info)) {
7442 kernel_param.curbe_size = sizeof(gen8_avc_scaling4x_curbe_data);
7443 kernel_param.inline_data_size = sizeof(gen8_avc_scaling4x_curbe_data);
7447 /* 4x scaling kernel*/
7448 kernel_param.sampler_size = 0;
7450 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7451 scoreboard_param.mask = 0xFF;
7452 scoreboard_param.enable = generic_context->use_hw_scoreboard;
7453 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7454 scoreboard_param.walkpat_flag = 0;
7456 gpe_context = &kernel_context->gpe_contexts[GEN9_AVC_KERNEL_SCALING_4X_IDX];
7457 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7458 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7460 memset(&common_kernel, 0, sizeof(common_kernel));
7462 generic_context->get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7463 generic_context->enc_kernel_size,
7464 INTEL_GENERIC_ENC_SCALING4X,
7468 gpe->load_kernels(ctx,
7473 /* PreEnc using only the 4X scaling */
7477 /*2x scaling kernel*/
7478 kernel_param.curbe_size = sizeof(gen9_avc_scaling2x_curbe_data);
7479 kernel_param.inline_data_size = 0;
7480 kernel_param.sampler_size = 0;
7482 gpe_context = &kernel_context->gpe_contexts[GEN9_AVC_KERNEL_SCALING_2X_IDX];
7483 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7484 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7486 memset(&common_kernel, 0, sizeof(common_kernel));
7488 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7489 generic_context->enc_kernel_size,
7490 INTEL_GENERIC_ENC_SCALING2X,
7494 gpe->load_kernels(ctx,
7502 gen9_avc_kernel_init_me(VADriverContextP ctx,
7503 struct generic_encoder_context *generic_context,
7504 struct gen_avc_me_context *kernel_context,
7507 struct i965_driver_data *i965 = i965_driver_data(ctx);
7508 struct i965_gpe_table *gpe = &i965->gpe_table;
7509 struct i965_gpe_context *gpe_context = NULL;
7510 struct encoder_kernel_parameter kernel_param ;
7511 struct encoder_scoreboard_parameter scoreboard_param;
7512 struct i965_kernel common_kernel;
7514 unsigned int curbe_size = 0;
7516 if (IS_GEN8(i965->intel.device_info)) {
7517 curbe_size = sizeof(gen8_avc_me_curbe_data);
7519 if (!preenc_enabled)
7520 curbe_size = sizeof(gen9_avc_me_curbe_data);
7522 curbe_size = sizeof(gen9_avc_fei_me_curbe_data);
7525 kernel_param.curbe_size = curbe_size;
7526 kernel_param.inline_data_size = 0;
7527 kernel_param.sampler_size = 0;
7529 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7530 scoreboard_param.mask = 0xFF;
7531 scoreboard_param.enable = generic_context->use_hw_scoreboard;
7532 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7533 scoreboard_param.walkpat_flag = 0;
7535 /* There is two hme kernel, one for P and other for B frame */
7536 for (i = 0; i < 2; i++) {
7537 gpe_context = &kernel_context->gpe_contexts[i];
7538 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7539 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7541 memset(&common_kernel, 0, sizeof(common_kernel));
7543 generic_context->get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7544 generic_context->enc_kernel_size,
7545 INTEL_GENERIC_ENC_ME,
7549 gpe->load_kernels(ctx,
7558 gen9_avc_kernel_init_preproc(VADriverContextP ctx,
7559 struct generic_encoder_context *generic_context,
7560 struct gen_avc_preproc_context *kernel_context)
7562 struct i965_driver_data *i965 = i965_driver_data(ctx);
7563 struct i965_gpe_table *gpe = &i965->gpe_table;
7564 struct i965_gpe_context *gpe_context = NULL;
7565 struct encoder_kernel_parameter kernel_param ;
7566 struct encoder_scoreboard_parameter scoreboard_param;
7567 struct i965_kernel common_kernel;
7569 kernel_param.curbe_size = sizeof(gen9_avc_preproc_curbe_data);
7570 kernel_param.inline_data_size = 0;
7571 kernel_param.sampler_size = 0;
7573 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7574 scoreboard_param.mask = 0xFF;
7575 scoreboard_param.enable = generic_context->use_hw_scoreboard;
7576 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7577 scoreboard_param.walkpat_flag = 0;
7579 gpe_context = &kernel_context->gpe_contexts;
7580 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7581 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7583 memset(&common_kernel, 0, sizeof(common_kernel));
7585 intel_avc_fei_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7586 generic_context->enc_kernel_size,
7587 INTEL_GENERIC_ENC_PREPROC,
7591 gpe->load_kernels(ctx,
7599 gen9_avc_kernel_init_mbenc(VADriverContextP ctx,
7600 struct generic_encoder_context *generic_context,
7601 struct gen_avc_mbenc_context *kernel_context,
7604 struct i965_driver_data *i965 = i965_driver_data(ctx);
7605 struct i965_gpe_table *gpe = &i965->gpe_table;
7606 struct i965_gpe_context *gpe_context = NULL;
7607 struct encoder_kernel_parameter kernel_param ;
7608 struct encoder_scoreboard_parameter scoreboard_param;
7609 struct i965_kernel common_kernel;
7611 unsigned int curbe_size = 0;
7612 unsigned int num_mbenc_kernels = 0;
7614 if (IS_SKL(i965->intel.device_info) ||
7615 IS_BXT(i965->intel.device_info)) {
7617 curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
7618 num_mbenc_kernels = NUM_GEN9_AVC_KERNEL_MBENC;
7620 curbe_size = sizeof(gen9_avc_fei_mbenc_curbe_data);
7621 num_mbenc_kernels = NUM_GEN9_AVC_FEI_KERNEL_MBENC;
7623 } else if (IS_KBL(i965->intel.device_info) ||
7624 IS_GEN10(i965->intel.device_info) ||
7625 IS_GLK(i965->intel.device_info)) {
7626 curbe_size = sizeof(gen95_avc_mbenc_curbe_data);
7627 num_mbenc_kernels = NUM_GEN9_AVC_KERNEL_MBENC;
7628 } else if (IS_GEN8(i965->intel.device_info)) {
7629 curbe_size = sizeof(gen8_avc_mbenc_curbe_data);
7630 num_mbenc_kernels = NUM_GEN9_AVC_KERNEL_MBENC;
7633 assert(curbe_size > 0);
7634 kernel_param.curbe_size = curbe_size;
7635 kernel_param.inline_data_size = 0;
7636 kernel_param.sampler_size = 0;
7638 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7639 scoreboard_param.mask = 0xFF;
7640 scoreboard_param.enable = generic_context->use_hw_scoreboard;
7641 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7642 scoreboard_param.walkpat_flag = 0;
7644 for (i = 0; i < num_mbenc_kernels ; i++) {
7645 gpe_context = &kernel_context->gpe_contexts[i];
7646 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7647 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7649 memset(&common_kernel, 0, sizeof(common_kernel));
7651 generic_context->get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7652 generic_context->enc_kernel_size,
7653 INTEL_GENERIC_ENC_MBENC,
7657 gpe->load_kernels(ctx,
7666 gen9_avc_kernel_init_brc(VADriverContextP ctx,
7667 struct generic_encoder_context *generic_context,
7668 struct gen_avc_brc_context *kernel_context)
7670 struct i965_driver_data *i965 = i965_driver_data(ctx);
7671 struct i965_gpe_table *gpe = &i965->gpe_table;
7672 struct i965_gpe_context *gpe_context = NULL;
7673 struct encoder_kernel_parameter kernel_param ;
7674 struct encoder_scoreboard_parameter scoreboard_param;
7675 struct i965_kernel common_kernel;
7676 int num_brc_init_kernels = 0;
7679 if (IS_GEN8(i965->intel.device_info)) {
7680 num_brc_init_kernels = NUM_GEN9_AVC_KERNEL_BRC - 1;
7682 num_brc_init_kernels = NUM_GEN9_AVC_KERNEL_BRC;
7685 const int gen8_brc_curbe_size[NUM_GEN9_AVC_KERNEL_BRC - 1] = {
7686 (sizeof(gen9_avc_brc_init_reset_curbe_data)),
7687 (sizeof(gen8_avc_frame_brc_update_curbe_data)),
7688 (sizeof(gen9_avc_brc_init_reset_curbe_data)),
7689 (sizeof(gen8_avc_mbenc_curbe_data)),
7692 const int gen9_brc_curbe_size[NUM_GEN9_AVC_KERNEL_BRC] = {
7693 (sizeof(gen9_avc_brc_init_reset_curbe_data)),
7694 (sizeof(gen9_avc_frame_brc_update_curbe_data)),
7695 (sizeof(gen9_avc_brc_init_reset_curbe_data)),
7696 ((IS_SKL(i965->intel.device_info) || IS_BXT(i965->intel.device_info)) ? sizeof(gen9_avc_mbenc_curbe_data) : sizeof(gen95_avc_mbenc_curbe_data)),
7698 (sizeof(gen9_avc_mb_brc_curbe_data))
7701 kernel_param.inline_data_size = 0;
7702 kernel_param.sampler_size = 0;
7704 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7705 scoreboard_param.mask = 0xFF;
7706 scoreboard_param.enable = generic_context->use_hw_scoreboard;
7707 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7708 scoreboard_param.walkpat_flag = 0;
7710 for (i = 0; i < num_brc_init_kernels; i++) {
7711 if (IS_GEN8(i965->intel.device_info)) {
7712 kernel_param.curbe_size = gen8_brc_curbe_size[i];
7714 kernel_param.curbe_size = gen9_brc_curbe_size[i];
7716 gpe_context = &kernel_context->gpe_contexts[i];
7717 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7718 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7720 memset(&common_kernel, 0, sizeof(common_kernel));
7722 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7723 generic_context->enc_kernel_size,
7724 INTEL_GENERIC_ENC_BRC,
7728 gpe->load_kernels(ctx,
7737 gen9_avc_kernel_init_wp(VADriverContextP ctx,
7738 struct generic_encoder_context *generic_context,
7739 struct gen_avc_wp_context *kernel_context)
7741 struct i965_driver_data *i965 = i965_driver_data(ctx);
7742 struct i965_gpe_table *gpe = &i965->gpe_table;
7743 struct i965_gpe_context *gpe_context = NULL;
7744 struct encoder_kernel_parameter kernel_param ;
7745 struct encoder_scoreboard_parameter scoreboard_param;
7746 struct i965_kernel common_kernel;
7748 kernel_param.curbe_size = sizeof(gen9_avc_wp_curbe_data);
7749 kernel_param.inline_data_size = 0;
7750 kernel_param.sampler_size = 0;
7752 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7753 scoreboard_param.mask = 0xFF;
7754 scoreboard_param.enable = generic_context->use_hw_scoreboard;
7755 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7756 scoreboard_param.walkpat_flag = 0;
7758 gpe_context = &kernel_context->gpe_contexts;
7759 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7760 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7762 memset(&common_kernel, 0, sizeof(common_kernel));
7764 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7765 generic_context->enc_kernel_size,
7766 INTEL_GENERIC_ENC_WP,
7770 gpe->load_kernels(ctx,
7778 gen9_avc_kernel_init_sfd(VADriverContextP ctx,
7779 struct generic_encoder_context *generic_context,
7780 struct gen_avc_sfd_context *kernel_context)
7782 struct i965_driver_data *i965 = i965_driver_data(ctx);
7783 struct i965_gpe_table *gpe = &i965->gpe_table;
7784 struct i965_gpe_context *gpe_context = NULL;
7785 struct encoder_kernel_parameter kernel_param ;
7786 struct encoder_scoreboard_parameter scoreboard_param;
7787 struct i965_kernel common_kernel;
7789 kernel_param.curbe_size = sizeof(gen9_avc_sfd_curbe_data);
7790 kernel_param.inline_data_size = 0;
7791 kernel_param.sampler_size = 0;
7793 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7794 scoreboard_param.mask = 0xFF;
7795 scoreboard_param.enable = generic_context->use_hw_scoreboard;
7796 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7797 scoreboard_param.walkpat_flag = 0;
7799 gpe_context = &kernel_context->gpe_contexts;
7800 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7801 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7803 memset(&common_kernel, 0, sizeof(common_kernel));
7805 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7806 generic_context->enc_kernel_size,
7807 INTEL_GENERIC_ENC_SFD,
7811 gpe->load_kernels(ctx,
7819 gen9_avc_kernel_destroy(struct encoder_vme_mfc_context * vme_context)
7822 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
7823 struct i965_driver_data *i965 = i965_driver_data(avc_ctx->ctx);
7824 struct i965_gpe_table *gpe = &i965->gpe_table;
7828 gen9_avc_free_resources(vme_context);
7830 for (i = 0; i < NUM_GEN9_AVC_KERNEL_SCALING; i++)
7831 gpe->context_destroy(&avc_ctx->context_scaling.gpe_contexts[i]);
7833 for (i = 0; i < NUM_GEN9_AVC_KERNEL_BRC; i++)
7834 gpe->context_destroy(&avc_ctx->context_brc.gpe_contexts[i]);
7836 for (i = 0; i < NUM_GEN9_AVC_KERNEL_ME; i++)
7837 gpe->context_destroy(&avc_ctx->context_me.gpe_contexts[i]);
7839 for (i = 0; i < NUM_GEN9_AVC_KERNEL_MBENC; i++)
7840 gpe->context_destroy(&avc_ctx->context_mbenc.gpe_contexts[i]);
7842 gpe->context_destroy(&avc_ctx->context_wp.gpe_contexts);
7844 gpe->context_destroy(&avc_ctx->context_sfd.gpe_contexts);
7846 gpe->context_destroy(&avc_ctx->context_preproc.gpe_contexts);
7854 gen9_avc_update_parameters(VADriverContextP ctx,
7856 struct encode_state *encode_state,
7857 struct intel_encoder_context *encoder_context)
7859 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7860 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
7861 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
7862 VAEncSequenceParameterBufferH264 *seq_param;
7863 VAEncSliceParameterBufferH264 *slice_param;
7864 VAEncMiscParameterBuffer *fei_misc_param;
7865 int i, j, slice_index;
7866 unsigned int preset = generic_state->preset;
7867 unsigned int fei_enabled = encoder_context->fei_enabled;
7869 /* seq/pic/slice parameter setting */
7870 generic_state->b16xme_supported = gen9_avc_super_hme[preset];
7871 generic_state->b32xme_supported = gen9_avc_ultra_hme[preset];
7873 avc_state->seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
7874 avc_state->pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
7877 encode_state->misc_param[VAEncMiscParameterTypeFEIFrameControl]) {
7878 fei_misc_param = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeFEIFrameControl][0]->buffer;
7879 avc_state->fei_framectl_param =
7880 (VAEncMiscParameterFEIFrameControlH264 *)fei_misc_param->data;
7883 avc_state->slice_num = 0;
7885 for (j = 0; j < encode_state->num_slice_params_ext; j++) {
7886 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
7887 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
7888 avc_state->slice_param[slice_index] = slice_param;
7891 avc_state->slice_num++;
7895 /* how many slices support by now? 1 slice or multi slices, but row slice.not slice group. */
7896 seq_param = avc_state->seq_param;
7897 slice_param = avc_state->slice_param[0];
7899 generic_state->frame_type = avc_state->slice_param[0]->slice_type;
7901 if (slice_param->slice_type == SLICE_TYPE_I ||
7902 slice_param->slice_type == SLICE_TYPE_SI)
7903 generic_state->frame_type = SLICE_TYPE_I;
7904 else if (slice_param->slice_type == SLICE_TYPE_P)
7905 generic_state->frame_type = SLICE_TYPE_P;
7906 else if (slice_param->slice_type == SLICE_TYPE_B)
7907 generic_state->frame_type = SLICE_TYPE_B;
7908 if (profile == VAProfileH264High)
7909 avc_state->transform_8x8_mode_enable = 0;//work around for high profile to disabel pic_param->pic_fields.bits.transform_8x8_mode_flag
7911 avc_state->transform_8x8_mode_enable = 0;
7914 if (generic_state->brc_enabled && (!generic_state->brc_inited || generic_state->brc_need_reset)) {
7915 generic_state->target_bit_rate = ALIGN(seq_param->bits_per_second, 1000) / 1000;
7916 generic_state->init_vbv_buffer_fullness_in_bit = seq_param->bits_per_second;
7917 generic_state->vbv_buffer_size_in_bit = (uint64_t)seq_param->bits_per_second << 1;
7918 generic_state->frames_per_100s = 3000; /* 30fps */
7921 generic_state->gop_size = seq_param->intra_period;
7922 generic_state->gop_ref_distance = seq_param->ip_period;
7924 if (generic_state->internal_rate_mode == VA_RC_CBR) {
7925 generic_state->max_bit_rate = generic_state->target_bit_rate;
7926 generic_state->min_bit_rate = generic_state->target_bit_rate;
7929 if (generic_state->frame_type == SLICE_TYPE_I || generic_state->first_frame) {
7930 gen9_avc_update_misc_parameters(ctx, encode_state, encoder_context);
7933 generic_state->preset = encoder_context->quality_level;
7934 if (encoder_context->quality_level == INTEL_PRESET_UNKNOWN) {
7935 generic_state->preset = INTEL_PRESET_RT_SPEED;
7937 generic_state->kernel_mode = gen9_avc_kernel_mode[generic_state->preset];
7939 if (!generic_state->brc_inited) {
7940 generic_state->brc_init_reset_input_bits_per_frame = ((double)(generic_state->max_bit_rate * 1000) * 100) / generic_state->frames_per_100s;;
7941 generic_state->brc_init_current_target_buf_full_in_bits = generic_state->init_vbv_buffer_fullness_in_bit;
7942 generic_state->brc_init_reset_buf_size_in_bits = generic_state->vbv_buffer_size_in_bit;
7943 generic_state->brc_target_size = generic_state->init_vbv_buffer_fullness_in_bit;
7947 generic_state->curr_pak_pass = 0;
7948 generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
7950 if (generic_state->internal_rate_mode == VA_RC_CBR ||
7951 generic_state->internal_rate_mode == VA_RC_VBR)
7952 generic_state->brc_enabled = 1;
7954 generic_state->brc_enabled = 0;
7956 if (generic_state->brc_enabled &&
7957 (!generic_state->init_vbv_buffer_fullness_in_bit ||
7958 !generic_state->vbv_buffer_size_in_bit ||
7959 !generic_state->max_bit_rate ||
7960 !generic_state->target_bit_rate ||
7961 !generic_state->frames_per_100s)) {
7962 WARN_ONCE("Rate control parameter is required for BRC\n");
7963 generic_state->brc_enabled = 0;
7966 if (!generic_state->brc_enabled) {
7967 generic_state->target_bit_rate = 0;
7968 generic_state->max_bit_rate = 0;
7969 generic_state->min_bit_rate = 0;
7970 generic_state->init_vbv_buffer_fullness_in_bit = 0;
7971 generic_state->vbv_buffer_size_in_bit = 0;
7972 generic_state->num_pak_passes = 1;
7974 generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
7978 generic_state->frame_width_in_mbs = seq_param->picture_width_in_mbs;
7979 generic_state->frame_height_in_mbs = seq_param->picture_height_in_mbs;
7980 generic_state->frame_width_in_pixel = generic_state->frame_width_in_mbs * 16;
7981 generic_state->frame_height_in_pixel = generic_state->frame_height_in_mbs * 16;
7983 generic_state->frame_width_4x = ALIGN(generic_state->frame_width_in_pixel / 4, 16);
7984 generic_state->frame_height_4x = ALIGN(generic_state->frame_height_in_pixel / 4, 16);
7985 generic_state->downscaled_width_4x_in_mb = generic_state->frame_width_4x / 16 ;
7986 generic_state->downscaled_height_4x_in_mb = generic_state->frame_height_4x / 16;
7988 generic_state->frame_width_16x = ALIGN(generic_state->frame_width_in_pixel / 16, 16);
7989 generic_state->frame_height_16x = ALIGN(generic_state->frame_height_in_pixel / 16, 16);
7990 generic_state->downscaled_width_16x_in_mb = generic_state->frame_width_16x / 16 ;
7991 generic_state->downscaled_height_16x_in_mb = generic_state->frame_height_16x / 16;
7993 generic_state->frame_width_32x = ALIGN(generic_state->frame_width_in_pixel / 32, 16);
7994 generic_state->frame_height_32x = ALIGN(generic_state->frame_height_in_pixel / 32, 16);
7995 generic_state->downscaled_width_32x_in_mb = generic_state->frame_width_32x / 16 ;
7996 generic_state->downscaled_height_32x_in_mb = generic_state->frame_height_32x / 16;
7998 if (generic_state->hme_supported) {
7999 generic_state->hme_enabled = 1;
8001 generic_state->hme_enabled = 0;
8004 if (generic_state->b16xme_supported) {
8005 generic_state->b16xme_enabled = 1;
8007 generic_state->b16xme_enabled = 0;
8010 if (generic_state->b32xme_supported) {
8011 generic_state->b32xme_enabled = 1;
8013 generic_state->b32xme_enabled = 0;
8015 /* disable HME/16xME if the size is too small */
8016 if (generic_state->frame_width_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8017 generic_state->b32xme_supported = 0;
8018 generic_state->b32xme_enabled = 0;
8019 generic_state->b16xme_supported = 0;
8020 generic_state->b16xme_enabled = 0;
8021 generic_state->frame_width_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8022 generic_state->downscaled_width_4x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8024 if (generic_state->frame_height_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8025 generic_state->b32xme_supported = 0;
8026 generic_state->b32xme_enabled = 0;
8027 generic_state->b16xme_supported = 0;
8028 generic_state->b16xme_enabled = 0;
8029 generic_state->frame_height_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8030 generic_state->downscaled_height_4x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8033 if (generic_state->frame_width_16x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8034 generic_state->b32xme_supported = 0;
8035 generic_state->b32xme_enabled = 0;
8036 generic_state->frame_width_16x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8037 generic_state->downscaled_width_16x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8039 if (generic_state->frame_height_16x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8040 generic_state->b32xme_supported = 0;
8041 generic_state->b32xme_enabled = 0;
8042 generic_state->frame_height_16x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8043 generic_state->downscaled_height_16x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8046 if (generic_state->frame_width_32x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8047 generic_state->frame_width_32x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8048 generic_state->downscaled_width_32x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8050 if (generic_state->frame_height_32x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8051 generic_state->frame_height_32x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8052 generic_state->downscaled_height_32x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8058 gen9_avc_encode_check_parameter(VADriverContextP ctx,
8059 struct encode_state *encode_state,
8060 struct intel_encoder_context *encoder_context)
8062 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8063 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8064 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8065 unsigned int rate_control_mode = encoder_context->rate_control_mode;
8066 unsigned int preset = generic_state->preset;
8067 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param ;
8068 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
8070 int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
8072 generic_state->avbr_curracy = 30;
8073 generic_state->avbr_convergence = 150;
8075 switch (rate_control_mode & 0x7f) {
8077 generic_state->internal_rate_mode = VA_RC_CBR;
8081 generic_state->internal_rate_mode = VA_RC_VBR;
8086 generic_state->internal_rate_mode = VA_RC_CQP;
8090 if (rate_control_mode != VA_RC_NONE &&
8091 rate_control_mode != VA_RC_CQP) {
8092 generic_state->brc_enabled = 1;
8093 generic_state->brc_distortion_buffer_supported = 1;
8094 generic_state->brc_constant_buffer_supported = 1;
8095 generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
8098 /*check brc parameter*/
8099 if (generic_state->brc_enabled) {
8100 avc_state->mb_qp_data_enable = 0;
8103 /*set the brc init and reset accordingly*/
8104 if (generic_state->brc_need_reset &&
8105 (generic_state->brc_distortion_buffer_supported == 0 ||
8106 rate_control_mode == VA_RC_CQP)) {
8107 generic_state->brc_need_reset = 0;// not support by CQP
8109 if (generic_state->internal_rate_mode == VA_RC_CBR || generic_state->internal_rate_mode == VA_RC_VBR || generic_state->frame_type == SLICE_TYPE_I) {
8110 avc_state->sfd_enable = 0;
8112 avc_state->sfd_enable = 1;
8115 if (generic_state->frames_per_window_size == 0) {
8116 generic_state->frames_per_window_size = (generic_state->frames_per_100s / 100 < 60) ? (generic_state->frames_per_100s / 100) : 60;
8117 } else if (generic_state->frames_per_window_size > 2 * generic_state->frames_per_100s / 100) {
8118 generic_state->frames_per_window_size = (generic_state->frames_per_100s / 100 < 60) ? (generic_state->frames_per_100s / 100) : 60;
8121 if (generic_state->brc_enabled) {
8122 generic_state->hme_enabled = generic_state->frame_type != SLICE_TYPE_I;
8123 if (avc_state->min_max_qp_enable) {
8124 generic_state->num_pak_passes = 1;
8126 generic_state->brc_roi_enable = (rate_control_mode != VA_RC_CQP) && (generic_state->num_roi > 0);// only !CQP
8127 generic_state->mb_brc_enabled = generic_state->mb_brc_enabled || generic_state->brc_roi_enable;
8129 generic_state->num_pak_passes = 1;// CQP only one pass
8132 avc_state->mbenc_i_frame_dist_in_use = 0;
8133 avc_state->mbenc_i_frame_dist_in_use = (generic_state->brc_enabled) && (generic_state->brc_distortion_buffer_supported) && (generic_state->frame_type == SLICE_TYPE_I);
8135 /*ROI must enable mbbrc.*/
8138 if (avc_state->caf_supported) {
8139 switch (generic_state->frame_type) {
8141 avc_state->caf_enable = 0;
8144 avc_state->caf_enable = gen9_avc_all_fractional[preset] & 0x01;
8147 avc_state->caf_enable = (gen9_avc_all_fractional[preset] >> 1) & 0x01;
8151 if (avc_state->caf_enable && avc_state->caf_disable_hd && gen9_avc_disable_all_fractional_check_for_high_res[preset]) {
8152 if (generic_state->frame_width_in_pixel >= 1280 && generic_state->frame_height_in_pixel >= 720)
8153 avc_state->caf_enable = 0;
8157 avc_state->adaptive_transform_decision_enable &= gen9_avc_enable_adaptive_tx_decision[preset & 0x7];
8159 /* Flatness check is enabled only if scaling will be performed and CAF is enabled. here only frame */
8160 if (avc_state->flatness_check_supported) {
8161 avc_state->flatness_check_enable = ((avc_state->caf_enable) && (generic_state->brc_enabled || generic_state->hme_supported)) ;
8163 avc_state->flatness_check_enable = 0;
8166 /* check mb_status_supported/enbale*/
8167 if (avc_state->adaptive_transform_decision_enable) {
8168 avc_state->mb_status_enable = 1;
8170 avc_state->mb_status_enable = 0;
8172 /*slice check,all the slices use the same slice height except the last slice*/
8173 avc_state->arbitrary_num_mbs_in_slice = 0;
8174 for (i = 0; i < avc_state->slice_num; i++) {
8175 if (avc_state->slice_param[i]->num_macroblocks % generic_state->frame_width_in_mbs > 0) {
8176 avc_state->arbitrary_num_mbs_in_slice = 1;
8177 avc_state->slice_height = 1; /* slice height will be ignored by kernel ans here set it as default value */
8179 avc_state->slice_height = avc_state->slice_param[i]->num_macroblocks / generic_state->frame_width_in_mbs;
8183 if (avc_state->slice_num > 1)
8184 avc_state->arbitrary_num_mbs_in_slice = 1;
8186 if (generic_state->frame_type == SLICE_TYPE_I) {
8187 generic_state->hme_enabled = 0;
8188 generic_state->b16xme_enabled = 0;
8189 generic_state->b32xme_enabled = 0;
8192 if (generic_state->frame_type == SLICE_TYPE_B) {
8193 gen9_avc_get_dist_scale_factor(ctx, encode_state, encoder_context);
8194 avc_state->bi_weight = gen9_avc_get_biweight(avc_state->dist_scale_factor_list0[0], pic_param->pic_fields.bits.weighted_bipred_idc);
8197 /* Determine if SkipBiasAdjustment should be enabled for P picture 1. No B frame 2. Qp >= 22 3. CQP mode */
8198 avc_state->skip_bias_adjustment_enable = avc_state->skip_bias_adjustment_supported && (generic_state->frame_type == SLICE_TYPE_P)
8199 && (generic_state->gop_ref_distance == 1) && (avc_state->pic_param->pic_init_qp + avc_state->slice_param[0]->slice_qp_delta >= 22) && !generic_state->brc_enabled;
8201 if (generic_state->kernel_mode == INTEL_ENC_KERNEL_QUALITY) {
8202 avc_state->tq_enable = 1;
8203 avc_state->tq_rounding = 6;
8204 if (generic_state->brc_enabled) {
8205 generic_state->mb_brc_enabled = 1;
8209 //check the inter rounding
8210 avc_state->rounding_value = 0;
8211 avc_state->rounding_inter_p = 255;//default
8212 avc_state->rounding_inter_b = 255; //default
8213 avc_state->rounding_inter_b_ref = 255; //default
8215 if (generic_state->frame_type == SLICE_TYPE_P) {
8216 if (avc_state->rounding_inter_p == AVC_INVALID_ROUNDING_VALUE) {
8217 if (avc_state->adaptive_rounding_inter_enable && !(generic_state->brc_enabled)) {
8218 if (generic_state->gop_ref_distance == 1)
8219 avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_p_without_b[slice_qp];
8221 avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_p[slice_qp];
8223 avc_state->rounding_value = gen9_avc_inter_rounding_p[generic_state->preset];
8227 avc_state->rounding_value = avc_state->rounding_inter_p;
8229 } else if (generic_state->frame_type == SLICE_TYPE_B) {
8230 if (pic_param->pic_fields.bits.reference_pic_flag) {
8231 if (avc_state->rounding_inter_b_ref == AVC_INVALID_ROUNDING_VALUE)
8232 avc_state->rounding_value = gen9_avc_inter_rounding_b_ref[generic_state->preset];
8234 avc_state->rounding_value = avc_state->rounding_inter_b_ref;
8236 if (avc_state->rounding_inter_b == AVC_INVALID_ROUNDING_VALUE) {
8237 if (avc_state->adaptive_rounding_inter_enable && !(generic_state->brc_enabled))
8238 avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_b[slice_qp];
8240 avc_state->rounding_value = gen9_avc_inter_rounding_b[generic_state->preset];
8242 avc_state->rounding_value = avc_state->rounding_inter_b;
8246 return VA_STATUS_SUCCESS;
8250 gen9_avc_vme_gpe_kernel_prepare(VADriverContextP ctx,
8251 struct encode_state *encode_state,
8252 struct intel_encoder_context *encoder_context)
8255 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8256 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
8257 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
8258 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8259 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8261 struct object_surface *obj_surface;
8262 struct object_buffer *obj_buffer;
8263 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
8264 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
8265 struct i965_coded_buffer_segment *coded_buffer_segment;
8267 struct gen9_surface_avc *avc_priv_surface;
8269 struct avc_surface_param surface_param;
8271 unsigned char * pdata;
8273 /* Setup current reconstruct frame */
8274 obj_surface = encode_state->reconstructed_object;
8275 va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
8277 if (va_status != VA_STATUS_SUCCESS)
8280 memset(&surface_param, 0, sizeof(surface_param));
8281 surface_param.frame_width = generic_state->frame_width_in_pixel;
8282 surface_param.frame_height = generic_state->frame_height_in_pixel;
8283 va_status = gen9_avc_init_check_surfaces(ctx,
8287 if (va_status != VA_STATUS_SUCCESS)
8290 /* init the member of avc_priv_surface,frame_store_id,qp_value*/
8291 avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
8292 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = 0;
8293 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = 0;
8294 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2]);
8295 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1]);
8296 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2], avc_priv_surface->dmv_top);
8297 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1], avc_priv_surface->dmv_bottom);
8298 avc_priv_surface->qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
8299 avc_priv_surface->frame_store_id = 0;
8300 avc_priv_surface->frame_idx = pic_param->CurrPic.frame_idx;
8301 avc_priv_surface->top_field_order_cnt = pic_param->CurrPic.TopFieldOrderCnt;
8302 avc_priv_surface->is_as_ref = pic_param->pic_fields.bits.reference_pic_flag;
8303 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = avc_priv_surface->top_field_order_cnt;
8304 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = avc_priv_surface->top_field_order_cnt + 1;
8306 i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
8307 i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_reconstructed_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
8309 /* input YUV surface*/
8310 obj_surface = encode_state->input_yuv_object;
8311 va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
8313 if (va_status != VA_STATUS_SUCCESS)
8315 i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
8316 i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_uncompressed_input_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
8318 /* Reference surfaces */
8319 for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
8320 i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
8321 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2]);
8322 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1]);
8323 obj_surface = encode_state->reference_objects[i];
8324 avc_state->top_field_poc[2 * i] = 0;
8325 avc_state->top_field_poc[2 * i + 1] = 0;
8327 if (obj_surface && obj_surface->bo) {
8328 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->list_reference_res[i], obj_surface, GPE_RESOURCE_ALIGNMENT);
8330 /* actually it should be handled when it is reconstructed surface*/
8331 va_status = gen9_avc_init_check_surfaces(ctx,
8332 obj_surface, encoder_context,
8334 if (va_status != VA_STATUS_SUCCESS)
8336 avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
8337 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2], avc_priv_surface->dmv_top);
8338 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1], avc_priv_surface->dmv_bottom);
8339 avc_state->top_field_poc[2 * i] = avc_priv_surface->top_field_order_cnt;
8340 avc_state->top_field_poc[2 * i + 1] = avc_priv_surface->top_field_order_cnt + 1;
8341 avc_priv_surface->frame_store_id = i;
8347 /* Encoded bitstream ?*/
8348 obj_buffer = encode_state->coded_buf_object;
8349 bo = obj_buffer->buffer_store->bo;
8350 i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
8351 i965_dri_object_to_buffer_gpe_resource(&generic_ctx->compressed_bitstream.res, bo);
8352 generic_ctx->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
8353 generic_ctx->compressed_bitstream.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
8356 avc_ctx->status_buffer.bo = bo;
8358 /* set the internal flag to 0 to indicate the coded size is unknown */
8360 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
8361 coded_buffer_segment->mapped = 0;
8362 coded_buffer_segment->codec = encoder_context->codec;
8363 coded_buffer_segment->status_support = 1;
8365 pdata = bo->virtual + avc_ctx->status_buffer.base_offset;
8366 memset(pdata, 0, avc_ctx->status_buffer.status_buffer_size);
8369 //frame id, it is the ref pic id in the reference_objects list.
8370 avc_state->num_refs[0] = 0;
8371 avc_state->num_refs[1] = 0;
8372 if (generic_state->frame_type == SLICE_TYPE_P) {
8373 avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
8375 if (slice_param->num_ref_idx_active_override_flag)
8376 avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
8377 } else if (generic_state->frame_type == SLICE_TYPE_B) {
8378 avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
8379 avc_state->num_refs[1] = pic_param->num_ref_idx_l1_active_minus1 + 1;
8381 if (slice_param->num_ref_idx_active_override_flag) {
8382 avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
8383 avc_state->num_refs[1] = slice_param->num_ref_idx_l1_active_minus1 + 1;
8387 if (avc_state->num_refs[0] > ARRAY_ELEMS(avc_state->list_ref_idx[0]))
8388 return VA_STATUS_ERROR_INVALID_VALUE;
8389 if (avc_state->num_refs[1] > ARRAY_ELEMS(avc_state->list_ref_idx[1]))
8390 return VA_STATUS_ERROR_INVALID_VALUE;
8392 for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[0]); i++) {
8393 VAPictureH264 *va_pic;
8395 assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(avc_state->list_ref_idx[0]));
8396 avc_state->list_ref_idx[0][i] = 0;
8398 if (i >= avc_state->num_refs[0])
8401 va_pic = &slice_param->RefPicList0[i];
8403 for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
8404 obj_surface = encode_state->reference_objects[j];
8408 obj_surface->base.id == va_pic->picture_id) {
8410 assert(obj_surface->base.id != VA_INVALID_SURFACE);
8411 avc_state->list_ref_idx[0][i] = j;
8417 for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[1]); i++) {
8418 VAPictureH264 *va_pic;
8420 assert(ARRAY_ELEMS(slice_param->RefPicList1) == ARRAY_ELEMS(avc_state->list_ref_idx[1]));
8421 avc_state->list_ref_idx[1][i] = 0;
8423 if (i >= avc_state->num_refs[1])
8426 va_pic = &slice_param->RefPicList1[i];
8428 for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
8429 obj_surface = encode_state->reference_objects[j];
8433 obj_surface->base.id == va_pic->picture_id) {
8435 assert(obj_surface->base.id != VA_INVALID_SURFACE);
8436 avc_state->list_ref_idx[1][i] = j;
8443 return VA_STATUS_SUCCESS;
8447 gen9_avc_vme_gpe_kernel_init(VADriverContextP ctx,
8448 struct encode_state *encode_state,
8449 struct intel_encoder_context *encoder_context)
8451 return VA_STATUS_SUCCESS;
8455 gen9_avc_vme_gpe_kernel_final(VADriverContextP ctx,
8456 struct encode_state *encode_state,
8457 struct intel_encoder_context *encoder_context)
8460 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8461 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8462 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8464 /*set this flag when all kernel is finished*/
8465 if (generic_state->brc_enabled) {
8466 generic_state->brc_inited = 1;
8467 generic_state->brc_need_reset = 0;
8468 avc_state->mbenc_curbe_set_in_brc_update = 0;
8470 return VA_STATUS_SUCCESS;
8474 gen9_avc_vme_gpe_kernel_run(VADriverContextP ctx,
8475 struct encode_state *encode_state,
8476 struct intel_encoder_context *encoder_context)
8478 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8479 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8480 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8481 int fei_enabled = encoder_context->fei_enabled;
8483 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
8484 VAEncSliceParameterBufferH264 *slice_param = avc_state->slice_param[0];
8487 /* BRC init/reset needs to be called before HME since it will reset the Brc Distortion surface*/
8488 if (!fei_enabled && generic_state->brc_enabled && (!generic_state->brc_inited || generic_state->brc_need_reset)) {
8489 gen9_avc_kernel_brc_init_reset(ctx, encode_state, encoder_context);
8493 if (generic_state->hme_supported) {
8494 gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
8495 if (generic_state->b16xme_supported) {
8496 gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_16x);
8497 if (generic_state->b32xme_supported) {
8498 gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_32x);
8504 if (generic_state->hme_enabled) {
8505 if (generic_state->b16xme_enabled) {
8506 if (generic_state->b32xme_enabled) {
8507 gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_32x);
8509 gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_16x);
8511 gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
8514 /*call SFD kernel after HME in same command buffer*/
8515 sfd_in_use = avc_state->sfd_enable && generic_state->hme_enabled;
8516 sfd_in_use = sfd_in_use && !avc_state->sfd_mb_enable;
8518 gen9_avc_kernel_sfd(ctx, encode_state, encoder_context);
8521 /* BRC and MbEnc are included in the same task phase*/
8522 if (generic_state->brc_enabled) {
8523 if (avc_state->mbenc_i_frame_dist_in_use) {
8524 gen9_avc_kernel_mbenc(ctx, encode_state, encoder_context, true);
8526 gen9_avc_kernel_brc_frame_update(ctx, encode_state, encoder_context);
8528 if (avc_state->brc_split_enable && generic_state->mb_brc_enabled) {
8529 gen9_avc_kernel_brc_mb_update(ctx, encode_state, encoder_context);
8533 /*weight prediction,disable by now */
8534 avc_state->weighted_ref_l0_enable = 0;
8535 avc_state->weighted_ref_l1_enable = 0;
8536 if (avc_state->weighted_prediction_supported &&
8537 ((generic_state->frame_type == SLICE_TYPE_P && pic_param->pic_fields.bits.weighted_pred_flag) ||
8538 (generic_state->frame_type == SLICE_TYPE_B && pic_param->pic_fields.bits.weighted_bipred_idc == INTEL_AVC_WP_MODE_EXPLICIT))) {
8539 if (slice_param->luma_weight_l0_flag & 1) {
8540 gen9_avc_kernel_wp(ctx, encode_state, encoder_context, 0);
8542 } else if (!(slice_param->chroma_weight_l0_flag & 1)) {
8543 pic_param->pic_fields.bits.weighted_pred_flag = 0;// it should be handled in app
8546 if (generic_state->frame_type == SLICE_TYPE_B && pic_param->pic_fields.bits.weighted_bipred_idc == INTEL_AVC_WP_MODE_EXPLICIT) {
8547 if (slice_param->luma_weight_l1_flag & 1) {
8548 gen9_avc_kernel_wp(ctx, encode_state, encoder_context, 1);
8549 } else if (!((slice_param->luma_weight_l0_flag & 1) ||
8550 (slice_param->chroma_weight_l0_flag & 1) ||
8551 (slice_param->chroma_weight_l1_flag & 1))) {
8552 pic_param->pic_fields.bits.weighted_bipred_idc = INTEL_AVC_WP_MODE_DEFAULT;// it should be handled in app
8558 gen9_avc_kernel_mbenc(ctx, encode_state, encoder_context, false);
8560 /*ignore the reset vertical line kernel*/
8562 return VA_STATUS_SUCCESS;
8566 gen9_avc_vme_pipeline(VADriverContextP ctx,
8568 struct encode_state *encode_state,
8569 struct intel_encoder_context *encoder_context)
8573 gen9_avc_update_parameters(ctx, profile, encode_state, encoder_context);
8575 va_status = gen9_avc_encode_check_parameter(ctx, encode_state, encoder_context);
8576 if (va_status != VA_STATUS_SUCCESS)
8579 va_status = gen9_avc_allocate_resources(ctx, encode_state, encoder_context);
8580 if (va_status != VA_STATUS_SUCCESS)
8583 va_status = gen9_avc_vme_gpe_kernel_prepare(ctx, encode_state, encoder_context);
8584 if (va_status != VA_STATUS_SUCCESS)
8587 va_status = gen9_avc_vme_gpe_kernel_init(ctx, encode_state, encoder_context);
8588 if (va_status != VA_STATUS_SUCCESS)
8591 va_status = gen9_avc_vme_gpe_kernel_run(ctx, encode_state, encoder_context);
8592 if (va_status != VA_STATUS_SUCCESS)
8595 gen9_avc_vme_gpe_kernel_final(ctx, encode_state, encoder_context);
8597 return VA_STATUS_SUCCESS;
8600 /* Update PreEnc specific parameters */
8602 gen9_avc_preenc_update_parameters(VADriverContextP ctx,
8604 struct encode_state *encode_state,
8605 struct intel_encoder_context *encoder_context)
8607 struct i965_driver_data *i965 = i965_driver_data(ctx);
8608 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8609 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8610 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
8611 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8612 VAStatsStatisticsParameterH264 *stat_param_h264 = NULL;
8613 VAStatsStatisticsParameter *stat_param = NULL;
8614 struct object_buffer *obj_buffer = NULL;
8615 struct object_buffer *obj_buffer_mv = NULL, *obj_buffer_stat = NULL;
8616 struct buffer_store *buffer_store = NULL;
8617 unsigned int size = 0, i = 0;
8618 unsigned int frame_mb_nums = 0;
8620 if (!encoder_context->preenc_enabled ||
8621 !encode_state->stat_param_ext ||
8622 !encode_state->stat_param_ext->buffer)
8623 return VA_STATUS_ERROR_OPERATION_FAILED;
8625 stat_param_h264 = avc_state->stat_param =
8626 (VAStatsStatisticsParameterH264 *)encode_state->stat_param_ext->buffer;
8627 stat_param = &stat_param_h264->stats_params;
8629 /* Assume the frame type based on number of past/future ref frames */
8630 if (!stat_param->num_past_references && !stat_param->num_future_references)
8631 generic_state->frame_type = SLICE_TYPE_I;
8632 else if (stat_param->num_future_references > 0)
8633 generic_state->frame_type = SLICE_TYPE_B;
8635 generic_state->frame_type = SLICE_TYPE_P;
8637 generic_state->preset = INTEL_PRESET_RT_SPEED;
8638 generic_state->kernel_mode = gen9_avc_kernel_mode[generic_state->preset];
8640 /* frame width and height */
8641 generic_state->frame_width_in_pixel = encoder_context->frame_width_in_pixel;
8642 generic_state->frame_height_in_pixel = encoder_context->frame_height_in_pixel;
8643 generic_state->frame_width_in_mbs = ALIGN(generic_state->frame_width_in_pixel, 16) / 16;
8644 generic_state->frame_height_in_mbs = ALIGN(generic_state->frame_height_in_pixel, 16) / 16;
8646 /* 4x downscaled width and height */
8647 generic_state->frame_width_4x = ALIGN(generic_state->frame_width_in_pixel / 4, 16);
8648 generic_state->frame_height_4x = ALIGN(generic_state->frame_height_in_pixel / 4, 16);
8649 generic_state->downscaled_width_4x_in_mb = generic_state->frame_width_4x / 16 ;
8650 generic_state->downscaled_height_4x_in_mb = generic_state->frame_height_4x / 16;
8652 /* reset hme types for preenc */
8653 if (generic_state->frame_type != SLICE_TYPE_I)
8654 generic_state->hme_enabled = 1;
8656 /* ensure frame width is not too small */
8657 if (generic_state->frame_width_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8658 generic_state->frame_width_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8659 generic_state->downscaled_width_4x_in_mb =
8660 WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8663 /* ensure frame height is not too small*/
8664 if (generic_state->frame_height_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8665 generic_state->frame_height_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8666 generic_state->downscaled_height_4x_in_mb =
8667 WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8670 /********** Ensure buffer object parameters ********/
8671 frame_mb_nums = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs;
8673 /* mv predictor buffer */
8674 if (stat_param_h264->mv_predictor_ctrl) {
8675 if (stat_param->mv_predictor == VA_INVALID_ID)
8677 size = frame_mb_nums * FEI_AVC_MV_PREDICTOR_BUFFER_SIZE;
8678 obj_buffer = BUFFER(stat_param->mv_predictor);
8681 buffer_store = obj_buffer->buffer_store;
8682 if (buffer_store->bo->size < size)
8684 if (avc_ctx->preproc_mv_predictor_buffer.bo != NULL)
8685 i965_free_gpe_resource(&avc_ctx->preproc_mv_predictor_buffer);
8686 i965_dri_object_to_buffer_gpe_resource(
8687 &avc_ctx->preproc_mv_predictor_buffer,
8692 if (stat_param_h264->mb_qp) {
8693 if (stat_param->qp == VA_INVALID_ID)
8695 size = frame_mb_nums * FEI_AVC_QP_BUFFER_SIZE;
8696 obj_buffer = BUFFER(stat_param->qp);
8697 buffer_store = obj_buffer->buffer_store;
8698 if (buffer_store->bo->size < size)
8700 if (avc_ctx->preproc_mb_qp_buffer.bo != NULL)
8701 i965_free_gpe_resource(&avc_ctx->preproc_mb_qp_buffer);
8702 i965_dri_object_to_buffer_gpe_resource(
8703 &avc_ctx->preproc_mb_qp_buffer,
8707 /* locate mv and stat buffer */
8708 if (!stat_param_h264->disable_mv_output ||
8709 !stat_param_h264->disable_statistics_output) {
8711 if (!stat_param->outputs)
8714 for (i = 0; i < 2 ; i++) {
8715 if (stat_param->outputs[i] != VA_INVALID_ID) {
8716 obj_buffer = BUFFER(stat_param->outputs[i]);
8717 switch (obj_buffer->type) {
8718 case VAStatsMVBufferType:
8719 obj_buffer_mv = obj_buffer;
8721 case VAStatsStatisticsBufferType:
8722 obj_buffer_stat = obj_buffer;
8728 if (!(!stat_param_h264->disable_mv_output &&
8729 !stat_param_h264->disable_statistics_output))
8733 /* mv data output buffer */
8734 if (!stat_param_h264->disable_mv_output && obj_buffer_mv) {
8735 size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
8736 buffer_store = obj_buffer_mv->buffer_store;
8737 if (buffer_store->bo->size < size)
8739 if (avc_ctx->preproc_mv_data_out_buffer.bo != NULL)
8740 i965_free_gpe_resource(&avc_ctx->preproc_mv_data_out_buffer);
8741 i965_dri_object_to_buffer_gpe_resource(
8742 &avc_ctx->preproc_mv_data_out_buffer,
8745 /* statistics output buffer */
8746 if (!stat_param_h264->disable_statistics_output && obj_buffer_stat) {
8747 size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
8748 buffer_store = obj_buffer_stat->buffer_store;
8749 if (buffer_store->bo->size < size)
8751 if (avc_ctx->preproc_stat_data_out_buffer.bo != NULL)
8752 i965_free_gpe_resource(&avc_ctx->preproc_stat_data_out_buffer);
8753 i965_dri_object_to_buffer_gpe_resource(
8754 &avc_ctx->preproc_stat_data_out_buffer,
8758 /* past ref stat out buffer */
8759 if (stat_param->num_past_references && stat_param->past_ref_stat_buf &&
8760 stat_param->past_ref_stat_buf[0] != VA_INVALID_ID) {
8761 size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
8762 obj_buffer = BUFFER(stat_param->past_ref_stat_buf[0]);
8763 buffer_store = obj_buffer->buffer_store;
8764 if (buffer_store->bo->size < size)
8766 if (avc_ctx->preenc_past_ref_stat_data_out_buffer.bo != NULL)
8767 i965_free_gpe_resource(&avc_ctx->preenc_past_ref_stat_data_out_buffer);
8768 i965_dri_object_to_buffer_gpe_resource(
8769 &avc_ctx->preenc_past_ref_stat_data_out_buffer,
8772 /* future ref stat out buffer */
8773 if (stat_param->num_past_references && stat_param->future_ref_stat_buf &&
8774 stat_param->future_ref_stat_buf[0] != VA_INVALID_ID) {
8775 size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
8776 obj_buffer = BUFFER(stat_param->future_ref_stat_buf[0]);
8777 buffer_store = obj_buffer->buffer_store;
8778 if (buffer_store->bo->size < size)
8780 if (avc_ctx->preenc_future_ref_stat_data_out_buffer.bo != NULL)
8781 i965_free_gpe_resource(&avc_ctx->preenc_future_ref_stat_data_out_buffer);
8782 i965_dri_object_to_buffer_gpe_resource(
8783 &avc_ctx->preenc_future_ref_stat_data_out_buffer,
8786 return VA_STATUS_SUCCESS;
8789 return VA_STATUS_ERROR_INVALID_BUFFER;
8792 /* allocate internal resouces required for PreEenc */
8794 gen9_avc_preenc_allocate_internal_resources(VADriverContextP ctx,
8795 struct encode_state *encode_state,
8796 struct intel_encoder_context *encoder_context)
8798 struct i965_driver_data *i965 = i965_driver_data(ctx);
8799 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8800 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
8801 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8802 unsigned int width = 0;
8803 unsigned int height = 0;
8804 unsigned int size = 0;
8805 int allocate_flag = 1;
8807 /* 4x MEMV data buffer */
8808 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 32, 64);
8809 height = generic_state->downscaled_height_4x_in_mb * 4 * 2 * 10;
8810 i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
8811 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
8812 &avc_ctx->s4x_memv_data_buffer,
8815 "4x MEMV data buffer");
8817 goto failed_allocation;
8818 i965_zero_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
8820 /* Output DISTORTION surface from 4x ME */
8821 width = generic_state->downscaled_width_4x_in_mb * 8;
8822 height = generic_state->downscaled_height_4x_in_mb * 4 * 10;
8823 i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
8824 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
8825 &avc_ctx->s4x_memv_distortion_buffer,
8828 "4x MEMV distortion buffer");
8830 goto failed_allocation;
8831 i965_zero_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
8833 /* output BRC DISTORTION surface from 4x ME */
8834 width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
8835 height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
8836 i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
8837 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
8838 &avc_ctx->res_brc_dist_data_surface,
8841 "brc dist data buffer");
8843 goto failed_allocation;
8844 i965_zero_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
8847 /* FTQ Lut buffer,whichs is the mbbrc_const_data_buffer */
8848 i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
8849 size = 16 * AVC_QP_MAX * 4;
8850 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
8851 &avc_ctx->res_mbbrc_const_data_buffer,
8852 ALIGN(size, 0x1000),
8853 "mbbrc const data buffer");
8855 goto failed_allocation;
8856 i965_zero_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
8858 /* 4x downscaled surface */
8859 if (!avc_ctx->preenc_scaled_4x_surface_obj) {
8860 i965_CreateSurfaces(ctx,
8861 generic_state->frame_width_4x,
8862 generic_state->frame_height_4x,
8863 VA_RT_FORMAT_YUV420,
8865 &avc_ctx->preenc_scaled_4x_surface_id);
8866 avc_ctx->preenc_scaled_4x_surface_obj = SURFACE(avc_ctx->preenc_scaled_4x_surface_id);
8867 if (!avc_ctx->preenc_scaled_4x_surface_obj)
8868 goto failed_allocation;
8869 i965_check_alloc_surface_bo(ctx, avc_ctx->preenc_scaled_4x_surface_obj, 1,
8870 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
8873 /* 4x downscaled past ref surface */
8874 if (!avc_ctx->preenc_past_ref_scaled_4x_surface_obj) {
8875 i965_CreateSurfaces(ctx,
8876 generic_state->frame_width_4x,
8877 generic_state->frame_height_4x,
8878 VA_RT_FORMAT_YUV420,
8880 &avc_ctx->preenc_past_ref_scaled_4x_surface_id);
8881 avc_ctx->preenc_past_ref_scaled_4x_surface_obj =
8882 SURFACE(avc_ctx->preenc_past_ref_scaled_4x_surface_id);
8883 if (!avc_ctx->preenc_past_ref_scaled_4x_surface_obj)
8884 goto failed_allocation;
8885 i965_check_alloc_surface_bo(ctx, avc_ctx->preenc_past_ref_scaled_4x_surface_obj, 1,
8886 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
8889 /* 4x downscaled future ref surface */
8890 if (!avc_ctx->preenc_future_ref_scaled_4x_surface_obj) {
8891 i965_CreateSurfaces(ctx,
8892 generic_state->frame_width_4x,
8893 generic_state->frame_height_4x,
8894 VA_RT_FORMAT_YUV420,
8896 &avc_ctx->preenc_future_ref_scaled_4x_surface_id);
8897 avc_ctx->preenc_future_ref_scaled_4x_surface_obj =
8898 SURFACE(avc_ctx->preenc_future_ref_scaled_4x_surface_id);
8899 if (!avc_ctx->preenc_future_ref_scaled_4x_surface_obj)
8900 goto failed_allocation;
8901 i965_check_alloc_surface_bo(ctx, avc_ctx->preenc_future_ref_scaled_4x_surface_obj, 1,
8902 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
8905 /* FeiPreEncFixme: dummy coded buffer. This is a tweak which helps to use
8906 * the generic AVC Encdoe codepath which allocate status buffer as extension
8908 if (!avc_ctx->status_buffer.bo) {
8910 generic_state->frame_width_in_pixel * generic_state->frame_height_in_pixel * 12;
8911 size += I965_CODEDBUFFER_HEADER_SIZE;
8913 avc_ctx->status_buffer.bo = dri_bo_alloc(i965->intel.bufmgr,
8914 "Dummy Coded Buffer",
8918 return VA_STATUS_SUCCESS;
8921 return VA_STATUS_ERROR_ALLOCATION_FAILED;
8926 gen9_avc_preenc_gpe_kernel_run(VADriverContextP ctx,
8927 struct encode_state *encode_state,
8928 struct intel_encoder_context *encoder_context)
8930 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8931 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8932 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8933 VAStatsStatisticsParameterH264 *stat_param_h264 = avc_state->stat_param;;
8934 VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
8936 /* FeiPreEncFixme: Optimize the scaling. Keep a cache of already scaled surfaces
8937 * to avoid repeated scaling of same surfaces */
8940 gen9_avc_preenc_kernel_scaling(ctx, encode_state, encoder_context,
8941 INTEL_ENC_HME_4x, SCALE_CUR_PIC);
8942 if (stat_param->num_past_references > 0) {
8943 gen9_avc_preenc_kernel_scaling(ctx, encode_state, encoder_context,
8944 INTEL_ENC_HME_4x, SCALE_PAST_REF_PIC);
8946 if (stat_param->num_future_references > 0) {
8947 gen9_avc_preenc_kernel_scaling(ctx, encode_state, encoder_context,
8948 INTEL_ENC_HME_4x, SCALE_FUTURE_REF_PIC);
8952 if (generic_state->hme_enabled) {
8953 gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
8956 /* preproc kernel */
8957 if (!stat_param_h264->disable_mv_output || !stat_param_h264->disable_statistics_output) {
8958 gen9_avc_preenc_kernel_preproc(ctx, encode_state, encoder_context);
8961 return VA_STATUS_SUCCESS;
8965 gen9_avc_preenc_pipeline(VADriverContextP ctx,
8967 struct encode_state *encode_state,
8968 struct intel_encoder_context *encoder_context)
8972 va_status = gen9_avc_preenc_update_parameters(ctx, profile, encode_state, encoder_context);
8973 if (va_status != VA_STATUS_SUCCESS)
8976 va_status = gen9_avc_preenc_allocate_internal_resources(ctx, encode_state, encoder_context);
8977 if (va_status != VA_STATUS_SUCCESS)
8980 va_status = gen9_avc_preenc_gpe_kernel_run(ctx, encode_state, encoder_context);
8981 if (va_status != VA_STATUS_SUCCESS)
8984 return VA_STATUS_SUCCESS;
8988 gen9_avc_vme_context_destroy(void * context)
8990 struct encoder_vme_mfc_context *vme_context = (struct encoder_vme_mfc_context *)context;
8991 struct generic_encoder_context *generic_ctx;
8992 struct i965_avc_encoder_context *avc_ctx;
8993 struct generic_enc_codec_state *generic_state;
8994 struct avc_enc_state *avc_state;
8999 generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
9000 avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
9001 generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
9002 avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
9004 gen9_avc_kernel_destroy(vme_context);
9008 free(generic_state);
9016 gen8_avc_kernel_init(VADriverContextP ctx,
9017 struct intel_encoder_context *encoder_context)
9019 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9020 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
9021 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
9022 int fei_enabled = encoder_context->fei_enabled;
9024 generic_ctx->get_kernel_header_and_size = fei_enabled ?
9025 intel_avc_fei_get_kernel_header_and_size :
9026 intel_avc_get_kernel_header_and_size ;
9027 gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling, false);
9028 gen9_avc_kernel_init_brc(ctx, generic_ctx, &avc_ctx->context_brc);
9029 gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me, false);
9030 gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc, fei_enabled);
9031 gen9_avc_kernel_init_sfd(ctx, generic_ctx, &avc_ctx->context_sfd);
9034 generic_ctx->pfn_set_curbe_scaling2x = gen9_avc_set_curbe_scaling2x;
9035 generic_ctx->pfn_set_curbe_scaling4x = gen8_avc_set_curbe_scaling4x;
9036 generic_ctx->pfn_set_curbe_me = gen8_avc_set_curbe_me;
9037 generic_ctx->pfn_set_curbe_mbenc = gen8_avc_set_curbe_mbenc;
9038 generic_ctx->pfn_set_curbe_brc_init_reset = gen9_avc_set_curbe_brc_init_reset;
9039 generic_ctx->pfn_set_curbe_brc_frame_update = gen8_avc_set_curbe_brc_frame_update;
9040 generic_ctx->pfn_set_curbe_sfd = gen9_avc_set_curbe_sfd;
9042 generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
9043 generic_ctx->pfn_send_me_surface = gen9_avc_send_surface_me;
9044 generic_ctx->pfn_send_mbenc_surface = gen9_avc_send_surface_mbenc;
9045 generic_ctx->pfn_send_brc_init_reset_surface = gen9_avc_send_surface_brc_init_reset;
9046 generic_ctx->pfn_send_brc_frame_update_surface = gen9_avc_send_surface_brc_frame_update;
9047 generic_ctx->pfn_send_sfd_surface = gen9_avc_send_surface_sfd;
9050 gen9_avc_kernel_init(VADriverContextP ctx,
9051 struct intel_encoder_context *encoder_context)
9053 struct i965_driver_data *i965 = i965_driver_data(ctx);
9054 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9055 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
9056 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
9057 int fei_enabled = encoder_context->fei_enabled;
9058 int preenc_enabled = encoder_context->preenc_enabled;
9060 generic_ctx->get_kernel_header_and_size = (fei_enabled || preenc_enabled) ?
9061 intel_avc_fei_get_kernel_header_and_size :
9062 intel_avc_get_kernel_header_and_size ;
9064 if (!fei_enabled && !preenc_enabled) {
9065 /* generic AVC Encoder */
9066 gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling, false);
9067 gen9_avc_kernel_init_brc(ctx, generic_ctx, &avc_ctx->context_brc);
9068 gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me, false);
9069 gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc,
9070 encoder_context->fei_enabled);
9071 gen9_avc_kernel_init_wp(ctx, generic_ctx, &avc_ctx->context_wp);
9072 gen9_avc_kernel_init_sfd(ctx, generic_ctx, &avc_ctx->context_sfd);
9075 generic_ctx->pfn_set_curbe_scaling2x = gen9_avc_set_curbe_scaling2x;
9076 generic_ctx->pfn_set_curbe_scaling4x = gen9_avc_set_curbe_scaling4x;
9077 generic_ctx->pfn_set_curbe_me = gen9_avc_set_curbe_me;
9078 generic_ctx->pfn_set_curbe_mbenc = gen9_avc_set_curbe_mbenc;
9079 generic_ctx->pfn_set_curbe_brc_init_reset = gen9_avc_set_curbe_brc_init_reset;
9080 generic_ctx->pfn_set_curbe_brc_frame_update = gen9_avc_set_curbe_brc_frame_update;
9081 generic_ctx->pfn_set_curbe_brc_mb_update = gen9_avc_set_curbe_brc_mb_update;
9082 generic_ctx->pfn_set_curbe_sfd = gen9_avc_set_curbe_sfd;
9083 generic_ctx->pfn_set_curbe_wp = gen9_avc_set_curbe_wp;
9085 generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
9086 generic_ctx->pfn_send_me_surface = gen9_avc_send_surface_me;
9087 generic_ctx->pfn_send_mbenc_surface = gen9_avc_send_surface_mbenc;
9088 generic_ctx->pfn_send_brc_init_reset_surface = gen9_avc_send_surface_brc_init_reset;
9089 generic_ctx->pfn_send_brc_frame_update_surface = gen9_avc_send_surface_brc_frame_update;
9090 generic_ctx->pfn_send_brc_mb_update_surface = gen9_avc_send_surface_brc_mb_update;
9091 generic_ctx->pfn_send_sfd_surface = gen9_avc_send_surface_sfd;
9092 generic_ctx->pfn_send_wp_surface = gen9_avc_send_surface_wp;
9094 if (IS_SKL(i965->intel.device_info) ||
9095 IS_BXT(i965->intel.device_info))
9096 generic_ctx->pfn_set_curbe_scaling4x = gen9_avc_set_curbe_scaling4x;
9097 else if (IS_KBL(i965->intel.device_info) ||
9098 IS_GEN10(i965->intel.device_info) ||
9099 IS_GLK(i965->intel.device_info))
9100 generic_ctx->pfn_set_curbe_scaling4x = gen95_avc_set_curbe_scaling4x;
9102 } else if (fei_enabled) {
9103 /* FEI AVC Encoding */
9104 gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc,
9105 encoder_context->fei_enabled);
9106 generic_ctx->pfn_set_curbe_mbenc = gen9_avc_fei_set_curbe_mbenc;
9107 generic_ctx->pfn_send_mbenc_surface = gen9_avc_fei_send_surface_mbenc;
9110 /* PreEnc for AVC */
9111 gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling,
9112 encoder_context->preenc_enabled);
9113 gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me,
9114 encoder_context->preenc_enabled);
9115 gen9_avc_kernel_init_preproc(ctx, generic_ctx, &avc_ctx->context_preproc);
9117 /* preenc 4x scaling uses the gen95 kernel */
9118 generic_ctx->pfn_set_curbe_scaling4x = gen95_avc_set_curbe_scaling4x;
9119 generic_ctx->pfn_set_curbe_me = gen9_avc_preenc_set_curbe_me;
9120 generic_ctx->pfn_set_curbe_preproc = gen9_avc_preenc_set_curbe_preproc;
9122 generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
9123 generic_ctx->pfn_send_me_surface = gen9_avc_preenc_send_surface_me;
9124 generic_ctx->pfn_send_preproc_surface = gen9_avc_preenc_send_surface_preproc;
9129 PAK pipeline related function
9132 intel_avc_enc_slice_type_fixup(int slice_type);
9134 /* Allocate resources needed for PAK only mode (get invoked only in FEI encode) */
9136 gen9_avc_allocate_pak_resources(VADriverContextP ctx,
9137 struct encode_state *encode_state,
9138 struct intel_encoder_context *encoder_context)
9140 struct i965_driver_data *i965 = i965_driver_data(ctx);
9141 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9142 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
9143 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
9144 unsigned int size = 0;
9145 int allocate_flag = 1;
9147 /*second level batch buffer for image state write when cqp etc*/
9148 i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
9149 size = INTEL_AVC_IMAGE_STATE_CMD_SIZE ;
9150 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
9151 &avc_ctx->res_image_state_batch_buffer_2nd_level,
9152 ALIGN(size, 0x1000),
9153 "second levle batch (image state write) buffer");
9155 goto failed_allocation;
9157 if (!generic_state->brc_allocated) {
9158 i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
9160 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
9161 &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
9162 ALIGN(size, 0x1000),
9163 "brc pak statistic buffer");
9165 goto failed_allocation;
9168 return VA_STATUS_SUCCESS;
9171 return VA_STATUS_ERROR_ALLOCATION_FAILED;
9175 gen9_mfc_avc_pipe_mode_select(VADriverContextP ctx,
9176 struct encode_state *encode_state,
9177 struct intel_encoder_context *encoder_context)
9179 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9180 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9181 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9182 struct intel_batchbuffer *batch = encoder_context->base.batch;
9184 BEGIN_BCS_BATCH(batch, 5);
9186 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
9187 OUT_BCS_BATCH(batch,
9189 (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
9190 (MFD_MODE_VLD << 15) |
9191 (0 << 13) | /* Non-VDEnc mode is 0*/
9192 ((generic_state->curr_pak_pass != (generic_state->num_pak_passes - 1)) << 10) | /* Stream-Out Enable */
9193 ((!!avc_ctx->res_post_deblocking_output.bo) << 9) | /* Post Deblocking Output */
9194 ((!!avc_ctx->res_pre_deblocking_output.bo) << 8) | /* Pre Deblocking Output */
9195 (0 << 7) | /* Scaled surface enable */
9196 (0 << 6) | /* Frame statistics stream out enable */
9197 (0 << 5) | /* not in stitch mode */
9198 (1 << 4) | /* encoding mode */
9199 (MFX_FORMAT_AVC << 0));
9200 OUT_BCS_BATCH(batch,
9201 (0 << 7) | /* expand NOA bus flag */
9202 (0 << 6) | /* disable slice-level clock gating */
9203 (0 << 5) | /* disable clock gating for NOA */
9204 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
9205 (0 << 3) | /* terminate if AVC mbdata error occurs */
9206 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
9209 OUT_BCS_BATCH(batch, 0);
9210 OUT_BCS_BATCH(batch, 0);
9212 ADVANCE_BCS_BATCH(batch);
9216 gen9_mfc_avc_surface_state(VADriverContextP ctx,
9217 struct intel_encoder_context *encoder_context,
9218 struct i965_gpe_resource *gpe_resource,
9221 struct intel_batchbuffer *batch = encoder_context->base.batch;
9223 BEGIN_BCS_BATCH(batch, 6);
9225 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
9226 OUT_BCS_BATCH(batch, id);
9227 OUT_BCS_BATCH(batch,
9228 ((gpe_resource->height - 1) << 18) |
9229 ((gpe_resource->width - 1) << 4));
9230 OUT_BCS_BATCH(batch,
9231 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
9232 (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
9233 ((gpe_resource->pitch - 1) << 3) | /* pitch */
9234 (0 << 2) | /* must be 0 for interleave U/V */
9235 (1 << 1) | /* must be tiled */
9236 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
9237 OUT_BCS_BATCH(batch,
9238 (0 << 16) | /* must be 0 for interleave U/V */
9239 (gpe_resource->y_cb_offset)); /* y offset for U(cb) */
9240 OUT_BCS_BATCH(batch,
9241 (0 << 16) | /* must be 0 for interleave U/V */
9242 (gpe_resource->y_cb_offset)); /* y offset for U(cb) */
9244 ADVANCE_BCS_BATCH(batch);
9248 gen9_mfc_avc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
9250 struct i965_driver_data *i965 = i965_driver_data(ctx);
9251 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9252 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
9253 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9254 struct intel_batchbuffer *batch = encoder_context->base.batch;
9256 unsigned int cmd_len = 65;
9258 if (IS_GEN10(i965->intel.device_info))
9261 BEGIN_BCS_BATCH(batch, cmd_len);
9263 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (cmd_len - 2));
9265 /* the DW1-3 is for pre_deblocking */
9266 OUT_BUFFER_3DW(batch, avc_ctx->res_pre_deblocking_output.bo, 1, 0, i965->intel.mocs_state);
9268 /* the DW4-6 is for the post_deblocking */
9269 OUT_BUFFER_3DW(batch, avc_ctx->res_post_deblocking_output.bo, 1, 0, i965->intel.mocs_state);
9271 /* the DW7-9 is for the uncompressed_picture */
9272 OUT_BUFFER_3DW(batch, generic_ctx->res_uncompressed_input_surface.bo, 0, 0, i965->intel.mocs_state);
9274 /* the DW10-12 is for PAK information (write) */
9275 OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, i965->intel.mocs_state);//?
9277 /* the DW13-15 is for the intra_row_store_scratch */
9278 OUT_BUFFER_3DW(batch, avc_ctx->res_intra_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
9280 /* the DW16-18 is for the deblocking filter */
9281 OUT_BUFFER_3DW(batch, avc_ctx->res_deblocking_filter_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
9283 /* the DW 19-50 is for Reference pictures*/
9284 for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
9285 OUT_BUFFER_2DW(batch, avc_ctx->list_reference_res[i].bo, 0, 0);
9288 /* DW 51, reference picture attributes */
9289 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
9291 /* The DW 52-54 is for PAK information (read) */
9292 OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, i965->intel.mocs_state);
9294 /* the DW 55-57 is the ILDB buffer */
9295 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9297 /* the DW 58-60 is the second ILDB buffer */
9298 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9300 /* DW 61, memory compress enable & mode */
9301 OUT_BCS_BATCH(batch, 0);
9303 /* the DW 62-64 is the buffer */
9304 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9307 if (IS_GEN10(i965->intel.device_info)) {
9308 OUT_BCS_BATCH(batch, 0);
9309 OUT_BCS_BATCH(batch, 0);
9310 OUT_BCS_BATCH(batch, 0);
9313 ADVANCE_BCS_BATCH(batch);
9317 gen9_mfc_avc_ind_obj_base_addr_state(VADriverContextP ctx,
9318 struct encode_state *encode_state,
9319 struct intel_encoder_context *encoder_context)
9321 struct i965_driver_data *i965 = i965_driver_data(ctx);
9322 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9323 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
9324 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9325 struct intel_batchbuffer *batch = encoder_context->base.batch;
9326 struct object_surface *obj_surface;
9327 struct gen9_surface_avc *avc_priv_surface;
9328 unsigned int size = 0;
9329 unsigned int w_mb = generic_state->frame_width_in_mbs;
9330 unsigned int h_mb = generic_state->frame_height_in_mbs;
9332 obj_surface = encode_state->reconstructed_object;
9334 if (!obj_surface || !obj_surface->private_data)
9336 avc_priv_surface = obj_surface->private_data;
9338 BEGIN_BCS_BATCH(batch, 26);
9340 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
9341 /* The DW1-5 is for the MFX indirect bistream offset */
9342 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9343 OUT_BUFFER_2DW(batch, NULL, 0, 0);
9345 /* the DW6-10 is for MFX Indirect MV Object Base Address */
9346 size = w_mb * h_mb * 32 * 4;
9347 OUT_BUFFER_3DW(batch,
9348 avc_priv_surface->res_mv_data_surface.bo,
9351 i965->intel.mocs_state);
9352 OUT_BUFFER_2DW(batch,
9353 avc_priv_surface->res_mv_data_surface.bo,
9355 ALIGN(size, 0x1000));
9357 /* The DW11-15 is for MFX IT-COFF. Not used on encoder */
9358 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9359 OUT_BUFFER_2DW(batch, NULL, 0, 0);
9361 /* The DW16-20 is for MFX indirect DBLK. Not used on encoder */
9362 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9363 OUT_BUFFER_2DW(batch, NULL, 0, 0);
9365 /* The DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder
9366 * Note: an offset is specified in MFX_AVC_SLICE_STATE
9368 OUT_BUFFER_3DW(batch,
9369 generic_ctx->compressed_bitstream.res.bo,
9372 i965->intel.mocs_state);
9373 OUT_BUFFER_2DW(batch,
9374 generic_ctx->compressed_bitstream.res.bo,
9376 generic_ctx->compressed_bitstream.end_offset);
9378 ADVANCE_BCS_BATCH(batch);
9382 gen9_mfc_avc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
9384 struct i965_driver_data *i965 = i965_driver_data(ctx);
9385 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9386 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9387 struct intel_batchbuffer *batch = encoder_context->base.batch;
9389 BEGIN_BCS_BATCH(batch, 10);
9391 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
9393 /* The DW1-3 is for bsd/mpc row store scratch buffer */
9394 OUT_BUFFER_3DW(batch, avc_ctx->res_bsd_mpc_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
9396 /* The DW4-6 is for MPR Row Store Scratch Buffer Base Address, ignore for encoder */
9397 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9399 /* The DW7-9 is for Bitplane Read Buffer Base Address, ignore for encoder */
9400 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9402 ADVANCE_BCS_BATCH(batch);
9406 gen9_mfc_avc_directmode_state(VADriverContextP ctx,
9407 struct intel_encoder_context *encoder_context)
9409 struct i965_driver_data *i965 = i965_driver_data(ctx);
9410 struct intel_batchbuffer *batch = encoder_context->base.batch;
9411 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9412 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9413 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
9417 BEGIN_BCS_BATCH(batch, 71);
9419 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
9421 /* Reference frames and Current frames */
9422 /* the DW1-32 is for the direct MV for reference */
9423 for (i = 0; i < NUM_MFC_AVC_DMV_BUFFERS - 2; i += 2) {
9424 if (avc_ctx->res_direct_mv_buffersr[i].bo != NULL) {
9425 OUT_BCS_RELOC64(batch, avc_ctx->res_direct_mv_buffersr[i].bo,
9426 I915_GEM_DOMAIN_INSTRUCTION, 0,
9429 OUT_BCS_BATCH(batch, 0);
9430 OUT_BCS_BATCH(batch, 0);
9434 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
9436 /* the DW34-36 is the MV for the current frame */
9437 OUT_BCS_RELOC64(batch, avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2].bo,
9438 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
9441 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
9444 for (i = 0; i < 32; i++) {
9445 OUT_BCS_BATCH(batch, avc_state->top_field_poc[i]);
9447 OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2]);
9448 OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1]);
9450 ADVANCE_BCS_BATCH(batch);
9454 gen9_mfc_qm_state(VADriverContextP ctx,
9456 const unsigned int *qm,
9458 struct intel_encoder_context *encoder_context)
9460 struct intel_batchbuffer *batch = encoder_context->base.batch;
9461 unsigned int qm_buffer[16];
9463 assert(qm_length <= 16);
9464 assert(sizeof(*qm) == 4);
9465 memset(qm_buffer, 0, 16 * 4);
9466 memcpy(qm_buffer, qm, qm_length * 4);
9468 BEGIN_BCS_BATCH(batch, 18);
9469 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
9470 OUT_BCS_BATCH(batch, qm_type << 0);
9471 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
9472 ADVANCE_BCS_BATCH(batch);
9476 gen9_mfc_avc_qm_state(VADriverContextP ctx,
9477 struct encode_state *encode_state,
9478 struct intel_encoder_context *encoder_context)
9480 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9481 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
9482 VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
9483 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
9486 const unsigned int *qm_4x4_intra;
9487 const unsigned int *qm_4x4_inter;
9488 const unsigned int *qm_8x8_intra;
9489 const unsigned int *qm_8x8_inter;
9491 if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
9492 && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
9493 qm_4x4_intra = qm_4x4_inter = qm_8x8_intra = qm_8x8_inter = qm_flat;
9495 VAIQMatrixBufferH264 *qm;
9496 assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
9497 qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
9498 qm_4x4_intra = (unsigned int *)qm->ScalingList4x4[0];
9499 qm_4x4_inter = (unsigned int *)qm->ScalingList4x4[3];
9500 qm_8x8_intra = (unsigned int *)qm->ScalingList8x8[0];
9501 qm_8x8_inter = (unsigned int *)qm->ScalingList8x8[1];
9504 gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm_4x4_intra, 12, encoder_context);
9505 gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm_4x4_inter, 12, encoder_context);
9506 gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm_8x8_intra, 16, encoder_context);
9507 gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm_8x8_inter, 16, encoder_context);
9511 gen9_mfc_fqm_state(VADriverContextP ctx,
9513 const unsigned int *fqm,
9515 struct intel_encoder_context *encoder_context)
9517 struct intel_batchbuffer *batch = encoder_context->base.batch;
9518 unsigned int fqm_buffer[32];
9520 assert(fqm_length <= 32);
9521 assert(sizeof(*fqm) == 4);
9522 memset(fqm_buffer, 0, 32 * 4);
9523 memcpy(fqm_buffer, fqm, fqm_length * 4);
9525 BEGIN_BCS_BATCH(batch, 34);
9526 OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
9527 OUT_BCS_BATCH(batch, fqm_type << 0);
9528 intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
9529 ADVANCE_BCS_BATCH(batch);
9533 gen9_mfc_fill_fqm(uint8_t *qm, uint16_t *fqm, int len)
9536 for (i = 0; i < len; i++)
9537 for (j = 0; j < len; j++) {
9538 assert(qm[j * len + i]);
9539 fqm[i * len + j] = (1 << 16) / qm[j * len + i];
9544 gen9_mfc_avc_fqm_state(VADriverContextP ctx,
9545 struct encode_state *encode_state,
9546 struct intel_encoder_context *encoder_context)
9548 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9549 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
9550 VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
9551 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
9553 if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
9554 && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
9555 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm_flat, 24, encoder_context);
9556 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm_flat, 24, encoder_context);
9557 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm_flat, 32, encoder_context);
9558 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm_flat, 32, encoder_context);
9562 VAIQMatrixBufferH264 *qm;
9563 assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
9564 qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
9566 for (i = 0; i < 3; i++)
9567 gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * i, 4);
9568 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm, 24, encoder_context);
9570 for (i = 3; i < 6; i++)
9571 gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * (i - 3), 4);
9572 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm, 24, encoder_context);
9574 gen9_mfc_fill_fqm(qm->ScalingList8x8[0], (uint16_t *)fqm, 8);
9575 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm, 32, encoder_context);
9577 gen9_mfc_fill_fqm(qm->ScalingList8x8[1], (uint16_t *)fqm, 8);
9578 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm, 32, encoder_context);
9583 gen9_mfc_avc_insert_object(VADriverContextP ctx,
9584 struct intel_encoder_context *encoder_context,
9585 unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
9586 int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
9587 int slice_header_indicator,
9588 struct intel_batchbuffer *batch)
9590 if (data_bits_in_last_dw == 0)
9591 data_bits_in_last_dw = 32;
9593 BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
9595 OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws));
9596 OUT_BCS_BATCH(batch,
9597 (0 << 16) | /* always start at offset 0 */
9598 (slice_header_indicator << 14) |
9599 (data_bits_in_last_dw << 8) |
9600 (skip_emul_byte_count << 4) |
9601 (!!emulation_flag << 3) |
9602 ((!!is_last_header) << 2) |
9603 ((!!is_end_of_slice) << 1) |
9604 (0 << 0)); /* check this flag */
9605 intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
9607 ADVANCE_BCS_BATCH(batch);
9611 gen9_mfc_avc_insert_aud_packed_data(VADriverContextP ctx,
9612 struct encode_state *encode_state,
9613 struct intel_encoder_context *encoder_context,
9614 struct intel_batchbuffer *batch)
9616 VAEncPackedHeaderParameterBuffer *param = NULL;
9617 unsigned int length_in_bits;
9618 unsigned int *header_data = NULL;
9619 unsigned char *nal_type = NULL;
9620 int count, i, start_index;
9622 count = encode_state->slice_rawdata_count[0];
9623 start_index = (encode_state->slice_rawdata_index[0] & SLICE_PACKED_DATA_INDEX_MASK);
9625 for (i = 0; i < count; i++) {
9626 unsigned int skip_emul_byte_cnt;
9628 header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
9629 nal_type = (unsigned char *)header_data;
9631 param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
9632 if (param->type != VAEncPackedHeaderRawData)
9635 length_in_bits = param->bit_length;
9637 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9639 if ((*(nal_type + skip_emul_byte_cnt - 1) & 0x1f) == AVC_NAL_DELIMITER) {
9640 gen9_mfc_avc_insert_object(ctx,
9643 ALIGN(length_in_bits, 32) >> 5,
9644 length_in_bits & 0x1f,
9648 !param->has_emulation_bytes,
9657 gen9_mfc_avc_insert_slice_packed_data(VADriverContextP ctx,
9658 struct encode_state *encode_state,
9659 struct intel_encoder_context *encoder_context,
9661 struct intel_batchbuffer *batch)
9663 VAEncPackedHeaderParameterBuffer *param = NULL;
9664 unsigned int length_in_bits;
9665 unsigned int *header_data = NULL;
9666 int count, i, start_index;
9667 int slice_header_index;
9668 unsigned char *nal_type = NULL;
9670 if (encode_state->slice_header_index[slice_index] == 0)
9671 slice_header_index = -1;
9673 slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
9675 count = encode_state->slice_rawdata_count[slice_index];
9676 start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
9678 for (i = 0; i < count; i++) {
9679 unsigned int skip_emul_byte_cnt;
9681 header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
9682 nal_type = (unsigned char *)header_data;
9684 param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
9686 length_in_bits = param->bit_length;
9688 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9690 /* skip the slice header packed data type as it is lastly inserted */
9691 if (param->type == VAEncPackedHeaderSlice || (*(nal_type + skip_emul_byte_cnt - 1) & 0x1f) == AVC_NAL_DELIMITER)
9694 /* as the slice header is still required, the last header flag is set to
9697 gen9_mfc_avc_insert_object(ctx,
9700 ALIGN(length_in_bits, 32) >> 5,
9701 length_in_bits & 0x1f,
9705 !param->has_emulation_bytes,
9710 if (slice_header_index == -1) {
9711 VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
9712 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
9713 VAEncSliceParameterBufferH264 *slice_params = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
9714 unsigned char *slice_header = NULL;
9715 int slice_header_length_in_bits = 0;
9717 /* No slice header data is passed. And the driver needs to generate it */
9718 /* For the Normal H264 */
9719 slice_header_length_in_bits = build_avc_slice_header(seq_param,
9723 gen9_mfc_avc_insert_object(ctx,
9725 (unsigned int *)slice_header,
9726 ALIGN(slice_header_length_in_bits, 32) >> 5,
9727 slice_header_length_in_bits & 0x1f,
9728 5, /* first 5 bytes are start code + nal unit type */
9735 unsigned int skip_emul_byte_cnt;
9737 header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
9739 param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[slice_header_index]->buffer);
9740 length_in_bits = param->bit_length;
9742 /* as the slice header is the last header data for one slice,
9743 * the last header flag is set to one.
9745 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9747 gen9_mfc_avc_insert_object(ctx,
9750 ALIGN(length_in_bits, 32) >> 5,
9751 length_in_bits & 0x1f,
9755 !param->has_emulation_bytes,
9764 gen9_mfc_avc_inset_headers(VADriverContextP ctx,
9765 struct encode_state *encode_state,
9766 struct intel_encoder_context *encoder_context,
9767 VAEncSliceParameterBufferH264 *slice_param,
9769 struct intel_batchbuffer *batch)
9771 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9772 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9773 int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
9774 unsigned int internal_rate_mode = generic_state->internal_rate_mode;
9775 unsigned int skip_emul_byte_cnt;
9777 if (slice_index == 0) {
9779 /* if AUD exist and insert it firstly */
9780 gen9_mfc_avc_insert_aud_packed_data(ctx, encode_state, encoder_context, batch);
9782 if (encode_state->packed_header_data[idx]) {
9783 VAEncPackedHeaderParameterBuffer *param = NULL;
9784 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
9785 unsigned int length_in_bits;
9787 assert(encode_state->packed_header_param[idx]);
9788 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
9789 length_in_bits = param->bit_length;
9791 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9792 gen9_mfc_avc_insert_object(ctx,
9795 ALIGN(length_in_bits, 32) >> 5,
9796 length_in_bits & 0x1f,
9800 !param->has_emulation_bytes,
9805 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
9807 if (encode_state->packed_header_data[idx]) {
9808 VAEncPackedHeaderParameterBuffer *param = NULL;
9809 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
9810 unsigned int length_in_bits;
9812 assert(encode_state->packed_header_param[idx]);
9813 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
9814 length_in_bits = param->bit_length;
9816 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9818 gen9_mfc_avc_insert_object(ctx,
9821 ALIGN(length_in_bits, 32) >> 5,
9822 length_in_bits & 0x1f,
9826 !param->has_emulation_bytes,
9831 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
9833 if (encode_state->packed_header_data[idx]) {
9834 VAEncPackedHeaderParameterBuffer *param = NULL;
9835 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
9836 unsigned int length_in_bits;
9838 assert(encode_state->packed_header_param[idx]);
9839 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
9840 length_in_bits = param->bit_length;
9842 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9843 gen9_mfc_avc_insert_object(ctx,
9846 ALIGN(length_in_bits, 32) >> 5,
9847 length_in_bits & 0x1f,
9851 !param->has_emulation_bytes,
9854 } else if (internal_rate_mode == VA_RC_CBR) {
9859 gen9_mfc_avc_insert_slice_packed_data(ctx,
9867 gen9_mfc_avc_slice_state(VADriverContextP ctx,
9868 struct encode_state *encode_state,
9869 struct intel_encoder_context *encoder_context,
9870 VAEncPictureParameterBufferH264 *pic_param,
9871 VAEncSliceParameterBufferH264 *slice_param,
9872 VAEncSliceParameterBufferH264 *next_slice_param,
9873 struct intel_batchbuffer *batch)
9875 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9876 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
9877 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9878 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
9879 unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
9880 unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
9881 unsigned char correct[6], grow, shrink;
9882 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
9883 int max_qp_n, max_qp_p;
9885 int weighted_pred_idc = 0;
9886 int num_ref_l0 = 0, num_ref_l1 = 0;
9887 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
9888 int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
9889 unsigned int rc_panic_enable = 0;
9890 unsigned int rate_control_counter_enable = 0;
9891 unsigned int rounding_value = 0;
9892 unsigned int rounding_inter_enable = 0;
9894 slice_hor_pos = slice_param->macroblock_address % generic_state->frame_width_in_mbs;
9895 slice_ver_pos = slice_param->macroblock_address / generic_state->frame_width_in_mbs;
9897 if (next_slice_param) {
9898 next_slice_hor_pos = next_slice_param->macroblock_address % generic_state->frame_width_in_mbs;
9899 next_slice_ver_pos = next_slice_param->macroblock_address / generic_state->frame_width_in_mbs;
9901 next_slice_hor_pos = 0;
9902 next_slice_ver_pos = generic_state->frame_height_in_mbs;
9905 if (slice_type == SLICE_TYPE_I) {
9906 luma_log2_weight_denom = 0;
9907 chroma_log2_weight_denom = 0;
9908 } else if (slice_type == SLICE_TYPE_P) {
9909 weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
9910 num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
9911 rounding_inter_enable = avc_state->rounding_inter_enable;
9912 rounding_value = avc_state->rounding_value;
9914 if (slice_param->num_ref_idx_active_override_flag)
9915 num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
9916 } else if (slice_type == SLICE_TYPE_B) {
9917 weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
9918 num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
9919 num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
9920 rounding_inter_enable = avc_state->rounding_inter_enable;
9921 rounding_value = avc_state->rounding_value;
9923 if (slice_param->num_ref_idx_active_override_flag) {
9924 num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
9925 num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
9928 if (weighted_pred_idc == 2) {
9929 /* 8.4.3 - Derivation process for prediction weights (8-279) */
9930 luma_log2_weight_denom = 5;
9931 chroma_log2_weight_denom = 5;
9940 rate_control_counter_enable = (generic_state->brc_enabled && (generic_state->curr_pak_pass != 0));
9941 rc_panic_enable = (avc_state->rc_panic_enable &&
9942 (!avc_state->min_max_qp_enable) &&
9943 (encoder_context->rate_control_mode != VA_RC_CQP) &&
9944 (generic_state->curr_pak_pass == (generic_state->num_pak_passes - 1)));
9946 for (i = 0; i < 6; i++)
9949 BEGIN_BCS_BATCH(batch, 11);
9951 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
9952 OUT_BCS_BATCH(batch, slice_type);
9953 OUT_BCS_BATCH(batch,
9954 (num_ref_l1 << 24) |
9955 (num_ref_l0 << 16) |
9956 (chroma_log2_weight_denom << 8) |
9957 (luma_log2_weight_denom << 0));
9958 OUT_BCS_BATCH(batch,
9959 (weighted_pred_idc << 30) |
9960 (((slice_type == SLICE_TYPE_B) ? slice_param->direct_spatial_mv_pred_flag : 0) << 29) |
9961 (slice_param->disable_deblocking_filter_idc << 27) |
9962 (slice_param->cabac_init_idc << 24) |
9964 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
9965 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
9967 OUT_BCS_BATCH(batch,
9968 slice_ver_pos << 24 |
9969 slice_hor_pos << 16 |
9970 slice_param->macroblock_address);
9971 OUT_BCS_BATCH(batch,
9972 next_slice_ver_pos << 16 |
9973 next_slice_hor_pos);
9975 OUT_BCS_BATCH(batch,
9976 (rate_control_counter_enable << 31) |
9977 (1 << 30) | /* ResetRateControlCounter */
9978 (2 << 28) | /* Loose Rate Control */
9979 (0 << 24) | /* RC Stable Tolerance */
9980 (rc_panic_enable << 23) | /* RC Panic Enable */
9981 (1 << 22) | /* CBP mode */
9982 (0 << 21) | /* MB Type Direct Conversion, 0: Enable, 1: Disable */
9983 (0 << 20) | /* MB Type Skip Conversion, 0: Enable, 1: Disable */
9984 (!next_slice_param << 19) | /* Is Last Slice */
9985 (0 << 18) | /* BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable */
9986 (1 << 17) | /* HeaderPresentFlag */
9987 (1 << 16) | /* SliceData PresentFlag */
9988 (0 << 15) | /* TailPresentFlag */
9989 (1 << 13) | /* RBSP NAL TYPE */
9990 (1 << 12)); /* CabacZeroWordInsertionEnable */
9992 OUT_BCS_BATCH(batch, generic_ctx->compressed_bitstream.start_offset);
9994 OUT_BCS_BATCH(batch,
9995 (max_qp_n << 24) | /*Target QP - 24 is lowest QP*/
9996 (max_qp_p << 16) | /*Target QP + 20 is highest QP*/
9999 OUT_BCS_BATCH(batch,
10000 (rounding_inter_enable << 31) |
10001 (rounding_value << 28) |
10004 (correct[5] << 20) |
10005 (correct[4] << 16) |
10006 (correct[3] << 12) |
10007 (correct[2] << 8) |
10008 (correct[1] << 4) |
10009 (correct[0] << 0));
10010 OUT_BCS_BATCH(batch, 0);
10012 ADVANCE_BCS_BATCH(batch);
10016 gen9_mfc_avc_get_ref_idx_state(VAPictureH264 *va_pic, unsigned int frame_store_id)
10018 unsigned int is_long_term =
10019 !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
10020 unsigned int is_top_field =
10021 !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
10022 unsigned int is_bottom_field =
10023 !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
10025 return ((is_long_term << 6) |
10027 (frame_store_id << 1) |
10028 ((is_top_field ^ 1) & is_bottom_field));
10032 gen9_mfc_avc_ref_idx_state(VADriverContextP ctx,
10033 struct encode_state *encode_state,
10034 struct intel_encoder_context *encoder_context,
10035 VAEncSliceParameterBufferH264 *slice_param,
10036 struct intel_batchbuffer *batch)
10038 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10039 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10040 VAPictureH264 *ref_pic;
10041 int i, slice_type, ref_idx_shift;
10042 unsigned int fwd_ref_entry;
10043 unsigned int bwd_ref_entry;
10045 /* max 4 ref frames are allowed for l0 and l1 */
10046 fwd_ref_entry = 0x80808080;
10047 slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
10049 if ((slice_type == SLICE_TYPE_P) ||
10050 (slice_type == SLICE_TYPE_B)) {
10051 for (i = 0; i < MIN(avc_state->num_refs[0], 4); i++) {
10052 ref_pic = &slice_param->RefPicList0[i];
10053 ref_idx_shift = i * 8;
10055 fwd_ref_entry &= ~(0xFF << ref_idx_shift);
10056 fwd_ref_entry += (gen9_mfc_avc_get_ref_idx_state(ref_pic, avc_state->list_ref_idx[0][i]) << ref_idx_shift);
10060 bwd_ref_entry = 0x80808080;
10061 if (slice_type == SLICE_TYPE_B) {
10062 for (i = 0; i < MIN(avc_state->num_refs[1], 4); i++) {
10063 ref_pic = &slice_param->RefPicList1[i];
10064 ref_idx_shift = i * 8;
10066 bwd_ref_entry &= ~(0xFF << ref_idx_shift);
10067 bwd_ref_entry += (gen9_mfc_avc_get_ref_idx_state(ref_pic, avc_state->list_ref_idx[1][i]) << ref_idx_shift);
10071 if ((slice_type == SLICE_TYPE_P) ||
10072 (slice_type == SLICE_TYPE_B)) {
10073 BEGIN_BCS_BATCH(batch, 10);
10074 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
10075 OUT_BCS_BATCH(batch, 0); // L0
10076 OUT_BCS_BATCH(batch, fwd_ref_entry);
10078 for (i = 0; i < 7; i++) {
10079 OUT_BCS_BATCH(batch, 0x80808080);
10082 ADVANCE_BCS_BATCH(batch);
10085 if (slice_type == SLICE_TYPE_B) {
10086 BEGIN_BCS_BATCH(batch, 10);
10087 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
10088 OUT_BCS_BATCH(batch, 1); //Select L1
10089 OUT_BCS_BATCH(batch, bwd_ref_entry); //max 4 reference allowed
10090 for (i = 0; i < 7; i++) {
10091 OUT_BCS_BATCH(batch, 0x80808080);
10093 ADVANCE_BCS_BATCH(batch);
10098 gen9_mfc_avc_weightoffset_state(VADriverContextP ctx,
10099 struct encode_state *encode_state,
10100 struct intel_encoder_context *encoder_context,
10101 VAEncPictureParameterBufferH264 *pic_param,
10102 VAEncSliceParameterBufferH264 *slice_param,
10103 struct intel_batchbuffer *batch)
10106 short weightoffsets[32 * 6];
10108 slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
10110 if (slice_type == SLICE_TYPE_P &&
10111 pic_param->pic_fields.bits.weighted_pred_flag == 1) {
10112 memset(weightoffsets, 0, 32 * 6 * sizeof(short));
10113 for (i = 0; i < 32; i++) {
10114 weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
10115 weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
10116 weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
10117 weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
10118 weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
10119 weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
10122 BEGIN_BCS_BATCH(batch, 98);
10123 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
10124 OUT_BCS_BATCH(batch, 0);
10125 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
10127 ADVANCE_BCS_BATCH(batch);
10130 if (slice_type == SLICE_TYPE_B &&
10131 (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
10132 memset(weightoffsets, 0, 32 * 6 * sizeof(short));
10133 for (i = 0; i < 32; i++) {
10134 weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
10135 weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
10136 weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
10137 weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
10138 weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
10139 weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
10142 BEGIN_BCS_BATCH(batch, 98);
10143 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
10144 OUT_BCS_BATCH(batch, 0);
10145 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
10146 ADVANCE_BCS_BATCH(batch);
10148 memset(weightoffsets, 0, 32 * 6 * sizeof(short));
10149 for (i = 0; i < 32; i++) {
10150 weightoffsets[i * 6 + 0] = slice_param->luma_weight_l1[i];
10151 weightoffsets[i * 6 + 1] = slice_param->luma_offset_l1[i];
10152 weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l1[i][0];
10153 weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l1[i][0];
10154 weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l1[i][1];
10155 weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l1[i][1];
10158 BEGIN_BCS_BATCH(batch, 98);
10159 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
10160 OUT_BCS_BATCH(batch, 1);
10161 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
10162 ADVANCE_BCS_BATCH(batch);
10167 gen9_mfc_avc_single_slice(VADriverContextP ctx,
10168 struct encode_state *encode_state,
10169 struct intel_encoder_context *encoder_context,
10170 VAEncSliceParameterBufferH264 *slice_param,
10171 VAEncSliceParameterBufferH264 *next_slice_param,
10174 struct i965_driver_data *i965 = i965_driver_data(ctx);
10175 struct i965_gpe_table *gpe = &i965->gpe_table;
10176 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10177 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10178 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10179 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10180 struct intel_batchbuffer *batch = encoder_context->base.batch;
10181 struct intel_batchbuffer *slice_batch = avc_ctx->pres_slice_batch_buffer_2nd_level;
10182 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
10183 struct gpe_mi_batch_buffer_start_parameter second_level_batch;
10184 struct object_surface *obj_surface;
10185 struct gen9_surface_avc *avc_priv_surface;
10187 unsigned int slice_offset = 0;
10189 if (generic_state->curr_pak_pass == 0) {
10190 slice_offset = intel_batchbuffer_used_size(slice_batch);
10191 avc_state->slice_batch_offset[slice_index] = slice_offset;
10192 gen9_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context, slice_param, slice_batch);
10193 gen9_mfc_avc_weightoffset_state(ctx,
10199 gen9_mfc_avc_slice_state(ctx,
10206 gen9_mfc_avc_inset_headers(ctx,
10213 BEGIN_BCS_BATCH(slice_batch, 2);
10214 OUT_BCS_BATCH(slice_batch, 0);
10215 OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END);
10216 ADVANCE_BCS_BATCH(slice_batch);
10219 slice_offset = avc_state->slice_batch_offset[slice_index];
10221 /* insert slice as second level.*/
10222 memset(&second_level_batch, 0, sizeof(second_level_batch));
10223 second_level_batch.is_second_level = 1; /* Must be the second level batch buffer */
10224 second_level_batch.offset = slice_offset;
10225 second_level_batch.bo = slice_batch->buffer;
10226 gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
10228 /* insert mb code as second level.*/
10229 obj_surface = encode_state->reconstructed_object;
10230 assert(obj_surface->private_data);
10231 avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
10233 memset(&second_level_batch, 0, sizeof(second_level_batch));
10234 second_level_batch.is_second_level = 1; /* Must be the second level batch buffer */
10235 second_level_batch.offset = slice_param->macroblock_address * 16 * 4;
10236 second_level_batch.bo = avc_priv_surface->res_mb_code_surface.bo;
10237 gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
10242 gen9_avc_pak_slice_level(VADriverContextP ctx,
10243 struct encode_state *encode_state,
10244 struct intel_encoder_context *encoder_context)
10246 struct i965_driver_data *i965 = i965_driver_data(ctx);
10247 struct i965_gpe_table *gpe = &i965->gpe_table;
10248 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10249 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10250 struct intel_batchbuffer *batch = encoder_context->base.batch;
10251 struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
10252 VAEncSliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
10254 int slice_index = 0;
10255 int is_frame_level = (avc_state->slice_num > 1) ? 0 : 1; /* check it for SKL,now single slice per frame */
10256 int has_tail = 0; /* check it later */
10258 for (j = 0; j < encode_state->num_slice_params_ext; j++) {
10259 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
10261 if (j == encode_state->num_slice_params_ext - 1)
10262 next_slice_group_param = NULL;
10264 next_slice_group_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j + 1]->buffer;
10266 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
10267 if (i < encode_state->slice_params_ext[j]->num_elements - 1)
10268 next_slice_param = slice_param + 1;
10270 next_slice_param = next_slice_group_param;
10272 gen9_mfc_avc_single_slice(ctx,
10281 if (is_frame_level)
10285 if (is_frame_level)
10290 /* insert a tail if required */
10293 memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
10294 mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
10295 gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_params);
10298 gen9_avc_pak_picture_level(VADriverContextP ctx,
10299 struct encode_state *encode_state,
10300 struct intel_encoder_context *encoder_context)
10302 struct i965_driver_data *i965 = i965_driver_data(ctx);
10303 struct i965_gpe_table *gpe = &i965->gpe_table;
10304 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10305 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
10306 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10307 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10308 struct gpe_mi_batch_buffer_start_parameter second_level_batch;
10309 struct intel_batchbuffer *batch = encoder_context->base.batch;
10311 if (generic_state->brc_enabled &&
10312 generic_state->curr_pak_pass) {
10313 struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
10314 struct encoder_status_buffer_internal *status_buffer;
10315 status_buffer = &(avc_ctx->status_buffer);
10317 memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
10318 mi_conditional_batch_buffer_end_params.offset = status_buffer->image_status_mask_offset;
10319 mi_conditional_batch_buffer_end_params.bo = status_buffer->bo;
10320 mi_conditional_batch_buffer_end_params.compare_data = 0;
10321 mi_conditional_batch_buffer_end_params.compare_mask_mode_disabled = 0;
10322 gpe->mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
10325 gen9_mfc_avc_pipe_mode_select(ctx, encode_state, encoder_context);
10326 gen9_mfc_avc_surface_state(ctx, encoder_context, &(generic_ctx->res_reconstructed_surface), 0);
10327 gen9_mfc_avc_surface_state(ctx, encoder_context, &(generic_ctx->res_uncompressed_input_surface), 4);
10328 gen9_mfc_avc_pipe_buf_addr_state(ctx, encoder_context);
10329 gen9_mfc_avc_ind_obj_base_addr_state(ctx, encode_state, encoder_context);
10330 gen9_mfc_avc_bsp_buf_base_addr_state(ctx, encoder_context);
10332 if (generic_state->brc_enabled) {
10333 memset(&second_level_batch, 0, sizeof(second_level_batch));
10334 if (generic_state->curr_pak_pass == 0) {
10335 second_level_batch.offset = 0;
10337 second_level_batch.offset = generic_state->curr_pak_pass * INTEL_AVC_IMAGE_STATE_CMD_SIZE;
10339 second_level_batch.is_second_level = 1;
10340 second_level_batch.bo = avc_ctx->res_brc_image_state_read_buffer.bo;
10341 gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
10343 /*generate a new image state */
10344 gen9_avc_set_image_state_non_brc(ctx, encode_state, encoder_context, &(avc_ctx->res_image_state_batch_buffer_2nd_level));
10345 memset(&second_level_batch, 0, sizeof(second_level_batch));
10346 second_level_batch.offset = 0;
10347 second_level_batch.is_second_level = 1;
10348 second_level_batch.bo = avc_ctx->res_image_state_batch_buffer_2nd_level.bo;
10349 gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
10352 gen9_mfc_avc_qm_state(ctx, encode_state, encoder_context);
10353 gen9_mfc_avc_fqm_state(ctx, encode_state, encoder_context);
10354 gen9_mfc_avc_directmode_state(ctx, encoder_context);
10359 gen9_avc_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
10361 struct i965_driver_data *i965 = i965_driver_data(ctx);
10362 struct i965_gpe_table *gpe = &i965->gpe_table;
10363 struct intel_batchbuffer *batch = encoder_context->base.batch;
10364 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10365 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10366 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10368 struct gpe_mi_store_register_mem_parameter mi_store_reg_mem_param;
10369 struct gpe_mi_store_data_imm_parameter mi_store_data_imm_param;
10370 struct gpe_mi_flush_dw_parameter mi_flush_dw_param;
10371 struct encoder_status_buffer_internal *status_buffer;
10373 status_buffer = &(avc_ctx->status_buffer);
10375 memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
10376 gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
10378 /* read register and store into status_buffer and pak_statitistic info */
10379 memset(&mi_store_reg_mem_param, 0, sizeof(mi_store_reg_mem_param));
10380 mi_store_reg_mem_param.bo = status_buffer->bo;
10381 mi_store_reg_mem_param.offset = status_buffer->bs_byte_count_frame_offset;
10382 mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_reg_offset;
10383 gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10385 mi_store_reg_mem_param.bo = status_buffer->bo;
10386 mi_store_reg_mem_param.offset = status_buffer->image_status_mask_offset;
10387 mi_store_reg_mem_param.mmio_offset = status_buffer->image_status_mask_reg_offset;
10388 gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10390 /*update the status in the pak_statistic_surface */
10391 mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
10392 mi_store_reg_mem_param.offset = 0;
10393 mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_reg_offset;
10394 gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10396 mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
10397 mi_store_reg_mem_param.offset = 4;
10398 mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_nh_reg_offset;
10399 gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10401 memset(&mi_store_data_imm_param, 0, sizeof(mi_store_data_imm_param));
10402 mi_store_data_imm_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
10403 mi_store_data_imm_param.offset = sizeof(unsigned int) * 2;
10404 mi_store_data_imm_param.dw0 = (generic_state->curr_pak_pass + 1);
10405 gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm_param);
10407 mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
10408 mi_store_reg_mem_param.offset = sizeof(unsigned int) * (4 + generic_state->curr_pak_pass) ;
10409 mi_store_reg_mem_param.mmio_offset = status_buffer->image_status_ctrl_reg_offset;
10410 gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10412 memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
10413 gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
10419 gen9_avc_pak_brc_prepare(struct encode_state *encode_state,
10420 struct intel_encoder_context *encoder_context)
10422 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10423 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10424 unsigned int rate_control_mode = encoder_context->rate_control_mode;
10426 switch (rate_control_mode & 0x7f) {
10428 generic_state->internal_rate_mode = VA_RC_CBR;
10432 generic_state->internal_rate_mode = VA_RC_VBR;//AVBR
10437 generic_state->internal_rate_mode = VA_RC_CQP;
10441 if (encoder_context->quality_level == 0)
10442 encoder_context->quality_level = ENCODER_DEFAULT_QUALITY_AVC;
10445 /* allcate resources for pak only (fei mode) */
10447 gen9_avc_fei_pak_pipeline_prepare(VADriverContextP ctx,
10449 struct encode_state *encode_state,
10450 struct intel_encoder_context *encoder_context)
10452 VAStatus va_status;
10453 struct i965_driver_data *i965 = i965_driver_data(ctx);
10454 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10455 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
10456 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10457 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10458 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10459 struct gen9_surface_avc *avc_priv_surface;
10460 VAEncPictureParameterBufferH264 *pic_param;
10461 VAEncSliceParameterBufferH264 *slice_param;
10462 VAEncMiscParameterFEIFrameControlH264 *fei_param = NULL;
10463 unsigned int size = 0, i, j;
10464 unsigned int frame_mb_nums;
10465 struct object_buffer *obj_buffer = NULL;
10466 struct buffer_store *buffer_store = NULL;
10467 struct object_surface *obj_surface = NULL;
10468 struct avc_surface_param surface_param;
10469 struct i965_coded_buffer_segment *coded_buffer_segment;
10471 unsigned char * pdata;
10473 gen9_avc_update_parameters(ctx, profile, encode_state, encoder_context);
10475 pic_param = avc_state->pic_param;
10476 slice_param = avc_state->slice_param[0];
10478 va_status = gen9_avc_encode_check_parameter(ctx, encode_state, encoder_context);
10479 if (va_status != VA_STATUS_SUCCESS)
10482 va_status = gen9_avc_allocate_pak_resources(ctx, encode_state, encoder_context);
10483 if (va_status != VA_STATUS_SUCCESS)
10486 /* Encoded bitstream ?*/
10487 obj_buffer = encode_state->coded_buf_object;
10488 bo = obj_buffer->buffer_store->bo;
10489 i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
10490 i965_dri_object_to_buffer_gpe_resource(&generic_ctx->compressed_bitstream.res, bo);
10491 generic_ctx->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
10492 generic_ctx->compressed_bitstream.end_offset =
10493 ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
10496 dri_bo_unreference(avc_ctx->status_buffer.bo);
10497 avc_ctx->status_buffer.bo = bo;
10498 dri_bo_reference(bo);
10500 /* set the internal flag to 0 to indicate the coded size is unknown */
10502 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
10503 coded_buffer_segment->mapped = 0;
10504 coded_buffer_segment->codec = encoder_context->codec;
10505 coded_buffer_segment->status_support = 1;
10507 pdata = bo->virtual + avc_ctx->status_buffer.base_offset;
10508 memset(pdata, 0, avc_ctx->status_buffer.status_buffer_size);
10510 //frame id, it is the ref pic id in the reference_objects list.
10511 avc_state->num_refs[0] = 0;
10512 avc_state->num_refs[1] = 0;
10513 if (generic_state->frame_type == SLICE_TYPE_P) {
10514 avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
10516 if (slice_param->num_ref_idx_active_override_flag)
10517 avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
10518 } else if (generic_state->frame_type == SLICE_TYPE_B) {
10519 avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
10520 avc_state->num_refs[1] = pic_param->num_ref_idx_l1_active_minus1 + 1;
10522 if (slice_param->num_ref_idx_active_override_flag) {
10523 avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
10524 avc_state->num_refs[1] = slice_param->num_ref_idx_l1_active_minus1 + 1;
10527 for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[0]); i++) {
10528 VAPictureH264 *va_pic;
10530 assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(avc_state->list_ref_idx[0]));
10531 avc_state->list_ref_idx[0][i] = 0;
10533 if (i >= avc_state->num_refs[0])
10536 va_pic = &slice_param->RefPicList0[i];
10538 for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
10539 obj_surface = encode_state->reference_objects[j];
10543 obj_surface->base.id == va_pic->picture_id) {
10545 assert(obj_surface->base.id != VA_INVALID_SURFACE);
10546 avc_state->list_ref_idx[0][i] = j;
10552 for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[1]); i++) {
10553 VAPictureH264 *va_pic;
10555 assert(ARRAY_ELEMS(slice_param->RefPicList1) == ARRAY_ELEMS(avc_state->list_ref_idx[1]));
10556 avc_state->list_ref_idx[1][i] = 0;
10558 if (i >= avc_state->num_refs[1])
10561 va_pic = &slice_param->RefPicList1[i];
10563 for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
10564 obj_surface = encode_state->reference_objects[j];
10569 obj_surface->base.id == va_pic->picture_id) {
10571 assert(obj_surface->base.id != VA_INVALID_SURFACE);
10572 avc_state->list_ref_idx[1][i] = j;
10580 obj_surface = encode_state->reconstructed_object;
10581 fei_param = avc_state->fei_framectl_param;
10582 frame_mb_nums = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs;
10584 /* Setup current reconstruct frame */
10585 obj_surface = encode_state->reconstructed_object;
10586 va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
10588 if (va_status != VA_STATUS_SUCCESS)
10591 memset(&surface_param, 0, sizeof(surface_param));
10592 surface_param.frame_width = generic_state->frame_width_in_pixel;
10593 surface_param.frame_height = generic_state->frame_height_in_pixel;
10594 va_status = gen9_avc_init_check_surfaces(ctx,
10595 obj_surface, encoder_context,
10597 avc_priv_surface = obj_surface->private_data;
10599 /* res_mb_code_surface for MB code */
10600 /* PAK only mode must have the mb_code_surface from middleware,
10601 * so the code shouldn't reach here without an externally provided
10602 * MB Code buffer */
10603 assert(fei_param->mb_code_data != VA_INVALID_ID);
10604 size = frame_mb_nums * FEI_AVC_MB_CODE_BUFFER_SIZE;
10605 obj_buffer = BUFFER(fei_param->mb_code_data);
10606 assert(obj_buffer != NULL);
10607 buffer_store = obj_buffer->buffer_store;
10608 assert(size <= buffer_store->bo->size);
10609 if (avc_priv_surface->res_mb_code_surface.bo != NULL)
10610 i965_free_gpe_resource(&avc_priv_surface->res_mb_code_surface);
10611 i965_dri_object_to_buffer_gpe_resource(&avc_priv_surface->res_mb_code_surface,
10613 /* res_mv_data_surface for MV data */
10614 size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
10615 if (fei_param->mv_data != VA_INVALID_ID) {
10616 obj_buffer = BUFFER(fei_param->mv_data);
10617 assert(obj_buffer != NULL);
10618 buffer_store = obj_buffer->buffer_store;
10619 assert(size <= buffer_store->bo->size);
10620 if (avc_priv_surface->res_mv_data_surface.bo != NULL)
10621 i965_free_gpe_resource(&avc_priv_surface->res_mv_data_surface);
10622 i965_dri_object_to_buffer_gpe_resource(&avc_priv_surface->res_mv_data_surface,
10626 return VA_STATUS_SUCCESS;
10631 gen9_avc_pak_pipeline_prepare(VADriverContextP ctx,
10633 struct encode_state *encode_state,
10634 struct intel_encoder_context *encoder_context)
10636 VAStatus va_status;
10637 struct i965_driver_data *i965 = i965_driver_data(ctx);
10638 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10639 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
10640 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10641 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10642 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10644 struct object_surface *obj_surface;
10645 VAEncPictureParameterBufferH264 *pic_param;
10646 VAEncSliceParameterBufferH264 *slice_param;
10648 struct gen9_surface_avc *avc_priv_surface;
10649 struct avc_surface_param surface_param;
10650 int i, j, enable_avc_ildb = 0;
10651 unsigned int allocate_flag = 1;
10652 unsigned int size, w_mb, h_mb;
10654 if (encoder_context->fei_function_mode == VA_FEI_FUNCTION_PAK) {
10655 va_status = gen9_avc_fei_pak_pipeline_prepare(ctx, profile, encode_state, encoder_context);
10656 if (va_status != VA_STATUS_SUCCESS)
10660 pic_param = avc_state->pic_param;
10661 slice_param = avc_state->slice_param[0];
10662 w_mb = generic_state->frame_width_in_mbs;
10663 h_mb = generic_state->frame_height_in_mbs;
10665 /* update the parameter and check slice parameter */
10666 for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
10667 assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
10668 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
10670 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
10671 assert((slice_param->slice_type == SLICE_TYPE_I) ||
10672 (slice_param->slice_type == SLICE_TYPE_SI) ||
10673 (slice_param->slice_type == SLICE_TYPE_P) ||
10674 (slice_param->slice_type == SLICE_TYPE_SP) ||
10675 (slice_param->slice_type == SLICE_TYPE_B));
10677 if (slice_param->disable_deblocking_filter_idc != 1) {
10678 enable_avc_ildb = 1;
10685 avc_state->enable_avc_ildb = enable_avc_ildb;
10687 /* setup the all surface and buffer for PAK */
10688 /* Setup current reconstruct frame */
10689 obj_surface = encode_state->reconstructed_object;
10690 va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
10692 if (va_status != VA_STATUS_SUCCESS)
10695 memset(&surface_param, 0, sizeof(surface_param));
10696 surface_param.frame_width = generic_state->frame_width_in_pixel;
10697 surface_param.frame_height = generic_state->frame_height_in_pixel;
10698 va_status = gen9_avc_init_check_surfaces(ctx,
10699 obj_surface, encoder_context,
10701 if (va_status != VA_STATUS_SUCCESS)
10703 /* init the member of avc_priv_surface,frame_store_id,qp_value */
10705 avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
10706 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = 0;
10707 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = 0;
10708 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2]);
10709 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1]);
10710 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2], avc_priv_surface->dmv_top);
10711 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1], avc_priv_surface->dmv_bottom);
10712 avc_priv_surface->qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
10713 avc_priv_surface->frame_store_id = 0;
10714 avc_priv_surface->frame_idx = pic_param->CurrPic.frame_idx;
10715 avc_priv_surface->top_field_order_cnt = pic_param->CurrPic.TopFieldOrderCnt;
10716 avc_priv_surface->is_as_ref = pic_param->pic_fields.bits.reference_pic_flag;
10717 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = avc_priv_surface->top_field_order_cnt;
10718 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = avc_priv_surface->top_field_order_cnt + 1;
10720 i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
10721 i965_free_gpe_resource(&avc_ctx->res_post_deblocking_output);
10722 i965_free_gpe_resource(&avc_ctx->res_pre_deblocking_output);
10723 i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_reconstructed_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
10726 if (avc_state->enable_avc_ildb) {
10727 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_post_deblocking_output, obj_surface, GPE_RESOURCE_ALIGNMENT);
10729 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_pre_deblocking_output, obj_surface, GPE_RESOURCE_ALIGNMENT);
10731 /* input YUV surface */
10732 obj_surface = encode_state->input_yuv_object;
10733 va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
10735 if (va_status != VA_STATUS_SUCCESS)
10737 i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
10738 i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_uncompressed_input_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
10740 /* Reference surfaces */
10741 for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
10742 i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
10743 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2]);
10744 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1]);
10745 obj_surface = encode_state->reference_objects[i];
10746 avc_state->top_field_poc[2 * i] = 0;
10747 avc_state->top_field_poc[2 * i + 1] = 0;
10749 if (obj_surface && obj_surface->bo) {
10750 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->list_reference_res[i], obj_surface, GPE_RESOURCE_ALIGNMENT);
10752 /* actually it should be handled when it is reconstructed surface */
10753 va_status = gen9_avc_init_check_surfaces(ctx,
10754 obj_surface, encoder_context,
10756 if (va_status != VA_STATUS_SUCCESS)
10758 avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
10759 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2], avc_priv_surface->dmv_top);
10760 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1], avc_priv_surface->dmv_bottom);
10761 avc_priv_surface->frame_store_id = i;
10762 avc_state->top_field_poc[2 * i] = avc_priv_surface->top_field_order_cnt;
10763 avc_state->top_field_poc[2 * i + 1] = avc_priv_surface->top_field_order_cnt + 1;
10769 if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
10770 intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
10771 avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
10774 avc_ctx->pres_slice_batch_buffer_2nd_level =
10775 intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD,
10777 encode_state->num_slice_params_ext);
10778 if (!avc_ctx->pres_slice_batch_buffer_2nd_level)
10779 return VA_STATUS_ERROR_ALLOCATION_FAILED;
10781 for (i = 0; i < MAX_AVC_SLICE_NUM; i++) {
10782 avc_state->slice_batch_offset[i] = 0;
10787 i965_free_gpe_resource(&avc_ctx->res_intra_row_store_scratch_buffer);
10788 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
10789 &avc_ctx->res_intra_row_store_scratch_buffer,
10791 "PAK Intra row store scratch buffer");
10792 if (!allocate_flag)
10793 goto failed_allocation;
10795 size = w_mb * 4 * 64;
10796 i965_free_gpe_resource(&avc_ctx->res_deblocking_filter_row_store_scratch_buffer);
10797 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
10798 &avc_ctx->res_deblocking_filter_row_store_scratch_buffer,
10800 "PAK Deblocking filter row store scratch buffer");
10801 if (!allocate_flag)
10802 goto failed_allocation;
10804 size = w_mb * 2 * 64;
10805 i965_free_gpe_resource(&avc_ctx->res_bsd_mpc_row_store_scratch_buffer);
10806 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
10807 &avc_ctx->res_bsd_mpc_row_store_scratch_buffer,
10809 "PAK BSD/MPC row store scratch buffer");
10810 if (!allocate_flag)
10811 goto failed_allocation;
10813 size = w_mb * h_mb * 16;
10814 i965_free_gpe_resource(&avc_ctx->res_pak_mb_status_buffer);
10815 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
10816 &avc_ctx->res_pak_mb_status_buffer,
10818 "PAK MB status buffer");
10819 if (!allocate_flag)
10820 goto failed_allocation;
10822 return VA_STATUS_SUCCESS;
10825 return VA_STATUS_ERROR_ALLOCATION_FAILED;
10829 gen9_avc_encode_picture(VADriverContextP ctx,
10831 struct encode_state *encode_state,
10832 struct intel_encoder_context *encoder_context)
10834 VAStatus va_status;
10835 struct i965_driver_data *i965 = i965_driver_data(ctx);
10836 struct i965_gpe_table *gpe = &i965->gpe_table;
10837 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10838 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
10839 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
10840 struct intel_batchbuffer *batch = encoder_context->base.batch;
10842 va_status = gen9_avc_pak_pipeline_prepare(ctx, profile, encode_state, encoder_context);
10844 if (va_status != VA_STATUS_SUCCESS)
10847 if (i965->intel.has_bsd2)
10848 intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
10850 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
10851 intel_batchbuffer_emit_mi_flush(batch);
10852 for (generic_state->curr_pak_pass = 0;
10853 generic_state->curr_pak_pass < generic_state->num_pak_passes;
10854 generic_state->curr_pak_pass++) {
10856 if (generic_state->curr_pak_pass == 0) {
10857 /* Initialize the avc Image Ctrl reg for the first pass,write 0 to staturs/control register, is it needed in AVC? */
10858 struct gpe_mi_load_register_imm_parameter mi_load_reg_imm;
10859 struct encoder_status_buffer_internal *status_buffer;
10861 status_buffer = &(avc_ctx->status_buffer);
10862 memset(&mi_load_reg_imm, 0, sizeof(mi_load_reg_imm));
10863 mi_load_reg_imm.mmio_offset = status_buffer->image_status_ctrl_reg_offset;
10864 mi_load_reg_imm.data = 0;
10865 gpe->mi_load_register_imm(ctx, batch, &mi_load_reg_imm);
10867 gen9_avc_pak_picture_level(ctx, encode_state, encoder_context);
10868 gen9_avc_pak_slice_level(ctx, encode_state, encoder_context);
10869 gen9_avc_read_mfc_status(ctx, encoder_context);
10872 if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
10873 intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
10874 avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
10877 intel_batchbuffer_end_atomic(batch);
10878 intel_batchbuffer_flush(batch);
10880 generic_state->seq_frame_number++;
10881 generic_state->total_frame_number++;
10882 generic_state->first_frame = 0;
10883 return VA_STATUS_SUCCESS;
10887 gen9_avc_pak_pipeline(VADriverContextP ctx,
10889 struct encode_state *encode_state,
10890 struct intel_encoder_context *encoder_context)
10895 case VAProfileH264ConstrainedBaseline:
10896 case VAProfileH264Main:
10897 case VAProfileH264High:
10898 case VAProfileH264MultiviewHigh:
10899 case VAProfileH264StereoHigh:
10900 vaStatus = gen9_avc_encode_picture(ctx, profile, encode_state, encoder_context);
10904 vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
10912 gen9_avc_pak_context_destroy(void * context)
10914 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)context;
10915 struct generic_encoder_context * generic_ctx;
10916 struct i965_avc_encoder_context * avc_ctx;
10922 generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
10923 avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10926 i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
10927 i965_free_gpe_resource(&avc_ctx->res_post_deblocking_output);
10928 i965_free_gpe_resource(&avc_ctx->res_pre_deblocking_output);
10929 i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
10931 i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
10932 i965_free_gpe_resource(&avc_ctx->res_intra_row_store_scratch_buffer);
10933 i965_free_gpe_resource(&avc_ctx->res_deblocking_filter_row_store_scratch_buffer);
10934 i965_free_gpe_resource(&avc_ctx->res_bsd_mpc_row_store_scratch_buffer);
10935 i965_free_gpe_resource(&avc_ctx->res_pak_mb_status_buffer);
10937 for (i = 0 ; i < MAX_MFC_AVC_REFERENCE_SURFACES; i++) {
10938 i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
10941 for (i = 0 ; i < NUM_MFC_AVC_DMV_BUFFERS; i++) {
10942 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i]);
10945 if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
10946 intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
10947 avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
10953 gen9_avc_get_coded_status(VADriverContextP ctx,
10954 struct intel_encoder_context *encoder_context,
10955 struct i965_coded_buffer_segment *coded_buf_seg)
10957 struct encoder_status *avc_encode_status;
10959 if (!encoder_context || !coded_buf_seg)
10960 return VA_STATUS_ERROR_INVALID_BUFFER;
10962 avc_encode_status = (struct encoder_status *)coded_buf_seg->codec_private_data;
10963 coded_buf_seg->base.size = avc_encode_status->bs_byte_count_frame;
10965 return VA_STATUS_SUCCESS;
10969 gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
10971 /* VME & PAK share the same context */
10972 struct i965_driver_data *i965 = i965_driver_data(ctx);
10973 struct encoder_vme_mfc_context * vme_context = NULL;
10974 struct generic_encoder_context * generic_ctx = NULL;
10975 struct i965_avc_encoder_context * avc_ctx = NULL;
10976 struct generic_enc_codec_state * generic_state = NULL;
10977 struct avc_enc_state * avc_state = NULL;
10978 struct encoder_status_buffer_internal *status_buffer;
10979 uint32_t base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
10981 vme_context = calloc(1, sizeof(struct encoder_vme_mfc_context));
10982 generic_ctx = calloc(1, sizeof(struct generic_encoder_context));
10983 avc_ctx = calloc(1, sizeof(struct i965_avc_encoder_context));
10984 generic_state = calloc(1, sizeof(struct generic_enc_codec_state));
10985 avc_state = calloc(1, sizeof(struct avc_enc_state));
10987 if (!vme_context || !generic_ctx || !avc_ctx || !generic_state || !avc_state)
10988 goto allocate_structure_failed;
10990 memset(vme_context, 0, sizeof(struct encoder_vme_mfc_context));
10991 memset(generic_ctx, 0, sizeof(struct generic_encoder_context));
10992 memset(avc_ctx, 0, sizeof(struct i965_avc_encoder_context));
10993 memset(generic_state, 0, sizeof(struct generic_enc_codec_state));
10994 memset(avc_state, 0, sizeof(struct avc_enc_state));
10996 encoder_context->vme_context = vme_context;
10997 vme_context->generic_enc_ctx = generic_ctx;
10998 vme_context->private_enc_ctx = avc_ctx;
10999 vme_context->generic_enc_state = generic_state;
11000 vme_context->private_enc_state = avc_state;
11002 if (IS_SKL(i965->intel.device_info) ||
11003 IS_BXT(i965->intel.device_info)) {
11004 if (!encoder_context->fei_enabled && !encoder_context->preenc_enabled) {
11005 generic_ctx->enc_kernel_ptr = (void *)skl_avc_encoder_kernels;
11006 generic_ctx->enc_kernel_size = sizeof(skl_avc_encoder_kernels);
11008 /* FEI and PreEnc operation kernels are included in
11009 * the monolithic kernel binary */
11010 generic_ctx->enc_kernel_ptr = (void *)skl_avc_fei_encoder_kernels;
11011 generic_ctx->enc_kernel_size = sizeof(skl_avc_fei_encoder_kernels);
11013 } else if (IS_GEN8(i965->intel.device_info)) {
11014 generic_ctx->enc_kernel_ptr = (void *)bdw_avc_encoder_kernels;
11015 generic_ctx->enc_kernel_size = sizeof(bdw_avc_encoder_kernels);
11016 } else if (IS_KBL(i965->intel.device_info) ||
11017 IS_GLK(i965->intel.device_info)) {
11018 generic_ctx->enc_kernel_ptr = (void *)kbl_avc_encoder_kernels;
11019 generic_ctx->enc_kernel_size = sizeof(kbl_avc_encoder_kernels);
11020 } else if (IS_GEN10(i965->intel.device_info)) {
11021 generic_ctx->enc_kernel_ptr = (void *)cnl_avc_encoder_kernels;
11022 generic_ctx->enc_kernel_size = sizeof(cnl_avc_encoder_kernels);
11024 goto allocate_structure_failed;
11026 /* initialize misc ? */
11027 avc_ctx->ctx = ctx;
11028 generic_ctx->use_hw_scoreboard = 1;
11029 generic_ctx->use_hw_non_stalling_scoreboard = 1;
11031 /* initialize generic state */
11033 generic_state->kernel_mode = INTEL_ENC_KERNEL_NORMAL;
11034 generic_state->preset = INTEL_PRESET_RT_SPEED;
11035 generic_state->seq_frame_number = 0;
11036 generic_state->total_frame_number = 0;
11037 generic_state->frame_type = 0;
11038 generic_state->first_frame = 1;
11040 generic_state->frame_width_in_pixel = 0;
11041 generic_state->frame_height_in_pixel = 0;
11042 generic_state->frame_width_in_mbs = 0;
11043 generic_state->frame_height_in_mbs = 0;
11044 generic_state->frame_width_4x = 0;
11045 generic_state->frame_height_4x = 0;
11046 generic_state->frame_width_16x = 0;
11047 generic_state->frame_height_16x = 0;
11048 generic_state->frame_width_32x = 0;
11049 generic_state->downscaled_width_4x_in_mb = 0;
11050 generic_state->downscaled_height_4x_in_mb = 0;
11051 generic_state->downscaled_width_16x_in_mb = 0;
11052 generic_state->downscaled_height_16x_in_mb = 0;
11053 generic_state->downscaled_width_32x_in_mb = 0;
11054 generic_state->downscaled_height_32x_in_mb = 0;
11056 generic_state->hme_supported = 1;
11057 generic_state->b16xme_supported = 1;
11058 generic_state->b32xme_supported = 0;
11059 generic_state->hme_enabled = 0;
11060 generic_state->b16xme_enabled = 0;
11061 generic_state->b32xme_enabled = 0;
11063 if (encoder_context->fei_enabled) {
11064 /* Disabling HME in FEI encode */
11065 generic_state->hme_supported = 0;
11066 generic_state->b16xme_supported = 0;
11067 } else if (encoder_context->preenc_enabled) {
11068 /* Disabling 16x16ME in PreEnc */
11069 generic_state->b16xme_supported = 0;
11072 generic_state->brc_distortion_buffer_supported = 1;
11073 generic_state->brc_constant_buffer_supported = 0;
11075 generic_state->frame_rate = 30;
11076 generic_state->brc_allocated = 0;
11077 generic_state->brc_inited = 0;
11078 generic_state->brc_need_reset = 0;
11079 generic_state->is_low_delay = 0;
11080 generic_state->brc_enabled = 0;//default
11081 generic_state->internal_rate_mode = 0;
11082 generic_state->curr_pak_pass = 0;
11083 generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
11084 generic_state->is_first_pass = 1;
11085 generic_state->is_last_pass = 0;
11086 generic_state->mb_brc_enabled = 0; // enable mb brc
11087 generic_state->brc_roi_enable = 0;
11088 generic_state->brc_dirty_roi_enable = 0;
11089 generic_state->skip_frame_enbale = 0;
11091 generic_state->target_bit_rate = 0;
11092 generic_state->max_bit_rate = 0;
11093 generic_state->min_bit_rate = 0;
11094 generic_state->init_vbv_buffer_fullness_in_bit = 0;
11095 generic_state->vbv_buffer_size_in_bit = 0;
11096 generic_state->frames_per_100s = 0;
11097 generic_state->gop_size = 0;
11098 generic_state->gop_ref_distance = 0;
11099 generic_state->brc_target_size = 0;
11100 generic_state->brc_mode = 0;
11101 generic_state->brc_init_current_target_buf_full_in_bits = 0.0;
11102 generic_state->brc_init_reset_input_bits_per_frame = 0.0;
11103 generic_state->brc_init_reset_buf_size_in_bits = 0;
11104 generic_state->brc_init_previous_target_buf_full_in_bits = 0;
11105 generic_state->frames_per_window_size = 0;//default
11106 generic_state->target_percentage = 0;
11108 generic_state->avbr_curracy = 0;
11109 generic_state->avbr_convergence = 0;
11111 generic_state->num_skip_frames = 0;
11112 generic_state->size_skip_frames = 0;
11114 generic_state->num_roi = 0;
11115 generic_state->max_delta_qp = 0;
11116 generic_state->min_delta_qp = 0;
11118 if (encoder_context->rate_control_mode != VA_RC_NONE &&
11119 encoder_context->rate_control_mode != VA_RC_CQP) {
11120 generic_state->brc_enabled = 1;
11121 generic_state->brc_distortion_buffer_supported = 1;
11122 generic_state->brc_constant_buffer_supported = 1;
11123 generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
11125 /*avc state initialization */
11126 avc_state->mad_enable = 0;
11127 avc_state->mb_disable_skip_map_enable = 0;
11128 avc_state->sfd_enable = 1;//default
11129 avc_state->sfd_mb_enable = 1;//set it true
11130 avc_state->adaptive_search_window_enable = 1;//default
11131 avc_state->mb_qp_data_enable = 0;
11132 avc_state->intra_refresh_i_enable = 0;
11133 avc_state->min_max_qp_enable = 0;
11134 avc_state->skip_bias_adjustment_enable = 0;//default,same as skip_bias_adjustment_supporte? no
11137 avc_state->non_ftq_skip_threshold_lut_input_enable = 0;
11138 avc_state->ftq_skip_threshold_lut_input_enable = 0;
11139 avc_state->ftq_override = 0;
11141 avc_state->direct_bias_adjustment_enable = 0;
11142 avc_state->global_motion_bias_adjustment_enable = 0;
11143 avc_state->disable_sub_mb_partion = 0;
11144 avc_state->arbitrary_num_mbs_in_slice = 0;
11145 avc_state->adaptive_transform_decision_enable = 0;//default
11146 avc_state->skip_check_disable = 0;
11147 avc_state->tq_enable = 0;
11148 avc_state->enable_avc_ildb = 0;
11149 avc_state->mbaff_flag = 0;
11150 avc_state->enable_force_skip = 1;//default
11151 avc_state->rc_panic_enable = 1;//default
11152 avc_state->suppress_recon_enable = 1;//default
11154 avc_state->ref_pic_select_list_supported = 1;
11155 avc_state->mb_brc_supported = 1;//?,default
11156 avc_state->multi_pre_enable = 1;//default
11157 avc_state->ftq_enable = 1;//default
11158 avc_state->caf_supported = 1; //default
11159 avc_state->caf_enable = 0;
11160 avc_state->caf_disable_hd = 1;//default
11161 avc_state->skip_bias_adjustment_supported = 1;//default
11163 avc_state->adaptive_intra_scaling_enable = 1;//default
11164 avc_state->old_mode_cost_enable = 0;//default
11165 avc_state->multi_ref_qp_enable = 1;//default
11166 avc_state->weighted_ref_l0_enable = 1;//default
11167 avc_state->weighted_ref_l1_enable = 1;//default
11168 avc_state->weighted_prediction_supported = 0;
11169 avc_state->brc_split_enable = 0;
11170 avc_state->slice_level_report_supported = 0;
11172 avc_state->fbr_bypass_enable = 1;//default
11173 avc_state->field_scaling_output_interleaved = 0;
11174 avc_state->mb_variance_output_enable = 0;
11175 avc_state->mb_pixel_average_output_enable = 0;
11176 avc_state->rolling_intra_refresh_enable = 0;// same as intra_refresh_i_enable?
11177 avc_state->mbenc_curbe_set_in_brc_update = 0;
11178 avc_state->rounding_inter_enable = 1; //default
11179 avc_state->adaptive_rounding_inter_enable = 1;//default
11181 avc_state->mbenc_i_frame_dist_in_use = 0;
11182 avc_state->mb_status_supported = 1; //set in intialization for gen9
11183 avc_state->mb_status_enable = 0;
11184 avc_state->mb_vproc_stats_enable = 0;
11185 avc_state->flatness_check_enable = 0;
11186 avc_state->flatness_check_supported = 1;//default
11187 avc_state->block_based_skip_enable = 0;
11188 avc_state->use_widi_mbenc_kernel = 0;
11189 avc_state->kernel_trellis_enable = 0;
11190 avc_state->generic_reserved = 0;
11192 avc_state->rounding_value = 0;
11193 avc_state->rounding_inter_p = AVC_INVALID_ROUNDING_VALUE;//default
11194 avc_state->rounding_inter_b = AVC_INVALID_ROUNDING_VALUE; //default
11195 avc_state->rounding_inter_b_ref = AVC_INVALID_ROUNDING_VALUE; //default
11196 avc_state->min_qp_i = INTEL_AVC_MIN_QP;
11197 avc_state->min_qp_p = INTEL_AVC_MIN_QP;
11198 avc_state->min_qp_b = INTEL_AVC_MIN_QP;
11199 avc_state->max_qp_i = INTEL_AVC_MAX_QP;
11200 avc_state->max_qp_p = INTEL_AVC_MAX_QP;
11201 avc_state->max_qp_b = INTEL_AVC_MAX_QP;
11203 memset(avc_state->non_ftq_skip_threshold_lut, 0, AVC_QP_MAX * sizeof(uint8_t));
11204 memset(avc_state->ftq_skip_threshold_lut, 0, AVC_QP_MAX * sizeof(uint8_t));
11205 memset(avc_state->lamda_value_lut, 0, AVC_QP_MAX * 2 * sizeof(uint32_t));
11207 avc_state->intra_refresh_qp_threshold = 0;
11208 avc_state->trellis_flag = 0;
11209 avc_state->hme_mv_cost_scaling_factor = 0;
11210 avc_state->slice_height = 1;
11211 avc_state->slice_num = 1;
11212 memset(avc_state->dist_scale_factor_list0, 0, 32 * sizeof(uint32_t));
11213 avc_state->bi_weight = 0;
11215 avc_state->lambda_table_enable = 0;
11217 if (IS_GEN8(i965->intel.device_info)) {
11218 avc_state->brc_const_data_surface_width = 64;
11219 avc_state->brc_const_data_surface_height = 44;
11220 avc_state->mb_status_supported = 0;
11221 } else if (IS_SKL(i965->intel.device_info) ||
11222 IS_BXT(i965->intel.device_info)) {
11223 avc_state->brc_const_data_surface_width = 64;
11224 avc_state->brc_const_data_surface_height = 44;
11225 avc_state->brc_split_enable = 1;
11226 } else if (IS_KBL(i965->intel.device_info) ||
11227 IS_GEN10(i965->intel.device_info) ||
11228 IS_GLK(i965->intel.device_info)) {
11229 avc_state->brc_const_data_surface_width = 64;
11230 avc_state->brc_const_data_surface_height = 53;
11232 avc_state->decouple_mbenc_curbe_from_brc_enable = 1;
11233 avc_state->extended_mv_cost_range_enable = 0;
11234 avc_state->reserved_g95 = 0;
11235 avc_state->mbenc_brc_buffer_size = 128;
11236 avc_state->kernel_trellis_enable = 1;
11237 avc_state->lambda_table_enable = 1;
11238 avc_state->brc_split_enable = 1;
11240 if (IS_GEN10(i965->intel.device_info))
11241 avc_state->adaptive_transform_decision_enable = 1;// CNL
11244 avc_state->num_refs[0] = 0;
11245 avc_state->num_refs[1] = 0;
11246 memset(avc_state->list_ref_idx, 0, 32 * 2 * sizeof(uint32_t));
11247 memset(avc_state->top_field_poc, 0, NUM_MFC_AVC_DMV_BUFFERS * sizeof(int32_t));
11248 avc_state->tq_rounding = 0;
11249 avc_state->zero_mv_threshold = 0;
11250 avc_state->slice_second_levle_batch_buffer_in_use = 0;
11254 /* the definition of status buffer offset for Encoder */
11256 status_buffer = &avc_ctx->status_buffer;
11257 memset(status_buffer, 0, sizeof(struct encoder_status_buffer_internal));
11259 status_buffer->base_offset = base_offset;
11260 status_buffer->bs_byte_count_frame_offset = base_offset + offsetof(struct encoder_status, bs_byte_count_frame);
11261 status_buffer->bs_byte_count_frame_nh_offset = base_offset + offsetof(struct encoder_status, bs_byte_count_frame_nh);
11262 status_buffer->image_status_mask_offset = base_offset + offsetof(struct encoder_status, image_status_mask);
11263 status_buffer->image_status_ctrl_offset = base_offset + offsetof(struct encoder_status, image_status_ctrl);
11264 status_buffer->mfc_qp_status_count_offset = base_offset + offsetof(struct encoder_status, mfc_qp_status_count);
11265 status_buffer->media_index_offset = base_offset + offsetof(struct encoder_status, media_index);
11267 status_buffer->status_buffer_size = sizeof(struct encoder_status);
11268 status_buffer->bs_byte_count_frame_reg_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG;
11269 status_buffer->bs_byte_count_frame_nh_reg_offset = MFC_BITSTREAM_BYTECOUNT_SLICE_REG;
11270 status_buffer->image_status_mask_reg_offset = MFC_IMAGE_STATUS_MASK_REG;
11271 status_buffer->image_status_ctrl_reg_offset = MFC_IMAGE_STATUS_CTRL_REG;
11272 status_buffer->mfc_qp_status_count_reg_offset = MFC_QP_STATUS_COUNT_REG;
11274 if (IS_GEN8(i965->intel.device_info)) {
11275 gen8_avc_kernel_init(ctx, encoder_context);
11277 gen9_avc_kernel_init(ctx, encoder_context);
11279 encoder_context->vme_context = vme_context;
11280 /* Handling PreEnc operations separately since it gives better
11281 * code readability, avoid possible vme operations mess-up */
11282 encoder_context->vme_pipeline =
11283 !encoder_context->preenc_enabled ? gen9_avc_vme_pipeline : gen9_avc_preenc_pipeline;
11284 encoder_context->vme_context_destroy = gen9_avc_vme_context_destroy;
11288 allocate_structure_failed:
11293 free(generic_state);
11299 gen9_avc_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
11301 /* VME & PAK share the same context */
11302 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
11307 encoder_context->mfc_context = pak_context;
11308 encoder_context->mfc_context_destroy = gen9_avc_pak_context_destroy;
11309 encoder_context->mfc_pipeline = gen9_avc_pak_pipeline;
11310 encoder_context->mfc_brc_prepare = gen9_avc_pak_brc_prepare;
11311 encoder_context->get_status = gen9_avc_get_coded_status;