2 * Copyright @ 2017 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWAR OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Pengfei Qu <Pengfei.qu@intel.com>
26 * Sreerenj Balachandran <sreerenj.balachandran@intel.com>
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
41 #include "i965_defines.h"
42 #include "i965_structs.h"
43 #include "i965_drv_video.h"
44 #include "i965_encoder.h"
45 #include "i965_encoder_utils.h"
46 #include "intel_media.h"
48 #include "i965_gpe_utils.h"
49 #include "i965_encoder_common.h"
50 #include "i965_avc_encoder_common.h"
51 #include "i965_avc_encoder_kernels.h"
52 #include "i965_avc_encoder.h"
53 #include "i965_avc_const_def.h"
55 #define MAX_URB_SIZE 4096 /* In register */
56 #define NUM_KERNELS_PER_GPE_CONTEXT 1
57 #define MBENC_KERNEL_BASE GEN9_AVC_KERNEL_MBENC_QUALITY_I
58 #define GPE_RESOURCE_ALIGNMENT 4 /* 4 means 16 = 1 << 4) */
60 #define OUT_BUFFER_2DW(batch, bo, is_target, delta) do { \
62 OUT_BCS_RELOC64(batch, \
64 I915_GEM_DOMAIN_INSTRUCTION, \
65 is_target ? I915_GEM_DOMAIN_RENDER : 0, \
68 OUT_BCS_BATCH(batch, 0); \
69 OUT_BCS_BATCH(batch, 0); \
73 #define OUT_BUFFER_3DW(batch, bo, is_target, delta, attr) do { \
74 OUT_BUFFER_2DW(batch, bo, is_target, delta); \
75 OUT_BCS_BATCH(batch, attr); \
78 /* FEI specific buffer sizes per MB in bytes for gen9 */
79 #define FEI_AVC_MB_CODE_BUFFER_SIZE 64
80 #define FEI_AVC_MV_DATA_BUFFER_SIZE 128
81 #define FEI_AVC_MB_CONTROL_BUFFER_SIZE 16
82 #define FEI_AVC_MV_PREDICTOR_BUFFER_SIZE 40
83 #define FEI_AVC_DISTORTION_BUFFER_SIZE 48
84 #define FEI_AVC_QP_BUFFER_SIZE 1
85 #define PREENC_AVC_STATISTICS_BUFFER_SIZE 64
87 #define SCALE_CUR_PIC 1
88 #define SCALE_PAST_REF_PIC 2
89 #define SCALE_FUTURE_REF_PIC 3
91 static const uint32_t qm_flat[16] = {
92 0x10101010, 0x10101010, 0x10101010, 0x10101010,
93 0x10101010, 0x10101010, 0x10101010, 0x10101010,
94 0x10101010, 0x10101010, 0x10101010, 0x10101010,
95 0x10101010, 0x10101010, 0x10101010, 0x10101010
98 static const uint32_t fqm_flat[32] = {
99 0x10001000, 0x10001000, 0x10001000, 0x10001000,
100 0x10001000, 0x10001000, 0x10001000, 0x10001000,
101 0x10001000, 0x10001000, 0x10001000, 0x10001000,
102 0x10001000, 0x10001000, 0x10001000, 0x10001000,
103 0x10001000, 0x10001000, 0x10001000, 0x10001000,
104 0x10001000, 0x10001000, 0x10001000, 0x10001000,
105 0x10001000, 0x10001000, 0x10001000, 0x10001000,
106 0x10001000, 0x10001000, 0x10001000, 0x10001000
109 static const unsigned int slice_type_kernel[3] = {1, 2, 0};
111 static const gen9_avc_brc_init_reset_curbe_data gen9_avc_brc_init_reset_curbe_init_data = {
268 static const gen8_avc_frame_brc_update_curbe_data gen8_avc_frame_brc_update_curbe_init_data = {
400 static const gen9_avc_frame_brc_update_curbe_data gen9_avc_frame_brc_update_curbe_init_data = {
558 gen9_avc_update_misc_parameters(VADriverContextP ctx,
559 struct encode_state *encode_state,
560 struct intel_encoder_context *encoder_context)
562 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
563 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
567 generic_state->max_bit_rate = (encoder_context->brc.bits_per_second[0] + 1000 - 1) / 1000;
569 generic_state->brc_need_reset = encoder_context->brc.need_reset;
571 if (generic_state->internal_rate_mode == VA_RC_CBR) {
572 generic_state->min_bit_rate = generic_state->max_bit_rate;
573 generic_state->mb_brc_enabled = encoder_context->brc.mb_rate_control[0] == 1;
575 if (generic_state->target_bit_rate != generic_state->max_bit_rate) {
576 generic_state->target_bit_rate = generic_state->max_bit_rate;
577 generic_state->brc_need_reset = 1;
579 } else if (generic_state->internal_rate_mode == VA_RC_VBR) {
580 generic_state->min_bit_rate = generic_state->max_bit_rate * (2 * encoder_context->brc.target_percentage[0] - 100) / 100;
581 generic_state->mb_brc_enabled = encoder_context->brc.mb_rate_control[0] == 1;
583 if (generic_state->target_bit_rate != generic_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100) {
584 generic_state->target_bit_rate = generic_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100;
585 generic_state->brc_need_reset = 1;
590 if (generic_state->internal_rate_mode != VA_RC_CQP) {
591 generic_state->frames_per_100s = encoder_context->brc.framerate[0].num * 100 / encoder_context->brc.framerate[0].den ;
592 generic_state->frame_rate = encoder_context->brc.framerate[0].num / encoder_context->brc.framerate[0].den ;
593 generic_state->frames_per_window_size = (int)(encoder_context->brc.window_size * generic_state->frame_rate / 1000); // brc.windows size in ms as the unit
595 generic_state->frames_per_100s = 30 * 100;
596 generic_state->frame_rate = 30 ;
597 generic_state->frames_per_window_size = 30;
601 if (generic_state->internal_rate_mode != VA_RC_CQP) {
602 generic_state->vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;//misc->buffer_size;
603 generic_state->init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;//misc->initial_buffer_fullness;
607 generic_state->num_roi = MIN(encoder_context->brc.num_roi, 3);
608 if (generic_state->num_roi > 0) {
609 generic_state->max_delta_qp = encoder_context->brc.roi_max_delta_qp;
610 generic_state->min_delta_qp = encoder_context->brc.roi_min_delta_qp;
612 for (i = 0; i < generic_state->num_roi; i++) {
613 generic_state->roi[i].left = encoder_context->brc.roi[i].left;
614 generic_state->roi[i].right = encoder_context->brc.roi[i].right;
615 generic_state->roi[i].top = encoder_context->brc.roi[i].top;
616 generic_state->roi[i].bottom = encoder_context->brc.roi[i].bottom;
617 generic_state->roi[i].value = encoder_context->brc.roi[i].value;
619 generic_state->roi[i].left /= 16;
620 generic_state->roi[i].right /= 16;
621 generic_state->roi[i].top /= 16;
622 generic_state->roi[i].bottom /= 16;
629 intel_avc_get_kernel_header_and_size(void *pvbinary,
631 INTEL_GENERIC_ENC_OPERATION operation,
633 struct i965_kernel *ret_kernel)
635 typedef uint32_t BIN_PTR[4];
638 gen9_avc_encoder_kernel_header *pkh_table;
639 kernel_header *pcurr_header, *pinvalid_entry, *pnext_header;
642 if (!pvbinary || !ret_kernel)
645 bin_start = (char *)pvbinary;
646 pkh_table = (gen9_avc_encoder_kernel_header *)pvbinary;
647 pinvalid_entry = &(pkh_table->static_detection) + 1;
648 next_krnoffset = binary_size;
650 if (operation == INTEL_GENERIC_ENC_SCALING4X) {
651 pcurr_header = &pkh_table->ply_dscale_ply;
652 } else if (operation == INTEL_GENERIC_ENC_SCALING2X) {
653 pcurr_header = &pkh_table->ply_2xdscale_ply;
654 } else if (operation == INTEL_GENERIC_ENC_ME) {
655 pcurr_header = &pkh_table->me_p;
656 } else if (operation == INTEL_GENERIC_ENC_BRC) {
657 pcurr_header = &pkh_table->frame_brc_init;
658 } else if (operation == INTEL_GENERIC_ENC_MBENC) {
659 pcurr_header = &pkh_table->mbenc_quality_I;
660 } else if (operation == INTEL_GENERIC_ENC_WP) {
661 pcurr_header = &pkh_table->wp;
662 } else if (operation == INTEL_GENERIC_ENC_SFD) {
663 pcurr_header = &pkh_table->static_detection;
668 pcurr_header += krnstate_idx;
669 ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
671 pnext_header = (pcurr_header + 1);
672 if (pnext_header < pinvalid_entry) {
673 next_krnoffset = pnext_header->kernel_start_pointer << 6;
675 ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
681 intel_avc_fei_get_kernel_header_and_size(
684 INTEL_GENERIC_ENC_OPERATION operation,
686 struct i965_kernel *ret_kernel)
688 typedef uint32_t BIN_PTR[4];
691 gen9_avc_fei_encoder_kernel_header *pkh_table;
692 kernel_header *pcurr_header, *pinvalid_entry, *pnext_header;
695 if (!pvbinary || !ret_kernel)
698 bin_start = (char *)pvbinary;
699 pkh_table = (gen9_avc_fei_encoder_kernel_header *)pvbinary;
700 pinvalid_entry = &(pkh_table->wp) + 1;
701 next_krnoffset = binary_size;
703 if (operation == INTEL_GENERIC_ENC_SCALING4X) {
704 pcurr_header = &pkh_table->ply_dscale_ply;
705 } else if (operation == INTEL_GENERIC_ENC_ME) {
706 pcurr_header = &pkh_table->me_p;
707 } else if (operation == INTEL_GENERIC_ENC_MBENC) {
708 pcurr_header = &pkh_table->mbenc_i;
709 } else if (operation == INTEL_GENERIC_ENC_PREPROC) {
710 pcurr_header = &pkh_table->preproc;
715 pcurr_header += krnstate_idx;
716 ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
718 pnext_header = (pcurr_header + 1);
719 if (pnext_header < pinvalid_entry) {
720 next_krnoffset = pnext_header->kernel_start_pointer << 6;
722 ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
728 gen9_free_surfaces_avc(void **data)
730 struct gen9_surface_avc *avc_surface;
737 if (avc_surface->scaled_4x_surface_obj) {
738 i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_4x_surface_id, 1);
739 avc_surface->scaled_4x_surface_id = VA_INVALID_SURFACE;
740 avc_surface->scaled_4x_surface_obj = NULL;
743 if (avc_surface->scaled_16x_surface_obj) {
744 i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_16x_surface_id, 1);
745 avc_surface->scaled_16x_surface_id = VA_INVALID_SURFACE;
746 avc_surface->scaled_16x_surface_obj = NULL;
749 if (avc_surface->scaled_32x_surface_obj) {
750 i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_32x_surface_id, 1);
751 avc_surface->scaled_32x_surface_id = VA_INVALID_SURFACE;
752 avc_surface->scaled_32x_surface_obj = NULL;
755 i965_free_gpe_resource(&avc_surface->res_mb_code_surface);
756 i965_free_gpe_resource(&avc_surface->res_mv_data_surface);
757 i965_free_gpe_resource(&avc_surface->res_ref_pic_select_surface);
759 /* FEI specific resources */
760 /* since the driver previously taken an extra reference to the drm_bo
761 * in case the buffers were supplied by middleware, there shouldn't
762 * be any memory handling issue */
763 i965_free_gpe_resource(&avc_surface->res_fei_mb_cntrl_surface);
764 i965_free_gpe_resource(&avc_surface->res_fei_mv_predictor_surface);
765 i965_free_gpe_resource(&avc_surface->res_fei_vme_distortion_surface);
766 i965_free_gpe_resource(&avc_surface->res_fei_mb_qp_surface);
768 dri_bo_unreference(avc_surface->dmv_top);
769 avc_surface->dmv_top = NULL;
770 dri_bo_unreference(avc_surface->dmv_bottom);
771 avc_surface->dmv_bottom = NULL;
781 gen9_avc_init_check_surfaces(VADriverContextP ctx,
782 struct object_surface *obj_surface,
783 struct intel_encoder_context *encoder_context,
784 struct avc_surface_param *surface_param)
786 struct i965_driver_data *i965 = i965_driver_data(ctx);
787 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
788 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
789 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
791 struct gen9_surface_avc *avc_surface;
792 int downscaled_width_4x, downscaled_height_4x;
793 int downscaled_width_16x, downscaled_height_16x;
794 int downscaled_width_32x, downscaled_height_32x;
796 unsigned int frame_width_in_mbs = ALIGN(surface_param->frame_width, 16) / 16;
797 unsigned int frame_height_in_mbs = ALIGN(surface_param->frame_height, 16) / 16;
798 unsigned int frame_mb_nums = frame_width_in_mbs * frame_height_in_mbs;
799 int allocate_flag = 1;
802 if (!obj_surface || !obj_surface->bo)
803 return VA_STATUS_ERROR_INVALID_SURFACE;
805 if (obj_surface->private_data) {
806 return VA_STATUS_SUCCESS;
809 avc_surface = calloc(1, sizeof(struct gen9_surface_avc));
812 return VA_STATUS_ERROR_ALLOCATION_FAILED;
814 avc_surface->ctx = ctx;
815 obj_surface->private_data = avc_surface;
816 obj_surface->free_private_data = gen9_free_surfaces_avc;
818 downscaled_width_4x = generic_state->frame_width_4x;
819 downscaled_height_4x = generic_state->frame_height_4x;
821 i965_CreateSurfaces(ctx,
823 downscaled_height_4x,
826 &avc_surface->scaled_4x_surface_id);
828 avc_surface->scaled_4x_surface_obj = SURFACE(avc_surface->scaled_4x_surface_id);
830 if (!avc_surface->scaled_4x_surface_obj) {
831 return VA_STATUS_ERROR_ALLOCATION_FAILED;
834 i965_check_alloc_surface_bo(ctx, avc_surface->scaled_4x_surface_obj, 1,
835 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
837 downscaled_width_16x = generic_state->frame_width_16x;
838 downscaled_height_16x = generic_state->frame_height_16x;
839 i965_CreateSurfaces(ctx,
840 downscaled_width_16x,
841 downscaled_height_16x,
844 &avc_surface->scaled_16x_surface_id);
845 avc_surface->scaled_16x_surface_obj = SURFACE(avc_surface->scaled_16x_surface_id);
847 if (!avc_surface->scaled_16x_surface_obj) {
848 return VA_STATUS_ERROR_ALLOCATION_FAILED;
851 i965_check_alloc_surface_bo(ctx, avc_surface->scaled_16x_surface_obj, 1,
852 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
854 if (generic_state->b32xme_supported ||
855 generic_state->b32xme_enabled) {
856 downscaled_width_32x = generic_state->frame_width_32x;
857 downscaled_height_32x = generic_state->frame_height_32x;
858 i965_CreateSurfaces(ctx,
859 downscaled_width_32x,
860 downscaled_height_32x,
863 &avc_surface->scaled_32x_surface_id);
864 avc_surface->scaled_32x_surface_obj = SURFACE(avc_surface->scaled_32x_surface_id);
866 if (!avc_surface->scaled_32x_surface_obj) {
867 return VA_STATUS_ERROR_ALLOCATION_FAILED;
870 i965_check_alloc_surface_bo(ctx, avc_surface->scaled_32x_surface_obj, 1,
871 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
874 /*mb code and mv data for each frame*/
875 if (!encoder_context->fei_enabled) {
876 size = frame_mb_nums * 16 * 4;
877 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
878 &avc_surface->res_mb_code_surface,
882 goto failed_allocation;
884 size = frame_mb_nums * 32 * 4;
885 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
886 &avc_surface->res_mv_data_surface,
890 goto failed_allocation;
894 if (avc_state->ref_pic_select_list_supported) {
895 width = ALIGN(frame_width_in_mbs * 8, 64);
896 height = frame_height_in_mbs ;
897 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
898 &avc_surface->res_ref_pic_select_surface,
901 "Ref pic select list buffer");
903 goto failed_allocation;
907 avc_surface->dmv_top =
908 dri_bo_alloc(i965->intel.bufmgr,
909 "direct mv top Buffer",
912 avc_surface->dmv_bottom =
913 dri_bo_alloc(i965->intel.bufmgr,
914 "direct mv bottom Buffer",
917 assert(avc_surface->dmv_top);
918 assert(avc_surface->dmv_bottom);
920 return VA_STATUS_SUCCESS;
923 return VA_STATUS_ERROR_ALLOCATION_FAILED;
927 gen9_avc_generate_slice_map(VADriverContextP ctx,
928 struct encode_state *encode_state,
929 struct intel_encoder_context *encoder_context)
931 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
932 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
933 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
934 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
936 struct i965_gpe_resource *gpe_resource = NULL;
937 VAEncSliceParameterBufferH264 * slice_param = NULL;
938 unsigned int * data = NULL;
939 unsigned int * data_row = NULL;
941 unsigned int pitch = ALIGN((generic_state->frame_width_in_mbs + 1) * 4, 64) / 4;
943 if (!avc_state->arbitrary_num_mbs_in_slice)
946 gpe_resource = &(avc_ctx->res_mbenc_slice_map_surface);
947 assert(gpe_resource);
949 i965_zero_gpe_resource(gpe_resource);
951 data_row = (unsigned int *)i965_map_gpe_resource(gpe_resource);
955 for (i = 0; i < avc_state->slice_num; i++) {
956 slice_param = avc_state->slice_param[i];
957 for (j = 0; j < slice_param->num_macroblocks; j++) {
959 if ((count > 0) && (count % generic_state->frame_width_in_mbs == 0)) {
967 *data++ = 0xFFFFFFFF;
969 i965_unmap_gpe_resource(gpe_resource);
973 gen9_avc_allocate_resources(VADriverContextP ctx,
974 struct encode_state *encode_state,
975 struct intel_encoder_context *encoder_context)
977 struct i965_driver_data *i965 = i965_driver_data(ctx);
978 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
979 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
980 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
981 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
982 unsigned int size = 0;
983 unsigned int width = 0;
984 unsigned int height = 0;
985 unsigned char * data = NULL;
986 int allocate_flag = 1;
989 /*all the surface/buffer are allocated here*/
991 /*second level batch buffer for image state write when cqp etc*/
992 i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
993 size = INTEL_AVC_IMAGE_STATE_CMD_SIZE ;
994 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
995 &avc_ctx->res_image_state_batch_buffer_2nd_level,
997 "second levle batch (image state write) buffer");
999 goto failed_allocation;
1001 /* scaling related surface */
1002 if (avc_state->mb_status_supported) {
1003 i965_free_gpe_resource(&avc_ctx->res_mb_status_buffer);
1004 size = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * 16 * 4 + 1023) & ~0x3ff;
1005 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1006 &avc_ctx->res_mb_status_buffer,
1007 ALIGN(size, 0x1000),
1008 "MB statistics output buffer");
1010 goto failed_allocation;
1011 i965_zero_gpe_resource(&avc_ctx->res_mb_status_buffer);
1014 if (avc_state->flatness_check_supported) {
1015 width = generic_state->frame_width_in_mbs * 4;
1016 height = generic_state->frame_height_in_mbs * 4;
1017 i965_free_gpe_resource(&avc_ctx->res_flatness_check_surface);
1018 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1019 &avc_ctx->res_flatness_check_surface,
1022 "Flatness check buffer");
1024 goto failed_allocation;
1026 /* me related surface */
1027 width = generic_state->downscaled_width_4x_in_mb * 8;
1028 height = generic_state->downscaled_height_4x_in_mb * 4 * 10;
1029 i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
1030 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1031 &avc_ctx->s4x_memv_distortion_buffer,
1034 "4x MEMV distortion buffer");
1036 goto failed_allocation;
1037 i965_zero_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
1039 width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
1040 height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
1041 i965_free_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
1042 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1043 &avc_ctx->s4x_memv_min_distortion_brc_buffer,
1046 "4x MEMV min distortion brc buffer");
1048 goto failed_allocation;
1049 i965_zero_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
1052 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 32, 64);
1053 height = generic_state->downscaled_height_4x_in_mb * 4 * 2 * 10;
1054 i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
1055 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1056 &avc_ctx->s4x_memv_data_buffer,
1059 "4x MEMV data buffer");
1061 goto failed_allocation;
1062 i965_zero_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
1065 width = ALIGN(generic_state->downscaled_width_16x_in_mb * 32, 64);
1066 height = generic_state->downscaled_height_16x_in_mb * 4 * 2 * 10 ;
1067 i965_free_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
1068 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1069 &avc_ctx->s16x_memv_data_buffer,
1072 "16x MEMV data buffer");
1074 goto failed_allocation;
1075 i965_zero_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
1078 width = ALIGN(generic_state->downscaled_width_32x_in_mb * 32, 64);
1079 height = generic_state->downscaled_height_32x_in_mb * 4 * 2 * 10 ;
1080 i965_free_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
1081 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1082 &avc_ctx->s32x_memv_data_buffer,
1085 "32x MEMV data buffer");
1087 goto failed_allocation;
1088 i965_zero_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
1091 if (!generic_state->brc_allocated) {
1092 /*brc related surface */
1093 i965_free_gpe_resource(&avc_ctx->res_brc_history_buffer);
1095 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1096 &avc_ctx->res_brc_history_buffer,
1097 ALIGN(size, 0x1000),
1098 "brc history buffer");
1100 goto failed_allocation;
1102 i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
1104 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1105 &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
1106 ALIGN(size, 0x1000),
1107 "brc pak statistic buffer");
1109 goto failed_allocation;
1111 i965_free_gpe_resource(&avc_ctx->res_brc_image_state_read_buffer);
1112 size = INTEL_AVC_IMAGE_STATE_CMD_SIZE * 7;
1113 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1114 &avc_ctx->res_brc_image_state_read_buffer,
1115 ALIGN(size, 0x1000),
1116 "brc image state read buffer");
1118 goto failed_allocation;
1120 i965_free_gpe_resource(&avc_ctx->res_brc_image_state_write_buffer);
1121 size = INTEL_AVC_IMAGE_STATE_CMD_SIZE * 7;
1122 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1123 &avc_ctx->res_brc_image_state_write_buffer,
1124 ALIGN(size, 0x1000),
1125 "brc image state write buffer");
1127 goto failed_allocation;
1129 width = ALIGN(avc_state->brc_const_data_surface_width, 64);
1130 height = avc_state->brc_const_data_surface_height;
1131 i965_free_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
1132 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1133 &avc_ctx->res_brc_const_data_buffer,
1136 "brc const data buffer");
1138 goto failed_allocation;
1139 i965_zero_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
1141 if (generic_state->brc_distortion_buffer_supported) {
1142 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 8, 64);
1143 height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1144 width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
1145 height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
1146 i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
1147 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1148 &avc_ctx->res_brc_dist_data_surface,
1151 "brc dist data buffer");
1153 goto failed_allocation;
1154 i965_zero_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
1157 if (generic_state->brc_roi_enable) {
1158 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 16, 64);
1159 height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1160 i965_free_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
1161 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1162 &avc_ctx->res_mbbrc_roi_surface,
1165 "mbbrc roi buffer");
1167 goto failed_allocation;
1168 i965_zero_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
1172 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 4, 64);
1173 height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1174 i965_free_gpe_resource(&avc_ctx->res_mbbrc_mb_qp_data_surface);
1175 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1176 &avc_ctx->res_mbbrc_mb_qp_data_surface,
1179 "mbbrc mb qp buffer");
1181 goto failed_allocation;
1183 i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
1184 size = 16 * AVC_QP_MAX * 4;
1185 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1186 &avc_ctx->res_mbbrc_const_data_buffer,
1187 ALIGN(size, 0x1000),
1188 "mbbrc const data buffer");
1190 goto failed_allocation;
1192 if (avc_state->decouple_mbenc_curbe_from_brc_enable) {
1193 i965_free_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
1194 size = avc_state->mbenc_brc_buffer_size;
1195 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1196 &avc_ctx->res_mbenc_brc_buffer,
1197 ALIGN(size, 0x1000),
1198 "mbenc brc buffer");
1200 goto failed_allocation;
1201 i965_zero_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
1203 generic_state->brc_allocated = 1;
1207 if (avc_state->mb_qp_data_enable) {
1208 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 4, 64);
1209 height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1210 i965_free_gpe_resource(&avc_ctx->res_mb_qp_data_surface);
1211 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1212 &avc_ctx->res_mb_qp_data_surface,
1215 "external mb qp buffer");
1217 goto failed_allocation;
1220 /* mbenc related surface. it share most of surface with other kernels */
1221 if (avc_state->arbitrary_num_mbs_in_slice) {
1222 width = ALIGN((generic_state->frame_width_in_mbs + 1) * 4, 64);
1223 height = generic_state->frame_height_in_mbs ;
1224 i965_free_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1225 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1226 &avc_ctx->res_mbenc_slice_map_surface,
1229 "slice map buffer");
1231 goto failed_allocation;
1232 i965_zero_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1234 /*generate slice map,default one slice per frame.*/
1237 /* sfd related surface */
1238 if (avc_state->sfd_enable) {
1239 i965_free_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1241 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1242 &avc_ctx->res_sfd_output_buffer,
1244 "sfd output buffer");
1246 goto failed_allocation;
1247 i965_zero_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1249 i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer);
1250 size = ALIGN(52, 64);
1251 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1252 &avc_ctx->res_sfd_cost_table_p_frame_buffer,
1254 "sfd P frame cost table buffer");
1256 goto failed_allocation;
1257 data = i965_map_gpe_resource(&(avc_ctx->res_sfd_cost_table_p_frame_buffer));
1259 memcpy(data, gen9_avc_sfd_cost_table_p_frame, sizeof(unsigned char) * 52);
1260 i965_unmap_gpe_resource(&(avc_ctx->res_sfd_cost_table_p_frame_buffer));
1262 i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_b_frame_buffer);
1263 size = ALIGN(52, 64);
1264 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1265 &avc_ctx->res_sfd_cost_table_b_frame_buffer,
1267 "sfd B frame cost table buffer");
1269 goto failed_allocation;
1270 data = i965_map_gpe_resource(&(avc_ctx->res_sfd_cost_table_b_frame_buffer));
1272 memcpy(data, gen9_avc_sfd_cost_table_b_frame, sizeof(unsigned char) * 52);
1273 i965_unmap_gpe_resource(&(avc_ctx->res_sfd_cost_table_b_frame_buffer));
1276 /* wp related surfaces */
1277 if (avc_state->weighted_prediction_supported) {
1278 for (i = 0; i < 2 ; i++) {
1279 if (avc_ctx->wp_output_pic_select_surface_obj[i]) {
1283 width = generic_state->frame_width_in_pixel;
1284 height = generic_state->frame_height_in_pixel ;
1285 i965_CreateSurfaces(ctx,
1288 VA_RT_FORMAT_YUV420,
1290 &avc_ctx->wp_output_pic_select_surface_id[i]);
1291 avc_ctx->wp_output_pic_select_surface_obj[i] = SURFACE(avc_ctx->wp_output_pic_select_surface_id[i]);
1293 if (!avc_ctx->wp_output_pic_select_surface_obj[i]) {
1294 goto failed_allocation;
1297 i965_check_alloc_surface_bo(ctx, avc_ctx->wp_output_pic_select_surface_obj[i], 1,
1298 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
1300 i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[0]);
1301 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_wp_output_pic_select_surface_list[0], avc_ctx->wp_output_pic_select_surface_obj[0], GPE_RESOURCE_ALIGNMENT);
1302 i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[1]);
1303 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_wp_output_pic_select_surface_list[1], avc_ctx->wp_output_pic_select_surface_obj[1], GPE_RESOURCE_ALIGNMENT);
1308 i965_free_gpe_resource(&avc_ctx->res_mad_data_buffer);
1310 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1311 &avc_ctx->res_mad_data_buffer,
1312 ALIGN(size, 0x1000),
1315 goto failed_allocation;
1317 return VA_STATUS_SUCCESS;
1320 return VA_STATUS_ERROR_ALLOCATION_FAILED;
1324 gen9_avc_free_resources(struct encoder_vme_mfc_context * vme_context)
1329 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1330 VADriverContextP ctx = avc_ctx->ctx;
1333 /* free all the surface/buffer here*/
1334 i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
1335 i965_free_gpe_resource(&avc_ctx->res_mb_status_buffer);
1336 i965_free_gpe_resource(&avc_ctx->res_flatness_check_surface);
1337 i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
1338 i965_free_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
1339 i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
1340 i965_free_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
1341 i965_free_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
1342 i965_free_gpe_resource(&avc_ctx->res_brc_history_buffer);
1343 i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
1344 i965_free_gpe_resource(&avc_ctx->res_brc_image_state_read_buffer);
1345 i965_free_gpe_resource(&avc_ctx->res_brc_image_state_write_buffer);
1346 i965_free_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
1347 i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
1348 i965_free_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
1349 i965_free_gpe_resource(&avc_ctx->res_mbbrc_mb_qp_data_surface);
1350 i965_free_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
1351 i965_free_gpe_resource(&avc_ctx->res_mb_qp_data_surface);
1352 i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
1353 i965_free_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1354 i965_free_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1355 i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer);
1356 i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_b_frame_buffer);
1357 i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[0]);
1358 i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[1]);
1359 i965_free_gpe_resource(&avc_ctx->res_mad_data_buffer);
1361 for (i = 0; i < 2 ; i++) {
1362 if (avc_ctx->wp_output_pic_select_surface_obj[i]) {
1363 i965_DestroySurfaces(ctx, &avc_ctx->wp_output_pic_select_surface_id[i], 1);
1364 avc_ctx->wp_output_pic_select_surface_id[i] = VA_INVALID_SURFACE;
1365 avc_ctx->wp_output_pic_select_surface_obj[i] = NULL;
1369 /* free preenc resources */
1370 i965_free_gpe_resource(&avc_ctx->preproc_mv_predictor_buffer);
1371 i965_free_gpe_resource(&avc_ctx->preproc_mb_qp_buffer);
1372 i965_free_gpe_resource(&avc_ctx->preproc_mv_data_out_buffer);
1373 i965_free_gpe_resource(&avc_ctx->preproc_stat_data_out_buffer);
1375 i965_free_gpe_resource(&avc_ctx->preenc_past_ref_stat_data_out_buffer);
1376 i965_free_gpe_resource(&avc_ctx->preenc_future_ref_stat_data_out_buffer);
1378 i965_DestroySurfaces(ctx, &avc_ctx->preenc_scaled_4x_surface_id, 1);
1379 avc_ctx->preenc_scaled_4x_surface_id = VA_INVALID_SURFACE;
1380 avc_ctx->preenc_scaled_4x_surface_obj = NULL;
1382 i965_DestroySurfaces(ctx, &avc_ctx->preenc_past_ref_scaled_4x_surface_id, 1);
1383 avc_ctx->preenc_past_ref_scaled_4x_surface_id = VA_INVALID_SURFACE;
1384 avc_ctx->preenc_past_ref_scaled_4x_surface_obj = NULL;
1386 i965_DestroySurfaces(ctx, &avc_ctx->preenc_future_ref_scaled_4x_surface_id, 1);
1387 avc_ctx->preenc_future_ref_scaled_4x_surface_id = VA_INVALID_SURFACE;
1388 avc_ctx->preenc_future_ref_scaled_4x_surface_obj = NULL;
1392 gen9_avc_run_kernel_media_object(VADriverContextP ctx,
1393 struct intel_encoder_context *encoder_context,
1394 struct i965_gpe_context *gpe_context,
1396 struct gpe_media_object_parameter *param)
1398 struct i965_driver_data *i965 = i965_driver_data(ctx);
1399 struct i965_gpe_table *gpe = &i965->gpe_table;
1400 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1401 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1403 struct intel_batchbuffer *batch = encoder_context->base.batch;
1404 struct encoder_status_buffer_internal *status_buffer;
1405 struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1410 intel_batchbuffer_start_atomic(batch, 0x1000);
1411 intel_batchbuffer_emit_mi_flush(batch);
1413 status_buffer = &(avc_ctx->status_buffer);
1414 memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1415 mi_store_data_imm.bo = status_buffer->bo;
1416 mi_store_data_imm.offset = status_buffer->media_index_offset;
1417 mi_store_data_imm.dw0 = media_function;
1418 gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1420 gpe->pipeline_setup(ctx, gpe_context, batch);
1421 gpe->media_object(ctx, gpe_context, batch, param);
1422 gpe->media_state_flush(ctx, gpe_context, batch);
1424 gpe->pipeline_end(ctx, gpe_context, batch);
1426 intel_batchbuffer_end_atomic(batch);
1428 intel_batchbuffer_flush(batch);
1432 gen9_avc_run_kernel_media_object_walker(VADriverContextP ctx,
1433 struct intel_encoder_context *encoder_context,
1434 struct i965_gpe_context *gpe_context,
1436 struct gpe_media_object_walker_parameter *param)
1438 struct i965_driver_data *i965 = i965_driver_data(ctx);
1439 struct i965_gpe_table *gpe = &i965->gpe_table;
1440 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1441 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1443 struct intel_batchbuffer *batch = encoder_context->base.batch;
1444 struct encoder_status_buffer_internal *status_buffer;
1445 struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1450 intel_batchbuffer_start_atomic(batch, 0x1000);
1452 intel_batchbuffer_emit_mi_flush(batch);
1454 status_buffer = &(avc_ctx->status_buffer);
1455 memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1456 mi_store_data_imm.bo = status_buffer->bo;
1457 mi_store_data_imm.offset = status_buffer->media_index_offset;
1458 mi_store_data_imm.dw0 = media_function;
1459 gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1461 gpe->pipeline_setup(ctx, gpe_context, batch);
1462 gpe->media_object_walker(ctx, gpe_context, batch, param);
1463 gpe->media_state_flush(ctx, gpe_context, batch);
1465 gpe->pipeline_end(ctx, gpe_context, batch);
1467 intel_batchbuffer_end_atomic(batch);
1469 intel_batchbuffer_flush(batch);
1473 gen9_init_gpe_context_avc(VADriverContextP ctx,
1474 struct i965_gpe_context *gpe_context,
1475 struct encoder_kernel_parameter *kernel_param)
1477 struct i965_driver_data *i965 = i965_driver_data(ctx);
1479 gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
1481 gpe_context->sampler.entry_size = 0;
1482 gpe_context->sampler.max_entries = 0;
1484 if (kernel_param->sampler_size) {
1485 gpe_context->sampler.entry_size = ALIGN(kernel_param->sampler_size, 64);
1486 gpe_context->sampler.max_entries = 1;
1489 gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
1490 gpe_context->idrt.max_entries = NUM_KERNELS_PER_GPE_CONTEXT;
1492 gpe_context->surface_state_binding_table.max_entries = MAX_AVC_ENCODER_SURFACES;
1493 gpe_context->surface_state_binding_table.binding_table_offset = 0;
1494 gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_AVC_ENCODER_SURFACES * 4, 64);
1495 gpe_context->surface_state_binding_table.length = ALIGN(MAX_AVC_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_AVC_ENCODER_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
1497 if (i965->intel.eu_total > 0)
1498 gpe_context->vfe_state.max_num_threads = 6 * i965->intel.eu_total;
1500 gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads
1502 gpe_context->vfe_state.curbe_allocation_size = MAX(1, ALIGN(gpe_context->curbe.length, 32) >> 5); // in registers
1503 gpe_context->vfe_state.urb_entry_size = MAX(1, ALIGN(kernel_param->inline_data_size, 32) >> 5); // in registers
1504 gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE -
1505 gpe_context->vfe_state.curbe_allocation_size -
1506 ((gpe_context->idrt.entry_size >> 5) *
1507 gpe_context->idrt.max_entries)) / gpe_context->vfe_state.urb_entry_size;
1508 gpe_context->vfe_state.num_urb_entries = CLAMP(1, 127, gpe_context->vfe_state.num_urb_entries);
1509 gpe_context->vfe_state.gpgpu_mode = 0;
1513 gen9_init_vfe_scoreboard_avc(struct i965_gpe_context *gpe_context,
1514 struct encoder_scoreboard_parameter *scoreboard_param)
1516 gpe_context->vfe_desc5.scoreboard0.mask = scoreboard_param->mask;
1517 gpe_context->vfe_desc5.scoreboard0.type = scoreboard_param->type;
1518 gpe_context->vfe_desc5.scoreboard0.enable = scoreboard_param->enable;
1520 if (scoreboard_param->walkpat_flag) {
1521 gpe_context->vfe_desc5.scoreboard0.mask = 0x0F;
1522 gpe_context->vfe_desc5.scoreboard0.type = 1;
1524 gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0x0;
1525 gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0xF;
1527 gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
1528 gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xE;
1530 gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0xF;
1531 gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0x3;
1533 gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
1534 gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0x1;
1537 gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0xF;
1538 gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0x0;
1541 gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
1542 gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xF;
1545 gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0x1;
1546 gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0xF;
1549 gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
1550 gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0xF;
1553 gpe_context->vfe_desc7.scoreboard2.delta_x4 = 0xF;
1554 gpe_context->vfe_desc7.scoreboard2.delta_y4 = 0x1;
1557 gpe_context->vfe_desc7.scoreboard2.delta_x5 = 0x0;
1558 gpe_context->vfe_desc7.scoreboard2.delta_y5 = 0xE;
1561 gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0x1;
1562 gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
1565 gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0xF;
1566 gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
1570 VME pipeline related function
1574 scaling kernel related function
1577 gen9_avc_set_curbe_scaling4x(VADriverContextP ctx,
1578 struct encode_state *encode_state,
1579 struct i965_gpe_context *gpe_context,
1580 struct intel_encoder_context *encoder_context,
1583 gen9_avc_scaling4x_curbe_data *curbe_cmd;
1584 struct scaling_param *surface_param = (struct scaling_param *)param;
1586 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1591 memset(curbe_cmd, 0, sizeof(gen9_avc_scaling4x_curbe_data));
1593 curbe_cmd->dw0.input_picture_width = surface_param->input_frame_width;
1594 curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1596 curbe_cmd->dw1.input_y_bti = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1597 curbe_cmd->dw2.output_y_bti = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1600 curbe_cmd->dw5.flatness_threshold = 128;
1601 curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
1602 curbe_cmd->dw7.enable_mb_variance_output = surface_param->enable_mb_variance_output;
1603 curbe_cmd->dw8.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
1605 if (curbe_cmd->dw6.enable_mb_flatness_check ||
1606 curbe_cmd->dw7.enable_mb_variance_output ||
1607 curbe_cmd->dw8.enable_mb_pixel_average_output) {
1608 curbe_cmd->dw10.mbv_proc_stat_bti = GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX;
1611 i965_gpe_context_unmap_curbe(gpe_context);
1616 gen95_avc_set_curbe_scaling4x(VADriverContextP ctx,
1617 struct encode_state *encode_state,
1618 struct i965_gpe_context *gpe_context,
1619 struct intel_encoder_context *encoder_context,
1622 gen95_avc_scaling4x_curbe_data *curbe_cmd;
1623 struct scaling_param *surface_param = (struct scaling_param *)param;
1625 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1630 memset(curbe_cmd, 0, sizeof(gen95_avc_scaling4x_curbe_data));
1632 curbe_cmd->dw0.input_picture_width = surface_param->input_frame_width;
1633 curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1635 curbe_cmd->dw1.input_y_bti_frame = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1636 curbe_cmd->dw2.output_y_bti_frame = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1638 if (surface_param->enable_mb_flatness_check)
1639 curbe_cmd->dw5.flatness_threshold = 128;
1640 curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
1641 curbe_cmd->dw6.enable_mb_variance_output = surface_param->enable_mb_variance_output;
1642 curbe_cmd->dw6.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
1643 curbe_cmd->dw6.enable_block8x8_statistics_output = surface_param->blk8x8_stat_enabled;
1645 if (curbe_cmd->dw6.enable_mb_flatness_check ||
1646 curbe_cmd->dw6.enable_mb_variance_output ||
1647 curbe_cmd->dw6.enable_mb_pixel_average_output) {
1648 curbe_cmd->dw8.mbv_proc_stat_bti_frame = GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX;
1651 i965_gpe_context_unmap_curbe(gpe_context);
1656 gen9_avc_set_curbe_scaling2x(VADriverContextP ctx,
1657 struct encode_state *encode_state,
1658 struct i965_gpe_context *gpe_context,
1659 struct intel_encoder_context *encoder_context,
1662 gen9_avc_scaling2x_curbe_data *curbe_cmd;
1663 struct scaling_param *surface_param = (struct scaling_param *)param;
1665 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1670 memset(curbe_cmd, 0, sizeof(gen9_avc_scaling2x_curbe_data));
1672 curbe_cmd->dw0.input_picture_width = surface_param->input_frame_width;
1673 curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1675 curbe_cmd->dw8.input_y_bti = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1676 curbe_cmd->dw9.output_y_bti = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1678 i965_gpe_context_unmap_curbe(gpe_context);
1683 gen9_avc_send_surface_scaling(VADriverContextP ctx,
1684 struct encode_state *encode_state,
1685 struct i965_gpe_context *gpe_context,
1686 struct intel_encoder_context *encoder_context,
1689 struct scaling_param *surface_param = (struct scaling_param *)param;
1690 struct i965_driver_data *i965 = i965_driver_data(ctx);
1691 unsigned int surface_format;
1692 unsigned int res_size;
1694 if (surface_param->scaling_out_use_32unorm_surf_fmt)
1695 surface_format = I965_SURFACEFORMAT_R32_UNORM;
1696 else if (surface_param->scaling_out_use_16unorm_surf_fmt)
1697 surface_format = I965_SURFACEFORMAT_R16_UNORM;
1699 surface_format = I965_SURFACEFORMAT_R8_UNORM;
1701 i965_add_2d_gpe_surface(ctx, gpe_context,
1702 surface_param->input_surface,
1703 0, 1, surface_format,
1704 GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX);
1706 i965_add_2d_gpe_surface(ctx, gpe_context,
1707 surface_param->output_surface,
1708 0, 1, surface_format,
1709 GEN9_AVC_SCALING_FRAME_DST_Y_INDEX);
1711 /*add buffer mv_proc_stat, here need change*/
1712 if (IS_GEN8(i965->intel.device_info)) {
1713 if (surface_param->mbv_proc_stat_enabled) {
1714 res_size = 16 * (surface_param->input_frame_width / 16) * (surface_param->input_frame_height / 16) * sizeof(unsigned int);
1716 i965_add_buffer_gpe_surface(ctx,
1718 surface_param->pres_mbv_proc_stat_buffer,
1722 GEN8_SCALING_FRAME_MBVPROCSTATS_DST_CM);
1724 if (surface_param->enable_mb_flatness_check) {
1725 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
1726 surface_param->pres_flatness_check_surface,
1728 I965_SURFACEFORMAT_R8_UNORM,
1729 GEN8_SCALING_FRAME_FLATNESS_DST_CM);
1732 if (surface_param->mbv_proc_stat_enabled) {
1733 res_size = 16 * (surface_param->input_frame_width / 16) * (surface_param->input_frame_height / 16) * sizeof(unsigned int);
1735 i965_add_buffer_gpe_surface(ctx,
1737 surface_param->pres_mbv_proc_stat_buffer,
1741 GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
1742 } else if (surface_param->enable_mb_flatness_check) {
1743 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
1744 surface_param->pres_flatness_check_surface,
1746 I965_SURFACEFORMAT_R8_UNORM,
1747 GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
1754 gen9_avc_kernel_scaling(VADriverContextP ctx,
1755 struct encode_state *encode_state,
1756 struct intel_encoder_context *encoder_context,
1759 struct i965_driver_data *i965 = i965_driver_data(ctx);
1760 struct i965_gpe_table *gpe = &i965->gpe_table;
1761 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1762 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1763 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1764 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1765 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
1767 struct i965_gpe_context *gpe_context;
1768 struct scaling_param surface_param;
1769 struct object_surface *obj_surface;
1770 struct gen9_surface_avc *avc_priv_surface;
1771 struct gpe_media_object_walker_parameter media_object_walker_param;
1772 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
1773 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
1774 int media_function = 0;
1777 obj_surface = encode_state->reconstructed_object;
1778 avc_priv_surface = obj_surface->private_data;
1780 memset(&surface_param, 0, sizeof(struct scaling_param));
1782 case INTEL_ENC_HME_4x : {
1783 media_function = INTEL_MEDIA_STATE_4X_SCALING;
1784 kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
1785 downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
1786 downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
1788 surface_param.input_surface = encode_state->input_yuv_object ;
1789 surface_param.input_frame_width = generic_state->frame_width_in_pixel ;
1790 surface_param.input_frame_height = generic_state->frame_height_in_pixel ;
1792 surface_param.output_surface = avc_priv_surface->scaled_4x_surface_obj ;
1793 surface_param.output_frame_width = generic_state->frame_width_4x ;
1794 surface_param.output_frame_height = generic_state->frame_height_4x ;
1796 surface_param.enable_mb_flatness_check = avc_state->flatness_check_enable;
1797 surface_param.enable_mb_variance_output = avc_state->mb_status_enable;
1798 surface_param.enable_mb_pixel_average_output = avc_state->mb_status_enable;
1800 surface_param.blk8x8_stat_enabled = 0 ;
1801 surface_param.use_4x_scaling = 1 ;
1802 surface_param.use_16x_scaling = 0 ;
1803 surface_param.use_32x_scaling = 0 ;
1806 case INTEL_ENC_HME_16x : {
1807 media_function = INTEL_MEDIA_STATE_16X_SCALING;
1808 kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
1809 downscaled_width_in_mb = generic_state->downscaled_width_16x_in_mb;
1810 downscaled_height_in_mb = generic_state->downscaled_height_16x_in_mb;
1812 surface_param.input_surface = avc_priv_surface->scaled_4x_surface_obj ;
1813 surface_param.input_frame_width = generic_state->frame_width_4x ;
1814 surface_param.input_frame_height = generic_state->frame_height_4x ;
1816 surface_param.output_surface = avc_priv_surface->scaled_16x_surface_obj ;
1817 surface_param.output_frame_width = generic_state->frame_width_16x ;
1818 surface_param.output_frame_height = generic_state->frame_height_16x ;
1820 surface_param.enable_mb_flatness_check = 0 ;
1821 surface_param.enable_mb_variance_output = 0 ;
1822 surface_param.enable_mb_pixel_average_output = 0 ;
1824 surface_param.blk8x8_stat_enabled = 0 ;
1825 surface_param.use_4x_scaling = 0 ;
1826 surface_param.use_16x_scaling = 1 ;
1827 surface_param.use_32x_scaling = 0 ;
1831 case INTEL_ENC_HME_32x : {
1832 media_function = INTEL_MEDIA_STATE_32X_SCALING;
1833 kernel_idx = GEN9_AVC_KERNEL_SCALING_2X_IDX;
1834 downscaled_width_in_mb = generic_state->downscaled_width_32x_in_mb;
1835 downscaled_height_in_mb = generic_state->downscaled_height_32x_in_mb;
1837 surface_param.input_surface = avc_priv_surface->scaled_16x_surface_obj ;
1838 surface_param.input_frame_width = generic_state->frame_width_16x ;
1839 surface_param.input_frame_height = generic_state->frame_height_16x ;
1841 surface_param.output_surface = avc_priv_surface->scaled_32x_surface_obj ;
1842 surface_param.output_frame_width = generic_state->frame_width_32x ;
1843 surface_param.output_frame_height = generic_state->frame_height_32x ;
1845 surface_param.enable_mb_flatness_check = 0 ;
1846 surface_param.enable_mb_variance_output = 0 ;
1847 surface_param.enable_mb_pixel_average_output = 0 ;
1849 surface_param.blk8x8_stat_enabled = 0 ;
1850 surface_param.use_4x_scaling = 0 ;
1851 surface_param.use_16x_scaling = 0 ;
1852 surface_param.use_32x_scaling = 1 ;
1860 gpe_context = &(avc_ctx->context_scaling.gpe_contexts[kernel_idx]);
1862 gpe->context_init(ctx, gpe_context);
1863 gpe->reset_binding_table(ctx, gpe_context);
1865 if (surface_param.use_32x_scaling) {
1866 generic_ctx->pfn_set_curbe_scaling2x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1868 generic_ctx->pfn_set_curbe_scaling4x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1871 if (surface_param.use_32x_scaling) {
1872 surface_param.scaling_out_use_16unorm_surf_fmt = 1 ;
1873 surface_param.scaling_out_use_32unorm_surf_fmt = 0 ;
1875 surface_param.scaling_out_use_16unorm_surf_fmt = 0 ;
1876 surface_param.scaling_out_use_32unorm_surf_fmt = 1 ;
1879 if (surface_param.use_4x_scaling) {
1880 if (avc_state->mb_status_supported) {
1881 surface_param.enable_mb_flatness_check = 0;
1882 surface_param.mbv_proc_stat_enabled = (surface_param.use_4x_scaling) ? (avc_state->mb_status_enable || avc_state->flatness_check_enable) : 0 ;
1883 surface_param.pres_mbv_proc_stat_buffer = &(avc_ctx->res_mb_status_buffer);
1886 surface_param.enable_mb_flatness_check = (surface_param.use_4x_scaling) ? avc_state->flatness_check_enable : 0;
1887 surface_param.mbv_proc_stat_enabled = 0 ;
1888 surface_param.pres_flatness_check_surface = &(avc_ctx->res_flatness_check_surface);
1892 generic_ctx->pfn_send_scaling_surface(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1894 /* setup the interface data */
1895 gpe->setup_interface_data(ctx, gpe_context);
1897 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
1898 if (surface_param.use_32x_scaling) {
1899 kernel_walker_param.resolution_x = downscaled_width_in_mb ;
1900 kernel_walker_param.resolution_y = downscaled_height_in_mb ;
1902 /* the scaling is based on 8x8 blk level */
1903 kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
1904 kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
1906 kernel_walker_param.no_dependency = 1;
1908 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
1910 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
1913 &media_object_walker_param);
1915 return VA_STATUS_SUCCESS;
1919 frame/mb brc related function
1922 gen9_avc_init_mfx_avc_img_state(VADriverContextP ctx,
1923 struct encode_state *encode_state,
1924 struct intel_encoder_context *encoder_context,
1925 struct gen9_mfx_avc_img_state *pstate)
1927 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1928 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1929 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1931 VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
1932 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
1934 memset(pstate, 0, sizeof(*pstate));
1936 pstate->dw0.dword_length = (sizeof(struct gen9_mfx_avc_img_state)) / 4 - 2;
1937 pstate->dw0.sub_opcode_b = 0;
1938 pstate->dw0.sub_opcode_a = 0;
1939 pstate->dw0.command_opcode = 1;
1940 pstate->dw0.pipeline = 2;
1941 pstate->dw0.command_type = 3;
1943 pstate->dw1.frame_size_in_mbs = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs ;
1945 pstate->dw2.frame_width_in_mbs_minus1 = generic_state->frame_width_in_mbs - 1;
1946 pstate->dw2.frame_height_in_mbs_minus1 = generic_state->frame_height_in_mbs - 1;
1948 pstate->dw3.image_structure = 0;//frame is zero
1949 pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
1950 pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
1951 pstate->dw3.brc_domain_rate_control_enable = 0;//0,set for non-vdenc mode;
1952 pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
1953 pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
1955 pstate->dw4.field_picture_flag = 0;
1956 pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
1957 pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
1958 pstate->dw4.transform_8x8_idct_mode_flag = pic_param->pic_fields.bits.transform_8x8_mode_flag;
1959 pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
1960 pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
1961 pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
1962 pstate->dw4.mb_mv_format_flag = 1;
1963 pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
1964 pstate->dw4.mv_unpacked_flag = 1;
1965 pstate->dw4.insert_test_flag = 0;
1966 pstate->dw4.load_slice_pointer_flag = 0;
1967 pstate->dw4.macroblock_stat_enable = 0; /* disable in the first pass */
1968 pstate->dw4.minimum_frame_size = 0;
1969 pstate->dw5.intra_mb_max_bit_flag = 1;
1970 pstate->dw5.inter_mb_max_bit_flag = 1;
1971 pstate->dw5.frame_size_over_flag = 1;
1972 pstate->dw5.frame_size_under_flag = 1;
1973 pstate->dw5.intra_mb_ipcm_flag = 1;
1974 pstate->dw5.mb_rate_ctrl_flag = 0;
1975 pstate->dw5.non_first_pass_flag = 0;
1976 pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
1977 pstate->dw5.aq_chroma_disable = 1;
1978 if (pstate->dw4.entropy_coding_flag && (avc_state->tq_enable)) {
1979 pstate->dw5.aq_enable = avc_state->tq_enable;
1980 pstate->dw5.aq_rounding = avc_state->tq_rounding;
1982 pstate->dw5.aq_rounding = 0;
1985 pstate->dw6.intra_mb_max_size = 2700;
1986 pstate->dw6.inter_mb_max_size = 4095;
1988 pstate->dw8.slice_delta_qp_max0 = 0;
1989 pstate->dw8.slice_delta_qp_max1 = 0;
1990 pstate->dw8.slice_delta_qp_max2 = 0;
1991 pstate->dw8.slice_delta_qp_max3 = 0;
1993 pstate->dw9.slice_delta_qp_min0 = 0;
1994 pstate->dw9.slice_delta_qp_min1 = 0;
1995 pstate->dw9.slice_delta_qp_min2 = 0;
1996 pstate->dw9.slice_delta_qp_min3 = 0;
1998 pstate->dw10.frame_bitrate_min = 0;
1999 pstate->dw10.frame_bitrate_min_unit = 1;
2000 pstate->dw10.frame_bitrate_min_unit_mode = 1;
2001 pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
2002 pstate->dw10.frame_bitrate_max_unit = 1;
2003 pstate->dw10.frame_bitrate_max_unit_mode = 1;
2005 pstate->dw11.frame_bitrate_min_delta = 0;
2006 pstate->dw11.frame_bitrate_max_delta = 0;
2008 pstate->dw12.vad_error_logic = 1;
2009 /* set paramters DW19/DW20 for slices */
2013 gen8_avc_init_mfx_avc_img_state(VADriverContextP ctx,
2014 struct encode_state *encode_state,
2015 struct intel_encoder_context *encoder_context,
2016 struct gen8_mfx_avc_img_state *pstate)
2018 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2019 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2020 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2022 VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
2023 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
2025 memset(pstate, 0, sizeof(*pstate));
2027 pstate->dw0.dword_length = (sizeof(struct gen8_mfx_avc_img_state)) / 4 - 2;
2028 pstate->dw0.command_sub_opcode_b = 0;
2029 pstate->dw0.command_sub_opcode_a = 0;
2030 pstate->dw0.command_opcode = 1;
2031 pstate->dw0.command_pipeline = 2;
2032 pstate->dw0.command_type = 3;
2034 pstate->dw1.frame_size_in_mbs = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs ;
2036 pstate->dw2.frame_width_in_mbs_minus1 = generic_state->frame_width_in_mbs - 1;
2037 pstate->dw2.frame_height_in_mbs_minus1 = generic_state->frame_height_in_mbs - 1;
2039 pstate->dw3.image_structure = 0;//frame is zero
2040 pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
2041 pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
2042 pstate->dw3.inter_mb_conf_flag = 0;
2043 pstate->dw3.intra_mb_conf_flag = 0;
2044 pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
2045 pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
2047 pstate->dw4.field_picture_flag = 0;
2048 pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
2049 pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
2050 pstate->dw4.transform_8x8_idct_mode_flag = pic_param->pic_fields.bits.transform_8x8_mode_flag;
2051 pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
2052 pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
2053 pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
2054 pstate->dw4.mb_mv_format_flag = 1;
2055 pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
2056 pstate->dw4.mv_unpacked_flag = 1;
2057 pstate->dw4.insert_test_flag = 0;
2058 pstate->dw4.load_slice_pointer_flag = 0;
2059 pstate->dw4.macroblock_stat_enable = 0; /* disable in the first pass */
2060 pstate->dw4.minimum_frame_size = 0;
2061 pstate->dw5.intra_mb_max_bit_flag = 1;
2062 pstate->dw5.inter_mb_max_bit_flag = 1;
2063 pstate->dw5.frame_size_over_flag = 1;
2064 pstate->dw5.frame_size_under_flag = 1;
2065 pstate->dw5.intra_mb_ipcm_flag = 1;
2066 pstate->dw5.mb_rate_ctrl_flag = 0;
2067 pstate->dw5.non_first_pass_flag = 0;
2068 pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
2069 pstate->dw5.aq_chroma_disable = 1;
2070 if (pstate->dw4.entropy_coding_flag && (avc_state->tq_enable)) {
2071 pstate->dw5.aq_enable = avc_state->tq_enable;
2072 pstate->dw5.aq_rounding = avc_state->tq_rounding;
2074 pstate->dw5.aq_rounding = 0;
2077 pstate->dw6.intra_mb_max_size = 2700;
2078 pstate->dw6.inter_mb_max_size = 4095;
2080 pstate->dw8.slice_delta_qp_max0 = 0;
2081 pstate->dw8.slice_delta_qp_max1 = 0;
2082 pstate->dw8.slice_delta_qp_max2 = 0;
2083 pstate->dw8.slice_delta_qp_max3 = 0;
2085 pstate->dw9.slice_delta_qp_min0 = 0;
2086 pstate->dw9.slice_delta_qp_min1 = 0;
2087 pstate->dw9.slice_delta_qp_min2 = 0;
2088 pstate->dw9.slice_delta_qp_min3 = 0;
2090 pstate->dw10.frame_bitrate_min = 0;
2091 pstate->dw10.frame_bitrate_min_unit = 1;
2092 pstate->dw10.frame_bitrate_min_unit_mode = 1;
2093 pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
2094 pstate->dw10.frame_bitrate_max_unit = 1;
2095 pstate->dw10.frame_bitrate_max_unit_mode = 1;
2097 pstate->dw11.frame_bitrate_min_delta = 0;
2098 pstate->dw11.frame_bitrate_max_delta = 0;
2099 /* set paramters DW19/DW20 for slices */
2101 void gen9_avc_set_image_state(VADriverContextP ctx,
2102 struct encode_state *encode_state,
2103 struct intel_encoder_context *encoder_context,
2104 struct i965_gpe_resource *gpe_resource)
2106 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2107 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
2110 unsigned int * data;
2111 struct gen9_mfx_avc_img_state cmd;
2113 pdata = i965_map_gpe_resource(gpe_resource);
2118 gen9_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
2119 for (i = 0; i < generic_state->num_pak_passes; i++) {
2122 cmd.dw4.macroblock_stat_enable = 0;
2123 cmd.dw5.non_first_pass_flag = 0;
2125 cmd.dw4.macroblock_stat_enable = 1;
2126 cmd.dw5.non_first_pass_flag = 1;
2127 cmd.dw5.intra_mb_ipcm_flag = 1;
2130 cmd.dw5.mb_rate_ctrl_flag = 0;
2131 memcpy(pdata, &cmd, sizeof(struct gen9_mfx_avc_img_state));
2132 data = (unsigned int *)(pdata + sizeof(struct gen9_mfx_avc_img_state));
2133 *data = MI_BATCH_BUFFER_END;
2135 pdata += INTEL_AVC_IMAGE_STATE_CMD_SIZE;
2137 i965_unmap_gpe_resource(gpe_resource);
2141 void gen8_avc_set_image_state(VADriverContextP ctx,
2142 struct encode_state *encode_state,
2143 struct intel_encoder_context *encoder_context,
2144 struct i965_gpe_resource *gpe_resource)
2146 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2147 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
2150 unsigned int * data;
2151 struct gen8_mfx_avc_img_state cmd;
2153 pdata = i965_map_gpe_resource(gpe_resource);
2158 gen8_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
2159 for (i = 0; i < generic_state->num_pak_passes; i++) {
2162 cmd.dw4.macroblock_stat_enable = 0;
2163 cmd.dw5.non_first_pass_flag = 0;
2165 cmd.dw4.macroblock_stat_enable = 1;
2166 cmd.dw5.non_first_pass_flag = 1;
2167 cmd.dw5.intra_mb_ipcm_flag = 1;
2168 cmd.dw3.inter_mb_conf_flag = 1;
2169 cmd.dw3.intra_mb_conf_flag = 1;
2171 cmd.dw5.mb_rate_ctrl_flag = 0;
2172 memcpy(pdata, &cmd, sizeof(struct gen8_mfx_avc_img_state));
2173 data = (unsigned int *)(pdata + sizeof(struct gen8_mfx_avc_img_state));
2174 *data = MI_BATCH_BUFFER_END;
2176 pdata += INTEL_AVC_IMAGE_STATE_CMD_SIZE;
2178 i965_unmap_gpe_resource(gpe_resource);
2182 void gen9_avc_set_image_state_non_brc(VADriverContextP ctx,
2183 struct encode_state *encode_state,
2184 struct intel_encoder_context *encoder_context,
2185 struct i965_gpe_resource *gpe_resource)
2187 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2188 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
2191 unsigned int * data;
2192 struct gen9_mfx_avc_img_state cmd;
2194 pdata = i965_map_gpe_resource(gpe_resource);
2199 gen9_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
2201 if (generic_state->curr_pak_pass == 0) {
2202 cmd.dw4.macroblock_stat_enable = 0;
2203 cmd.dw5.non_first_pass_flag = 0;
2206 cmd.dw4.macroblock_stat_enable = 1;
2207 cmd.dw5.non_first_pass_flag = 0;
2208 cmd.dw5.intra_mb_ipcm_flag = 1;
2211 cmd.dw5.mb_rate_ctrl_flag = 0;
2212 memcpy(pdata, &cmd, sizeof(struct gen9_mfx_avc_img_state));
2213 data = (unsigned int *)(pdata + sizeof(struct gen9_mfx_avc_img_state));
2214 *data = MI_BATCH_BUFFER_END;
2216 i965_unmap_gpe_resource(gpe_resource);
2221 gen95_avc_calc_lambda_table(VADriverContextP ctx,
2222 struct encode_state *encode_state,
2223 struct intel_encoder_context *encoder_context)
2225 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2226 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2227 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2228 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
2229 unsigned int value, inter, intra;
2230 unsigned int rounding_value = 0;
2231 unsigned int size = 0;
2234 unsigned int * lambda_table = (unsigned int *)(avc_state->lamda_value_lut);
2240 size = AVC_QP_MAX * 2 * sizeof(unsigned int);
2241 switch (generic_state->frame_type) {
2243 memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_i_frame[0][0], size * sizeof(unsigned char));
2246 memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_p_frame[0][0], size * sizeof(unsigned char));
2249 memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_b_frame[0][0], size * sizeof(unsigned char));
2256 for (i = 0; i < AVC_QP_MAX ; i++) {
2257 for (col = 0; col < 2; col++) {
2258 value = *(lambda_table + i * 2 + col);
2259 intra = value >> 16;
2261 if (intra < GEN95_AVC_MAX_LAMBDA) {
2262 if (intra == 0xfffa) {
2263 intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
2267 intra = intra << 16;
2268 inter = value & 0xffff;
2270 if (inter < GEN95_AVC_MAX_LAMBDA) {
2271 if (inter == 0xffef) {
2272 if (generic_state->frame_type == SLICE_TYPE_P) {
2273 if (avc_state->rounding_inter_p == AVC_INVALID_ROUNDING_VALUE)
2274 rounding_value = gen9_avc_inter_rounding_p[generic_state->preset];
2276 rounding_value = avc_state->rounding_inter_p;
2277 } else if (generic_state->frame_type == SLICE_TYPE_B) {
2278 if (pic_param->pic_fields.bits.reference_pic_flag) {
2279 if (avc_state->rounding_inter_b_ref == AVC_INVALID_ROUNDING_VALUE)
2280 rounding_value = gen9_avc_inter_rounding_b_ref[generic_state->preset];
2282 rounding_value = avc_state->rounding_inter_b_ref;
2284 if (avc_state->rounding_inter_b == AVC_INVALID_ROUNDING_VALUE)
2285 rounding_value = gen9_avc_inter_rounding_b[generic_state->preset];
2287 rounding_value = avc_state->rounding_inter_b;
2291 inter = 0xf000 + rounding_value;
2293 *(lambda_table + i * 2 + col) = intra + inter;
2299 gen9_avc_init_brc_const_data(VADriverContextP ctx,
2300 struct encode_state *encode_state,
2301 struct intel_encoder_context *encoder_context)
2303 struct i965_driver_data *i965 = i965_driver_data(ctx);
2304 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2305 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2306 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2307 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2309 struct i965_gpe_resource *gpe_resource = NULL;
2310 unsigned char * data = NULL;
2311 unsigned char * data_tmp = NULL;
2312 unsigned int size = 0;
2313 unsigned int table_idx = 0;
2314 unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
2317 struct object_surface *obj_surface;
2318 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
2319 VASurfaceID surface_id;
2320 unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
2322 gpe_resource = &(avc_ctx->res_brc_const_data_buffer);
2323 assert(gpe_resource);
2325 i965_zero_gpe_resource(gpe_resource);
2327 data = i965_map_gpe_resource(gpe_resource);
2330 table_idx = slice_type_kernel[generic_state->frame_type];
2332 /* Fill surface with QP Adjustment table, Distortion threshold table, MaxFrame threshold table, Distortion QP Adjustment Table*/
2333 size = sizeof(gen9_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb);
2334 memcpy(data, gen9_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb, size * sizeof(unsigned char));
2338 /* skip threshold table*/
2340 switch (generic_state->frame_type) {
2342 memcpy(data, gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2345 memcpy(data, gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2348 /*SLICE_TYPE_I,no change */
2352 if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->non_ftq_skip_threshold_lut_input_enable) {
2353 for (i = 0; i < AVC_QP_MAX ; i++) {
2354 *(data + 1 + (i * 2)) = (unsigned char)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
2359 /*fill the qp for ref list*/
2360 size = 32 + 32 + 32 + 160;
2361 memset(data, 0xff, 32);
2362 memset(data + 32 + 32, 0xff, 32);
2363 switch (generic_state->frame_type) {
2364 case SLICE_TYPE_P: {
2365 for (i = 0 ; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
2366 surface_id = slice_param->RefPicList0[i].picture_id;
2367 obj_surface = SURFACE(surface_id);
2370 *(data + i) = avc_state->list_ref_idx[0][i];//?
2374 case SLICE_TYPE_B: {
2375 data = data + 32 + 32;
2376 for (i = 0 ; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
2377 surface_id = slice_param->RefPicList1[i].picture_id;
2378 obj_surface = SURFACE(surface_id);
2381 *(data + i) = avc_state->list_ref_idx[1][i];//?
2384 data = data - 32 - 32;
2386 for (i = 0 ; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
2387 surface_id = slice_param->RefPicList0[i].picture_id;
2388 obj_surface = SURFACE(surface_id);
2391 *(data + i) = avc_state->list_ref_idx[0][i];//?
2396 /*SLICE_TYPE_I,no change */
2401 /*mv cost and mode cost*/
2403 memcpy(data, (unsigned char *)&gen9_avc_mode_mv_cost_table[table_idx][0][0], size * sizeof(unsigned char));
2405 if (avc_state->old_mode_cost_enable) {
2407 for (i = 0; i < AVC_QP_MAX ; i++) {
2408 *(data_tmp + 3) = (unsigned int)gen9_avc_old_intra_mode_cost[i];
2413 if (avc_state->ftq_skip_threshold_lut_input_enable) {
2414 for (i = 0; i < AVC_QP_MAX ; i++) {
2415 *(data + (i * 32) + 24) =
2416 *(data + (i * 32) + 25) =
2417 *(data + (i * 32) + 27) =
2418 *(data + (i * 32) + 28) =
2419 *(data + (i * 32) + 29) =
2420 *(data + (i * 32) + 30) =
2421 *(data + (i * 32) + 31) = avc_state->ftq_skip_threshold_lut[i];
2429 memcpy(data, (unsigned char *)&gen9_avc_ref_cost[table_idx][0], size * sizeof(unsigned char));
2434 if (avc_state->adaptive_intra_scaling_enable) {
2435 memcpy(data, (unsigned char *)gen9_avc_adaptive_intra_scaling_factor, size * sizeof(unsigned char));
2437 memcpy(data, (unsigned char *)gen9_avc_intra_scaling_factor, size * sizeof(unsigned char));
2440 if (IS_KBL(i965->intel.device_info) ||
2441 IS_GEN10(i965->intel.device_info) ||
2442 IS_GLK(i965->intel.device_info)) {
2446 memcpy(data, (unsigned char *)gen95_avc_lambda_data, size * sizeof(unsigned char));
2450 memcpy(data, (unsigned char *)gen95_avc_ftq25, size * sizeof(unsigned char));
2453 i965_unmap_gpe_resource(gpe_resource);
2457 gen9_avc_init_brc_const_data_old(VADriverContextP ctx,
2458 struct encode_state *encode_state,
2459 struct intel_encoder_context *encoder_context)
2461 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2462 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2463 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2464 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2466 struct i965_gpe_resource *gpe_resource = NULL;
2467 unsigned int * data = NULL;
2468 unsigned int * data_tmp = NULL;
2469 unsigned int size = 0;
2470 unsigned int table_idx = 0;
2471 unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
2472 unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
2475 gpe_resource = &(avc_ctx->res_brc_const_data_buffer);
2476 assert(gpe_resource);
2478 i965_zero_gpe_resource(gpe_resource);
2480 data = i965_map_gpe_resource(gpe_resource);
2483 table_idx = slice_type_kernel[generic_state->frame_type];
2485 /* Fill surface with QP Adjustment table, Distortion threshold table, MaxFrame threshold table, Distortion QP Adjustment Table*/
2486 size = sizeof(gen75_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb);
2487 memcpy(data, gen75_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb, size * sizeof(unsigned char));
2491 /* skip threshold table*/
2493 switch (generic_state->frame_type) {
2495 memcpy(data, gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2498 memcpy(data, gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2501 /*SLICE_TYPE_I,no change */
2505 if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->non_ftq_skip_threshold_lut_input_enable) {
2506 for (i = 0; i < AVC_QP_MAX ; i++) {
2507 *(data + 1 + (i * 2)) = (unsigned char)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
2512 /*fill the qp for ref list*/
2518 /*mv cost and mode cost*/
2520 memcpy(data, (unsigned char *)&gen75_avc_mode_mv_cost_table[table_idx][0][0], size * sizeof(unsigned char));
2522 if (avc_state->old_mode_cost_enable) {
2524 for (i = 0; i < AVC_QP_MAX ; i++) {
2525 *(data_tmp + 3) = (unsigned int)gen9_avc_old_intra_mode_cost[i];
2530 if (avc_state->ftq_skip_threshold_lut_input_enable) {
2531 for (i = 0; i < AVC_QP_MAX ; i++) {
2532 *(data + (i * 32) + 24) =
2533 *(data + (i * 32) + 25) =
2534 *(data + (i * 32) + 27) =
2535 *(data + (i * 32) + 28) =
2536 *(data + (i * 32) + 29) =
2537 *(data + (i * 32) + 30) =
2538 *(data + (i * 32) + 31) = avc_state->ftq_skip_threshold_lut[i];
2546 memcpy(data, (unsigned char *)&gen9_avc_ref_cost[table_idx][0], size * sizeof(unsigned char));
2548 i965_unmap_gpe_resource(gpe_resource);
2551 gen9_avc_set_curbe_brc_init_reset(VADriverContextP ctx,
2552 struct encode_state *encode_state,
2553 struct i965_gpe_context *gpe_context,
2554 struct intel_encoder_context *encoder_context,
2557 gen9_avc_brc_init_reset_curbe_data *cmd;
2558 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2559 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2560 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2561 double input_bits_per_frame = 0;
2562 double bps_ratio = 0;
2563 VAEncSequenceParameterBufferH264 * seq_param = avc_state->seq_param;
2564 struct avc_param common_param;
2566 cmd = i965_gpe_context_map_curbe(gpe_context);
2571 memcpy(cmd, &gen9_avc_brc_init_reset_curbe_init_data, sizeof(gen9_avc_brc_init_reset_curbe_data));
2573 memset(&common_param, 0, sizeof(common_param));
2574 common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
2575 common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
2576 common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
2577 common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
2578 common_param.frames_per_100s = generic_state->frames_per_100s;
2579 common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
2580 common_param.target_bit_rate = generic_state->target_bit_rate;
2582 cmd->dw0.profile_level_max_frame = i965_avc_get_profile_level_max_frame(&common_param, seq_param->level_idc);
2583 cmd->dw1.init_buf_full_in_bits = generic_state->init_vbv_buffer_fullness_in_bit;
2584 cmd->dw2.buf_size_in_bits = generic_state->vbv_buffer_size_in_bit;
2585 cmd->dw3.average_bit_rate = generic_state->target_bit_rate * 1000;
2586 cmd->dw4.max_bit_rate = generic_state->max_bit_rate * 1000;
2587 cmd->dw8.gop_p = (generic_state->gop_ref_distance) ? ((generic_state->gop_size - 1) / generic_state->gop_ref_distance) : 0;
2588 cmd->dw9.gop_b = (generic_state->gop_size - 1 - cmd->dw8.gop_p);
2589 cmd->dw9.frame_width_in_bytes = generic_state->frame_width_in_pixel;
2590 cmd->dw10.frame_height_in_bytes = generic_state->frame_height_in_pixel;
2591 cmd->dw12.no_slices = avc_state->slice_num;
2594 if (seq_param->vui_parameters_present_flag && generic_state->internal_rate_mode != INTEL_BRC_AVBR) {
2595 cmd->dw4.max_bit_rate = cmd->dw4.max_bit_rate;
2596 if (generic_state->internal_rate_mode == VA_RC_CBR) {
2597 cmd->dw3.average_bit_rate = cmd->dw4.max_bit_rate;
2602 cmd->dw6.frame_rate_m = generic_state->frames_per_100s;
2603 cmd->dw7.frame_rate_d = 100;
2604 cmd->dw8.brc_flag = 0;
2605 cmd->dw8.brc_flag |= (generic_state->mb_brc_enabled) ? 0 : 0x8000;
2608 if (generic_state->internal_rate_mode == VA_RC_CBR) {
2610 cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate;
2611 cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISCBR;
2613 } else if (generic_state->internal_rate_mode == VA_RC_VBR) {
2615 if (cmd->dw4.max_bit_rate < cmd->dw3.average_bit_rate) {
2616 cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate << 1;
2618 cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISVBR;
2620 } else if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2622 cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate;
2623 cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISAVBR;
2626 //igonre icq/vcm/qvbr
2628 cmd->dw10.avbr_accuracy = generic_state->avbr_curracy;
2629 cmd->dw11.avbr_convergence = generic_state->avbr_convergence;
2632 input_bits_per_frame = (double)(cmd->dw4.max_bit_rate) * (double)(cmd->dw7.frame_rate_d) / (double)(cmd->dw6.frame_rate_m);;
2634 if (cmd->dw2.buf_size_in_bits == 0) {
2635 cmd->dw2.buf_size_in_bits = (unsigned int)(input_bits_per_frame * 4);
2638 if (cmd->dw1.init_buf_full_in_bits == 0) {
2639 cmd->dw1.init_buf_full_in_bits = cmd->dw2.buf_size_in_bits * 7 / 8;
2641 if (cmd->dw1.init_buf_full_in_bits < (unsigned int)(input_bits_per_frame * 2)) {
2642 cmd->dw1.init_buf_full_in_bits = (unsigned int)(input_bits_per_frame * 2);
2644 if (cmd->dw1.init_buf_full_in_bits > cmd->dw2.buf_size_in_bits) {
2645 cmd->dw1.init_buf_full_in_bits = cmd->dw2.buf_size_in_bits;
2649 if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2650 cmd->dw2.buf_size_in_bits = 2 * generic_state->target_bit_rate * 1000;
2651 cmd->dw1.init_buf_full_in_bits = (unsigned int)(3 * cmd->dw2.buf_size_in_bits / 4);
2655 bps_ratio = input_bits_per_frame / (cmd->dw2.buf_size_in_bits / 30.0);
2656 bps_ratio = (bps_ratio < 0.1) ? 0.1 : (bps_ratio > 3.5) ? 3.5 : bps_ratio;
2659 cmd->dw16.deviation_threshold_0_pand_b = (unsigned int)(-50 * pow(0.90, bps_ratio));
2660 cmd->dw16.deviation_threshold_1_pand_b = (unsigned int)(-50 * pow(0.66, bps_ratio));
2661 cmd->dw16.deviation_threshold_2_pand_b = (unsigned int)(-50 * pow(0.46, bps_ratio));
2662 cmd->dw16.deviation_threshold_3_pand_b = (unsigned int)(-50 * pow(0.3, bps_ratio));
2663 cmd->dw17.deviation_threshold_4_pand_b = (unsigned int)(50 * pow(0.3, bps_ratio));
2664 cmd->dw17.deviation_threshold_5_pand_b = (unsigned int)(50 * pow(0.46, bps_ratio));
2665 cmd->dw17.deviation_threshold_6_pand_b = (unsigned int)(50 * pow(0.7, bps_ratio));
2666 cmd->dw17.deviation_threshold_7_pand_b = (unsigned int)(50 * pow(0.9, bps_ratio));
2667 cmd->dw18.deviation_threshold_0_vbr = (unsigned int)(-50 * pow(0.9, bps_ratio));
2668 cmd->dw18.deviation_threshold_1_vbr = (unsigned int)(-50 * pow(0.7, bps_ratio));
2669 cmd->dw18.deviation_threshold_2_vbr = (unsigned int)(-50 * pow(0.5, bps_ratio));
2670 cmd->dw18.deviation_threshold_3_vbr = (unsigned int)(-50 * pow(0.3, bps_ratio));
2671 cmd->dw19.deviation_threshold_4_vbr = (unsigned int)(100 * pow(0.4, bps_ratio));
2672 cmd->dw19.deviation_threshold_5_vbr = (unsigned int)(100 * pow(0.5, bps_ratio));
2673 cmd->dw19.deviation_threshold_6_vbr = (unsigned int)(100 * pow(0.75, bps_ratio));
2674 cmd->dw19.deviation_threshold_7_vbr = (unsigned int)(100 * pow(0.9, bps_ratio));
2675 cmd->dw20.deviation_threshold_0_i = (unsigned int)(-50 * pow(0.8, bps_ratio));
2676 cmd->dw20.deviation_threshold_1_i = (unsigned int)(-50 * pow(0.6, bps_ratio));
2677 cmd->dw20.deviation_threshold_2_i = (unsigned int)(-50 * pow(0.34, bps_ratio));
2678 cmd->dw20.deviation_threshold_3_i = (unsigned int)(-50 * pow(0.2, bps_ratio));
2679 cmd->dw21.deviation_threshold_4_i = (unsigned int)(50 * pow(0.2, bps_ratio));
2680 cmd->dw21.deviation_threshold_5_i = (unsigned int)(50 * pow(0.4, bps_ratio));
2681 cmd->dw21.deviation_threshold_6_i = (unsigned int)(50 * pow(0.66, bps_ratio));
2682 cmd->dw21.deviation_threshold_7_i = (unsigned int)(50 * pow(0.9, bps_ratio));
2684 cmd->dw22.sliding_window_size = generic_state->frames_per_window_size;
2686 i965_gpe_context_unmap_curbe(gpe_context);
2692 gen9_avc_send_surface_brc_init_reset(VADriverContextP ctx,
2693 struct encode_state *encode_state,
2694 struct i965_gpe_context *gpe_context,
2695 struct intel_encoder_context *encoder_context,
2698 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2699 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2701 i965_add_buffer_gpe_surface(ctx,
2703 &avc_ctx->res_brc_history_buffer,
2705 avc_ctx->res_brc_history_buffer.size,
2707 GEN9_AVC_BRC_INIT_RESET_HISTORY_INDEX);
2709 i965_add_buffer_2d_gpe_surface(ctx,
2711 &avc_ctx->res_brc_dist_data_surface,
2713 I965_SURFACEFORMAT_R8_UNORM,
2714 GEN9_AVC_BRC_INIT_RESET_DISTORTION_INDEX);
2720 gen9_avc_kernel_brc_init_reset(VADriverContextP ctx,
2721 struct encode_state *encode_state,
2722 struct intel_encoder_context *encoder_context)
2724 struct i965_driver_data *i965 = i965_driver_data(ctx);
2725 struct i965_gpe_table *gpe = &i965->gpe_table;
2726 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2727 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2728 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2729 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
2731 struct i965_gpe_context *gpe_context;
2732 struct gpe_media_object_parameter media_object_param;
2733 struct gpe_media_object_inline_data media_object_inline_data;
2734 int media_function = 0;
2735 int kernel_idx = GEN9_AVC_KERNEL_BRC_INIT;
2737 media_function = INTEL_MEDIA_STATE_BRC_INIT_RESET;
2739 if (generic_state->brc_inited)
2740 kernel_idx = GEN9_AVC_KERNEL_BRC_RESET;
2742 gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
2744 gpe->context_init(ctx, gpe_context);
2745 gpe->reset_binding_table(ctx, gpe_context);
2747 generic_ctx->pfn_set_curbe_brc_init_reset(ctx, encode_state, gpe_context, encoder_context, NULL);
2749 generic_ctx->pfn_send_brc_init_reset_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
2751 gpe->setup_interface_data(ctx, gpe_context);
2753 memset(&media_object_param, 0, sizeof(media_object_param));
2754 memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
2755 media_object_param.pinline_data = &media_object_inline_data;
2756 media_object_param.inline_size = sizeof(media_object_inline_data);
2758 gen9_avc_run_kernel_media_object(ctx, encoder_context,
2761 &media_object_param);
2763 return VA_STATUS_SUCCESS;
2767 gen9_avc_set_curbe_brc_frame_update(VADriverContextP ctx,
2768 struct encode_state *encode_state,
2769 struct i965_gpe_context *gpe_context,
2770 struct intel_encoder_context *encoder_context,
2773 gen9_avc_frame_brc_update_curbe_data *cmd;
2774 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2775 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2776 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2777 struct object_surface *obj_surface;
2778 struct gen9_surface_avc *avc_priv_surface;
2779 struct avc_param common_param;
2780 VAEncSequenceParameterBufferH264 * seq_param = avc_state->seq_param;
2782 obj_surface = encode_state->reconstructed_object;
2784 if (!obj_surface || !obj_surface->private_data)
2786 avc_priv_surface = obj_surface->private_data;
2788 cmd = i965_gpe_context_map_curbe(gpe_context);
2793 memcpy(cmd, &gen9_avc_frame_brc_update_curbe_init_data, sizeof(gen9_avc_frame_brc_update_curbe_data));
2795 cmd->dw5.target_size_flag = 0 ;
2796 if (generic_state->brc_init_current_target_buf_full_in_bits > (double)generic_state->brc_init_reset_buf_size_in_bits) {
2798 generic_state->brc_init_current_target_buf_full_in_bits -= (double)generic_state->brc_init_reset_buf_size_in_bits;
2799 cmd->dw5.target_size_flag = 1 ;
2802 if (generic_state->skip_frame_enbale) {
2803 cmd->dw6.num_skip_frames = generic_state->num_skip_frames ;
2804 cmd->dw7.size_skip_frames = generic_state->size_skip_frames;
2806 generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame * generic_state->num_skip_frames;
2809 cmd->dw0.target_size = (unsigned int)generic_state->brc_init_current_target_buf_full_in_bits ;
2810 cmd->dw1.frame_number = generic_state->seq_frame_number ;
2811 cmd->dw2.size_of_pic_headers = generic_state->herder_bytes_inserted << 3 ;
2812 cmd->dw5.cur_frame_type = generic_state->frame_type ;
2813 cmd->dw5.brc_flag = 0 ;
2814 cmd->dw5.brc_flag |= (avc_priv_surface->is_as_ref) ? INTEL_ENCODE_BRCUPDATE_IS_REFERENCE : 0 ;
2816 if (avc_state->multi_pre_enable) {
2817 cmd->dw5.brc_flag |= INTEL_ENCODE_BRCUPDATE_IS_ACTUALQP ;
2818 cmd->dw14.qp_index_of_cur_pic = avc_priv_surface->frame_idx ; //do not know this. use -1
2821 cmd->dw5.max_num_paks = generic_state->num_pak_passes ;
2822 if (avc_state->min_max_qp_enable) {
2823 switch (generic_state->frame_type) {
2825 cmd->dw6.minimum_qp = avc_state->min_qp_i ;
2826 cmd->dw6.maximum_qp = avc_state->max_qp_i ;
2829 cmd->dw6.minimum_qp = avc_state->min_qp_p ;
2830 cmd->dw6.maximum_qp = avc_state->max_qp_p ;
2833 cmd->dw6.minimum_qp = avc_state->min_qp_b ;
2834 cmd->dw6.maximum_qp = avc_state->max_qp_b ;
2838 cmd->dw6.minimum_qp = 0 ;
2839 cmd->dw6.maximum_qp = 0 ;
2841 cmd->dw6.enable_force_skip = avc_state->enable_force_skip ;
2842 cmd->dw6.enable_sliding_window = 0 ;
2844 generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame;
2846 if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2847 cmd->dw3.start_gadj_frame0 = (unsigned int)((10 * generic_state->avbr_convergence) / (double)150);
2848 cmd->dw3.start_gadj_frame1 = (unsigned int)((50 * generic_state->avbr_convergence) / (double)150);
2849 cmd->dw4.start_gadj_frame2 = (unsigned int)((100 * generic_state->avbr_convergence) / (double)150);
2850 cmd->dw4.start_gadj_frame3 = (unsigned int)((150 * generic_state->avbr_convergence) / (double)150);
2851 cmd->dw11.g_rate_ratio_threshold_0 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 40)));
2852 cmd->dw11.g_rate_ratio_threshold_1 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 75)));
2853 cmd->dw12.g_rate_ratio_threshold_2 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 97)));
2854 cmd->dw12.g_rate_ratio_threshold_3 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (103 - 100)));
2855 cmd->dw12.g_rate_ratio_threshold_4 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (125 - 100)));
2856 cmd->dw12.g_rate_ratio_threshold_5 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (160 - 100)));
2859 cmd->dw15.enable_roi = generic_state->brc_roi_enable ;
2861 memset(&common_param, 0, sizeof(common_param));
2862 common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
2863 common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
2864 common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
2865 common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
2866 common_param.frames_per_100s = generic_state->frames_per_100s;
2867 common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
2868 common_param.target_bit_rate = generic_state->target_bit_rate;
2870 cmd->dw19.user_max_frame = i965_avc_get_profile_level_max_frame(&common_param, seq_param->level_idc);
2871 i965_gpe_context_unmap_curbe(gpe_context);
2877 gen9_avc_send_surface_brc_frame_update(VADriverContextP ctx,
2878 struct encode_state *encode_state,
2879 struct i965_gpe_context *gpe_context,
2880 struct intel_encoder_context *encoder_context,
2883 struct i965_driver_data *i965 = i965_driver_data(ctx);
2884 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2885 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2886 struct brc_param * param = (struct brc_param *)param_brc ;
2887 struct i965_gpe_context * gpe_context_mbenc = param->gpe_context_mbenc;
2888 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2889 unsigned char is_g95 = 0;
2891 if (IS_SKL(i965->intel.device_info) ||
2892 IS_BXT(i965->intel.device_info) ||
2893 IS_GEN8(i965->intel.device_info))
2895 else if (IS_KBL(i965->intel.device_info) ||
2896 IS_GEN10(i965->intel.device_info) ||
2897 IS_GLK(i965->intel.device_info))
2900 /* brc history buffer*/
2901 i965_add_buffer_gpe_surface(ctx,
2903 &avc_ctx->res_brc_history_buffer,
2905 avc_ctx->res_brc_history_buffer.size,
2907 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_HISTORY_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_HISTORY_INDEX));
2909 /* previous pak buffer*/
2910 i965_add_buffer_gpe_surface(ctx,
2912 &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
2914 avc_ctx->res_brc_pre_pak_statistics_output_buffer.size,
2916 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_PAK_STATISTICS_OUTPUT_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_PAK_STATISTICS_OUTPUT_INDEX));
2918 /* image state command buffer read only*/
2919 i965_add_buffer_gpe_surface(ctx,
2921 &avc_ctx->res_brc_image_state_read_buffer,
2923 avc_ctx->res_brc_image_state_read_buffer.size,
2925 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_READ_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_READ_INDEX));
2927 /* image state command buffer write only*/
2928 i965_add_buffer_gpe_surface(ctx,
2930 &avc_ctx->res_brc_image_state_write_buffer,
2932 avc_ctx->res_brc_image_state_write_buffer.size,
2934 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_WRITE_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_WRITE_INDEX));
2936 if (avc_state->mbenc_brc_buffer_size > 0) {
2937 i965_add_buffer_gpe_surface(ctx,
2939 &(avc_ctx->res_mbenc_brc_buffer),
2941 avc_ctx->res_mbenc_brc_buffer.size,
2943 GEN95_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_WRITE_INDEX);
2945 /* Mbenc curbe input buffer */
2946 gen9_add_dri_buffer_gpe_surface(ctx,
2948 gpe_context_mbenc->dynamic_state.bo,
2950 ALIGN(gpe_context_mbenc->curbe.length, 64),
2951 gpe_context_mbenc->curbe.offset,
2952 GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_READ_INDEX);
2953 /* Mbenc curbe output buffer */
2954 gen9_add_dri_buffer_gpe_surface(ctx,
2956 gpe_context_mbenc->dynamic_state.bo,
2958 ALIGN(gpe_context_mbenc->curbe.length, 64),
2959 gpe_context_mbenc->curbe.offset,
2960 GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_WRITE_INDEX);
2963 /* AVC_ME Distortion 2D surface buffer,input/output. is it res_brc_dist_data_surface*/
2964 i965_add_buffer_2d_gpe_surface(ctx,
2966 &avc_ctx->res_brc_dist_data_surface,
2968 I965_SURFACEFORMAT_R8_UNORM,
2969 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_DISTORTION_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_DISTORTION_INDEX));
2971 /* BRC const data 2D surface buffer */
2972 i965_add_buffer_2d_gpe_surface(ctx,
2974 &avc_ctx->res_brc_const_data_buffer,
2976 I965_SURFACEFORMAT_R8_UNORM,
2977 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX));
2979 /* MB statistical data surface*/
2980 if (!IS_GEN8(i965->intel.device_info)) {
2981 i965_add_buffer_gpe_surface(ctx,
2983 &avc_ctx->res_mb_status_buffer,
2985 avc_ctx->res_mb_status_buffer.size,
2987 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX));
2989 i965_add_buffer_2d_gpe_surface(ctx,
2991 &avc_ctx->res_mbbrc_mb_qp_data_surface,
2993 I965_SURFACEFORMAT_R8_UNORM,
2994 GEN9_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX);
3000 gen9_avc_kernel_brc_frame_update(VADriverContextP ctx,
3001 struct encode_state *encode_state,
3002 struct intel_encoder_context *encoder_context)
3005 struct i965_driver_data *i965 = i965_driver_data(ctx);
3006 struct i965_gpe_table *gpe = &i965->gpe_table;
3007 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3008 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
3009 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3010 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3011 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3013 struct i965_gpe_context *gpe_context = NULL;
3014 struct gpe_media_object_parameter media_object_param;
3015 struct gpe_media_object_inline_data media_object_inline_data;
3016 int media_function = 0;
3018 unsigned int mb_const_data_buffer_in_use, mb_qp_buffer_in_use;
3019 unsigned int brc_enabled = 0;
3020 unsigned int roi_enable = (generic_state->num_roi > 0) ? 1 : 0;
3021 unsigned int dirty_roi_enable = ((generic_state->dirty_num_roi > 0) && (generic_state->frame_type == SLICE_TYPE_P) && (0));
3023 /* the following set the mbenc curbe*/
3024 struct mbenc_param curbe_mbenc_param ;
3025 struct brc_param curbe_brc_param ;
3027 mb_const_data_buffer_in_use =
3028 generic_state->mb_brc_enabled ||
3031 avc_state->mb_qp_data_enable ||
3032 avc_state->rolling_intra_refresh_enable;
3033 mb_qp_buffer_in_use =
3034 generic_state->mb_brc_enabled ||
3035 generic_state->brc_roi_enable ||
3036 avc_state->mb_qp_data_enable;
3038 switch (generic_state->kernel_mode) {
3039 case INTEL_ENC_KERNEL_NORMAL : {
3040 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_NORMAL_I;
3043 case INTEL_ENC_KERNEL_PERFORMANCE : {
3044 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_PERFORMANCE_I;
3047 case INTEL_ENC_KERNEL_QUALITY : {
3048 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_QUALITY_I;
3056 if (generic_state->frame_type == SLICE_TYPE_P) {
3058 } else if (generic_state->frame_type == SLICE_TYPE_B) {
3062 gpe_context = &(avc_ctx->context_mbenc.gpe_contexts[kernel_idx]);
3063 gpe->context_init(ctx, gpe_context);
3065 memset(&curbe_mbenc_param, 0, sizeof(struct mbenc_param));
3067 curbe_mbenc_param.mb_const_data_buffer_in_use = mb_const_data_buffer_in_use;
3068 curbe_mbenc_param.mb_qp_buffer_in_use = mb_qp_buffer_in_use;
3069 curbe_mbenc_param.mbenc_i_frame_dist_in_use = 0;
3070 curbe_mbenc_param.brc_enabled = brc_enabled;
3071 curbe_mbenc_param.roi_enabled = roi_enable;
3073 /* set curbe mbenc*/
3074 generic_ctx->pfn_set_curbe_mbenc(ctx, encode_state, gpe_context, encoder_context, &curbe_mbenc_param);
3076 // gen95 set curbe out of the brc. gen9 do it here
3077 avc_state->mbenc_curbe_set_in_brc_update = !avc_state->decouple_mbenc_curbe_from_brc_enable;
3078 /*begin brc frame update*/
3079 memset(&curbe_brc_param, 0, sizeof(struct brc_param));
3080 curbe_brc_param.gpe_context_mbenc = gpe_context;
3081 media_function = INTEL_MEDIA_STATE_BRC_UPDATE;
3082 kernel_idx = GEN9_AVC_KERNEL_BRC_FRAME_UPDATE;
3083 gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
3084 curbe_brc_param.gpe_context_brc_frame_update = gpe_context;
3086 gpe->context_init(ctx, gpe_context);
3087 gpe->reset_binding_table(ctx, gpe_context);
3088 /*brc copy ignored*/
3090 /* set curbe frame update*/
3091 generic_ctx->pfn_set_curbe_brc_frame_update(ctx, encode_state, gpe_context, encoder_context, &curbe_brc_param);
3093 /* load brc constant data, is it same as mbenc mb brc constant data? no.*/
3094 if (avc_state->multi_pre_enable) {
3095 gen9_avc_init_brc_const_data(ctx, encode_state, encoder_context);
3097 gen9_avc_init_brc_const_data_old(ctx, encode_state, encoder_context);
3099 /* image state construct*/
3100 if (IS_GEN8(i965->intel.device_info)) {
3101 gen8_avc_set_image_state(ctx, encode_state, encoder_context, &(avc_ctx->res_brc_image_state_read_buffer));
3103 gen9_avc_set_image_state(ctx, encode_state, encoder_context, &(avc_ctx->res_brc_image_state_read_buffer));
3105 /* set surface frame mbenc*/
3106 generic_ctx->pfn_send_brc_frame_update_surface(ctx, encode_state, gpe_context, encoder_context, &curbe_brc_param);
3109 gpe->setup_interface_data(ctx, gpe_context);
3111 memset(&media_object_param, 0, sizeof(media_object_param));
3112 memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
3113 media_object_param.pinline_data = &media_object_inline_data;
3114 media_object_param.inline_size = sizeof(media_object_inline_data);
3116 gen9_avc_run_kernel_media_object(ctx, encoder_context,
3119 &media_object_param);
3121 return VA_STATUS_SUCCESS;
3125 gen9_avc_set_curbe_brc_mb_update(VADriverContextP ctx,
3126 struct encode_state *encode_state,
3127 struct i965_gpe_context *gpe_context,
3128 struct intel_encoder_context *encoder_context,
3131 gen9_avc_mb_brc_curbe_data *cmd;
3132 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3133 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3135 cmd = i965_gpe_context_map_curbe(gpe_context);
3140 memset(cmd, 0, sizeof(gen9_avc_mb_brc_curbe_data));
3142 cmd->dw0.cur_frame_type = generic_state->frame_type;
3143 if (generic_state->brc_roi_enable) {
3144 cmd->dw0.enable_roi = 1;
3146 cmd->dw0.enable_roi = 0;
3149 i965_gpe_context_unmap_curbe(gpe_context);
3155 gen9_avc_send_surface_brc_mb_update(VADriverContextP ctx,
3156 struct encode_state *encode_state,
3157 struct i965_gpe_context *gpe_context,
3158 struct intel_encoder_context *encoder_context,
3161 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3162 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3163 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3165 /* brc history buffer*/
3166 i965_add_buffer_gpe_surface(ctx,
3168 &avc_ctx->res_brc_history_buffer,
3170 avc_ctx->res_brc_history_buffer.size,
3172 GEN9_AVC_MB_BRC_UPDATE_HISTORY_INDEX);
3174 /* MB qp data buffer is it same as res_mbbrc_mb_qp_data_surface*/
3175 if (generic_state->mb_brc_enabled) {
3176 i965_add_buffer_2d_gpe_surface(ctx,
3178 &avc_ctx->res_mbbrc_mb_qp_data_surface,
3180 I965_SURFACEFORMAT_R8_UNORM,
3181 GEN9_AVC_MB_BRC_UPDATE_MB_QP_INDEX);
3185 /* BRC roi feature*/
3186 if (generic_state->brc_roi_enable) {
3187 i965_add_buffer_gpe_surface(ctx,
3189 &avc_ctx->res_mbbrc_roi_surface,
3191 avc_ctx->res_mbbrc_roi_surface.size,
3193 GEN9_AVC_MB_BRC_UPDATE_ROI_INDEX);
3197 /* MB statistical data surface*/
3198 i965_add_buffer_gpe_surface(ctx,
3200 &avc_ctx->res_mb_status_buffer,
3202 avc_ctx->res_mb_status_buffer.size,
3204 GEN9_AVC_MB_BRC_UPDATE_MB_STATUS_INDEX);
3210 gen9_avc_kernel_brc_mb_update(VADriverContextP ctx,
3211 struct encode_state *encode_state,
3212 struct intel_encoder_context *encoder_context)
3215 struct i965_driver_data *i965 = i965_driver_data(ctx);
3216 struct i965_gpe_table *gpe = &i965->gpe_table;
3217 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3218 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3219 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3220 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
3222 struct i965_gpe_context *gpe_context;
3223 struct gpe_media_object_walker_parameter media_object_walker_param;
3224 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
3225 int media_function = 0;
3228 media_function = INTEL_MEDIA_STATE_MB_BRC_UPDATE;
3229 kernel_idx = GEN9_AVC_KERNEL_BRC_MB_UPDATE;
3230 gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
3232 gpe->context_init(ctx, gpe_context);
3233 gpe->reset_binding_table(ctx, gpe_context);
3235 /* set curbe brc mb update*/
3236 generic_ctx->pfn_set_curbe_brc_mb_update(ctx, encode_state, gpe_context, encoder_context, NULL);
3239 /* set surface brc mb update*/
3240 generic_ctx->pfn_send_brc_mb_update_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
3243 gpe->setup_interface_data(ctx, gpe_context);
3245 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
3246 /* the scaling is based on 8x8 blk level */
3247 kernel_walker_param.resolution_x = (generic_state->frame_width_in_mbs + 1) / 2;
3248 kernel_walker_param.resolution_y = (generic_state->frame_height_in_mbs + 1) / 2 ;
3249 kernel_walker_param.no_dependency = 1;
3251 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
3253 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
3256 &media_object_walker_param);
3258 return VA_STATUS_SUCCESS;
3262 mbenc kernel related function,it include intra dist kernel
3265 gen9_avc_get_biweight(int dist_scale_factor_ref_id0_list0, unsigned short weighted_bipredidc)
3267 int biweight = 32; // default value
3269 /* based on kernel HLD*/
3270 if (weighted_bipredidc != INTEL_AVC_WP_MODE_IMPLICIT) {
3273 biweight = (dist_scale_factor_ref_id0_list0 + 2) >> 2;
3275 if (biweight != 16 && biweight != 21 &&
3276 biweight != 32 && biweight != 43 && biweight != 48) {
3277 biweight = 32; // If # of B-pics between two refs is more than 3. VME does not support it.
3285 gen9_avc_get_dist_scale_factor(VADriverContextP ctx,
3286 struct encode_state *encode_state,
3287 struct intel_encoder_context *encoder_context)
3289 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3290 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3291 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3292 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
3294 int max_num_references;
3295 VAPictureH264 *curr_pic;
3296 VAPictureH264 *ref_pic_l0;
3297 VAPictureH264 *ref_pic_l1;
3306 max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
3308 memset(avc_state->dist_scale_factor_list0, 0, 32 * sizeof(unsigned int));
3309 curr_pic = &pic_param->CurrPic;
3310 for (i = 0; i < max_num_references; i++) {
3311 ref_pic_l0 = &(slice_param->RefPicList0[i]);
3313 if ((ref_pic_l0->flags & VA_PICTURE_H264_INVALID) ||
3314 (ref_pic_l0->picture_id == VA_INVALID_SURFACE))
3316 ref_pic_l1 = &(slice_param->RefPicList1[0]);
3317 if ((ref_pic_l0->flags & VA_PICTURE_H264_INVALID) ||
3318 (ref_pic_l0->picture_id == VA_INVALID_SURFACE))
3321 poc0 = (curr_pic->TopFieldOrderCnt - ref_pic_l0->TopFieldOrderCnt);
3322 poc1 = (ref_pic_l1->TopFieldOrderCnt - ref_pic_l0->TopFieldOrderCnt);
3323 CLIP(poc0, -128, 127);
3324 CLIP(poc1, -128, 127);
3331 tmp = (td / 2 > 0) ? (td / 2) : (-(td / 2));
3332 tx = (16384 + tmp) / td ;
3333 tmp = (tb * tx + 32) >> 6;
3334 CLIP(tmp, -1024, 1023);
3335 avc_state->dist_scale_factor_list0[i] = tmp;
3341 gen9_avc_get_qp_from_ref_list(VADriverContextP ctx,
3342 VAEncSliceParameterBufferH264 *slice_param,
3346 struct i965_driver_data *i965 = i965_driver_data(ctx);
3347 struct object_surface *obj_surface;
3348 struct gen9_surface_avc *avc_priv_surface;
3349 VASurfaceID surface_id;
3351 assert(slice_param);
3355 if (ref_frame_idx < slice_param->num_ref_idx_l0_active_minus1 + 1)
3356 surface_id = slice_param->RefPicList0[ref_frame_idx].picture_id;
3360 if (ref_frame_idx < slice_param->num_ref_idx_l1_active_minus1 + 1)
3361 surface_id = slice_param->RefPicList1[ref_frame_idx].picture_id;
3365 obj_surface = SURFACE(surface_id);
3366 if (obj_surface && obj_surface->private_data) {
3367 avc_priv_surface = obj_surface->private_data;
3368 return avc_priv_surface->qp_value;
3375 gen9_avc_load_mb_brc_const_data(VADriverContextP ctx,
3376 struct encode_state *encode_state,
3377 struct intel_encoder_context *encoder_context)
3379 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3380 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3381 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3382 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3384 struct i965_gpe_resource *gpe_resource = NULL;
3385 unsigned int * data = NULL;
3386 unsigned int * data_tmp = NULL;
3387 unsigned int size = 16 * 52;
3388 unsigned int table_idx = 0;
3389 unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
3390 unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
3393 gpe_resource = &(avc_ctx->res_mbbrc_const_data_buffer);
3394 assert(gpe_resource);
3395 data = i965_map_gpe_resource(gpe_resource);
3398 table_idx = slice_type_kernel[generic_state->frame_type];
3400 memcpy(data, gen9_avc_mb_brc_const_data[table_idx][0], size * sizeof(unsigned int));
3404 switch (generic_state->frame_type) {
3406 for (i = 0; i < AVC_QP_MAX ; i++) {
3407 if (avc_state->old_mode_cost_enable)
3408 *data = (unsigned int)gen9_avc_old_intra_mode_cost[i];
3414 for (i = 0; i < AVC_QP_MAX ; i++) {
3415 if (generic_state->frame_type == SLICE_TYPE_P) {
3416 if (avc_state->skip_bias_adjustment_enable)
3417 *(data + 3) = (unsigned int)gen9_avc_mv_cost_p_skip_adjustment[i];
3419 if (avc_state->non_ftq_skip_threshold_lut_input_enable) {
3420 *(data + 9) = (unsigned int)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
3421 } else if (generic_state->frame_type == SLICE_TYPE_P) {
3422 *(data + 9) = (unsigned int)gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag][i];
3424 *(data + 9) = (unsigned int)gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag][i];
3427 if (avc_state->adaptive_intra_scaling_enable) {
3428 *(data + 10) = (unsigned int)gen9_avc_adaptive_intra_scaling_factor[i];
3430 *(data + 10) = (unsigned int)gen9_avc_intra_scaling_factor[i];
3442 for (i = 0; i < AVC_QP_MAX ; i++) {
3443 if (avc_state->ftq_skip_threshold_lut_input_enable) {
3444 *(data + 6) = (avc_state->ftq_skip_threshold_lut[i] |
3445 (avc_state->ftq_skip_threshold_lut[i] << 16) |
3446 (avc_state->ftq_skip_threshold_lut[i] << 24));
3447 *(data + 7) = (avc_state->ftq_skip_threshold_lut[i] |
3448 (avc_state->ftq_skip_threshold_lut[i] << 8) |
3449 (avc_state->ftq_skip_threshold_lut[i] << 16) |
3450 (avc_state->ftq_skip_threshold_lut[i] << 24));
3453 if (avc_state->kernel_trellis_enable) {
3454 *(data + 11) = (unsigned int)avc_state->lamda_value_lut[i][0];
3455 *(data + 12) = (unsigned int)avc_state->lamda_value_lut[i][1];
3461 i965_unmap_gpe_resource(gpe_resource);
3465 gen9_avc_set_curbe_mbenc(VADriverContextP ctx,
3466 struct encode_state *encode_state,
3467 struct i965_gpe_context *gpe_context,
3468 struct intel_encoder_context *encoder_context,
3471 struct i965_driver_data *i965 = i965_driver_data(ctx);
3473 gen9_avc_mbenc_curbe_data *g9;
3474 gen95_avc_mbenc_curbe_data *g95;
3476 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3477 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3478 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3480 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3481 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
3482 VASurfaceID surface_id;
3483 struct object_surface *obj_surface;
3485 struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
3486 unsigned char qp = 0;
3487 unsigned char me_method = 0;
3488 unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
3489 unsigned int table_idx = 0;
3490 unsigned char is_g9 = 0;
3491 unsigned char is_g95 = 0;
3492 unsigned int curbe_size = 0;
3494 unsigned int preset = generic_state->preset;
3495 if (IS_SKL(i965->intel.device_info) ||
3496 IS_BXT(i965->intel.device_info)) {
3497 cmd.g9 = (gen9_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
3501 curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
3502 memset(cmd.g9, 0, curbe_size);
3504 if (mbenc_i_frame_dist_in_use) {
3505 memcpy(cmd.g9, gen9_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
3508 switch (generic_state->frame_type) {
3510 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
3513 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
3516 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
3523 } else if (IS_KBL(i965->intel.device_info) ||
3524 IS_GEN10(i965->intel.device_info) ||
3525 IS_GLK(i965->intel.device_info)) {
3526 cmd.g95 = (gen95_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
3530 curbe_size = sizeof(gen95_avc_mbenc_curbe_data);
3531 memset(cmd.g9, 0, curbe_size);
3533 if (mbenc_i_frame_dist_in_use) {
3534 memcpy(cmd.g95, gen95_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
3537 switch (generic_state->frame_type) {
3539 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
3542 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
3545 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
3553 /* Never get here, just silence a gcc warning */
3559 me_method = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_b_me_method[preset] : gen9_avc_p_me_method[preset];
3560 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
3562 cmd.g9->dw0.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
3563 cmd.g9->dw37.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
3564 cmd.g9->dw0.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
3565 cmd.g9->dw37.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
3567 cmd.g9->dw2.max_len_sp = gen9_avc_max_len_sp[preset];
3568 cmd.g9->dw38.max_len_sp = 0;
3571 cmd.g95->dw1.extended_mv_cost_range = avc_state->extended_mv_cost_range_enable;
3573 cmd.g9->dw3.src_access = 0;
3574 cmd.g9->dw3.ref_access = 0;
3576 if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) {
3577 //disable ftq_override by now.
3578 if (avc_state->ftq_override) {
3579 cmd.g9->dw3.ftq_enable = avc_state->ftq_enable;
3582 // both gen9 and gen95 come here by now
3583 if (generic_state->frame_type == SLICE_TYPE_P) {
3584 cmd.g9->dw3.ftq_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
3587 cmd.g9->dw3.ftq_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
3591 cmd.g9->dw3.ftq_enable = 0;
3594 if (avc_state->disable_sub_mb_partion)
3595 cmd.g9->dw3.sub_mb_part_mask = 0x7;
3597 if (mbenc_i_frame_dist_in_use) {
3598 cmd.g9->dw2.pitch_width = generic_state->downscaled_width_4x_in_mb;
3599 cmd.g9->dw4.picture_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
3600 cmd.g9->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4;
3601 cmd.g9->dw6.batch_buffer_end = 0;
3602 cmd.g9->dw31.intra_compute_type = 1;
3605 cmd.g9->dw2.pitch_width = generic_state->frame_width_in_mbs;
3606 cmd.g9->dw4.picture_height_minus1 = generic_state->frame_height_in_mbs - 1;
3607 cmd.g9->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ? generic_state->frame_height_in_mbs : avc_state->slice_height;
3610 memcpy(&(cmd.g9->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
3611 if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
3612 } else if (avc_state->skip_bias_adjustment_enable) {
3613 /* Load different MvCost for P picture when SkipBiasAdjustment is enabled
3614 // No need to check for P picture as the flag is only enabled for P picture */
3615 cmd.g9->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
3620 table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
3621 memcpy(&(cmd.g9->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
3623 cmd.g9->dw4.enable_fbr_bypass = avc_state->fbr_bypass_enable;
3624 cmd.g9->dw4.enable_intra_cost_scaling_for_static_frame = avc_state->sfd_enable && generic_state->hme_enabled;
3625 cmd.g9->dw4.field_parity_flag = 0;//bottom field
3626 cmd.g9->dw4.enable_cur_fld_idr = 0;//field realted
3627 cmd.g9->dw4.contrained_intra_pred_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
3628 cmd.g9->dw4.hme_enable = generic_state->hme_enabled;
3629 cmd.g9->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
3630 cmd.g9->dw4.use_actual_ref_qp_value = generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
3633 cmd.g9->dw7.intra_part_mask = avc_state->transform_8x8_mode_enable ? 0 : 0x02;
3634 cmd.g9->dw7.src_field_polarity = 0;//field related
3636 /*ftq_skip_threshold_lut set,dw14 /15*/
3638 /*r5 disable NonFTQSkipThresholdLUT*/
3639 if (generic_state->frame_type == SLICE_TYPE_P) {
3640 cmd.g9->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
3642 } else if (generic_state->frame_type == SLICE_TYPE_B) {
3643 cmd.g9->dw32.skip_val = gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
3647 cmd.g9->dw13.qp_prime_y = qp;
3648 cmd.g9->dw13.qp_prime_cb = qp;
3649 cmd.g9->dw13.qp_prime_cr = qp;
3650 cmd.g9->dw13.target_size_in_word = 0xff;//hardcode for brc disable
3652 if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) {
3653 switch (gen9_avc_multi_pred[preset]) {
3655 cmd.g9->dw32.mult_pred_l0_disable = 128;
3656 cmd.g9->dw32.mult_pred_l1_disable = 128;
3659 cmd.g9->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_P) ? 1 : 128;
3660 cmd.g9->dw32.mult_pred_l1_disable = 128;
3663 cmd.g9->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3664 cmd.g9->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3667 cmd.g9->dw32.mult_pred_l0_disable = 1;
3668 cmd.g9->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3674 cmd.g9->dw32.mult_pred_l0_disable = 128;
3675 cmd.g9->dw32.mult_pred_l1_disable = 128;
3678 /*field setting for dw33 34, ignored*/
3680 if (avc_state->adaptive_transform_decision_enable) {
3681 if (generic_state->frame_type != SLICE_TYPE_I) {
3683 cmd.g9->dw34.enable_adaptive_tx_decision = 1;
3684 cmd.g9->dw58.mb_texture_threshold = 1024;
3685 cmd.g9->dw58.tx_decision_threshold = 128;
3686 } else if (is_g95) {
3687 cmd.g95->dw34.enable_adaptive_tx_decision = 1;
3688 cmd.g9->dw58.mb_texture_threshold = 1024;
3689 cmd.g9->dw58.tx_decision_threshold = 128;
3695 if (generic_state->frame_type == SLICE_TYPE_B) {
3696 cmd.g9->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
3697 cmd.g9->dw34.list1_ref_id0_frm_field_parity = 0;
3698 cmd.g9->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
3701 cmd.g9->dw34.b_original_bff = 0; //frame only
3702 cmd.g9->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
3703 cmd.g9->dw34.roi_enable_flag = curbe_param->roi_enabled;
3704 cmd.g9->dw34.mad_enable_falg = avc_state->mad_enable;
3705 cmd.g9->dw34.mb_brc_enable = avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled;
3706 cmd.g9->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
3708 cmd.g95->dw34.tq_enable = avc_state->tq_enable;
3709 cmd.g95->dw34.cqp_flag = !generic_state->brc_enabled;
3713 cmd.g9->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
3715 if (cmd.g9->dw34.force_non_skip_check) {
3716 cmd.g9->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
3721 cmd.g9->dw36.check_all_fractional_enable = avc_state->caf_enable;
3722 cmd.g9->dw38.ref_threshold = 400;
3723 cmd.g9->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
3725 /* Default:2 used for MBBRC (MB QP Surface width and height are 4x downscaled picture in MB unit * 4 bytes)
3726 0 used for MBQP data surface (MB QP Surface width and height are same as the input picture size in MB unit * 1bytes)
3727 starting GEN9, BRC use split kernel, MB QP surface is same size as input picture */
3728 cmd.g9->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled) ? 0 : 2;
3730 if (mbenc_i_frame_dist_in_use) {
3731 cmd.g9->dw13.qp_prime_y = 0;
3732 cmd.g9->dw13.qp_prime_cb = 0;
3733 cmd.g9->dw13.qp_prime_cr = 0;
3734 cmd.g9->dw33.intra_16x16_nondc_penalty = 0;
3735 cmd.g9->dw33.intra_8x8_nondc_penalty = 0;
3736 cmd.g9->dw33.intra_4x4_nondc_penalty = 0;
3739 if (cmd.g9->dw4.use_actual_ref_qp_value) {
3740 cmd.g9->dw44.actual_qp_value_for_ref_id0_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 0);
3741 cmd.g9->dw44.actual_qp_value_for_ref_id1_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 1);
3742 cmd.g9->dw44.actual_qp_value_for_ref_id2_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 2);
3743 cmd.g9->dw44.actual_qp_value_for_ref_id3_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 3);
3744 cmd.g9->dw45.actual_qp_value_for_ref_id4_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 4);
3745 cmd.g9->dw45.actual_qp_value_for_ref_id5_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 5);
3746 cmd.g9->dw45.actual_qp_value_for_ref_id6_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 6);
3747 cmd.g9->dw45.actual_qp_value_for_ref_id7_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 7);
3748 cmd.g9->dw46.actual_qp_value_for_ref_id0_list1 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 0);
3749 cmd.g9->dw46.actual_qp_value_for_ref_id1_list1 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 1);
3752 table_idx = slice_type_kernel[generic_state->frame_type];
3753 cmd.g9->dw46.ref_cost = gen9_avc_ref_cost[table_idx][qp];
3755 if (generic_state->frame_type == SLICE_TYPE_I) {
3756 cmd.g9->dw0.skip_mode_enable = 0;
3757 cmd.g9->dw37.skip_mode_enable = 0;
3758 cmd.g9->dw36.hme_combine_overlap = 0;
3759 cmd.g9->dw47.intra_cost_sf = 16;
3760 cmd.g9->dw34.enable_direct_bias_adjustment = 0;
3762 cmd.g9->dw34.enable_global_motion_bias_adjustment = 0;
3764 } else if (generic_state->frame_type == SLICE_TYPE_P) {
3765 cmd.g9->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
3766 cmd.g9->dw3.bme_disable_fbr = 1;
3767 cmd.g9->dw5.ref_width = gen9_avc_search_x[preset];
3768 cmd.g9->dw5.ref_height = gen9_avc_search_y[preset];
3769 cmd.g9->dw7.non_skip_zmv_added = 1;
3770 cmd.g9->dw7.non_skip_mode_added = 1;
3771 cmd.g9->dw7.skip_center_mask = 1;
3772 cmd.g9->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
3773 cmd.g9->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame onlys
3774 cmd.g9->dw36.hme_combine_overlap = 1;
3775 cmd.g9->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
3776 cmd.g9->dw39.ref_width = gen9_avc_search_x[preset];
3777 cmd.g9->dw39.ref_height = gen9_avc_search_y[preset];
3778 cmd.g9->dw34.enable_direct_bias_adjustment = 0;
3779 cmd.g9->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
3780 if (is_g9 && avc_state->global_motion_bias_adjustment_enable)
3781 cmd.g9->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
3784 cmd.g9->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
3785 cmd.g9->dw1.bi_weight = avc_state->bi_weight;
3786 cmd.g9->dw3.search_ctrl = 7;
3787 cmd.g9->dw3.skip_type = 1;
3788 cmd.g9->dw5.ref_width = gen9_avc_b_search_x[preset];
3789 cmd.g9->dw5.ref_height = gen9_avc_b_search_y[preset];
3790 cmd.g9->dw7.skip_center_mask = 0xff;
3791 cmd.g9->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
3792 cmd.g9->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
3793 cmd.g9->dw36.hme_combine_overlap = 1;
3794 surface_id = slice_param->RefPicList1[0].picture_id;
3795 obj_surface = SURFACE(surface_id);
3797 WARN_ONCE("Invalid backward reference frame\n");
3800 cmd.g9->dw36.is_fwd_frame_short_term_ref = !!(slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
3802 cmd.g9->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
3803 cmd.g9->dw36.num_ref_idx_l1_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l1_active_minus1 : 0;
3804 cmd.g9->dw39.ref_width = gen9_avc_b_search_x[preset];
3805 cmd.g9->dw39.ref_height = gen9_avc_b_search_y[preset];
3806 cmd.g9->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
3807 cmd.g9->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
3808 cmd.g9->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
3809 cmd.g9->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
3810 cmd.g9->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
3811 cmd.g9->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
3812 cmd.g9->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
3813 cmd.g9->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
3815 cmd.g9->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
3816 if (cmd.g9->dw34.enable_direct_bias_adjustment) {
3817 cmd.g9->dw7.non_skip_zmv_added = 1;
3818 cmd.g9->dw7.non_skip_mode_added = 1;
3821 cmd.g9->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
3822 if (is_g9 && avc_state->global_motion_bias_adjustment_enable)
3823 cmd.g9->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
3827 avc_state->block_based_skip_enable = cmd.g9->dw3.block_based_skip_enable;
3829 if (avc_state->rolling_intra_refresh_enable) {
3830 /*by now disable it*/
3831 cmd.g9->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
3832 cmd.g9->dw32.mult_pred_l0_disable = 128;
3833 /* Pass the same IntraRefreshUnit to the kernel w/o the adjustment by -1, so as to have an overlap of one MB row or column of Intra macroblocks
3834 across one P frame to another P frame, as needed by the RollingI algo */
3836 cmd.g9->dw48.widi_intra_refresh_mb_num = 0;
3837 cmd.g9->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
3838 cmd.g9->dw48.widi_intra_refresh_qp_delta = 0;
3842 if (avc_state->rolling_intra_refresh_enable == INTEL_ROLLING_I_SQUARE && generic_state->brc_enabled) {
3843 cmd.g95->dw4.enable_intra_refresh = 0;
3844 cmd.g95->dw34.widi_intra_refresh_en = INTEL_ROLLING_I_DISABLED;
3845 cmd.g95->dw48.widi_intra_refresh_mb_x = 0;
3846 cmd.g95->dw61.widi_intra_refresh_mb_y = 0;
3848 cmd.g95->dw4.enable_intra_refresh = 1;
3849 cmd.g95->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
3850 cmd.g95->dw48.widi_intra_refresh_mb_x = 0;
3851 cmd.g95->dw61.widi_intra_refresh_mb_y = 0;
3852 cmd.g9->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
3853 cmd.g9->dw48.widi_intra_refresh_qp_delta = 0;
3858 cmd.g9->dw34.widi_intra_refresh_en = 0;
3861 cmd.g9->dw34.enable_per_mb_static_check = avc_state->sfd_enable && generic_state->hme_enabled;
3863 cmd.g9->dw34.enable_adaptive_search_window_size = avc_state->adaptive_search_window_enable;
3865 cmd.g95->dw34.enable_adaptive_search_window_size = avc_state->adaptive_search_window_enable;
3867 /*roi set disable by now. 49-56*/
3868 if (curbe_param->roi_enabled) {
3869 cmd.g9->dw49.roi_1_x_left = generic_state->roi[0].left;
3870 cmd.g9->dw49.roi_1_y_top = generic_state->roi[0].top;
3871 cmd.g9->dw50.roi_1_x_right = generic_state->roi[0].right;
3872 cmd.g9->dw50.roi_1_y_bottom = generic_state->roi[0].bottom;
3874 cmd.g9->dw51.roi_2_x_left = generic_state->roi[1].left;
3875 cmd.g9->dw51.roi_2_y_top = generic_state->roi[1].top;
3876 cmd.g9->dw52.roi_2_x_right = generic_state->roi[1].right;
3877 cmd.g9->dw52.roi_2_y_bottom = generic_state->roi[1].bottom;
3879 cmd.g9->dw53.roi_3_x_left = generic_state->roi[2].left;
3880 cmd.g9->dw53.roi_3_y_top = generic_state->roi[2].top;
3881 cmd.g9->dw54.roi_3_x_right = generic_state->roi[2].right;
3882 cmd.g9->dw54.roi_3_y_bottom = generic_state->roi[2].bottom;
3884 cmd.g9->dw55.roi_4_x_left = generic_state->roi[3].left;
3885 cmd.g9->dw55.roi_4_y_top = generic_state->roi[3].top;
3886 cmd.g9->dw56.roi_4_x_right = generic_state->roi[3].right;
3887 cmd.g9->dw56.roi_4_y_bottom = generic_state->roi[3].bottom;
3889 if (!generic_state->brc_enabled) {
3891 tmp = generic_state->roi[0].value;
3892 CLIP(tmp, -qp, AVC_QP_MAX - qp);
3893 cmd.g9->dw57.roi_1_dqp_prime_y = tmp;
3894 tmp = generic_state->roi[1].value;
3895 CLIP(tmp, -qp, AVC_QP_MAX - qp);
3896 cmd.g9->dw57.roi_2_dqp_prime_y = tmp;
3897 tmp = generic_state->roi[2].value;
3898 CLIP(tmp, -qp, AVC_QP_MAX - qp);
3899 cmd.g9->dw57.roi_3_dqp_prime_y = tmp;
3900 tmp = generic_state->roi[3].value;
3901 CLIP(tmp, -qp, AVC_QP_MAX - qp);
3902 cmd.g9->dw57.roi_4_dqp_prime_y = tmp;
3904 cmd.g9->dw34.roi_enable_flag = 0;
3909 if (avc_state->tq_enable) {
3910 if (generic_state->frame_type == SLICE_TYPE_I) {
3911 cmd.g95->dw58.value = gen95_avc_tq_lambda_i_frame[qp][0];
3912 cmd.g95->dw59.value = gen95_avc_tq_lambda_i_frame[qp][1];
3914 } else if (generic_state->frame_type == SLICE_TYPE_P) {
3915 cmd.g95->dw58.value = gen95_avc_tq_lambda_p_frame[qp][0];
3916 cmd.g95->dw59.value = gen95_avc_tq_lambda_p_frame[qp][1];
3919 cmd.g95->dw58.value = gen95_avc_tq_lambda_b_frame[qp][0];
3920 cmd.g95->dw59.value = gen95_avc_tq_lambda_b_frame[qp][1];
3923 if (cmd.g95->dw58.lambda_8x8_inter > GEN95_AVC_MAX_LAMBDA)
3924 cmd.g95->dw58.lambda_8x8_inter = 0xf000 + avc_state->rounding_value;
3926 if (cmd.g95->dw58.lambda_8x8_intra > GEN95_AVC_MAX_LAMBDA)
3927 cmd.g95->dw58.lambda_8x8_intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
3929 if (cmd.g95->dw59.lambda_inter > GEN95_AVC_MAX_LAMBDA)
3930 cmd.g95->dw59.lambda_inter = 0xf000 + avc_state->rounding_value;
3932 if (cmd.g95->dw59.lambda_intra > GEN95_AVC_MAX_LAMBDA)
3933 cmd.g95->dw59.lambda_intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
3938 cmd.g95->dw66.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
3939 cmd.g95->dw67.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
3940 cmd.g95->dw68.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
3941 cmd.g95->dw69.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
3942 cmd.g95->dw70.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
3943 cmd.g95->dw71.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
3944 cmd.g95->dw72.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
3945 cmd.g95->dw73.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
3946 cmd.g95->dw74.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
3947 cmd.g95->dw75.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
3948 cmd.g95->dw76.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
3949 cmd.g95->dw77.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
3950 cmd.g95->dw78.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
3951 cmd.g95->dw79.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
3952 cmd.g95->dw80.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
3953 cmd.g95->dw81.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
3954 cmd.g95->dw82.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
3955 cmd.g95->dw83.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
3956 cmd.g95->dw84.brc_curbe_surf_index = GEN95_AVC_MBENC_BRC_CURBE_DATA_INDEX;
3957 cmd.g95->dw85.force_non_skip_mb_map_surface = GEN95_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
3958 cmd.g95->dw86.widi_wa_surf_index = GEN95_AVC_MBENC_WIDI_WA_INDEX;
3959 cmd.g95->dw87.static_detection_cost_table_index = GEN95_AVC_MBENC_SFD_COST_TABLE_INDEX;
3963 cmd.g9->dw64.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
3964 cmd.g9->dw65.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
3965 cmd.g9->dw66.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
3966 cmd.g9->dw67.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
3967 cmd.g9->dw68.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
3968 cmd.g9->dw69.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
3969 cmd.g9->dw70.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
3970 cmd.g9->dw71.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
3971 cmd.g9->dw72.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
3972 cmd.g9->dw73.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
3973 cmd.g9->dw74.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
3974 cmd.g9->dw75.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
3975 cmd.g9->dw76.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
3976 cmd.g9->dw77.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
3977 cmd.g9->dw78.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
3978 cmd.g9->dw79.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
3979 cmd.g9->dw80.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
3980 cmd.g9->dw81.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
3981 cmd.g9->dw82.force_non_skip_mb_map_surface = GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
3982 cmd.g9->dw83.widi_wa_surf_index = GEN9_AVC_MBENC_WIDI_WA_INDEX;
3983 cmd.g9->dw84.brc_curbe_surf_index = GEN9_AVC_MBENC_BRC_CURBE_DATA_INDEX;
3984 cmd.g9->dw85.static_detection_cost_table_index = GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX;
3987 i965_gpe_context_unmap_curbe(gpe_context);
3993 gen9_avc_fei_set_curbe_mbenc(VADriverContextP ctx,
3994 struct encode_state *encode_state,
3995 struct i965_gpe_context *gpe_context,
3996 struct intel_encoder_context *encoder_context,
3999 struct i965_driver_data *i965 = i965_driver_data(ctx);
4000 gen9_avc_fei_mbenc_curbe_data *cmd;
4001 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4002 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4003 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4004 VASurfaceID surface_id;
4005 struct object_surface *obj_surface;
4006 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4007 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
4008 VAEncMiscParameterFEIFrameControlH264 *fei_param = avc_state->fei_framectl_param;
4010 struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
4011 unsigned char qp = 0;
4012 unsigned char me_method = 0;
4013 unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
4014 unsigned int table_idx = 0;
4015 int ref_width, ref_height, len_sp;
4016 int is_bframe = (generic_state->frame_type == SLICE_TYPE_B);
4017 int is_pframe = (generic_state->frame_type == SLICE_TYPE_P);
4018 unsigned int preset = generic_state->preset;
4020 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
4022 assert(gpe_context != NULL);
4023 cmd = (gen9_avc_fei_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
4024 memset(cmd, 0, sizeof(gen9_avc_fei_mbenc_curbe_data));
4026 if (mbenc_i_frame_dist_in_use) {
4027 memcpy(cmd, gen9_avc_fei_mbenc_curbe_i_frame_dist_init_data, sizeof(gen9_avc_fei_mbenc_curbe_data));
4030 switch (generic_state->frame_type) {
4032 memcpy(cmd, gen9_avc_fei_mbenc_curbe_i_frame_init_data, sizeof(gen9_avc_fei_mbenc_curbe_data));
4035 memcpy(cmd, gen9_avc_fei_mbenc_curbe_p_frame_init_data, sizeof(gen9_avc_fei_mbenc_curbe_data));
4038 memcpy(cmd, gen9_avc_fei_mbenc_curbe_b_frame_init_data, sizeof(gen9_avc_fei_mbenc_curbe_data));
4045 /* 4 means full search, 6 means diamand search */
4046 me_method = (fei_param->search_window == 5) ||
4047 (fei_param->search_window == 8) ? 4 : 6;
4049 ref_width = fei_param->ref_width;
4050 ref_height = fei_param->ref_height;
4051 len_sp = fei_param->len_sp;
4052 /* If there is a serch_window, discard user provided ref_width, ref_height
4053 * and search_path length */
4054 switch (fei_param->search_window) {
4056 /* not use predefined search window, there should be a search_path input */
4057 if ((fei_param->search_path != 0) &&
4058 (fei_param->search_path != 1) &&
4059 (fei_param->search_path != 2)) {
4060 WARN_ONCE("Invalid input search_path for SearchWindow=0 \n");
4063 /* 4 means full search, 6 means diamand search */
4064 me_method = (fei_param->search_path == 1) ? 6 : 4;
4065 if (((ref_width * ref_height) > 2048) || (ref_width > 64) || (ref_height > 64)) {
4066 WARN_ONCE("Invalid input ref_width/ref_height in"
4067 "SearchWindow=0 case! \n");
4073 /* Tiny - 4 SUs 24x24 window */
4080 /* Small - 9 SUs 28x28 window */
4086 /* Diamond - 16 SUs 48x40 window */
4092 /* Large Diamond - 32 SUs 48x40 window */
4098 /* Exhaustive - 48 SUs 48x40 window */
4104 /* Diamond - 16 SUs 64x32 window */
4110 /* Large Diamond - 32 SUs 64x32 window */
4116 /* Exhaustive - 48 SUs 64x32 window */
4126 /* ref_width*ref_height = Max 64x32 one direction, Max 32x32 two directions */
4128 CLIP(ref_width, 4, 32);
4129 CLIP(ref_height, 4, 32);
4130 } else if (is_pframe) {
4131 CLIP(ref_width, 4, 64);
4132 CLIP(ref_height, 4, 32);
4135 cmd->dw0.adaptive_enable =
4136 cmd->dw37.adaptive_enable = fei_param->adaptive_search;
4137 cmd->dw0.t8x8_flag_for_inter_enable = cmd->dw37.t8x8_flag_for_inter_enable
4138 = avc_state->transform_8x8_mode_enable;
4139 cmd->dw2.max_len_sp = len_sp;
4140 cmd->dw38.max_len_sp = 0; // HLD mandates this field to be Zero
4141 cmd->dw2.max_num_su = cmd->dw38.max_num_su = 57;
4142 cmd->dw3.src_access =
4143 cmd->dw3.ref_access = 0; // change it to (is_frame ? 0: 1) when interlace is suppoted
4145 if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) {
4146 if (avc_state->ftq_override) {
4147 cmd->dw3.ft_enable = avc_state->ftq_enable;
4149 if (generic_state->frame_type == SLICE_TYPE_P) {
4150 cmd->dw3.ft_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
4152 cmd->dw3.ft_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
4156 cmd->dw3.ft_enable = 0;
4159 if (avc_state->disable_sub_mb_partion)
4160 cmd->dw3.sub_mb_part_mask = 0x7;
4162 if (mbenc_i_frame_dist_in_use) {
4163 /* Fixme: Not supported, no brc in fei */
4165 cmd->dw2.pic_width = generic_state->downscaled_width_4x_in_mb;
4166 cmd->dw4.pic_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
4167 cmd->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4;
4168 cmd->dw6.batch_buffer_end = 0;
4169 cmd->dw31.intra_compute_type = 1;
4172 cmd->dw2.pic_width = generic_state->frame_width_in_mbs;
4173 cmd->dw4.pic_height_minus1 = generic_state->frame_height_in_mbs - 1;
4174 cmd->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ?
4175 generic_state->frame_height_in_mbs : avc_state->slice_height;
4176 cmd->dw3.sub_mb_part_mask = fei_param->sub_mb_part_mask;
4177 cmd->dw3.sub_pel_mode = fei_param->sub_pel_mode;
4178 cmd->dw3.inter_sad = fei_param->inter_sad;
4179 cmd->dw3.Intra_sad = fei_param->intra_sad;
4180 cmd->dw3.search_ctrl = (is_bframe) ? 7 : 0;
4181 cmd->dw4.enable_fbr_bypass = avc_state->fbr_bypass_enable;
4182 cmd->dw4.enable_intra_cost_scaling_for_static_frame =
4183 avc_state->sfd_enable && generic_state->hme_enabled;
4184 cmd->dw4.true_distortion_enable = fei_param->distortion_type == 0 ? 1 : 0;
4185 cmd->dw4.constrained_intra_pred_flag =
4186 pic_param->pic_fields.bits.constrained_intra_pred_flag;
4187 cmd->dw4.hme_enable = 0;
4188 cmd->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
4189 cmd->dw4.use_actual_ref_qp_value =
4190 generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
4191 cmd->dw7.intra_part_mask = fei_param->intra_part_mask;
4192 cmd->dw7.src_field_polarity = 0;
4195 memcpy(&(cmd->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
4196 if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
4197 // cmd->dw8 = gen9_avc_old_intra_mode_cost[qp];
4198 } else if (avc_state->skip_bias_adjustment_enable) {
4199 // Load different MvCost for P picture when SkipBiasAdjustment is enabled
4200 // No need to check for P picture as the flag is only enabled for P picture
4201 cmd->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
4205 /* search path tables */
4206 table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
4207 memcpy(&(cmd->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
4209 //ftq_skip_threshold_lut set,dw14 /15
4211 //r5 disable NonFTQSkipThresholdLUT
4212 if (generic_state->frame_type == SLICE_TYPE_P) {
4213 cmd->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
4214 } else if (generic_state->frame_type == SLICE_TYPE_B) {
4215 cmd->dw32.skip_val =
4216 gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
4218 cmd->dw13.qp_prime_y = qp;
4219 cmd->dw13.qp_prime_cb = qp;
4220 cmd->dw13.qp_prime_cr = qp;
4221 cmd->dw13.target_size_in_word = 0xff; /* hardcoded for brc disable */
4223 if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) {
4224 cmd->dw32.mult_pred_l0_disable = fei_param->multi_pred_l0 ? 0x01 : 0x80;
4225 cmd->dw32.mult_pred_l1_disable = ((generic_state->frame_type == SLICE_TYPE_B) && fei_param->multi_pred_l1) ? 0x01 : 0x80;
4228 cmd->dw32.mult_pred_l0_disable = 0x80;
4229 cmd->dw32.mult_pred_l1_disable = 0x80;
4231 /* no field pic setting, not supported */
4234 if (avc_state->adaptive_transform_decision_enable) {
4235 if (generic_state->frame_type != SLICE_TYPE_I) {
4236 cmd->dw34.enable_adaptive_tx_decision = 1;
4239 cmd->dw58.mb_texture_threshold = 1024;
4240 cmd->dw58.tx_decision_threshold = 128;
4242 if (generic_state->frame_type == SLICE_TYPE_B) {
4243 cmd->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
4244 cmd->dw34.list1_ref_id1_frm_field_parity = 0;
4245 cmd->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
4247 cmd->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
4248 cmd->dw34.roi_enable_flag = generic_state->brc_roi_enable;
4249 cmd->dw34.mad_enable_falg = avc_state->mad_enable;
4250 cmd->dw34.mb_brc_enable = avc_state->mb_qp_data_enable && generic_state->mb_brc_enabled;
4251 cmd->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
4252 cmd->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
4254 if (cmd->dw34.force_non_skip_check) {
4255 cmd->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
4257 cmd->dw36.check_all_fractional_enable = avc_state->caf_enable;
4258 cmd->dw38.ref_threshold = 400;
4259 cmd->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
4260 // Default:2 used for MBBRC (MB QP Surface width and height are 4x downscaled picture in MB unit * 4 bytes)
4261 // 0 used for MBQP data surface (MB QP Surface width and height are same as the input picture size in MB unit * 1bytes)
4262 // starting GEN9, BRC use split kernel, MB QP surface is same size as input picture
4263 cmd->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled) ? 0 : 2;
4264 if (mbenc_i_frame_dist_in_use) {
4265 cmd->dw13.qp_prime_y = 0;
4266 cmd->dw13.qp_prime_cb = 0;
4267 cmd->dw13.qp_prime_cr = 0;
4268 cmd->dw33.intra_16x16_nondc_penalty = 0;
4269 cmd->dw33.intra_8x8_nondc_penalty = 0;
4270 cmd->dw33.intra_4x4_nondc_penalty = 0;
4272 if (cmd->dw4.use_actual_ref_qp_value) {
4273 cmd->dw44.actual_qp_value_for_ref_id0_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 0);
4274 cmd->dw44.actual_qp_value_for_ref_id1_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 1);
4275 cmd->dw44.actual_qp_value_for_ref_id2_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 2);
4276 cmd->dw44.actual_qp_value_for_ref_id3_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 3);
4277 cmd->dw45.actual_qp_value_for_ref_id4_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 4);
4278 cmd->dw45.actual_qp_value_for_ref_id5_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 5);
4279 cmd->dw45.actual_qp_value_for_ref_id6_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 6);
4280 cmd->dw45.actual_qp_value_for_ref_id7_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 7);
4281 cmd->dw46.actual_qp_value_for_ref_id0_list1 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 0);
4282 cmd->dw46.actual_qp_value_for_ref_id1_list1 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 1);
4285 table_idx = slice_type_kernel[generic_state->frame_type];
4286 cmd->dw46.ref_cost = gen9_avc_ref_cost[table_idx][qp];
4288 if (generic_state->frame_type == SLICE_TYPE_I) {
4289 cmd->dw0.skip_mode_enable = 0;
4290 cmd->dw37.skip_mode_enable = 0;
4291 cmd->dw36.hme_combine_overlap = 0;
4292 cmd->dw36.check_all_fractional_enable = 0;
4293 cmd->dw47.intra_cost_sf = 16;/* not used, but recommended to set 16 by kernel team */
4294 cmd->dw34.enable_direct_bias_adjustment = 0;
4295 cmd->dw34.enable_global_motion_bias_adjustment = 0;
4297 } else if (generic_state->frame_type == SLICE_TYPE_P) {
4298 cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
4299 cmd->dw3.bme_disable_fbr = 1;
4300 cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
4301 cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
4302 cmd->dw7.non_skip_zmv_added = 1;
4303 cmd->dw7.non_skip_mode_added = 1;
4304 cmd->dw7.skip_center_mask = 1;
4306 cmd->dw47.intra_cost_sf =
4307 (avc_state->adaptive_intra_scaling_enable) ?
4308 gen9_avc_adaptive_intra_scaling_factor[preset] :
4309 gen9_avc_intra_scaling_factor[preset];
4311 cmd->dw47.max_vmv_r =
4312 i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
4314 cmd->dw36.hme_combine_overlap = 1;
4315 cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
4316 cmd->dw36.check_all_fractional_enable = fei_param->repartition_check_enable;
4317 cmd->dw34.enable_direct_bias_adjustment = 0;
4318 cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
4319 if (avc_state->global_motion_bias_adjustment_enable)
4320 cmd->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
4322 cmd->dw64.num_mv_predictors_l0 = fei_param->num_mv_predictors_l0;
4324 } else { /* B slice */
4326 cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
4327 cmd->dw1.bi_Weight = avc_state->bi_weight;
4328 cmd->dw3.search_ctrl = 7;
4329 cmd->dw3.skip_type = 1;
4330 cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
4331 cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
4332 cmd->dw7.skip_center_mask = 0xff;
4334 cmd->dw47.intra_cost_sf = avc_state->adaptive_intra_scaling_enable ?
4335 gen9_avc_adaptive_intra_scaling_factor[qp] :
4336 gen9_avc_intra_scaling_factor[qp];
4338 cmd->dw47.max_vmv_r =
4339 i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
4341 cmd->dw36.hme_combine_overlap = 1;
4343 //check is_fwd_frame_short_term_ref
4344 surface_id = slice_param->RefPicList1[0].picture_id;
4345 obj_surface = SURFACE(surface_id);
4347 WARN_ONCE("Invalid backward reference frame\n");
4349 i965_gpe_context_unmap_curbe(gpe_context);
4352 cmd->dw36.is_fwd_frame_short_term_ref = !!(slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
4354 cmd->dw36.num_ref_idx_l0_minus_one =
4355 (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1
4357 cmd->dw36.num_ref_idx_l1_minus_one =
4358 (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l1_active_minus1
4360 cmd->dw36.check_all_fractional_enable = fei_param->repartition_check_enable;
4362 cmd->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
4363 cmd->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
4364 cmd->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
4365 cmd->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
4366 cmd->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
4367 cmd->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
4368 cmd->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
4369 cmd->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
4371 cmd->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
4372 if (cmd->dw34.enable_direct_bias_adjustment) {
4373 cmd->dw7.non_skip_mode_added = 1;
4374 cmd->dw7.non_skip_zmv_added = 1;
4377 cmd->dw34.enable_global_motion_bias_adjustment =
4378 avc_state->global_motion_bias_adjustment_enable;
4379 if (avc_state->global_motion_bias_adjustment_enable)
4380 cmd->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
4382 cmd->dw64.num_mv_predictors_l0 = fei_param->num_mv_predictors_l0;
4383 cmd->dw64.num_mv_predictors_l1 = fei_param->num_mv_predictors_l1;
4386 avc_state->block_based_skip_enable = cmd->dw3.block_based_skip_enable;
4388 if (avc_state->rolling_intra_refresh_enable) {
4390 cmd->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
4393 cmd->dw34.widi_intra_refresh_en = 0;
4395 cmd->dw34.enable_per_mb_static_check = avc_state->sfd_enable && generic_state->hme_enabled;
4396 cmd->dw34.enable_adaptive_search_window_size = avc_state->adaptive_search_window_enable;
4398 /* Fixme: Skipped ROI stuffs for now */
4400 /* r64: FEI specific fields */
4401 cmd->dw64.fei_enable = 1;
4402 cmd->dw64.multiple_mv_predictor_per_mb_enable = fei_param->mv_predictor_enable;
4403 if (fei_param->distortion != VA_INVALID_ID)
4404 cmd->dw64.vme_distortion_output_enable = 1;
4405 cmd->dw64.per_mb_qp_enable = fei_param->mb_qp;
4406 cmd->dw64.mb_input_enable = fei_param->mb_input;
4408 // FEI mode is disabled when external MVP is available
4409 if (fei_param->mv_predictor_enable)
4410 cmd->dw64.fei_mode = 0;
4412 cmd->dw64.fei_mode = 1;
4414 cmd->dw80.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
4415 cmd->dw81.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
4416 cmd->dw82.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
4417 cmd->dw83.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
4418 cmd->dw84.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
4419 cmd->dw85.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
4420 cmd->dw86.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
4421 cmd->dw87.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
4422 cmd->dw88.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
4423 cmd->dw89.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
4424 cmd->dw90.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
4425 cmd->dw91.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
4426 cmd->dw92.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
4427 cmd->dw93.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
4428 cmd->dw94.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
4429 cmd->dw95.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
4430 cmd->dw96.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
4431 cmd->dw97.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
4432 cmd->dw98.force_non_skip_mb_map_surface = GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
4433 cmd->dw99.widi_wa_surf_index = GEN9_AVC_MBENC_WIDI_WA_INDEX;
4434 cmd->dw100.brc_curbe_surf_index = GEN9_AVC_MBENC_BRC_CURBE_DATA_INDEX;
4435 cmd->dw101.static_detection_cost_table_index = GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX;
4436 cmd->dw102.fei_mv_predictor_surf_index = GEN9_AVC_MBENC_MV_PREDICTOR_INDEX;
4437 i965_gpe_context_unmap_curbe(gpe_context);
4443 gen9_avc_send_surface_mbenc(VADriverContextP ctx,
4444 struct encode_state *encode_state,
4445 struct i965_gpe_context *gpe_context,
4446 struct intel_encoder_context *encoder_context,
4449 struct i965_driver_data *i965 = i965_driver_data(ctx);
4450 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4451 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4452 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4453 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4454 struct object_surface *obj_surface;
4455 struct gen9_surface_avc *avc_priv_surface;
4456 struct i965_gpe_resource *gpe_resource;
4457 struct mbenc_param * param = (struct mbenc_param *)param_mbenc ;
4458 VASurfaceID surface_id;
4459 unsigned int mbenc_i_frame_dist_in_use = param->mbenc_i_frame_dist_in_use;
4460 unsigned int size = 0;
4461 unsigned int frame_mb_size = generic_state->frame_width_in_mbs *
4462 generic_state->frame_height_in_mbs;
4464 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4465 unsigned char is_g95 = 0;
4467 if (IS_SKL(i965->intel.device_info) ||
4468 IS_BXT(i965->intel.device_info))
4470 else if (IS_KBL(i965->intel.device_info) ||
4471 IS_GEN10(i965->intel.device_info) ||
4472 IS_GLK(i965->intel.device_info))
4475 obj_surface = encode_state->reconstructed_object;
4477 if (!obj_surface || !obj_surface->private_data)
4479 avc_priv_surface = obj_surface->private_data;
4481 /*pak obj command buffer output*/
4482 size = frame_mb_size * 16 * 4;
4483 gpe_resource = &avc_priv_surface->res_mb_code_surface;
4484 i965_add_buffer_gpe_surface(ctx,
4490 GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX);
4492 /*mv data buffer output*/
4493 size = frame_mb_size * 32 * 4;
4494 gpe_resource = &avc_priv_surface->res_mv_data_surface;
4495 i965_add_buffer_gpe_surface(ctx,
4501 GEN9_AVC_MBENC_IND_MV_DATA_INDEX);
4503 /*input current YUV surface, current input Y/UV object*/
4504 if (mbenc_i_frame_dist_in_use) {
4505 obj_surface = encode_state->reconstructed_object;
4506 if (!obj_surface || !obj_surface->private_data)
4508 avc_priv_surface = obj_surface->private_data;
4509 obj_surface = avc_priv_surface->scaled_4x_surface_obj;
4511 obj_surface = encode_state->input_yuv_object;
4513 i965_add_2d_gpe_surface(ctx,
4518 I965_SURFACEFORMAT_R8_UNORM,
4519 GEN9_AVC_MBENC_CURR_Y_INDEX);
4521 i965_add_2d_gpe_surface(ctx,
4526 I965_SURFACEFORMAT_R16_UINT,
4527 GEN9_AVC_MBENC_CURR_UV_INDEX);
4529 if (generic_state->hme_enabled) {
4531 if (!IS_GEN8(i965->intel.device_info)) {
4532 gpe_resource = &(avc_ctx->s4x_memv_data_buffer);
4533 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4536 I965_SURFACEFORMAT_R8_UNORM,
4537 GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX);
4538 /* memv distortion input*/
4539 gpe_resource = &(avc_ctx->s4x_memv_distortion_buffer);
4540 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4543 I965_SURFACEFORMAT_R8_UNORM,
4544 GEN9_AVC_MBENC_4XME_DISTORTION_INDEX);
4545 } else if (generic_state->frame_type != SLICE_TYPE_I) {
4546 gpe_resource = &(avc_ctx->s4x_memv_data_buffer);
4547 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4550 I965_SURFACEFORMAT_R8_UNORM,
4551 GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX);
4552 /* memv distortion input*/
4553 gpe_resource = &(avc_ctx->s4x_memv_distortion_buffer);
4554 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4557 I965_SURFACEFORMAT_R8_UNORM,
4558 GEN9_AVC_MBENC_4XME_DISTORTION_INDEX);
4562 /*mbbrc const data_buffer*/
4563 if (param->mb_const_data_buffer_in_use) {
4564 size = 16 * AVC_QP_MAX * sizeof(unsigned int);
4565 gpe_resource = &avc_ctx->res_mbbrc_const_data_buffer;
4566 i965_add_buffer_gpe_surface(ctx,
4572 GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX);
4576 /*mb qp data_buffer*/
4577 if (param->mb_qp_buffer_in_use) {
4578 if (avc_state->mb_qp_data_enable)
4579 gpe_resource = &(avc_ctx->res_mb_qp_data_surface);
4581 gpe_resource = &(avc_ctx->res_mbbrc_mb_qp_data_surface);
4582 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4585 I965_SURFACEFORMAT_R8_UNORM,
4586 GEN9_AVC_MBENC_MBQP_INDEX);
4589 /*input current YUV surface, current input Y/UV object*/
4590 if (mbenc_i_frame_dist_in_use) {
4591 obj_surface = encode_state->reconstructed_object;
4592 if (!obj_surface || !obj_surface->private_data)
4594 avc_priv_surface = obj_surface->private_data;
4595 obj_surface = avc_priv_surface->scaled_4x_surface_obj;
4597 obj_surface = encode_state->input_yuv_object;
4599 i965_add_adv_gpe_surface(ctx, gpe_context,
4601 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX);
4602 /*input ref YUV surface*/
4603 for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
4604 surface_id = slice_param->RefPicList0[i].picture_id;
4605 obj_surface = SURFACE(surface_id);
4606 if (!obj_surface || !obj_surface->private_data)
4609 i965_add_adv_gpe_surface(ctx, gpe_context,
4611 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 1);
4613 /*input current YUV surface, current input Y/UV object*/
4614 if (mbenc_i_frame_dist_in_use) {
4615 obj_surface = encode_state->reconstructed_object;
4616 if (!obj_surface || !obj_surface->private_data)
4618 avc_priv_surface = obj_surface->private_data;
4619 obj_surface = avc_priv_surface->scaled_4x_surface_obj;
4621 obj_surface = encode_state->input_yuv_object;
4623 i965_add_adv_gpe_surface(ctx, gpe_context,
4625 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX);
4627 for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
4628 if (i > 0) break; // only one ref supported here for B frame
4629 surface_id = slice_param->RefPicList1[i].picture_id;
4630 obj_surface = SURFACE(surface_id);
4631 if (!obj_surface || !obj_surface->private_data)
4634 i965_add_adv_gpe_surface(ctx, gpe_context,
4636 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1);
4637 i965_add_adv_gpe_surface(ctx, gpe_context,
4639 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 2);
4641 avc_priv_surface = obj_surface->private_data;
4642 /*pak obj command buffer output(mb code)*/
4643 size = frame_mb_size * 16 * 4;
4644 gpe_resource = &avc_priv_surface->res_mb_code_surface;
4645 i965_add_buffer_gpe_surface(ctx,
4651 GEN9_AVC_MBENC_FWD_MB_DATA_INDEX);
4653 /*mv data buffer output*/
4654 size = frame_mb_size * 32 * 4;
4655 gpe_resource = &avc_priv_surface->res_mv_data_surface;
4656 i965_add_buffer_gpe_surface(ctx,
4662 GEN9_AVC_MBENC_FWD_MV_DATA_INDEX);
4666 if (i < INTEL_AVC_MAX_BWD_REF_NUM) {
4667 i965_add_adv_gpe_surface(ctx, gpe_context,
4669 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1 + INTEL_AVC_MAX_BWD_REF_NUM);
4674 /* BRC distortion data buffer for I frame*/
4675 if (mbenc_i_frame_dist_in_use) {
4676 gpe_resource = &(avc_ctx->res_brc_dist_data_surface);
4677 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4680 I965_SURFACEFORMAT_R8_UNORM,
4681 GEN9_AVC_MBENC_BRC_DISTORTION_INDEX);
4684 /* as ref frame ,update later RefPicSelect of Current Picture*/
4685 obj_surface = encode_state->reconstructed_object;
4686 avc_priv_surface = obj_surface->private_data;
4687 if (avc_state->ref_pic_select_list_supported && avc_priv_surface->is_as_ref) {
4688 gpe_resource = &(avc_priv_surface->res_ref_pic_select_surface);
4689 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4692 I965_SURFACEFORMAT_R8_UNORM,
4693 GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX);
4696 if (!IS_GEN8(i965->intel.device_info)) {
4697 if (param->mb_vproc_stats_enable) {
4698 /*mb status buffer input*/
4699 size = frame_mb_size * 16 * 4;
4700 gpe_resource = &(avc_ctx->res_mb_status_buffer);
4701 i965_add_buffer_gpe_surface(ctx,
4707 GEN9_AVC_MBENC_MB_STATS_INDEX);
4709 } else if (avc_state->flatness_check_enable) {
4710 gpe_resource = &(avc_ctx->res_flatness_check_surface);
4711 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4714 I965_SURFACEFORMAT_R8_UNORM,
4715 GEN9_AVC_MBENC_MB_STATS_INDEX);
4717 } else if (avc_state->flatness_check_enable) {
4718 gpe_resource = &(avc_ctx->res_flatness_check_surface);
4719 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4722 I965_SURFACEFORMAT_R8_UNORM,
4723 GEN9_AVC_MBENC_MB_STATS_INDEX);
4726 if (param->mad_enable) {
4727 /*mad buffer input*/
4729 gpe_resource = &(avc_ctx->res_mad_data_buffer);
4730 i965_add_buffer_gpe_surface(ctx,
4736 GEN9_AVC_MBENC_MAD_DATA_INDEX);
4737 i965_zero_gpe_resource(gpe_resource);
4740 /*brc updated mbenc curbe data buffer,it is ignored by gen9 and used in gen95*/
4741 if (avc_state->mbenc_brc_buffer_size > 0) {
4742 size = avc_state->mbenc_brc_buffer_size;
4743 gpe_resource = &(avc_ctx->res_mbenc_brc_buffer);
4744 i965_add_buffer_gpe_surface(ctx,
4750 GEN95_AVC_MBENC_BRC_CURBE_DATA_INDEX);
4753 /*artitratry num mbs in slice*/
4754 if (avc_state->arbitrary_num_mbs_in_slice) {
4755 /*slice surface input*/
4756 gpe_resource = &(avc_ctx->res_mbenc_slice_map_surface);
4757 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4760 I965_SURFACEFORMAT_R8_UNORM,
4761 GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX);
4762 gen9_avc_generate_slice_map(ctx, encode_state, encoder_context);
4765 /* BRC distortion data buffer for I frame */
4766 if (!mbenc_i_frame_dist_in_use) {
4767 if (avc_state->mb_disable_skip_map_enable) {
4768 gpe_resource = &(avc_ctx->res_mb_disable_skip_map_surface);
4769 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4772 I965_SURFACEFORMAT_R8_UNORM,
4773 (is_g95 ? GEN95_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX : GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX));
4775 if (IS_GEN8(i965->intel.device_info)) {
4776 if (avc_state->sfd_enable) {
4777 size = 128 / sizeof(unsigned long);
4778 gpe_resource = &(avc_ctx->res_sfd_output_buffer);
4779 i965_add_buffer_gpe_surface(ctx,
4785 GEN8_AVC_MBENC_STATIC_FRAME_DETECTION_OUTPUT_CM);
4789 if (avc_state->sfd_enable && generic_state->hme_enabled) {
4790 if (generic_state->frame_type == SLICE_TYPE_P) {
4791 gpe_resource = &(avc_ctx->res_sfd_cost_table_p_frame_buffer);
4792 } else if (generic_state->frame_type == SLICE_TYPE_B) {
4793 gpe_resource = &(avc_ctx->res_sfd_cost_table_b_frame_buffer);
4795 if (generic_state->frame_type != SLICE_TYPE_I) {
4797 i965_add_buffer_gpe_surface(ctx,
4803 (is_g95 ? GEN95_AVC_MBENC_SFD_COST_TABLE_INDEX : GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX));
4814 gen9_avc_fei_send_surface_mbenc(VADriverContextP ctx,
4815 struct encode_state *encode_state,
4816 struct i965_gpe_context *gpe_context,
4817 struct intel_encoder_context *encoder_context,
4820 struct i965_driver_data *i965 = i965_driver_data(ctx);
4821 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4822 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4823 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4824 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4825 VAEncMiscParameterFEIFrameControlH264 *fei_param = NULL;
4826 struct object_buffer *obj_buffer = NULL;
4827 struct buffer_store *buffer_store = NULL;
4828 struct object_surface *obj_surface = NULL;
4829 struct gen9_surface_avc *avc_priv_surface;
4830 struct i965_gpe_resource *gpe_resource;
4831 VASurfaceID surface_id;
4832 unsigned int size = 0;
4833 unsigned int frame_mb_nums;
4834 int i = 0, allocate_flag = 1;
4836 obj_surface = encode_state->reconstructed_object;
4837 if (!obj_surface || !obj_surface->private_data)
4839 avc_priv_surface = obj_surface->private_data;
4841 frame_mb_nums = generic_state->frame_width_in_mbs *
4842 generic_state->frame_height_in_mbs;
4843 fei_param = avc_state->fei_framectl_param;
4845 assert(fei_param != NULL);
4847 /* res_mb_code_surface for MB code */
4848 size = frame_mb_nums * FEI_AVC_MB_CODE_BUFFER_SIZE;
4849 if (avc_priv_surface->res_mb_code_surface.bo != NULL)
4850 i965_free_gpe_resource(&avc_priv_surface->res_mb_code_surface);
4851 if (fei_param->mb_code_data != VA_INVALID_ID) {
4852 obj_buffer = BUFFER(fei_param->mb_code_data);
4853 assert(obj_buffer != NULL);
4854 buffer_store = obj_buffer->buffer_store;
4855 assert(size <= buffer_store->bo->size);
4856 i965_dri_object_to_buffer_gpe_resource(
4857 &avc_priv_surface->res_mb_code_surface,
4860 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
4861 &avc_priv_surface->res_mb_code_surface,
4862 ALIGN(size, 0x1000),
4864 assert(allocate_flag != 0);
4867 /* res_mv_data_surface for MV data */
4868 size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
4869 if (avc_priv_surface->res_mv_data_surface.bo != NULL)
4870 i965_free_gpe_resource(&avc_priv_surface->res_mv_data_surface);
4871 if (fei_param->mv_data != VA_INVALID_ID) {
4872 obj_buffer = BUFFER(fei_param->mv_data);
4873 assert(obj_buffer != NULL);
4874 buffer_store = obj_buffer->buffer_store;
4875 assert(size <= buffer_store->bo->size);
4876 i965_dri_object_to_buffer_gpe_resource(
4877 &avc_priv_surface->res_mv_data_surface,
4880 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
4881 &avc_priv_surface->res_mv_data_surface,
4882 ALIGN(size, 0x1000),
4884 assert(allocate_flag != 0);
4887 /* fei mb control data surface */
4888 size = frame_mb_nums * FEI_AVC_MB_CONTROL_BUFFER_SIZE;
4889 if (fei_param->mb_input | fei_param->mb_size_ctrl) {
4890 assert(fei_param->mb_ctrl != VA_INVALID_ID);
4891 obj_buffer = BUFFER(fei_param->mb_ctrl);
4892 assert(obj_buffer != NULL);
4893 buffer_store = obj_buffer->buffer_store;
4894 assert(size <= buffer_store->bo->size);
4895 if (avc_priv_surface->res_fei_mb_cntrl_surface.bo != NULL)
4896 i965_free_gpe_resource(&avc_priv_surface->res_fei_mb_cntrl_surface);
4897 i965_dri_object_to_buffer_gpe_resource(
4898 &avc_priv_surface->res_fei_mb_cntrl_surface,
4902 /* fei mv predictor surface*/
4903 size = frame_mb_nums * FEI_AVC_MV_PREDICTOR_BUFFER_SIZE;
4904 if (fei_param->mv_predictor_enable &&
4905 (fei_param->mv_predictor != VA_INVALID_ID)) {
4906 obj_buffer = BUFFER(fei_param->mv_predictor);
4907 assert(obj_buffer != NULL);
4908 buffer_store = obj_buffer->buffer_store;
4909 assert(size <= buffer_store->bo->size);
4910 if (avc_priv_surface->res_fei_mv_predictor_surface.bo != NULL)
4911 i965_free_gpe_resource(&avc_priv_surface->res_fei_mv_predictor_surface);
4912 i965_dri_object_to_buffer_gpe_resource(
4913 &avc_priv_surface->res_fei_mv_predictor_surface,
4916 if (fei_param->mv_predictor_enable)
4917 assert(fei_param->mv_predictor != VA_INVALID_ID);
4920 /* fei vme distortion */
4921 size = frame_mb_nums * FEI_AVC_DISTORTION_BUFFER_SIZE;
4922 if (avc_priv_surface->res_fei_vme_distortion_surface.bo != NULL)
4923 i965_free_gpe_resource(&avc_priv_surface->res_fei_vme_distortion_surface);
4924 if (fei_param->distortion != VA_INVALID_ID) {
4925 obj_buffer = BUFFER(fei_param->distortion);
4926 assert(obj_buffer != NULL);
4927 buffer_store = obj_buffer->buffer_store;
4928 assert(size <= buffer_store->bo->size);
4929 i965_dri_object_to_buffer_gpe_resource(
4930 &avc_priv_surface->res_fei_vme_distortion_surface,
4933 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
4934 &avc_priv_surface->res_fei_vme_distortion_surface,
4935 ALIGN(size, 0x1000),
4936 "fei vme distortion");
4937 assert(allocate_flag != 0);
4941 /* Fixme/Confirm: not sure why we need 3 byte padding here */
4942 size = frame_mb_nums * FEI_AVC_QP_BUFFER_SIZE + 3;
4943 if (fei_param->mb_qp && (fei_param->qp != VA_INVALID_ID)) {
4944 obj_buffer = BUFFER(fei_param->qp);
4945 assert(obj_buffer != NULL);
4946 buffer_store = obj_buffer->buffer_store;
4947 assert((size - 3) <= buffer_store->bo->size);
4948 if (avc_priv_surface->res_fei_mb_qp_surface.bo != NULL)
4949 i965_free_gpe_resource(&avc_priv_surface->res_fei_mb_qp_surface);
4950 i965_dri_object_to_buffer_gpe_resource(
4951 &avc_priv_surface->res_fei_mb_qp_surface,
4954 if (fei_param->mb_qp)
4955 assert(fei_param->qp != VA_INVALID_ID);
4958 /*==== pak obj command buffer output ====*/
4959 size = frame_mb_nums * FEI_AVC_MB_CODE_BUFFER_SIZE;
4960 gpe_resource = &avc_priv_surface->res_mb_code_surface;
4961 i965_add_buffer_gpe_surface(ctx,
4967 GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX);
4970 /*=== mv data buffer output */
4971 size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
4972 gpe_resource = &avc_priv_surface->res_mv_data_surface;
4973 i965_add_buffer_gpe_surface(ctx,
4979 GEN9_AVC_MBENC_IND_MV_DATA_INDEX);
4982 /* === current input Y (binding table offset = 3)=== */
4983 obj_surface = encode_state->input_yuv_object;
4984 i965_add_2d_gpe_surface(ctx,
4989 I965_SURFACEFORMAT_R8_UNORM,
4990 GEN9_AVC_MBENC_CURR_Y_INDEX);
4992 /* === current input UV === (binding table offset == 4)*/
4993 i965_add_2d_gpe_surface(ctx,
4998 I965_SURFACEFORMAT_R16_UINT,
4999 GEN9_AVC_MBENC_CURR_UV_INDEX);
5001 /* === input current YUV surface, (binding table offset == 15) === */
5002 i965_add_adv_gpe_surface(ctx, gpe_context,
5004 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX);
5007 /*== input current YUV surface, (binding table offset == 32)*/
5008 i965_add_adv_gpe_surface(ctx, gpe_context,
5010 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX);
5012 /* list 0 references */
5013 for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
5015 surface_id = slice_param->RefPicList0[i].picture_id;
5016 obj_surface = SURFACE(surface_id);
5017 if (!obj_surface || !obj_surface->private_data)
5019 i965_add_adv_gpe_surface(ctx, gpe_context,
5021 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 1);
5025 /* list 1 references */
5026 for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
5027 if (i > 0) break; // only one ref supported here for B frame
5028 surface_id = slice_param->RefPicList1[i].picture_id;
5029 obj_surface = SURFACE(surface_id);
5030 if (!obj_surface || !obj_surface->private_data)
5033 i965_add_adv_gpe_surface(ctx, gpe_context,
5035 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 2);
5037 avc_priv_surface = obj_surface->private_data;
5038 /* mb code of Backward reference frame */
5039 size = frame_mb_nums * FEI_AVC_MB_CODE_BUFFER_SIZE;
5040 gpe_resource = &avc_priv_surface->res_mb_code_surface;
5041 i965_add_buffer_gpe_surface(ctx,
5047 GEN9_AVC_MBENC_FWD_MB_DATA_INDEX);
5049 /* mv data of backward ref frame */
5050 size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
5051 gpe_resource = &avc_priv_surface->res_mv_data_surface;
5052 i965_add_buffer_gpe_surface(ctx,
5058 GEN9_AVC_MBENC_FWD_MV_DATA_INDEX);
5062 if (i < INTEL_AVC_MAX_BWD_REF_NUM) {
5063 i965_add_adv_gpe_surface(ctx, gpe_context,
5065 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1);
5069 /* as ref frame ,update later RefPicSelect of Current Picture*/
5070 obj_surface = encode_state->reconstructed_object;
5071 avc_priv_surface = obj_surface->private_data;
5072 if (avc_state->ref_pic_select_list_supported && avc_priv_surface->is_as_ref) {
5073 gpe_resource = &(avc_priv_surface->res_ref_pic_select_surface);
5074 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5077 I965_SURFACEFORMAT_R8_UNORM,
5078 GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX);
5083 /* mb specific data, macroblock control parameters */
5084 if ((fei_param->mb_input | fei_param->mb_size_ctrl) &&
5085 (fei_param->mb_ctrl != VA_INVALID_ID)) {
5086 size = frame_mb_nums * FEI_AVC_MB_CONTROL_BUFFER_SIZE;
5087 gpe_resource = &avc_priv_surface->res_fei_mb_cntrl_surface;
5088 i965_add_buffer_gpe_surface(ctx,
5094 GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX);
5097 /* multi mv predictor surface */
5098 if (fei_param->mv_predictor_enable && (fei_param->mv_predictor != VA_INVALID_ID)) {
5099 size = frame_mb_nums * 48; //sizeof (VAEncMVPredictorH264Intel) == 40
5100 gpe_resource = &avc_priv_surface->res_fei_mv_predictor_surface;
5101 i965_add_buffer_gpe_surface(ctx,
5107 GEN9_AVC_MBENC_MV_PREDICTOR_INDEX);
5111 if (fei_param->mb_qp && (fei_param->qp != VA_INVALID_ID)) {
5112 size = frame_mb_nums + 3;
5113 gpe_resource = &avc_priv_surface->res_fei_mb_qp_surface,
5114 i965_add_buffer_gpe_surface(ctx,
5120 GEN9_AVC_MBENC_MBQP_INDEX);
5124 /*=== FEI distortion surface ====*/
5125 size = frame_mb_nums * 48; //sizeof (VAEncFEIDistortionBufferH264Intel) == 48
5126 gpe_resource = &avc_priv_surface->res_fei_vme_distortion_surface;
5127 i965_add_buffer_gpe_surface(ctx,
5133 GEN9_AVC_MBENC_AUX_VME_OUT_INDEX);
5139 gen9_avc_kernel_mbenc(VADriverContextP ctx,
5140 struct encode_state *encode_state,
5141 struct intel_encoder_context *encoder_context,
5142 bool i_frame_dist_in_use)
5144 struct i965_driver_data *i965 = i965_driver_data(ctx);
5145 struct i965_gpe_table *gpe = &i965->gpe_table;
5146 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5147 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5148 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5149 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5150 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5152 struct i965_gpe_context *gpe_context;
5153 struct gpe_media_object_walker_parameter media_object_walker_param;
5154 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
5155 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
5156 int media_function = 0;
5158 unsigned int mb_const_data_buffer_in_use = 0;
5159 unsigned int mb_qp_buffer_in_use = 0;
5160 unsigned int brc_enabled = 0;
5161 unsigned int roi_enable = (generic_state->num_roi > 0) ? 1 : 0;
5162 unsigned int dirty_roi_enable = ((generic_state->dirty_num_roi > 0) && (generic_state->frame_type == SLICE_TYPE_P) && (0));
5163 struct mbenc_param param ;
5165 int mbenc_i_frame_dist_in_use = i_frame_dist_in_use;
5167 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5169 mb_const_data_buffer_in_use =
5170 generic_state->mb_brc_enabled ||
5173 avc_state->mb_qp_data_enable ||
5174 avc_state->rolling_intra_refresh_enable;
5175 mb_qp_buffer_in_use =
5176 generic_state->mb_brc_enabled ||
5177 generic_state->brc_roi_enable ||
5178 avc_state->mb_qp_data_enable;
5180 if (mbenc_i_frame_dist_in_use) {
5181 media_function = INTEL_MEDIA_STATE_ENC_I_FRAME_DIST;
5182 kernel_idx = GEN9_AVC_KERNEL_BRC_I_FRAME_DIST;
5183 downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
5184 downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
5188 gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
5190 switch (generic_state->kernel_mode) {
5191 case INTEL_ENC_KERNEL_NORMAL : {
5192 media_function = INTEL_MEDIA_STATE_ENC_NORMAL;
5193 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_NORMAL_I;
5196 case INTEL_ENC_KERNEL_PERFORMANCE : {
5197 media_function = INTEL_MEDIA_STATE_ENC_PERFORMANCE;
5198 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_PERFORMANCE_I;
5201 case INTEL_ENC_KERNEL_QUALITY : {
5202 media_function = INTEL_MEDIA_STATE_ENC_QUALITY;
5203 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_QUALITY_I;
5211 if (encoder_context->fei_enabled) {
5212 media_function = INTEL_MEDIA_STATE_ENC_NORMAL;
5213 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_FEI_KERNEL_I;
5216 if (generic_state->frame_type == SLICE_TYPE_P) {
5218 } else if (generic_state->frame_type == SLICE_TYPE_B) {
5222 downscaled_width_in_mb = generic_state->frame_width_in_mbs;
5223 downscaled_height_in_mb = generic_state->frame_height_in_mbs;
5224 mad_enable = avc_state->mad_enable;
5225 brc_enabled = generic_state->brc_enabled;
5227 gpe_context = &(avc_ctx->context_mbenc.gpe_contexts[kernel_idx]);
5230 memset(¶m, 0, sizeof(struct mbenc_param));
5232 param.mb_const_data_buffer_in_use = mb_const_data_buffer_in_use;
5233 param.mb_qp_buffer_in_use = mb_qp_buffer_in_use;
5234 param.mbenc_i_frame_dist_in_use = mbenc_i_frame_dist_in_use;
5235 param.mad_enable = mad_enable;
5236 param.brc_enabled = brc_enabled;
5237 param.roi_enabled = roi_enable;
5239 if (avc_state->mb_status_supported) {
5240 param.mb_vproc_stats_enable = avc_state->flatness_check_enable || avc_state->adaptive_transform_decision_enable;
5243 if (!avc_state->mbenc_curbe_set_in_brc_update) {
5244 gpe->context_init(ctx, gpe_context);
5247 gpe->reset_binding_table(ctx, gpe_context);
5249 if (!avc_state->mbenc_curbe_set_in_brc_update) {
5251 generic_ctx->pfn_set_curbe_mbenc(ctx, encode_state, gpe_context, encoder_context, ¶m);
5254 /* MB brc const data buffer set up*/
5255 if (mb_const_data_buffer_in_use) {
5256 // caculate the lambda table, it is kernel controlled trellis quantization,gen95+
5257 if (avc_state->lambda_table_enable)
5258 gen95_avc_calc_lambda_table(ctx, encode_state, encoder_context);
5260 gen9_avc_load_mb_brc_const_data(ctx, encode_state, encoder_context);
5263 /*clear the mad buffer*/
5265 i965_zero_gpe_resource(&(avc_ctx->res_mad_data_buffer));
5268 generic_ctx->pfn_send_mbenc_surface(ctx, encode_state, gpe_context, encoder_context, ¶m);
5270 gpe->setup_interface_data(ctx, gpe_context);
5273 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
5275 kernel_walker_param.use_scoreboard = 1;
5276 kernel_walker_param.resolution_x = downscaled_width_in_mb ;
5277 kernel_walker_param.resolution_y = downscaled_height_in_mb ;
5278 if (mbenc_i_frame_dist_in_use) {
5279 kernel_walker_param.no_dependency = 1;
5281 switch (generic_state->frame_type) {
5283 kernel_walker_param.walker_degree = WALKER_45_DEGREE;
5286 kernel_walker_param.walker_degree = WALKER_26_DEGREE;
5289 kernel_walker_param.walker_degree = WALKER_26_DEGREE;
5290 if (!slice_param->direct_spatial_mv_pred_flag) {
5291 kernel_walker_param.walker_degree = WALKER_45_DEGREE;
5297 kernel_walker_param.no_dependency = 0;
5300 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
5302 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
5305 &media_object_walker_param);
5306 return VA_STATUS_SUCCESS;
5310 me kernle related function
5313 gen9_avc_set_curbe_me(VADriverContextP ctx,
5314 struct encode_state *encode_state,
5315 struct i965_gpe_context *gpe_context,
5316 struct intel_encoder_context *encoder_context,
5319 gen9_avc_me_curbe_data *curbe_cmd;
5320 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5321 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5322 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5324 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5326 struct me_param * curbe_param = (struct me_param *)param ;
5327 unsigned char use_mv_from_prev_step = 0;
5328 unsigned char write_distortions = 0;
5329 unsigned char qp_prime_y = 0;
5330 unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
5331 unsigned char seach_table_idx = 0;
5332 unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
5333 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
5334 unsigned int scale_factor = 0;
5336 qp_prime_y = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta;
5337 switch (curbe_param->hme_type) {
5338 case INTEL_ENC_HME_4x : {
5339 use_mv_from_prev_step = (generic_state->b16xme_enabled) ? 1 : 0;
5340 write_distortions = 1;
5341 mv_shift_factor = 2;
5343 prev_mv_read_pos_factor = 0;
5346 case INTEL_ENC_HME_16x : {
5347 use_mv_from_prev_step = (generic_state->b32xme_enabled) ? 1 : 0;
5348 write_distortions = 0;
5349 mv_shift_factor = 2;
5351 prev_mv_read_pos_factor = 1;
5354 case INTEL_ENC_HME_32x : {
5355 use_mv_from_prev_step = 0;
5356 write_distortions = 0;
5357 mv_shift_factor = 1;
5359 prev_mv_read_pos_factor = 0;
5366 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
5371 downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
5372 downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
5374 memcpy(curbe_cmd, gen9_avc_me_curbe_init_data, sizeof(gen9_avc_me_curbe_data));
5376 curbe_cmd->dw3.sub_pel_mode = 3;
5377 if (avc_state->field_scaling_output_interleaved) {
5378 /*frame set to zero,field specified*/
5379 curbe_cmd->dw3.src_access = 0;
5380 curbe_cmd->dw3.ref_access = 0;
5381 curbe_cmd->dw7.src_field_polarity = 0;
5383 curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
5384 curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
5385 curbe_cmd->dw5.qp_prime_y = qp_prime_y;
5387 curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
5388 curbe_cmd->dw6.write_distortions = write_distortions;
5389 curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
5390 curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
5392 if (generic_state->frame_type == SLICE_TYPE_B) {
5393 curbe_cmd->dw1.bi_weight = 32;
5394 curbe_cmd->dw13.num_ref_idx_l1_minus1 = slice_param->num_ref_idx_l1_active_minus1;
5395 me_method = gen9_avc_b_me_method[generic_state->preset];
5396 seach_table_idx = 1;
5399 if (generic_state->frame_type == SLICE_TYPE_P ||
5400 generic_state->frame_type == SLICE_TYPE_B)
5401 curbe_cmd->dw13.num_ref_idx_l0_minus1 = slice_param->num_ref_idx_l0_active_minus1;
5403 curbe_cmd->dw13.ref_streamin_cost = 5;
5404 curbe_cmd->dw13.roi_enable = 0;
5406 curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
5407 curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
5409 memcpy(&curbe_cmd->dw16, table_enc_search_path[seach_table_idx][me_method], 14 * sizeof(int));
5411 curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN9_AVC_ME_MV_DATA_SURFACE_INDEX;
5412 curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x) ? GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX : GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX ;
5413 curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN9_AVC_ME_DISTORTION_SURFACE_INDEX;
5414 curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN9_AVC_ME_BRC_DISTORTION_INDEX;
5415 curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX;
5416 curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX;
5417 curbe_cmd->dw38.reserved = GEN9_AVC_ME_VDENC_STREAMIN_INDEX;
5419 i965_gpe_context_unmap_curbe(gpe_context);
5424 gen9_avc_send_surface_me(VADriverContextP ctx,
5425 struct encode_state *encode_state,
5426 struct i965_gpe_context *gpe_context,
5427 struct intel_encoder_context *encoder_context,
5430 struct i965_driver_data *i965 = i965_driver_data(ctx);
5432 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5433 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5434 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5435 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5437 struct object_surface *obj_surface, *input_surface;
5438 struct gen9_surface_avc *avc_priv_surface;
5439 struct i965_gpe_resource *gpe_resource;
5440 struct me_param * curbe_param = (struct me_param *)param ;
5442 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5443 VASurfaceID surface_id;
5446 /* all scaled input surface stored in reconstructed_object*/
5447 obj_surface = encode_state->reconstructed_object;
5448 if (!obj_surface || !obj_surface->private_data)
5450 avc_priv_surface = obj_surface->private_data;
5453 switch (curbe_param->hme_type) {
5454 case INTEL_ENC_HME_4x : {
5456 gpe_resource = &avc_ctx->s4x_memv_data_buffer;
5457 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5460 I965_SURFACEFORMAT_R8_UNORM,
5461 GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
5464 if (generic_state->b16xme_enabled) {
5465 gpe_resource = &avc_ctx->s16x_memv_data_buffer;
5466 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5469 I965_SURFACEFORMAT_R8_UNORM,
5470 GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX);
5472 /* brc distortion output*/
5473 gpe_resource = &avc_ctx->res_brc_dist_data_surface;
5474 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5477 I965_SURFACEFORMAT_R8_UNORM,
5478 GEN9_AVC_ME_BRC_DISTORTION_INDEX);
5479 /* memv distortion output*/
5480 gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
5481 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5484 I965_SURFACEFORMAT_R8_UNORM,
5485 GEN9_AVC_ME_DISTORTION_SURFACE_INDEX);
5486 /*input current down scaled YUV surface*/
5487 obj_surface = encode_state->reconstructed_object;
5488 avc_priv_surface = obj_surface->private_data;
5489 input_surface = avc_priv_surface->scaled_4x_surface_obj;
5490 i965_add_adv_gpe_surface(ctx, gpe_context,
5492 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
5493 /*input ref scaled YUV surface*/
5494 for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
5495 surface_id = slice_param->RefPicList0[i].picture_id;
5496 obj_surface = SURFACE(surface_id);
5497 if (!obj_surface || !obj_surface->private_data)
5499 avc_priv_surface = obj_surface->private_data;
5501 input_surface = avc_priv_surface->scaled_4x_surface_obj;
5503 i965_add_adv_gpe_surface(ctx, gpe_context,
5505 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
5508 obj_surface = encode_state->reconstructed_object;
5509 avc_priv_surface = obj_surface->private_data;
5510 input_surface = avc_priv_surface->scaled_4x_surface_obj;
5512 i965_add_adv_gpe_surface(ctx, gpe_context,
5514 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
5516 for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
5517 surface_id = slice_param->RefPicList1[i].picture_id;
5518 obj_surface = SURFACE(surface_id);
5519 if (!obj_surface || !obj_surface->private_data)
5521 avc_priv_surface = obj_surface->private_data;
5523 input_surface = avc_priv_surface->scaled_4x_surface_obj;
5525 i965_add_adv_gpe_surface(ctx, gpe_context,
5527 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
5532 case INTEL_ENC_HME_16x : {
5533 gpe_resource = &avc_ctx->s16x_memv_data_buffer;
5534 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5537 I965_SURFACEFORMAT_R8_UNORM,
5538 GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
5540 if (generic_state->b32xme_enabled) {
5541 gpe_resource = &avc_ctx->s32x_memv_data_buffer;
5542 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5545 I965_SURFACEFORMAT_R8_UNORM,
5546 GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX);
5549 obj_surface = encode_state->reconstructed_object;
5550 avc_priv_surface = obj_surface->private_data;
5551 input_surface = avc_priv_surface->scaled_16x_surface_obj;
5552 i965_add_adv_gpe_surface(ctx, gpe_context,
5554 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
5556 for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
5557 surface_id = slice_param->RefPicList0[i].picture_id;
5558 obj_surface = SURFACE(surface_id);
5559 if (!obj_surface || !obj_surface->private_data)
5561 avc_priv_surface = obj_surface->private_data;
5563 input_surface = avc_priv_surface->scaled_16x_surface_obj;
5565 i965_add_adv_gpe_surface(ctx, gpe_context,
5567 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
5570 obj_surface = encode_state->reconstructed_object;
5571 avc_priv_surface = obj_surface->private_data;
5572 input_surface = avc_priv_surface->scaled_16x_surface_obj;
5574 i965_add_adv_gpe_surface(ctx, gpe_context,
5576 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
5578 for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
5579 surface_id = slice_param->RefPicList1[i].picture_id;
5580 obj_surface = SURFACE(surface_id);
5581 if (!obj_surface || !obj_surface->private_data)
5583 avc_priv_surface = obj_surface->private_data;
5585 input_surface = avc_priv_surface->scaled_16x_surface_obj;
5587 i965_add_adv_gpe_surface(ctx, gpe_context,
5589 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
5593 case INTEL_ENC_HME_32x : {
5594 gpe_resource = &avc_ctx->s32x_memv_data_buffer;
5595 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5598 I965_SURFACEFORMAT_R8_UNORM,
5599 GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
5601 obj_surface = encode_state->reconstructed_object;
5602 avc_priv_surface = obj_surface->private_data;
5603 input_surface = avc_priv_surface->scaled_32x_surface_obj;
5604 i965_add_adv_gpe_surface(ctx, gpe_context,
5606 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
5608 for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
5609 surface_id = slice_param->RefPicList0[i].picture_id;
5610 obj_surface = SURFACE(surface_id);
5611 if (!obj_surface || !obj_surface->private_data)
5613 avc_priv_surface = obj_surface->private_data;
5615 input_surface = avc_priv_surface->scaled_32x_surface_obj;
5617 i965_add_adv_gpe_surface(ctx, gpe_context,
5619 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
5622 obj_surface = encode_state->reconstructed_object;
5623 avc_priv_surface = obj_surface->private_data;
5624 input_surface = avc_priv_surface->scaled_32x_surface_obj;
5626 i965_add_adv_gpe_surface(ctx, gpe_context,
5628 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
5630 for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
5631 surface_id = slice_param->RefPicList1[i].picture_id;
5632 obj_surface = SURFACE(surface_id);
5633 if (!obj_surface || !obj_surface->private_data)
5635 avc_priv_surface = obj_surface->private_data;
5637 input_surface = avc_priv_surface->scaled_32x_surface_obj;
5639 i965_add_adv_gpe_surface(ctx, gpe_context,
5641 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
5652 gen9_avc_kernel_me(VADriverContextP ctx,
5653 struct encode_state *encode_state,
5654 struct intel_encoder_context *encoder_context,
5657 struct i965_driver_data *i965 = i965_driver_data(ctx);
5658 struct i965_gpe_table *gpe = &i965->gpe_table;
5659 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5660 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5661 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5662 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5664 struct i965_gpe_context *gpe_context;
5665 struct gpe_media_object_walker_parameter media_object_walker_param;
5666 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
5667 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
5668 int media_function = 0;
5670 struct me_param param ;
5671 unsigned int scale_factor = 0;
5674 case INTEL_ENC_HME_4x : {
5675 media_function = INTEL_MEDIA_STATE_4X_ME;
5679 case INTEL_ENC_HME_16x : {
5680 media_function = INTEL_MEDIA_STATE_16X_ME;
5684 case INTEL_ENC_HME_32x : {
5685 media_function = INTEL_MEDIA_STATE_32X_ME;
5694 downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
5695 downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
5697 /* I frame should not come here.*/
5698 kernel_idx = (generic_state->frame_type == SLICE_TYPE_P) ? GEN9_AVC_KERNEL_ME_P_IDX : GEN9_AVC_KERNEL_ME_B_IDX;
5699 gpe_context = &(avc_ctx->context_me.gpe_contexts[kernel_idx]);
5701 gpe->context_init(ctx, gpe_context);
5702 gpe->reset_binding_table(ctx, gpe_context);
5705 memset(¶m, 0, sizeof(param));
5706 param.hme_type = hme_type;
5707 generic_ctx->pfn_set_curbe_me(ctx, encode_state, gpe_context, encoder_context, ¶m);
5710 generic_ctx->pfn_send_me_surface(ctx, encode_state, gpe_context, encoder_context, ¶m);
5712 gpe->setup_interface_data(ctx, gpe_context);
5714 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
5715 /* the scaling is based on 8x8 blk level */
5716 kernel_walker_param.resolution_x = downscaled_width_in_mb ;
5717 kernel_walker_param.resolution_y = downscaled_height_in_mb ;
5718 kernel_walker_param.no_dependency = 1;
5720 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
5722 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
5725 &media_object_walker_param);
5727 return VA_STATUS_SUCCESS;
5734 gen9_avc_set_curbe_wp(VADriverContextP ctx,
5735 struct encode_state *encode_state,
5736 struct i965_gpe_context *gpe_context,
5737 struct intel_encoder_context *encoder_context,
5740 gen9_avc_wp_curbe_data *cmd;
5741 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5742 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5743 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5744 struct wp_param * curbe_param = (struct wp_param *)param;
5746 cmd = i965_gpe_context_map_curbe(gpe_context);
5750 memset(cmd, 0, sizeof(gen9_avc_wp_curbe_data));
5751 if (curbe_param->ref_list_idx) {
5752 cmd->dw0.default_weight = slice_param->luma_weight_l1[0];
5753 cmd->dw0.default_offset = slice_param->luma_offset_l1[0];
5755 cmd->dw0.default_weight = slice_param->luma_weight_l0[0];
5756 cmd->dw0.default_offset = slice_param->luma_offset_l0[0];
5759 cmd->dw49.input_surface = GEN9_AVC_WP_INPUT_REF_SURFACE_INDEX;
5760 cmd->dw50.output_surface = GEN9_AVC_WP_OUTPUT_SCALED_SURFACE_INDEX;
5762 i965_gpe_context_unmap_curbe(gpe_context);
5767 gen9_avc_send_surface_wp(VADriverContextP ctx,
5768 struct encode_state *encode_state,
5769 struct i965_gpe_context *gpe_context,
5770 struct intel_encoder_context *encoder_context,
5773 struct i965_driver_data *i965 = i965_driver_data(ctx);
5774 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5775 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5776 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5777 struct wp_param * curbe_param = (struct wp_param *)param;
5778 struct object_surface *obj_surface;
5779 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5780 VASurfaceID surface_id;
5782 if (curbe_param->ref_list_idx) {
5783 surface_id = slice_param->RefPicList1[0].picture_id;
5784 obj_surface = SURFACE(surface_id);
5785 if (!obj_surface || !obj_surface->private_data)
5786 avc_state->weighted_ref_l1_enable = 0;
5788 avc_state->weighted_ref_l1_enable = 1;
5790 surface_id = slice_param->RefPicList0[0].picture_id;
5791 obj_surface = SURFACE(surface_id);
5792 if (!obj_surface || !obj_surface->private_data)
5793 avc_state->weighted_ref_l0_enable = 0;
5795 avc_state->weighted_ref_l0_enable = 1;
5798 obj_surface = encode_state->reference_objects[0];
5801 i965_add_adv_gpe_surface(ctx, gpe_context,
5803 GEN9_AVC_WP_INPUT_REF_SURFACE_INDEX);
5805 obj_surface = avc_ctx->wp_output_pic_select_surface_obj[curbe_param->ref_list_idx];
5806 i965_add_adv_gpe_surface(ctx, gpe_context,
5808 GEN9_AVC_WP_OUTPUT_SCALED_SURFACE_INDEX);
5813 gen9_avc_kernel_wp(VADriverContextP ctx,
5814 struct encode_state *encode_state,
5815 struct intel_encoder_context *encoder_context,
5816 unsigned int list1_in_use)
5818 struct i965_driver_data *i965 = i965_driver_data(ctx);
5819 struct i965_gpe_table *gpe = &i965->gpe_table;
5820 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5821 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5822 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5823 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5825 struct i965_gpe_context *gpe_context;
5826 struct gpe_media_object_walker_parameter media_object_walker_param;
5827 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
5828 int media_function = INTEL_MEDIA_STATE_ENC_WP;
5829 struct wp_param param;
5831 gpe_context = &(avc_ctx->context_wp.gpe_contexts);
5833 gpe->context_init(ctx, gpe_context);
5834 gpe->reset_binding_table(ctx, gpe_context);
5836 memset(¶m, 0, sizeof(param));
5837 param.ref_list_idx = (list1_in_use == 1) ? 1 : 0;
5839 generic_ctx->pfn_set_curbe_wp(ctx, encode_state, gpe_context, encoder_context, ¶m);
5842 generic_ctx->pfn_send_wp_surface(ctx, encode_state, gpe_context, encoder_context, ¶m);
5844 gpe->setup_interface_data(ctx, gpe_context);
5846 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
5847 /* the scaling is based on 8x8 blk level */
5848 kernel_walker_param.resolution_x = generic_state->frame_width_in_mbs;
5849 kernel_walker_param.resolution_y = generic_state->frame_height_in_mbs;
5850 kernel_walker_param.no_dependency = 1;
5852 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
5854 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
5857 &media_object_walker_param);
5859 return VA_STATUS_SUCCESS;
5864 sfd related function
5867 gen9_avc_set_curbe_sfd(VADriverContextP ctx,
5868 struct encode_state *encode_state,
5869 struct i965_gpe_context *gpe_context,
5870 struct intel_encoder_context *encoder_context,
5873 gen9_avc_sfd_curbe_data *cmd;
5874 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5875 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5876 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5877 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5879 cmd = i965_gpe_context_map_curbe(gpe_context);
5883 memset(cmd, 0, sizeof(gen9_avc_sfd_curbe_data));
5885 cmd->dw0.enable_intra_cost_scaling_for_static_frame = 1 ;
5886 cmd->dw0.enable_adaptive_mv_stream_in = 0 ;
5887 cmd->dw0.stream_in_type = 7 ;
5888 cmd->dw0.slice_type = slice_type_kernel[generic_state->frame_type] ;
5889 cmd->dw0.brc_mode_enable = generic_state->brc_enabled ;
5890 cmd->dw0.vdenc_mode_disable = 1 ;
5892 cmd->dw1.hme_stream_in_ref_cost = 5 ;
5893 cmd->dw1.num_of_refs = slice_param->num_ref_idx_l0_active_minus1 ;
5894 cmd->dw1.qp_value = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta ;
5896 cmd->dw2.frame_width_in_mbs = generic_state->frame_width_in_mbs ;
5897 cmd->dw2.frame_height_in_mbs = generic_state->frame_height_in_mbs ;
5899 cmd->dw3.large_mv_threshold = 128 ;
5900 cmd->dw4.total_large_mv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs) / 100 ;
5901 cmd->dw5.zmv_threshold = 4 ;
5902 cmd->dw6.total_zmv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * avc_state->zero_mv_threshold) / 100 ; // zero_mv_threshold = 60;
5903 cmd->dw7.min_dist_threshold = 10 ;
5905 if (generic_state->frame_type == SLICE_TYPE_P) {
5906 memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_p_frame, AVC_QP_MAX * sizeof(unsigned char));
5908 } else if (generic_state->frame_type == SLICE_TYPE_B) {
5909 memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_b_frame, AVC_QP_MAX * sizeof(unsigned char));
5912 cmd->dw21.actual_width_in_mb = cmd->dw2.frame_width_in_mbs ;
5913 cmd->dw21.actual_height_in_mb = cmd->dw2.frame_height_in_mbs ;
5914 cmd->dw24.vdenc_input_image_state_index = GEN9_AVC_SFD_VDENC_INPUT_IMAGE_STATE_INDEX ;
5915 cmd->dw26.mv_data_surface_index = GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX ;
5916 cmd->dw27.inter_distortion_surface_index = GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX ;
5917 cmd->dw28.output_data_surface_index = GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX ;
5918 cmd->dw29.vdenc_output_image_state_index = GEN9_AVC_SFD_VDENC_OUTPUT_IMAGE_STATE_INDEX ;
5920 i965_gpe_context_unmap_curbe(gpe_context);
5925 gen9_avc_send_surface_sfd(VADriverContextP ctx,
5926 struct encode_state *encode_state,
5927 struct i965_gpe_context *gpe_context,
5928 struct intel_encoder_context *encoder_context,
5931 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5932 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5933 struct i965_gpe_resource *gpe_resource;
5936 /*HME mv data surface memv output 4x*/
5937 gpe_resource = &avc_ctx->s4x_memv_data_buffer;
5938 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5941 I965_SURFACEFORMAT_R8_UNORM,
5942 GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX);
5944 /* memv distortion */
5945 gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
5946 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5949 I965_SURFACEFORMAT_R8_UNORM,
5950 GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX);
5953 gpe_resource = &avc_ctx->res_sfd_output_buffer;
5954 i965_add_buffer_gpe_surface(ctx,
5960 GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX);
5965 gen9_avc_kernel_sfd(VADriverContextP ctx,
5966 struct encode_state *encode_state,
5967 struct intel_encoder_context *encoder_context)
5969 struct i965_driver_data *i965 = i965_driver_data(ctx);
5970 struct i965_gpe_table *gpe = &i965->gpe_table;
5971 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5972 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5973 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5975 struct i965_gpe_context *gpe_context;
5976 struct gpe_media_object_parameter media_object_param;
5977 struct gpe_media_object_inline_data media_object_inline_data;
5978 int media_function = INTEL_MEDIA_STATE_STATIC_FRAME_DETECTION;
5979 gpe_context = &(avc_ctx->context_sfd.gpe_contexts);
5981 gpe->context_init(ctx, gpe_context);
5982 gpe->reset_binding_table(ctx, gpe_context);
5985 generic_ctx->pfn_set_curbe_sfd(ctx, encode_state, gpe_context, encoder_context, NULL);
5988 generic_ctx->pfn_send_sfd_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
5990 gpe->setup_interface_data(ctx, gpe_context);
5992 memset(&media_object_param, 0, sizeof(media_object_param));
5993 memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
5994 media_object_param.pinline_data = &media_object_inline_data;
5995 media_object_param.inline_size = sizeof(media_object_inline_data);
5997 gen9_avc_run_kernel_media_object(ctx, encoder_context,
6000 &media_object_param);
6002 return VA_STATUS_SUCCESS;
6005 /**************** PreEnc Scaling *************************************/
6006 /* function to run preenc scaling: gen9_avc_preenc_kernel_scaling()
6007 * function to set preenc scaling curbe is the same one using for avc encode
6008 == gen95_avc_set_curbe_scaling4x()
6009 * function to send buffer/surface resources is the same one using for avc encode
6010 == gen9_avc_send_surface_scaling()
6013 gen9_avc_preenc_kernel_scaling(VADriverContextP ctx,
6014 struct encode_state *encode_state,
6015 struct intel_encoder_context *encoder_context,
6017 int scale_surface_type)
6019 struct i965_driver_data *i965 = i965_driver_data(ctx);
6020 struct i965_gpe_table *gpe = &i965->gpe_table;
6021 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6022 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
6023 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6024 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6025 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
6026 VAStatsStatisticsParameterH264 *stat_param_h264 = NULL;
6027 VAStatsStatisticsParameter *stat_param = NULL;
6028 struct i965_gpe_context *gpe_context;
6029 struct scaling_param surface_param;
6030 struct object_surface *obj_surface = NULL;
6031 struct gpe_media_object_walker_parameter media_object_walker_param;
6032 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
6033 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
6034 int media_function = 0;
6036 int enable_statistics_output;
6038 stat_param_h264 = avc_state->stat_param;
6039 assert(stat_param_h264);
6040 stat_param = &stat_param_h264->stats_params;
6041 enable_statistics_output = !stat_param_h264->disable_statistics_output;
6043 memset(&surface_param, 0, sizeof(struct scaling_param));
6044 media_function = INTEL_MEDIA_STATE_4X_SCALING;
6045 kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
6046 downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
6047 downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
6049 surface_param.input_frame_width = generic_state->frame_width_in_pixel;
6050 surface_param.input_frame_height = generic_state->frame_height_in_pixel;
6051 surface_param.output_frame_width = generic_state->frame_width_4x;
6052 surface_param.output_frame_height = generic_state->frame_height_4x;
6053 surface_param.use_4x_scaling = 1 ;
6054 surface_param.use_16x_scaling = 0 ;
6055 surface_param.use_32x_scaling = 0 ;
6056 surface_param.enable_mb_flatness_check = enable_statistics_output;
6057 surface_param.enable_mb_variance_output = enable_statistics_output;
6058 surface_param.enable_mb_pixel_average_output = enable_statistics_output;
6059 surface_param.blk8x8_stat_enabled = stat_param_h264->enable_8x8_statistics;
6061 switch (scale_surface_type) {
6064 surface_param.input_surface = encode_state->input_yuv_object ;
6065 surface_param.output_surface = avc_ctx->preenc_scaled_4x_surface_obj ;
6067 if (enable_statistics_output) {
6068 surface_param.pres_mbv_proc_stat_buffer =
6069 &avc_ctx->preproc_stat_data_out_buffer;
6070 surface_param.mbv_proc_stat_enabled = 1;
6072 surface_param.mbv_proc_stat_enabled = 0;
6073 surface_param.pres_mbv_proc_stat_buffer = NULL;
6077 case SCALE_PAST_REF_PIC:
6078 obj_surface = SURFACE(stat_param->past_references[0].picture_id);
6079 assert(obj_surface);
6080 surface_param.input_surface = obj_surface;
6081 surface_param.output_surface = avc_ctx->preenc_past_ref_scaled_4x_surface_obj;
6083 if (stat_param->past_ref_stat_buf) {
6084 surface_param.pres_mbv_proc_stat_buffer =
6085 &avc_ctx->preenc_past_ref_stat_data_out_buffer;
6086 surface_param.mbv_proc_stat_enabled = 1;
6088 surface_param.mbv_proc_stat_enabled = 0;
6089 surface_param.pres_mbv_proc_stat_buffer = NULL;
6093 case SCALE_FUTURE_REF_PIC:
6095 obj_surface = SURFACE(stat_param->future_references[0].picture_id);
6096 assert(obj_surface);
6097 surface_param.input_surface = obj_surface;
6098 surface_param.output_surface = avc_ctx->preenc_future_ref_scaled_4x_surface_obj;
6100 if (stat_param->future_ref_stat_buf) {
6101 surface_param.pres_mbv_proc_stat_buffer =
6102 &avc_ctx->preenc_future_ref_stat_data_out_buffer;
6103 surface_param.mbv_proc_stat_enabled = 1;
6105 surface_param.mbv_proc_stat_enabled = 0;
6106 surface_param.pres_mbv_proc_stat_buffer = NULL;
6113 gpe_context = &(avc_ctx->context_scaling.gpe_contexts[kernel_idx]);
6115 gpe->context_init(ctx, gpe_context);
6116 gpe->reset_binding_table(ctx, gpe_context);
6118 generic_ctx->pfn_set_curbe_scaling4x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
6120 surface_param.scaling_out_use_16unorm_surf_fmt = 0 ;
6121 surface_param.scaling_out_use_32unorm_surf_fmt = 1 ;
6123 /* No need of explicit flatness_check surface allocation. The field mb_is_flat
6124 * VAStatsStatisticsH264 will be used to store the output. */
6125 surface_param.enable_mb_flatness_check = 0;
6126 generic_ctx->pfn_send_scaling_surface(ctx, encode_state, gpe_context, encoder_context, &surface_param);
6128 /* setup the interface data */
6129 gpe->setup_interface_data(ctx, gpe_context);
6131 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
6132 /* the scaling is based on 8x8 blk level */
6133 kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
6134 kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
6135 kernel_walker_param.no_dependency = 1;
6137 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
6139 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
6142 &media_object_walker_param);
6144 return VA_STATUS_SUCCESS;
6147 /**************** PreEnc HME *************************************/
6148 /* function to run preenc hme is the same one we using in avc encode:
6149 == gen9_avc_kernel_me()
6150 * function to set preenc hme curbe: gen9_avc_preenc_set_curbe_me()
6151 * function to send hme buffer/surface: gen9_avc_preenc_send_surface_me()
6154 gen9_avc_preenc_set_curbe_me(VADriverContextP ctx,
6155 struct encode_state *encode_state,
6156 struct i965_gpe_context *gpe_context,
6157 struct intel_encoder_context *encoder_context,
6160 gen9_avc_fei_me_curbe_data *curbe_cmd;
6161 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6162 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6163 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6164 VAStatsStatisticsParameterH264 * stat_param_h264 = avc_state->stat_param;
6165 VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
6167 struct me_param * curbe_param = (struct me_param *)param ;
6168 unsigned char use_mv_from_prev_step = 0;
6169 unsigned char write_distortions = 0;
6170 unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
6171 unsigned char seach_table_idx = 0;
6172 unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
6173 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
6174 unsigned int scale_factor = 0;
6176 switch (curbe_param->hme_type) {
6177 case INTEL_ENC_HME_4x:
6178 use_mv_from_prev_step = 0;
6179 write_distortions = 0;
6180 mv_shift_factor = 2;
6182 prev_mv_read_pos_factor = 0;
6189 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
6193 downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
6194 downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
6196 memcpy(curbe_cmd, gen9_avc_me_curbe_init_data, sizeof(gen9_avc_me_curbe_init_data));
6198 curbe_cmd->dw3.sub_pel_mode = stat_param_h264->sub_pel_mode;
6199 if (avc_state->field_scaling_output_interleaved) {
6200 /*frame set to zero,field specified*/
6201 curbe_cmd->dw3.src_access = 0;
6202 curbe_cmd->dw3.ref_access = 0;
6203 curbe_cmd->dw7.src_field_polarity = 0;
6205 curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
6206 curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
6207 curbe_cmd->dw5.qp_prime_y = stat_param_h264->frame_qp;
6209 curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
6210 curbe_cmd->dw6.write_distortions = write_distortions;
6211 curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
6212 curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(INTEL_AVC_LEVEL_52) * 4;//frame only
6214 if (generic_state->frame_type == SLICE_TYPE_B) {
6215 curbe_cmd->dw1.bi_weight = 32;
6216 curbe_cmd->dw13.num_ref_idx_l1_minus1 = stat_param->num_future_references - 1;
6217 me_method = gen9_avc_b_me_method[generic_state->preset];
6218 seach_table_idx = 1;
6221 if (generic_state->frame_type == SLICE_TYPE_P ||
6222 generic_state->frame_type == SLICE_TYPE_B)
6223 curbe_cmd->dw13.num_ref_idx_l0_minus1 = stat_param->num_past_references - 1;
6225 curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
6226 curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
6228 memcpy(&curbe_cmd->dw16, table_enc_search_path[seach_table_idx][me_method], 14 * sizeof(int));
6230 curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN9_AVC_ME_MV_DATA_SURFACE_INDEX;
6231 curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x) ? GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX : GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX ;
6232 curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN9_AVC_ME_DISTORTION_SURFACE_INDEX;
6233 curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN9_AVC_ME_BRC_DISTORTION_INDEX;
6234 curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX;
6235 curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX;
6236 curbe_cmd->dw38.reserved = 0;
6238 i965_gpe_context_unmap_curbe(gpe_context);
6243 gen9_avc_preenc_send_surface_me(VADriverContextP ctx,
6244 struct encode_state *encode_state,
6245 struct i965_gpe_context *gpe_context,
6246 struct intel_encoder_context *encoder_context,
6249 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6250 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
6251 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6252 VAStatsStatisticsParameterH264 * stat_param_h264 = avc_state->stat_param;
6253 VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
6254 struct object_surface *input_surface;
6255 struct i965_gpe_resource *gpe_resource;
6256 struct me_param * curbe_param = (struct me_param *)param ;
6259 /* PreEnc Only supports 4xme */
6260 assert(curbe_param->hme_type == INTEL_ENC_HME_4x);
6262 switch (curbe_param->hme_type) {
6263 case INTEL_ENC_HME_4x : {
6265 gpe_resource = &avc_ctx->s4x_memv_data_buffer;
6266 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
6269 I965_SURFACEFORMAT_R8_UNORM,
6270 GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
6272 /* memv distortion output*/
6273 gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
6274 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
6277 I965_SURFACEFORMAT_R8_UNORM,
6278 GEN9_AVC_ME_DISTORTION_SURFACE_INDEX);
6280 /* brc distortion output*/
6281 gpe_resource = &avc_ctx->res_brc_dist_data_surface;
6282 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
6285 I965_SURFACEFORMAT_R8_UNORM,
6286 GEN9_AVC_ME_BRC_DISTORTION_INDEX);
6288 /* input past ref scaled YUV surface*/
6289 for (i = 0; i < stat_param->num_past_references; i++) {
6290 /*input current down scaled YUV surface for forward refef */
6291 input_surface = avc_ctx->preenc_scaled_4x_surface_obj;
6292 i965_add_adv_gpe_surface(ctx, gpe_context,
6294 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
6296 input_surface = avc_ctx->preenc_past_ref_scaled_4x_surface_obj;
6297 i965_add_adv_gpe_surface(ctx, gpe_context,
6299 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
6302 /* input future ref scaled YUV surface*/
6303 for (i = 0; i < stat_param->num_future_references; i++) {
6304 /*input current down scaled YUV surface for backward ref */
6305 input_surface = avc_ctx->preenc_scaled_4x_surface_obj;
6306 i965_add_adv_gpe_surface(ctx, gpe_context,
6308 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
6310 input_surface = avc_ctx->preenc_future_ref_scaled_4x_surface_obj;
6311 i965_add_adv_gpe_surface(ctx, gpe_context,
6313 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
6324 /**************** PreEnc PreProc *************************************/
6325 /* function to run preenc preproc: gen9_avc_preenc_kernel_preproc()
6326 * function to set preenc preproc curbe: gen9_avc_preenc_set_curbe_preproc()
6327 * function to send preproc buffer/surface: gen9_avc_preenc_send_surface_preproc ()
6330 gen9_avc_preenc_set_curbe_preproc(VADriverContextP ctx,
6331 struct encode_state *encode_state,
6332 struct i965_gpe_context *gpe_context,
6333 struct intel_encoder_context *encoder_context,
6336 gen9_avc_preproc_curbe_data *cmd;
6337 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6338 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6339 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6340 VAStatsStatisticsParameterH264 *stat_param_h264 = avc_state->stat_param;
6341 VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
6342 unsigned char me_method = 0;
6343 unsigned int table_idx = 0;
6344 int ref_width, ref_height, len_sp;
6345 int is_bframe = (generic_state->frame_type == SLICE_TYPE_B);
6346 int is_pframe = (generic_state->frame_type == SLICE_TYPE_P);
6347 unsigned int preset = generic_state->preset;
6349 cmd = (gen9_avc_preproc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
6352 memset(cmd, 0, sizeof(gen9_avc_preproc_curbe_data));
6354 switch (generic_state->frame_type) {
6356 memcpy(cmd, gen9_avc_preenc_preproc_curbe_i_frame_init_data,
6357 sizeof(gen9_avc_preproc_curbe_data));
6360 memcpy(cmd, gen9_avc_preenc_preproc_curbe_p_frame_init_data,
6361 sizeof(gen9_avc_preproc_curbe_data));
6364 memcpy(cmd, gen9_avc_preenc_preproc_curbe_b_frame_init_data,
6365 sizeof(gen9_avc_preproc_curbe_data));
6370 /* 4 means full search, 6 means diamand search */
6371 me_method = (stat_param_h264->search_window == 5) ||
6372 (stat_param_h264->search_window == 8) ? 4 : 6;
6374 ref_width = stat_param_h264->ref_width;
6375 ref_height = stat_param_h264->ref_height;
6376 len_sp = stat_param_h264->len_sp;
6377 /* If there is a serch_window, discard user provided ref_width, ref_height
6378 * and search_path length */
6379 switch (stat_param_h264->search_window) {
6381 /* not use predefined search window, there should be a search_path input */
6382 if ((stat_param_h264->search_path != 0) &&
6383 (stat_param_h264->search_path != 1) &&
6384 (stat_param_h264->search_path != 2)) {
6385 WARN_ONCE("Invalid input search_path for SearchWindow=0 \n");
6388 /* 4 means full search, 6 means diamand search */
6389 me_method = (stat_param_h264->search_path == 1) ? 6 : 4;
6390 if (((ref_width * ref_height) > 2048) || (ref_width > 64) || (ref_height > 64)) {
6391 WARN_ONCE("Invalid input ref_width/ref_height in"
6392 "SearchWindow=0 case! \n");
6398 /* Tiny - 4 SUs 24x24 window */
6405 /* Small - 9 SUs 28x28 window */
6411 /* Diamond - 16 SUs 48x40 window */
6417 /* Large Diamond - 32 SUs 48x40 window */
6423 /* Exhaustive - 48 SUs 48x40 window */
6429 /* Diamond - 16 SUs 64x32 window */
6435 /* Large Diamond - 32 SUs 64x32 window */
6441 /* Exhaustive - 48 SUs 64x32 window */
6451 /* ref_width*ref_height = Max 64x32 one direction, Max 32x32 two directions */
6453 CLIP(ref_width, 4, 32);
6454 CLIP(ref_height, 4, 32);
6455 } else if (is_pframe) {
6456 CLIP(ref_width, 4, 64);
6457 CLIP(ref_height, 4, 32);
6460 cmd->dw0.adaptive_enable =
6461 cmd->dw37.adaptive_enable = stat_param_h264->adaptive_search;
6462 cmd->dw2.max_len_sp = len_sp;
6463 cmd->dw38.max_len_sp = 0; // HLD mandates this field to be Zero
6464 cmd->dw2.max_num_su = cmd->dw38.max_num_su = 57;
6465 cmd->dw3.src_access =
6466 cmd->dw3.ref_access = 0; // change it to (is_frame ? 0: 1) when interlace is suppoted
6468 if (generic_state->frame_type != SLICE_TYPE_I && avc_state->ftq_enable)
6469 cmd->dw3.ft_enable = stat_param_h264->ft_enable;
6471 cmd->dw3.ft_enable = 0;
6473 cmd->dw2.pic_width = generic_state->frame_width_in_mbs;
6474 cmd->dw6.pic_height = cmd->dw5.slice_mb_height = generic_state->frame_height_in_mbs;
6475 cmd->dw3.sub_mb_part_mask = stat_param_h264->sub_mb_part_mask;
6476 cmd->dw3.sub_pel_mode = stat_param_h264->sub_pel_mode;
6477 cmd->dw3.inter_sad = stat_param_h264->inter_sad;
6478 cmd->dw3.intra_sad = stat_param_h264->intra_sad;
6479 cmd->dw4.hme_enable = generic_state->hme_enabled;
6480 cmd->dw4.frame_qp = stat_param_h264->frame_qp;
6481 cmd->dw4.per_mb_qp_enable = stat_param_h264->mb_qp;
6483 cmd->dw4.multiple_mv_predictor_per_mb_enable =
6484 (generic_state->frame_type != SLICE_TYPE_I) ? 0 : stat_param_h264->mv_predictor_ctrl;
6486 cmd->dw4.disable_mv_output = (generic_state->frame_type == SLICE_TYPE_I) ? 1 : stat_param_h264->disable_mv_output;
6487 cmd->dw4.disable_mb_stats = stat_param_h264->disable_statistics_output;
6489 cmd->dw4.fwd_ref_pic_enable = (stat_param->num_past_references > 0) ? 1 : 0;
6490 cmd->dw4.bwd_ref_pic_enable = (stat_param->num_future_references > 0) ? 1 : 0;
6492 cmd->dw7.intra_part_mask = stat_param_h264->intra_part_mask;
6495 memcpy(&(cmd->dw8), gen75_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][stat_param_h264->frame_qp], 8 * sizeof(unsigned int));
6497 /* reset all except sic_fwd_trans_coeff_threshold_* from dw8 to dw15 */
6498 memset(&(cmd->dw8), 0, 6 * (sizeof(unsigned int)));
6500 /* search path tables */
6501 table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
6502 memcpy(&(cmd->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
6504 if (stat_param_h264->intra_part_mask == 0x07)
6505 cmd->dw31.intra_compute_type = 3;
6507 cmd->dw38.ref_threshold = 400;
6508 cmd->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
6510 if (generic_state->frame_type == SLICE_TYPE_I) {
6511 cmd->dw0.skip_mode_enable = cmd->dw37.skip_mode_enable = 0;
6512 cmd->dw36.hme_combine_overlap = 0;
6513 } else if (generic_state->frame_type == SLICE_TYPE_P) {
6514 cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(INTEL_AVC_LEVEL_52) / 2;
6515 cmd->dw3.bme_disable_fbr = 1;
6516 cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
6517 cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
6518 cmd->dw7.non_skip_zmv_added = 1;
6519 cmd->dw7.non_skip_mode_added = 1;
6520 cmd->dw7.skip_center_mask = 1;
6521 cmd->dw32.max_vmv_r =
6522 i965_avc_get_max_mv_len(INTEL_AVC_LEVEL_52) * 4;
6523 cmd->dw36.hme_combine_overlap = 1;
6525 } else if (generic_state->frame_type == SLICE_TYPE_P) { /* B slice */
6527 cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(INTEL_AVC_LEVEL_52) / 2;
6528 cmd->dw3.search_ctrl = 0;
6529 cmd->dw3.skip_type = 1;
6530 cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
6531 cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
6532 cmd->dw7.skip_center_mask = 0xff;
6533 cmd->dw32.max_vmv_r =
6534 i965_avc_get_max_mv_len(INTEL_AVC_LEVEL_52) * 4;
6535 cmd->dw36.hme_combine_overlap = 1;
6538 cmd->dw40.curr_pic_surf_index = GEN9_AVC_PREPROC_CURR_Y_INDEX;
6539 cmd->dw41.hme_mv_dat_surf_index = GEN9_AVC_PREPROC_HME_MV_DATA_INDEX;
6540 cmd->dw42.mv_predictor_surf_index = GEN9_AVC_PREPROC_MV_PREDICTOR_INDEX;
6541 cmd->dw43.mb_qp_surf_index = GEN9_AVC_PREPROC_MBQP_INDEX;
6542 cmd->dw44.mv_data_out_surf_index = GEN9_AVC_PREPROC_MV_DATA_INDEX;
6543 cmd->dw45.mb_stats_out_surf_index = GEN9_AVC_PREPROC_MB_STATS_INDEX;
6544 cmd->dw46.vme_inter_prediction_surf_index = GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_0_INDEX;
6545 cmd->dw47.vme_Inter_prediction_mr_surf_index = GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_1_INDEX;
6546 cmd->dw48.ftq_lut_surf_index = GEN9_AVC_PREPROC_FTQ_LUT_INDEX;
6548 i965_gpe_context_unmap_curbe(gpe_context);
6552 gen9_avc_preenc_send_surface_preproc(VADriverContextP ctx,
6553 struct encode_state *encode_state,
6554 struct i965_gpe_context *gpe_context,
6555 struct intel_encoder_context *encoder_context,
6558 struct i965_driver_data *i965 = i965_driver_data(ctx);
6559 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6560 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
6561 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6562 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6563 struct object_surface *obj_surface;
6564 struct i965_gpe_resource *gpe_resource;
6565 VASurfaceID surface_id;
6566 VAStatsStatisticsParameterH264 *stat_param_h264 = avc_state->stat_param;
6567 VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
6568 unsigned int size = 0, frame_mb_nums = 0;
6570 frame_mb_nums = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs;
6572 /* input yuv surface, Y index */
6573 obj_surface = encode_state->input_yuv_object;
6574 i965_add_2d_gpe_surface(ctx,
6579 I965_SURFACEFORMAT_R8_UNORM,
6580 GEN9_AVC_PREPROC_CURR_Y_INDEX);
6582 /* input yuv surface, UV index */
6583 i965_add_2d_gpe_surface(ctx,
6588 I965_SURFACEFORMAT_R16_UINT,
6589 GEN9_AVC_MBENC_CURR_UV_INDEX);
6592 if (generic_state->hme_enabled) {
6593 /* HME mv data buffer */
6594 gpe_resource = &avc_ctx->s4x_memv_data_buffer;
6595 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
6598 I965_SURFACEFORMAT_R8_UNORM,
6599 GEN9_AVC_PREPROC_HME_MV_DATA_INDEX);
6602 /* mv predictor buffer */
6603 if (stat_param_h264->mv_predictor_ctrl) {
6604 size = frame_mb_nums * FEI_AVC_MV_PREDICTOR_BUFFER_SIZE;
6605 gpe_resource = &avc_ctx->preproc_mv_predictor_buffer;
6606 i965_add_buffer_gpe_surface(ctx,
6612 GEN9_AVC_PREPROC_MV_PREDICTOR_INDEX);
6616 if (stat_param_h264->mb_qp) {
6617 size = frame_mb_nums * FEI_AVC_QP_BUFFER_SIZE;
6618 gpe_resource = &avc_ctx->preproc_mb_qp_buffer;
6619 i965_add_buffer_gpe_surface(ctx,
6625 GEN9_AVC_PREPROC_MBQP_INDEX);
6627 gpe_resource = &avc_ctx->res_mbbrc_const_data_buffer;
6628 size = 16 * AVC_QP_MAX * 4;
6629 i965_add_buffer_gpe_surface(ctx,
6635 GEN9_AVC_PREPROC_FTQ_LUT_INDEX);
6639 /* mv data output buffer */
6640 if (!stat_param_h264->disable_mv_output) {
6641 gpe_resource = &avc_ctx->preproc_mv_data_out_buffer;
6642 size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
6643 i965_add_buffer_gpe_surface(ctx,
6649 GEN9_AVC_PREPROC_MV_DATA_INDEX);
6652 /* statistics output buffer */
6653 if (!stat_param_h264->disable_statistics_output) {
6654 gpe_resource = &avc_ctx->preproc_stat_data_out_buffer;
6655 size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
6656 i965_add_buffer_gpe_surface(ctx,
6662 GEN9_AVC_PREPROC_MB_STATS_INDEX);
6666 obj_surface = encode_state->input_yuv_object;
6667 i965_add_2d_gpe_surface(ctx,
6672 I965_SURFACEFORMAT_R8_UNORM,
6673 GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_0_INDEX);
6675 /* vme cur pic y (repeating based on required BTI order for mediakerel)*/
6676 obj_surface = encode_state->input_yuv_object;
6677 i965_add_2d_gpe_surface(ctx,
6682 I965_SURFACEFORMAT_R8_UNORM,
6683 GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_1_INDEX);
6685 /* vme forward ref */
6686 /* Only supports one past ref */
6687 if (stat_param->num_past_references > 0) {
6688 surface_id = stat_param->past_references[0].picture_id;
6689 assert(surface_id != VA_INVALID_ID);
6690 obj_surface = SURFACE(surface_id);
6693 i965_add_adv_gpe_surface(ctx, gpe_context,
6695 GEN9_AVC_PREPROC_VME_FWD_PIC_IDX0_INDEX);
6699 /* vme future ref */
6700 /* Only supports one future ref */
6701 if (stat_param->num_future_references > 0) {
6702 surface_id = stat_param->future_references[0].picture_id;
6703 assert(surface_id != VA_INVALID_ID);
6704 obj_surface = SURFACE(surface_id);
6707 i965_add_adv_gpe_surface(ctx, gpe_context,
6709 GEN9_AVC_PREPROC_VME_BWD_PIC_IDX0_0_INDEX);
6711 surface_id = stat_param->future_references[0].picture_id;
6712 assert(surface_id != VA_INVALID_ID);
6713 obj_surface = SURFACE(surface_id);
6716 i965_add_adv_gpe_surface(ctx, gpe_context,
6718 GEN9_AVC_PREPROC_VME_BWD_PIC_IDX0_1_INDEX);
6726 gen9_avc_preenc_kernel_preproc(VADriverContextP ctx,
6727 struct encode_state *encode_state,
6728 struct intel_encoder_context *encoder_context)
6730 struct i965_driver_data *i965 = i965_driver_data(ctx);
6731 struct i965_gpe_table *gpe = &i965->gpe_table;
6732 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6733 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
6734 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
6735 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6736 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6737 VAStatsStatisticsParameterH264 * stat_param_h264 = avc_state->stat_param;
6738 struct i965_gpe_context *gpe_context;
6739 struct gpe_media_object_walker_parameter media_object_walker_param;
6740 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
6741 int media_function = INTEL_MEDIA_STATE_PREPROC;
6742 struct i965_gpe_resource *gpe_resource = NULL;
6743 unsigned int * data = NULL;
6744 unsigned int ftq_lut_table_size = 16 * 52; /* 16 DW per QP for each qp*/
6746 gpe_context = &(avc_ctx->context_preproc.gpe_contexts);
6747 gpe->context_init(ctx, gpe_context);
6748 gpe->reset_binding_table(ctx, gpe_context);
6751 generic_ctx->pfn_set_curbe_preproc(ctx, encode_state, gpe_context, encoder_context, NULL);
6754 generic_ctx->pfn_send_preproc_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
6756 gpe->setup_interface_data(ctx, gpe_context);
6758 /* Set up FtqLut Buffer if there is QP change within a frame */
6759 if (stat_param_h264->mb_qp) {
6760 gpe_resource = &(avc_ctx->res_mbbrc_const_data_buffer);
6761 assert(gpe_resource);
6762 data = i965_map_gpe_resource(gpe_resource);
6764 memcpy(data, gen9_avc_preenc_preproc_ftq_lut, ftq_lut_table_size * sizeof(unsigned int));
6767 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
6768 kernel_walker_param.resolution_x = generic_state->frame_width_in_mbs ;
6769 kernel_walker_param.resolution_y = generic_state->frame_height_in_mbs ;
6770 kernel_walker_param.no_dependency = 1;
6772 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
6774 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
6777 &media_object_walker_param);
6779 return VA_STATUS_SUCCESS;
6784 gen8_avc_set_curbe_mbenc(VADriverContextP ctx,
6785 struct encode_state *encode_state,
6786 struct i965_gpe_context *gpe_context,
6787 struct intel_encoder_context *encoder_context,
6790 struct i965_driver_data *i965 = i965_driver_data(ctx);
6791 gen8_avc_mbenc_curbe_data *cmd;
6792 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6793 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6794 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6796 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
6797 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
6798 VASurfaceID surface_id;
6799 struct object_surface *obj_surface;
6801 struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
6802 unsigned char qp = 0;
6803 unsigned char me_method = 0;
6804 unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
6805 unsigned int table_idx = 0;
6806 unsigned int curbe_size = 0;
6808 unsigned int preset = generic_state->preset;
6809 if (IS_GEN8(i965->intel.device_info)) {
6810 cmd = (gen8_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
6813 curbe_size = sizeof(gen8_avc_mbenc_curbe_data);
6814 memset(cmd, 0, curbe_size);
6816 if (mbenc_i_frame_dist_in_use) {
6817 memcpy(cmd, gen8_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
6819 switch (generic_state->frame_type) {
6821 memcpy(cmd, gen8_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
6824 memcpy(cmd, gen8_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
6827 memcpy(cmd, gen8_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
6839 me_method = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_b_me_method[preset] : gen9_avc_p_me_method[preset];
6840 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
6842 cmd->dw0.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
6843 cmd->dw37.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
6844 cmd->dw0.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
6845 cmd->dw37.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
6847 cmd->dw2.max_len_sp = gen9_avc_max_len_sp[preset];
6848 cmd->dw38.max_len_sp = 0;
6850 cmd->dw3.src_access = 0;
6851 cmd->dw3.ref_access = 0;
6853 if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) {
6854 //disable ftq_override by now.
6855 if (avc_state->ftq_override) {
6856 cmd->dw3.ftq_enable = avc_state->ftq_enable;
6859 if (generic_state->frame_type == SLICE_TYPE_P) {
6860 cmd->dw3.ftq_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
6863 cmd->dw3.ftq_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
6867 cmd->dw3.ftq_enable = 0;
6870 if (avc_state->disable_sub_mb_partion)
6871 cmd->dw3.sub_mb_part_mask = 0x7;
6873 if (mbenc_i_frame_dist_in_use) {
6874 cmd->dw2.pitch_width = generic_state->downscaled_width_4x_in_mb;
6875 cmd->dw4.picture_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
6876 cmd->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4;
6877 cmd->dw6.batch_buffer_end = 0;
6878 cmd->dw31.intra_compute_type = 1;
6880 cmd->dw2.pitch_width = generic_state->frame_width_in_mbs;
6881 cmd->dw4.picture_height_minus1 = generic_state->frame_height_in_mbs - 1;
6882 cmd->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ? generic_state->frame_height_in_mbs : avc_state->slice_height;
6885 memcpy(&(cmd->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
6886 if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
6887 } else if (avc_state->skip_bias_adjustment_enable) {
6888 /* Load different MvCost for P picture when SkipBiasAdjustment is enabled
6889 // No need to check for P picture as the flag is only enabled for P picture */
6890 cmd->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
6893 table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
6894 memcpy(&(cmd->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
6896 cmd->dw4.enable_intra_cost_scaling_for_static_frame = avc_state->sfd_enable && generic_state->hme_enabled;
6897 cmd->dw4.field_parity_flag = 0;//bottom field
6898 cmd->dw4.enable_cur_fld_idr = 0;//field realted
6899 cmd->dw4.contrained_intra_pred_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
6900 cmd->dw4.hme_enable = generic_state->hme_enabled;
6901 cmd->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
6902 cmd->dw4.use_actual_ref_qp_value = generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
6904 cmd->dw7.intra_part_mask = avc_state->transform_8x8_mode_enable ? 0 : 0x02;
6905 cmd->dw7.src_field_polarity = 0;//field related
6907 /*ftq_skip_threshold_lut set,dw14 /15*/
6909 /*r5 disable NonFTQSkipThresholdLUT*/
6910 if (generic_state->frame_type == SLICE_TYPE_P) {
6911 cmd->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
6912 } else if (generic_state->frame_type == SLICE_TYPE_B) {
6913 cmd->dw32.skip_val = gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
6916 cmd->dw13.qp_prime_y = qp;
6917 cmd->dw13.qp_prime_cb = qp;
6918 cmd->dw13.qp_prime_cr = qp;
6919 cmd->dw13.target_size_in_word = 0xff;//hardcode for brc disable
6921 if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) {
6922 switch (gen9_avc_multi_pred[preset]) {
6924 cmd->dw32.mult_pred_l0_disable = 128;
6925 cmd->dw32.mult_pred_l1_disable = 128;
6928 cmd->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_P) ? 1 : 128;
6929 cmd->dw32.mult_pred_l1_disable = 128;
6932 cmd->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
6933 cmd->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
6936 cmd->dw32.mult_pred_l0_disable = 1;
6937 cmd->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
6942 cmd->dw32.mult_pred_l0_disable = 128;
6943 cmd->dw32.mult_pred_l1_disable = 128;
6946 if (generic_state->frame_type == SLICE_TYPE_B) {
6947 cmd->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
6948 cmd->dw34.list1_ref_id0_frm_field_parity = 0;
6949 cmd->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
6952 cmd->dw34.b_original_bff = 0; //frame only
6953 cmd->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
6954 cmd->dw34.roi_enable_flag = curbe_param->roi_enabled;
6955 cmd->dw34.mad_enable_falg = avc_state->mad_enable;
6956 cmd->dw34.mb_brc_enable = avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled;
6957 cmd->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
6958 cmd->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
6960 if (cmd->dw34.force_non_skip_check) {
6961 cmd->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
6964 cmd->dw36.check_all_fractional_enable = avc_state->caf_enable;
6965 cmd->dw38.ref_threshold = 400;
6966 cmd->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
6967 cmd->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable) ? 0 : 2;
6969 if (mbenc_i_frame_dist_in_use) {
6970 cmd->dw13.qp_prime_y = 0;
6971 cmd->dw13.qp_prime_cb = 0;
6972 cmd->dw13.qp_prime_cr = 0;
6973 cmd->dw33.intra_16x16_nondc_penalty = 0;
6974 cmd->dw33.intra_8x8_nondc_penalty = 0;
6975 cmd->dw33.intra_4x4_nondc_penalty = 0;
6977 if (cmd->dw4.use_actual_ref_qp_value) {
6978 cmd->dw44.actual_qp_value_for_ref_id0_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 0);
6979 cmd->dw44.actual_qp_value_for_ref_id1_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 1);
6980 cmd->dw44.actual_qp_value_for_ref_id2_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 2);
6981 cmd->dw44.actual_qp_value_for_ref_id3_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 3);
6982 cmd->dw45.actual_qp_value_for_ref_id4_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 4);
6983 cmd->dw45.actual_qp_value_for_ref_id5_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 5);
6984 cmd->dw45.actual_qp_value_for_ref_id6_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 6);
6985 cmd->dw45.actual_qp_value_for_ref_id7_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 7);
6986 cmd->dw46.actual_qp_value_for_ref_id0_list1 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 0);
6987 cmd->dw46.actual_qp_value_for_ref_id1_list1 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 1);
6990 table_idx = slice_type_kernel[generic_state->frame_type];
6991 cmd->dw46.ref_cost = gen8_avc_ref_cost[table_idx][qp];
6992 if (generic_state->frame_type == SLICE_TYPE_I) {
6993 cmd->dw0.skip_mode_enable = 0;
6994 cmd->dw37.skip_mode_enable = 0;
6995 cmd->dw36.hme_combine_overlap = 0;
6996 cmd->dw47.intra_cost_sf = 16;
6997 cmd->dw34.enable_direct_bias_adjustment = 0;
6998 cmd->dw34.enable_global_motion_bias_adjustment = 0;
7000 } else if (generic_state->frame_type == SLICE_TYPE_P) {
7001 cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
7002 cmd->dw3.bme_disable_fbr = 1;
7003 cmd->dw5.ref_width = gen9_avc_search_x[preset];
7004 cmd->dw5.ref_height = gen9_avc_search_y[preset];
7005 cmd->dw7.non_skip_zmv_added = 1;
7006 cmd->dw7.non_skip_mode_added = 1;
7007 cmd->dw7.skip_center_mask = 1;
7008 cmd->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
7009 cmd->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame onlys
7010 cmd->dw36.hme_combine_overlap = 1;
7011 cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
7012 cmd->dw39.ref_width = gen9_avc_search_x[preset];
7013 cmd->dw39.ref_height = gen9_avc_search_y[preset];
7014 cmd->dw34.enable_direct_bias_adjustment = 0;
7015 cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
7016 if (avc_state->global_motion_bias_adjustment_enable)
7017 cmd->dw58.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
7019 cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
7020 cmd->dw1.bi_weight = avc_state->bi_weight;
7021 cmd->dw3.search_ctrl = 7;
7022 cmd->dw3.skip_type = 1;
7023 cmd->dw5.ref_width = gen9_avc_b_search_x[preset];
7024 cmd->dw5.ref_height = gen9_avc_b_search_y[preset];
7025 cmd->dw7.skip_center_mask = 0xff;
7026 cmd->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
7027 cmd->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
7028 cmd->dw36.hme_combine_overlap = 1;
7029 surface_id = slice_param->RefPicList1[0].picture_id;
7030 obj_surface = SURFACE(surface_id);
7032 WARN_ONCE("Invalid backward reference frame\n");
7035 cmd->dw36.is_fwd_frame_short_term_ref = !!(slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
7036 cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
7037 cmd->dw36.num_ref_idx_l1_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l1_active_minus1 : 0;
7038 cmd->dw39.ref_width = gen9_avc_b_search_x[preset];
7039 cmd->dw39.ref_height = gen9_avc_b_search_y[preset];
7040 cmd->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
7041 cmd->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
7042 cmd->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
7043 cmd->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
7044 cmd->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
7045 cmd->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
7046 cmd->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
7047 cmd->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
7048 cmd->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
7049 if (cmd->dw34.enable_direct_bias_adjustment) {
7050 cmd->dw7.non_skip_zmv_added = 1;
7051 cmd->dw7.non_skip_mode_added = 1;
7054 cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
7055 if (avc_state->global_motion_bias_adjustment_enable)
7056 cmd->dw58.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
7058 avc_state->block_based_skip_enable = cmd->dw3.block_based_skip_enable;
7060 if (avc_state->rolling_intra_refresh_enable) {
7061 /*by now disable it*/
7062 if (generic_state->brc_enabled) {
7063 cmd->dw4.enable_intra_refresh = false;
7064 cmd->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
7065 cmd->dw48.widi_intra_refresh_mbx = 0;
7066 cmd->dw58.widi_intra_refresh_mby = 0;
7068 cmd->dw4.enable_intra_refresh = true;
7069 cmd->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
7071 cmd->dw32.mult_pred_l0_disable = 128;
7072 /* Pass the same IntraRefreshUnit to the kernel w/o the adjustment by -1, so as to have an overlap of one MB row or column of Intra macroblocks
7073 across one P frame to another P frame, as needed by the RollingI algo */
7074 cmd->dw48.widi_intra_refresh_mbx = 0;
7075 cmd->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
7076 cmd->dw48.widi_intra_refresh_qp_delta = 0;
7079 cmd->dw34.widi_intra_refresh_en = 0;
7082 /*roi set disable by now. 49-56*/
7083 if (curbe_param->roi_enabled) {
7084 cmd->dw49.roi_1_x_left = generic_state->roi[0].left;
7085 cmd->dw49.roi_1_y_top = generic_state->roi[0].top;
7086 cmd->dw50.roi_1_x_right = generic_state->roi[0].right;
7087 cmd->dw50.roi_1_y_bottom = generic_state->roi[0].bottom;
7089 cmd->dw51.roi_2_x_left = generic_state->roi[1].left;
7090 cmd->dw51.roi_2_y_top = generic_state->roi[1].top;
7091 cmd->dw52.roi_2_x_right = generic_state->roi[1].right;
7092 cmd->dw52.roi_2_y_bottom = generic_state->roi[1].bottom;
7094 cmd->dw53.roi_3_x_left = generic_state->roi[2].left;
7095 cmd->dw53.roi_3_y_top = generic_state->roi[2].top;
7096 cmd->dw54.roi_3_x_right = generic_state->roi[2].right;
7097 cmd->dw54.roi_3_y_bottom = generic_state->roi[2].bottom;
7099 cmd->dw55.roi_4_x_left = generic_state->roi[3].left;
7100 cmd->dw55.roi_4_y_top = generic_state->roi[3].top;
7101 cmd->dw56.roi_4_x_right = generic_state->roi[3].right;
7102 cmd->dw56.roi_4_y_bottom = generic_state->roi[3].bottom;
7104 cmd->dw36.enable_cabac_work_around = 0;
7106 if (!generic_state->brc_enabled) {
7108 tmp = generic_state->roi[0].value;
7109 CLIP(tmp, -qp, AVC_QP_MAX - qp);
7110 cmd->dw57.roi_1_dqp_prime_y = tmp;
7111 tmp = generic_state->roi[1].value;
7112 CLIP(tmp, -qp, AVC_QP_MAX - qp);
7113 cmd->dw57.roi_2_dqp_prime_y = tmp;
7114 tmp = generic_state->roi[2].value;
7115 CLIP(tmp, -qp, AVC_QP_MAX - qp);
7116 cmd->dw57.roi_3_dqp_prime_y = tmp;
7117 tmp = generic_state->roi[3].value;
7118 CLIP(tmp, -qp, AVC_QP_MAX - qp);
7119 cmd->dw57.roi_4_dqp_prime_y = tmp;
7121 cmd->dw34.roi_enable_flag = 0;
7125 cmd->dw65.mb_data_surf_index = GEN8_AVC_MBENC_MFC_AVC_PAK_OBJ_CM;
7126 cmd->dw66.mv_data_surf_index = GEN8_AVC_MBENC_IND_MV_DATA_CM;
7127 cmd->dw67.i_dist_surf_index = GEN8_AVC_MBENC_BRC_DISTORTION_CM;
7128 cmd->dw68.src_y_surf_index = GEN8_AVC_MBENC_CURR_Y_CM;
7129 cmd->dw69.mb_specific_data_surf_index = GEN8_AVC_MBENC_MB_SPECIFIC_DATA_CM;
7130 cmd->dw70.aux_vme_out_surf_index = GEN8_AVC_MBENC_AUX_VME_OUT_CM;
7131 cmd->dw71.curr_ref_pic_sel_surf_index = GEN8_AVC_MBENC_REFPICSELECT_L0_CM;
7132 cmd->dw72.hme_mv_pred_fwd_bwd_surf_index = GEN8_AVC_MBENC_MV_DATA_FROM_ME_CM;
7133 cmd->dw73.hme_dist_surf_index = GEN8_AVC_MBENC_4xME_DISTORTION_CM;
7134 cmd->dw74.slice_map_surf_index = GEN8_AVC_MBENC_SLICEMAP_DATA_CM;
7135 cmd->dw75.fwd_frm_mb_data_surf_index = GEN8_AVC_MBENC_FWD_MB_DATA_CM;
7136 cmd->dw76.fwd_frm_mv_surf_index = GEN8_AVC_MBENC_FWD_MV_DATA_CM;
7137 cmd->dw77.mb_qp_buffer = GEN8_AVC_MBENC_MBQP_CM;
7138 cmd->dw78.mb_brc_lut = GEN8_AVC_MBENC_MBBRC_CONST_DATA_CM;
7139 cmd->dw79.vme_inter_prediction_surf_index = GEN8_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_CM;
7140 cmd->dw80.vme_inter_prediction_mr_surf_index = GEN8_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_CM;
7141 cmd->dw81.flatness_chk_surf_index = GEN8_AVC_MBENC_FLATNESS_CHECK_CM;
7142 cmd->dw82.mad_surf_index = GEN8_AVC_MBENC_MAD_DATA_CM;
7143 cmd->dw83.force_non_skip_mb_map_surface = GEN8_AVC_MBENC_FORCE_NONSKIP_MB_MAP_CM;
7144 cmd->dw84.widi_wa_surf_index = GEN8_AVC_MBENC_WIDI_WA_DATA_CM;
7145 cmd->dw85.brc_curbe_surf_index = GEN8_AVC_MBENC_BRC_CURBE_DATA_CM;
7146 cmd->dw86.static_detection_cost_table_index = GEN8_AVC_MBENC_STATIC_FRAME_DETECTION_OUTPUT_CM;
7148 i965_gpe_context_unmap_curbe(gpe_context);
7154 gen8_avc_set_curbe_scaling4x(VADriverContextP ctx,
7155 struct encode_state *encode_state,
7156 struct i965_gpe_context *gpe_context,
7157 struct intel_encoder_context *encoder_context,
7160 gen8_avc_scaling4x_curbe_data *curbe_cmd;
7161 struct scaling_param *surface_param = (struct scaling_param *)param;
7163 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
7168 memset(curbe_cmd, 0, sizeof(gen9_avc_scaling4x_curbe_data));
7170 curbe_cmd->dw0.input_picture_width = surface_param->input_frame_width;
7171 curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
7173 curbe_cmd->dw1.input_y_bti = GEN8_SCALING_FRAME_SRC_Y_CM;
7174 curbe_cmd->dw2.output_y_bti = GEN8_SCALING_FRAME_DST_Y_CM;
7176 curbe_cmd->dw5.flatness_threshold = 0;
7177 if (surface_param->enable_mb_flatness_check) {
7178 curbe_cmd->dw5.flatness_threshold = 128;
7179 curbe_cmd->dw8.flatness_output_bti_top_field = 4;
7182 curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
7183 curbe_cmd->dw6.enable_mb_variance_output = surface_param->enable_mb_variance_output;
7184 curbe_cmd->dw6.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
7186 if (curbe_cmd->dw6.enable_mb_variance_output ||
7187 curbe_cmd->dw6.enable_mb_pixel_average_output) {
7188 curbe_cmd->dw10.mbv_proc_states_bti_top_field = GEN8_SCALING_FIELD_TOP_MBVPROCSTATS_DST_CM;
7189 curbe_cmd->dw11.mbv_proc_states_bti_bottom_field = GEN8_SCALING_FIELD_BOT_MBVPROCSTATS_DST_CM;
7192 i965_gpe_context_unmap_curbe(gpe_context);
7197 gen8_avc_set_curbe_me(VADriverContextP ctx,
7198 struct encode_state *encode_state,
7199 struct i965_gpe_context *gpe_context,
7200 struct intel_encoder_context *encoder_context,
7203 gen8_avc_me_curbe_data *curbe_cmd;
7204 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7205 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
7206 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
7208 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
7210 struct me_param * curbe_param = (struct me_param *)param ;
7211 unsigned char use_mv_from_prev_step = 0;
7212 unsigned char write_distortions = 0;
7213 unsigned char qp_prime_y = 0;
7214 unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
7215 unsigned char seach_table_idx = 0;
7216 unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
7217 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
7218 unsigned int scale_factor = 0;
7220 qp_prime_y = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta;
7221 switch (curbe_param->hme_type) {
7222 case INTEL_ENC_HME_4x : {
7223 use_mv_from_prev_step = (generic_state->b16xme_enabled) ? 1 : 0;
7224 write_distortions = 1;
7225 mv_shift_factor = 2;
7227 prev_mv_read_pos_factor = 0;
7230 case INTEL_ENC_HME_16x : {
7231 use_mv_from_prev_step = (generic_state->b32xme_enabled) ? 1 : 0;
7232 write_distortions = 0;
7233 mv_shift_factor = 2;
7235 prev_mv_read_pos_factor = 1;
7238 case INTEL_ENC_HME_32x : {
7239 use_mv_from_prev_step = 0;
7240 write_distortions = 0;
7241 mv_shift_factor = 1;
7243 prev_mv_read_pos_factor = 0;
7250 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
7255 downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
7256 downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
7258 memcpy(curbe_cmd, gen8_avc_me_curbe_init_data, sizeof(gen8_avc_me_curbe_data));
7260 curbe_cmd->dw3.sub_pel_mode = 3;
7261 if (avc_state->field_scaling_output_interleaved) {
7262 /*frame set to zero,field specified*/
7263 curbe_cmd->dw3.src_access = 0;
7264 curbe_cmd->dw3.ref_access = 0;
7265 curbe_cmd->dw7.src_field_polarity = 0;
7267 curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
7268 curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
7269 curbe_cmd->dw5.qp_prime_y = qp_prime_y;
7271 curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
7272 curbe_cmd->dw6.write_distortions = write_distortions;
7273 curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
7274 curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
7276 if (generic_state->frame_type == SLICE_TYPE_B) {
7277 curbe_cmd->dw1.bi_weight = 32;
7278 curbe_cmd->dw13.num_ref_idx_l1_minus1 = slice_param->num_ref_idx_l1_active_minus1;
7279 me_method = gen9_avc_b_me_method[generic_state->preset];
7280 seach_table_idx = 1;
7283 if (generic_state->frame_type == SLICE_TYPE_P ||
7284 generic_state->frame_type == SLICE_TYPE_B)
7285 curbe_cmd->dw13.num_ref_idx_l0_minus1 = slice_param->num_ref_idx_l0_active_minus1;
7287 curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
7288 curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
7290 memcpy(&curbe_cmd->dw16, table_enc_search_path[seach_table_idx][me_method], 14 * sizeof(int));
7292 curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN8_AVC_ME_MV_DATA_SURFACE_CM;
7293 curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x) ? GEN8_AVC_32xME_MV_DATA_SURFACE_CM : GEN8_AVC_16xME_MV_DATA_SURFACE_CM ;
7294 curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN8_AVC_ME_DISTORTION_SURFACE_CM;
7295 curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN8_AVC_ME_BRC_DISTORTION_CM;
7296 curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN8_AVC_ME_CURR_FOR_FWD_REF_CM;
7297 curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN8_AVC_ME_CURR_FOR_BWD_REF_CM;
7298 curbe_cmd->dw38.reserved = 0;
7300 i965_gpe_context_unmap_curbe(gpe_context);
7305 gen8_avc_set_curbe_brc_frame_update(VADriverContextP ctx,
7306 struct encode_state *encode_state,
7307 struct i965_gpe_context *gpe_context,
7308 struct intel_encoder_context *encoder_context,
7311 gen8_avc_frame_brc_update_curbe_data *cmd;
7312 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7313 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
7314 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
7315 struct object_surface *obj_surface;
7316 struct gen9_surface_avc *avc_priv_surface;
7317 struct avc_param common_param;
7319 obj_surface = encode_state->reconstructed_object;
7321 if (!obj_surface || !obj_surface->private_data)
7323 avc_priv_surface = obj_surface->private_data;
7325 cmd = i965_gpe_context_map_curbe(gpe_context);
7330 memcpy(cmd, &gen8_avc_frame_brc_update_curbe_init_data, sizeof(gen8_avc_frame_brc_update_curbe_data));
7332 cmd->dw5.target_size_flag = 0 ;
7333 if (generic_state->brc_init_current_target_buf_full_in_bits > (double)generic_state->brc_init_reset_buf_size_in_bits) {
7335 generic_state->brc_init_current_target_buf_full_in_bits -= (double)generic_state->brc_init_reset_buf_size_in_bits;
7336 cmd->dw5.target_size_flag = 1 ;
7339 if (generic_state->skip_frame_enbale) {
7340 cmd->dw6.num_skip_frames = generic_state->num_skip_frames ;
7341 cmd->dw7.size_skip_frames = generic_state->size_skip_frames;
7343 generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame * generic_state->num_skip_frames;
7346 cmd->dw0.target_size = (unsigned int)generic_state->brc_init_current_target_buf_full_in_bits ;
7347 cmd->dw1.frame_number = generic_state->seq_frame_number ;
7348 cmd->dw2.size_of_pic_headers = generic_state->herder_bytes_inserted << 3 ;
7349 cmd->dw5.cur_frame_type = generic_state->frame_type ;
7350 cmd->dw5.brc_flag = 0 ;
7351 cmd->dw5.brc_flag |= (avc_priv_surface->is_as_ref) ? INTEL_ENCODE_BRCUPDATE_IS_REFERENCE : 0 ;
7353 if (avc_state->multi_pre_enable) {
7354 cmd->dw5.brc_flag |= INTEL_ENCODE_BRCUPDATE_IS_ACTUALQP ;
7355 cmd->dw14.qp_index_of_cur_pic = avc_priv_surface->frame_idx ; //do not know this. use -1
7358 cmd->dw5.max_num_paks = generic_state->num_pak_passes ;
7359 if (avc_state->min_max_qp_enable) {
7360 switch (generic_state->frame_type) {
7362 cmd->dw6.minimum_qp = avc_state->min_qp_i ;
7363 cmd->dw6.maximum_qp = avc_state->max_qp_i ;
7366 cmd->dw6.minimum_qp = avc_state->min_qp_p ;
7367 cmd->dw6.maximum_qp = avc_state->max_qp_p ;
7370 cmd->dw6.minimum_qp = avc_state->min_qp_b ;
7371 cmd->dw6.maximum_qp = avc_state->max_qp_b ;
7375 cmd->dw6.minimum_qp = 0 ;
7376 cmd->dw6.maximum_qp = 0 ;
7379 generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame;
7381 if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
7382 cmd->dw3.start_gadj_frame0 = (unsigned int)((10 * generic_state->avbr_convergence) / (double)150);
7383 cmd->dw3.start_gadj_frame1 = (unsigned int)((50 * generic_state->avbr_convergence) / (double)150);
7384 cmd->dw4.start_gadj_frame2 = (unsigned int)((100 * generic_state->avbr_convergence) / (double)150);
7385 cmd->dw4.start_gadj_frame3 = (unsigned int)((150 * generic_state->avbr_convergence) / (double)150);
7386 cmd->dw11.g_rate_ratio_threshold_0 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 40)));
7387 cmd->dw11.g_rate_ratio_threshold_1 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 75)));
7388 cmd->dw12.g_rate_ratio_threshold_2 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 97)));
7389 cmd->dw12.g_rate_ratio_threshold_3 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (103 - 100)));
7390 cmd->dw12.g_rate_ratio_threshold_4 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (125 - 100)));
7391 cmd->dw12.g_rate_ratio_threshold_5 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (160 - 100)));
7395 memset(&common_param, 0, sizeof(common_param));
7396 common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
7397 common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
7398 common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
7399 common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
7400 common_param.frames_per_100s = generic_state->frames_per_100s;
7401 common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
7402 common_param.target_bit_rate = generic_state->target_bit_rate;
7404 i965_gpe_context_unmap_curbe(gpe_context);
7410 kernel related function:init/destroy etc
7413 gen9_avc_kernel_init_scaling(VADriverContextP ctx,
7414 struct generic_encoder_context *generic_context,
7415 struct gen_avc_scaling_context *kernel_context,
7418 struct i965_driver_data *i965 = i965_driver_data(ctx);
7419 struct i965_gpe_table *gpe = &i965->gpe_table;
7420 struct i965_gpe_context *gpe_context = NULL;
7421 struct encoder_kernel_parameter kernel_param ;
7422 struct encoder_scoreboard_parameter scoreboard_param;
7423 struct i965_kernel common_kernel;
7425 memset(&kernel_param, 0, sizeof(kernel_param));
7426 if (IS_SKL(i965->intel.device_info) ||
7427 IS_BXT(i965->intel.device_info)) {
7428 if (!preenc_enabled) {
7429 kernel_param.curbe_size = sizeof(gen9_avc_scaling4x_curbe_data);
7430 kernel_param.inline_data_size = sizeof(gen9_avc_scaling4x_curbe_data);
7432 /* Skylake PreEnc using GEN95/gen10 DS kernel */
7433 kernel_param.curbe_size = sizeof(gen95_avc_scaling4x_curbe_data);
7434 kernel_param.inline_data_size = sizeof(gen95_avc_scaling4x_curbe_data);
7436 } else if (IS_KBL(i965->intel.device_info) ||
7437 IS_GEN10(i965->intel.device_info) ||
7438 IS_GLK(i965->intel.device_info)) {
7439 kernel_param.curbe_size = sizeof(gen95_avc_scaling4x_curbe_data);
7440 kernel_param.inline_data_size = sizeof(gen95_avc_scaling4x_curbe_data);
7441 } else if (IS_GEN8(i965->intel.device_info)) {
7442 kernel_param.curbe_size = sizeof(gen8_avc_scaling4x_curbe_data);
7443 kernel_param.inline_data_size = sizeof(gen8_avc_scaling4x_curbe_data);
7447 /* 4x scaling kernel*/
7448 kernel_param.sampler_size = 0;
7450 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7451 scoreboard_param.mask = 0xFF;
7452 scoreboard_param.enable = generic_context->use_hw_scoreboard;
7453 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7454 scoreboard_param.walkpat_flag = 0;
7456 gpe_context = &kernel_context->gpe_contexts[GEN9_AVC_KERNEL_SCALING_4X_IDX];
7457 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7458 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7460 memset(&common_kernel, 0, sizeof(common_kernel));
7462 generic_context->get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7463 generic_context->enc_kernel_size,
7464 INTEL_GENERIC_ENC_SCALING4X,
7468 gpe->load_kernels(ctx,
7473 /* PreEnc using only the 4X scaling */
7477 /*2x scaling kernel*/
7478 kernel_param.curbe_size = sizeof(gen9_avc_scaling2x_curbe_data);
7479 kernel_param.inline_data_size = 0;
7480 kernel_param.sampler_size = 0;
7482 gpe_context = &kernel_context->gpe_contexts[GEN9_AVC_KERNEL_SCALING_2X_IDX];
7483 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7484 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7486 memset(&common_kernel, 0, sizeof(common_kernel));
7488 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7489 generic_context->enc_kernel_size,
7490 INTEL_GENERIC_ENC_SCALING2X,
7494 gpe->load_kernels(ctx,
7502 gen9_avc_kernel_init_me(VADriverContextP ctx,
7503 struct generic_encoder_context *generic_context,
7504 struct gen_avc_me_context *kernel_context,
7507 struct i965_driver_data *i965 = i965_driver_data(ctx);
7508 struct i965_gpe_table *gpe = &i965->gpe_table;
7509 struct i965_gpe_context *gpe_context = NULL;
7510 struct encoder_kernel_parameter kernel_param ;
7511 struct encoder_scoreboard_parameter scoreboard_param;
7512 struct i965_kernel common_kernel;
7514 unsigned int curbe_size = 0;
7516 if (IS_GEN8(i965->intel.device_info)) {
7517 curbe_size = sizeof(gen8_avc_me_curbe_data);
7519 if (!preenc_enabled)
7520 curbe_size = sizeof(gen9_avc_me_curbe_data);
7522 curbe_size = sizeof(gen9_avc_fei_me_curbe_data);
7525 kernel_param.curbe_size = curbe_size;
7526 kernel_param.inline_data_size = 0;
7527 kernel_param.sampler_size = 0;
7529 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7530 scoreboard_param.mask = 0xFF;
7531 scoreboard_param.enable = generic_context->use_hw_scoreboard;
7532 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7533 scoreboard_param.walkpat_flag = 0;
7535 /* There is two hme kernel, one for P and other for B frame */
7536 for (i = 0; i < 2; i++) {
7537 gpe_context = &kernel_context->gpe_contexts[i];
7538 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7539 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7541 memset(&common_kernel, 0, sizeof(common_kernel));
7543 generic_context->get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7544 generic_context->enc_kernel_size,
7545 INTEL_GENERIC_ENC_ME,
7549 gpe->load_kernels(ctx,
7558 gen9_avc_kernel_init_preproc(VADriverContextP ctx,
7559 struct generic_encoder_context *generic_context,
7560 struct gen_avc_preproc_context *kernel_context)
7562 struct i965_driver_data *i965 = i965_driver_data(ctx);
7563 struct i965_gpe_table *gpe = &i965->gpe_table;
7564 struct i965_gpe_context *gpe_context = NULL;
7565 struct encoder_kernel_parameter kernel_param ;
7566 struct encoder_scoreboard_parameter scoreboard_param;
7567 struct i965_kernel common_kernel;
7569 kernel_param.curbe_size = sizeof(gen9_avc_preproc_curbe_data);
7570 kernel_param.inline_data_size = 0;
7571 kernel_param.sampler_size = 0;
7573 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7574 scoreboard_param.mask = 0xFF;
7575 scoreboard_param.enable = generic_context->use_hw_scoreboard;
7576 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7577 scoreboard_param.walkpat_flag = 0;
7579 gpe_context = &kernel_context->gpe_contexts;
7580 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7581 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7583 memset(&common_kernel, 0, sizeof(common_kernel));
7585 intel_avc_fei_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7586 generic_context->enc_kernel_size,
7587 INTEL_GENERIC_ENC_PREPROC,
7591 gpe->load_kernels(ctx,
7599 gen9_avc_kernel_init_mbenc(VADriverContextP ctx,
7600 struct generic_encoder_context *generic_context,
7601 struct gen_avc_mbenc_context *kernel_context,
7604 struct i965_driver_data *i965 = i965_driver_data(ctx);
7605 struct i965_gpe_table *gpe = &i965->gpe_table;
7606 struct i965_gpe_context *gpe_context = NULL;
7607 struct encoder_kernel_parameter kernel_param ;
7608 struct encoder_scoreboard_parameter scoreboard_param;
7609 struct i965_kernel common_kernel;
7611 unsigned int curbe_size = 0;
7612 unsigned int num_mbenc_kernels = 0;
7614 if (IS_SKL(i965->intel.device_info) ||
7615 IS_BXT(i965->intel.device_info)) {
7617 curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
7618 num_mbenc_kernels = NUM_GEN9_AVC_KERNEL_MBENC;
7620 curbe_size = sizeof(gen9_avc_fei_mbenc_curbe_data);
7621 num_mbenc_kernels = NUM_GEN9_AVC_FEI_KERNEL_MBENC;
7623 } else if (IS_KBL(i965->intel.device_info) ||
7624 IS_GEN10(i965->intel.device_info) ||
7625 IS_GLK(i965->intel.device_info)) {
7626 curbe_size = sizeof(gen95_avc_mbenc_curbe_data);
7627 num_mbenc_kernels = NUM_GEN9_AVC_KERNEL_MBENC;
7628 } else if (IS_GEN8(i965->intel.device_info)) {
7629 curbe_size = sizeof(gen8_avc_mbenc_curbe_data);
7630 num_mbenc_kernels = NUM_GEN9_AVC_KERNEL_MBENC;
7633 assert(curbe_size > 0);
7634 kernel_param.curbe_size = curbe_size;
7635 kernel_param.inline_data_size = 0;
7636 kernel_param.sampler_size = 0;
7638 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7639 scoreboard_param.mask = 0xFF;
7640 scoreboard_param.enable = generic_context->use_hw_scoreboard;
7641 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7642 scoreboard_param.walkpat_flag = 0;
7644 for (i = 0; i < num_mbenc_kernels ; i++) {
7645 gpe_context = &kernel_context->gpe_contexts[i];
7646 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7647 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7649 memset(&common_kernel, 0, sizeof(common_kernel));
7651 generic_context->get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7652 generic_context->enc_kernel_size,
7653 INTEL_GENERIC_ENC_MBENC,
7657 gpe->load_kernels(ctx,
7666 gen9_avc_kernel_init_brc(VADriverContextP ctx,
7667 struct generic_encoder_context *generic_context,
7668 struct gen_avc_brc_context *kernel_context)
7670 struct i965_driver_data *i965 = i965_driver_data(ctx);
7671 struct i965_gpe_table *gpe = &i965->gpe_table;
7672 struct i965_gpe_context *gpe_context = NULL;
7673 struct encoder_kernel_parameter kernel_param ;
7674 struct encoder_scoreboard_parameter scoreboard_param;
7675 struct i965_kernel common_kernel;
7676 int num_brc_init_kernels = 0;
7679 if (IS_GEN8(i965->intel.device_info)) {
7680 num_brc_init_kernels = NUM_GEN9_AVC_KERNEL_BRC - 1;
7682 num_brc_init_kernels = NUM_GEN9_AVC_KERNEL_BRC;
7685 const int gen8_brc_curbe_size[NUM_GEN9_AVC_KERNEL_BRC - 1] = {
7686 (sizeof(gen9_avc_brc_init_reset_curbe_data)),
7687 (sizeof(gen8_avc_frame_brc_update_curbe_data)),
7688 (sizeof(gen9_avc_brc_init_reset_curbe_data)),
7689 (sizeof(gen8_avc_mbenc_curbe_data)),
7692 const int gen9_brc_curbe_size[NUM_GEN9_AVC_KERNEL_BRC] = {
7693 (sizeof(gen9_avc_brc_init_reset_curbe_data)),
7694 (sizeof(gen9_avc_frame_brc_update_curbe_data)),
7695 (sizeof(gen9_avc_brc_init_reset_curbe_data)),
7696 ((IS_SKL(i965->intel.device_info) || IS_BXT(i965->intel.device_info)) ? sizeof(gen9_avc_mbenc_curbe_data) : sizeof(gen95_avc_mbenc_curbe_data)),
7698 (sizeof(gen9_avc_mb_brc_curbe_data))
7701 kernel_param.inline_data_size = 0;
7702 kernel_param.sampler_size = 0;
7704 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7705 scoreboard_param.mask = 0xFF;
7706 scoreboard_param.enable = generic_context->use_hw_scoreboard;
7707 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7708 scoreboard_param.walkpat_flag = 0;
7710 for (i = 0; i < num_brc_init_kernels; i++) {
7711 if (IS_GEN8(i965->intel.device_info)) {
7712 kernel_param.curbe_size = gen8_brc_curbe_size[i];
7714 kernel_param.curbe_size = gen9_brc_curbe_size[i];
7716 gpe_context = &kernel_context->gpe_contexts[i];
7717 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7718 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7720 memset(&common_kernel, 0, sizeof(common_kernel));
7722 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7723 generic_context->enc_kernel_size,
7724 INTEL_GENERIC_ENC_BRC,
7728 gpe->load_kernels(ctx,
7737 gen9_avc_kernel_init_wp(VADriverContextP ctx,
7738 struct generic_encoder_context *generic_context,
7739 struct gen_avc_wp_context *kernel_context)
7741 struct i965_driver_data *i965 = i965_driver_data(ctx);
7742 struct i965_gpe_table *gpe = &i965->gpe_table;
7743 struct i965_gpe_context *gpe_context = NULL;
7744 struct encoder_kernel_parameter kernel_param ;
7745 struct encoder_scoreboard_parameter scoreboard_param;
7746 struct i965_kernel common_kernel;
7748 kernel_param.curbe_size = sizeof(gen9_avc_wp_curbe_data);
7749 kernel_param.inline_data_size = 0;
7750 kernel_param.sampler_size = 0;
7752 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7753 scoreboard_param.mask = 0xFF;
7754 scoreboard_param.enable = generic_context->use_hw_scoreboard;
7755 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7756 scoreboard_param.walkpat_flag = 0;
7758 gpe_context = &kernel_context->gpe_contexts;
7759 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7760 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7762 memset(&common_kernel, 0, sizeof(common_kernel));
7764 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7765 generic_context->enc_kernel_size,
7766 INTEL_GENERIC_ENC_WP,
7770 gpe->load_kernels(ctx,
7778 gen9_avc_kernel_init_sfd(VADriverContextP ctx,
7779 struct generic_encoder_context *generic_context,
7780 struct gen_avc_sfd_context *kernel_context)
7782 struct i965_driver_data *i965 = i965_driver_data(ctx);
7783 struct i965_gpe_table *gpe = &i965->gpe_table;
7784 struct i965_gpe_context *gpe_context = NULL;
7785 struct encoder_kernel_parameter kernel_param ;
7786 struct encoder_scoreboard_parameter scoreboard_param;
7787 struct i965_kernel common_kernel;
7789 kernel_param.curbe_size = sizeof(gen9_avc_sfd_curbe_data);
7790 kernel_param.inline_data_size = 0;
7791 kernel_param.sampler_size = 0;
7793 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7794 scoreboard_param.mask = 0xFF;
7795 scoreboard_param.enable = generic_context->use_hw_scoreboard;
7796 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7797 scoreboard_param.walkpat_flag = 0;
7799 gpe_context = &kernel_context->gpe_contexts;
7800 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7801 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7803 memset(&common_kernel, 0, sizeof(common_kernel));
7805 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7806 generic_context->enc_kernel_size,
7807 INTEL_GENERIC_ENC_SFD,
7811 gpe->load_kernels(ctx,
7819 gen9_avc_kernel_destroy(struct encoder_vme_mfc_context * vme_context)
7822 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
7823 struct i965_driver_data *i965 = i965_driver_data(avc_ctx->ctx);
7824 struct i965_gpe_table *gpe = &i965->gpe_table;
7828 gen9_avc_free_resources(vme_context);
7830 for (i = 0; i < NUM_GEN9_AVC_KERNEL_SCALING; i++)
7831 gpe->context_destroy(&avc_ctx->context_scaling.gpe_contexts[i]);
7833 for (i = 0; i < NUM_GEN9_AVC_KERNEL_BRC; i++)
7834 gpe->context_destroy(&avc_ctx->context_brc.gpe_contexts[i]);
7836 for (i = 0; i < NUM_GEN9_AVC_KERNEL_ME; i++)
7837 gpe->context_destroy(&avc_ctx->context_me.gpe_contexts[i]);
7839 for (i = 0; i < NUM_GEN9_AVC_KERNEL_MBENC; i++)
7840 gpe->context_destroy(&avc_ctx->context_mbenc.gpe_contexts[i]);
7842 gpe->context_destroy(&avc_ctx->context_wp.gpe_contexts);
7844 gpe->context_destroy(&avc_ctx->context_sfd.gpe_contexts);
7846 gpe->context_destroy(&avc_ctx->context_preproc.gpe_contexts);
7854 gen9_avc_update_parameters(VADriverContextP ctx,
7856 struct encode_state *encode_state,
7857 struct intel_encoder_context *encoder_context)
7859 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7860 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
7861 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
7862 VAEncSequenceParameterBufferH264 *seq_param;
7863 VAEncSliceParameterBufferH264 *slice_param;
7864 VAEncMiscParameterBuffer *fei_misc_param;
7865 int i, j, slice_index;
7866 unsigned int preset = generic_state->preset;
7867 unsigned int fei_enabled = encoder_context->fei_enabled;
7869 /* seq/pic/slice parameter setting */
7870 generic_state->b16xme_supported = gen9_avc_super_hme[preset];
7871 generic_state->b32xme_supported = gen9_avc_ultra_hme[preset];
7873 avc_state->seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
7874 avc_state->pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
7877 encode_state->misc_param[VAEncMiscParameterTypeFEIFrameControl]) {
7878 fei_misc_param = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeFEIFrameControl][0]->buffer;
7879 avc_state->fei_framectl_param =
7880 (VAEncMiscParameterFEIFrameControlH264 *)fei_misc_param->data;
7883 avc_state->slice_num = 0;
7885 for (j = 0; j < encode_state->num_slice_params_ext; j++) {
7886 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
7887 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
7888 avc_state->slice_param[slice_index] = slice_param;
7891 avc_state->slice_num++;
7895 /* how many slices support by now? 1 slice or multi slices, but row slice.not slice group. */
7896 seq_param = avc_state->seq_param;
7897 slice_param = avc_state->slice_param[0];
7899 generic_state->frame_type = avc_state->slice_param[0]->slice_type;
7901 if (slice_param->slice_type == SLICE_TYPE_I ||
7902 slice_param->slice_type == SLICE_TYPE_SI)
7903 generic_state->frame_type = SLICE_TYPE_I;
7904 else if (slice_param->slice_type == SLICE_TYPE_P)
7905 generic_state->frame_type = SLICE_TYPE_P;
7906 else if (slice_param->slice_type == SLICE_TYPE_B)
7907 generic_state->frame_type = SLICE_TYPE_B;
7908 if (profile == VAProfileH264High)
7909 avc_state->transform_8x8_mode_enable = 0;//work around for high profile to disabel pic_param->pic_fields.bits.transform_8x8_mode_flag
7911 avc_state->transform_8x8_mode_enable = 0;
7914 if (generic_state->brc_enabled && (!generic_state->brc_inited || generic_state->brc_need_reset)) {
7915 generic_state->target_bit_rate = ALIGN(seq_param->bits_per_second, 1000) / 1000;
7916 generic_state->init_vbv_buffer_fullness_in_bit = seq_param->bits_per_second;
7917 generic_state->vbv_buffer_size_in_bit = (uint64_t)seq_param->bits_per_second << 1;
7918 generic_state->frames_per_100s = 3000; /* 30fps */
7921 generic_state->gop_size = seq_param->intra_period;
7922 generic_state->gop_ref_distance = seq_param->ip_period;
7924 if (generic_state->internal_rate_mode == VA_RC_CBR) {
7925 generic_state->max_bit_rate = generic_state->target_bit_rate;
7926 generic_state->min_bit_rate = generic_state->target_bit_rate;
7929 if (generic_state->frame_type == SLICE_TYPE_I || generic_state->first_frame) {
7930 gen9_avc_update_misc_parameters(ctx, encode_state, encoder_context);
7933 generic_state->preset = encoder_context->quality_level;
7934 if (encoder_context->quality_level == INTEL_PRESET_UNKNOWN) {
7935 generic_state->preset = INTEL_PRESET_RT_SPEED;
7937 generic_state->kernel_mode = gen9_avc_kernel_mode[generic_state->preset];
7939 if (!generic_state->brc_inited) {
7940 generic_state->brc_init_reset_input_bits_per_frame = ((double)(generic_state->max_bit_rate * 1000) * 100) / generic_state->frames_per_100s;;
7941 generic_state->brc_init_current_target_buf_full_in_bits = generic_state->init_vbv_buffer_fullness_in_bit;
7942 generic_state->brc_init_reset_buf_size_in_bits = generic_state->vbv_buffer_size_in_bit;
7943 generic_state->brc_target_size = generic_state->init_vbv_buffer_fullness_in_bit;
7947 generic_state->curr_pak_pass = 0;
7948 generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
7950 if (generic_state->internal_rate_mode == VA_RC_CBR ||
7951 generic_state->internal_rate_mode == VA_RC_VBR)
7952 generic_state->brc_enabled = 1;
7954 generic_state->brc_enabled = 0;
7956 if (generic_state->brc_enabled &&
7957 (!generic_state->init_vbv_buffer_fullness_in_bit ||
7958 !generic_state->vbv_buffer_size_in_bit ||
7959 !generic_state->max_bit_rate ||
7960 !generic_state->target_bit_rate ||
7961 !generic_state->frames_per_100s)) {
7962 WARN_ONCE("Rate control parameter is required for BRC\n");
7963 generic_state->brc_enabled = 0;
7966 if (!generic_state->brc_enabled) {
7967 generic_state->target_bit_rate = 0;
7968 generic_state->max_bit_rate = 0;
7969 generic_state->min_bit_rate = 0;
7970 generic_state->init_vbv_buffer_fullness_in_bit = 0;
7971 generic_state->vbv_buffer_size_in_bit = 0;
7972 generic_state->num_pak_passes = 1;
7974 generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
7978 generic_state->frame_width_in_mbs = seq_param->picture_width_in_mbs;
7979 generic_state->frame_height_in_mbs = seq_param->picture_height_in_mbs;
7980 generic_state->frame_width_in_pixel = generic_state->frame_width_in_mbs * 16;
7981 generic_state->frame_height_in_pixel = generic_state->frame_height_in_mbs * 16;
7983 generic_state->frame_width_4x = ALIGN(generic_state->frame_width_in_pixel / 4, 16);
7984 generic_state->frame_height_4x = ALIGN(generic_state->frame_height_in_pixel / 4, 16);
7985 generic_state->downscaled_width_4x_in_mb = generic_state->frame_width_4x / 16 ;
7986 generic_state->downscaled_height_4x_in_mb = generic_state->frame_height_4x / 16;
7988 generic_state->frame_width_16x = ALIGN(generic_state->frame_width_in_pixel / 16, 16);
7989 generic_state->frame_height_16x = ALIGN(generic_state->frame_height_in_pixel / 16, 16);
7990 generic_state->downscaled_width_16x_in_mb = generic_state->frame_width_16x / 16 ;
7991 generic_state->downscaled_height_16x_in_mb = generic_state->frame_height_16x / 16;
7993 generic_state->frame_width_32x = ALIGN(generic_state->frame_width_in_pixel / 32, 16);
7994 generic_state->frame_height_32x = ALIGN(generic_state->frame_height_in_pixel / 32, 16);
7995 generic_state->downscaled_width_32x_in_mb = generic_state->frame_width_32x / 16 ;
7996 generic_state->downscaled_height_32x_in_mb = generic_state->frame_height_32x / 16;
7998 if (generic_state->hme_supported) {
7999 generic_state->hme_enabled = 1;
8001 generic_state->hme_enabled = 0;
8004 if (generic_state->b16xme_supported) {
8005 generic_state->b16xme_enabled = 1;
8007 generic_state->b16xme_enabled = 0;
8010 if (generic_state->b32xme_supported) {
8011 generic_state->b32xme_enabled = 1;
8013 generic_state->b32xme_enabled = 0;
8015 /* disable HME/16xME if the size is too small */
8016 if (generic_state->frame_width_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8017 generic_state->b32xme_supported = 0;
8018 generic_state->b32xme_enabled = 0;
8019 generic_state->b16xme_supported = 0;
8020 generic_state->b16xme_enabled = 0;
8021 generic_state->frame_width_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8022 generic_state->downscaled_width_4x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8024 if (generic_state->frame_height_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8025 generic_state->b32xme_supported = 0;
8026 generic_state->b32xme_enabled = 0;
8027 generic_state->b16xme_supported = 0;
8028 generic_state->b16xme_enabled = 0;
8029 generic_state->frame_height_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8030 generic_state->downscaled_height_4x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8033 if (generic_state->frame_width_16x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8034 generic_state->b32xme_supported = 0;
8035 generic_state->b32xme_enabled = 0;
8036 generic_state->frame_width_16x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8037 generic_state->downscaled_width_16x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8039 if (generic_state->frame_height_16x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8040 generic_state->b32xme_supported = 0;
8041 generic_state->b32xme_enabled = 0;
8042 generic_state->frame_height_16x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8043 generic_state->downscaled_height_16x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8046 if (generic_state->frame_width_32x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8047 generic_state->frame_width_32x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8048 generic_state->downscaled_width_32x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8050 if (generic_state->frame_height_32x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8051 generic_state->frame_height_32x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8052 generic_state->downscaled_height_32x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8058 gen9_avc_encode_check_parameter(VADriverContextP ctx,
8059 struct encode_state *encode_state,
8060 struct intel_encoder_context *encoder_context)
8062 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8063 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8064 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8065 unsigned int rate_control_mode = encoder_context->rate_control_mode;
8066 unsigned int preset = generic_state->preset;
8067 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param ;
8068 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
8070 int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
8072 generic_state->avbr_curracy = 30;
8073 generic_state->avbr_convergence = 150;
8075 switch (rate_control_mode & 0x7f) {
8077 generic_state->internal_rate_mode = VA_RC_CBR;
8081 generic_state->internal_rate_mode = VA_RC_VBR;
8086 generic_state->internal_rate_mode = VA_RC_CQP;
8090 if (rate_control_mode != VA_RC_NONE &&
8091 rate_control_mode != VA_RC_CQP) {
8092 generic_state->brc_enabled = 1;
8093 generic_state->brc_distortion_buffer_supported = 1;
8094 generic_state->brc_constant_buffer_supported = 1;
8095 generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
8098 /*check brc parameter*/
8099 if (generic_state->brc_enabled) {
8100 avc_state->mb_qp_data_enable = 0;
8103 /*set the brc init and reset accordingly*/
8104 if (generic_state->brc_need_reset &&
8105 (generic_state->brc_distortion_buffer_supported == 0 ||
8106 rate_control_mode == VA_RC_CQP)) {
8107 generic_state->brc_need_reset = 0;// not support by CQP
8109 if (generic_state->internal_rate_mode == VA_RC_CBR || generic_state->internal_rate_mode == VA_RC_VBR || generic_state->frame_type == SLICE_TYPE_I) {
8110 avc_state->sfd_enable = 0;
8112 avc_state->sfd_enable = 1;
8115 if (generic_state->frames_per_window_size == 0) {
8116 generic_state->frames_per_window_size = (generic_state->frames_per_100s / 100 < 60) ? (generic_state->frames_per_100s / 100) : 60;
8117 } else if (generic_state->frames_per_window_size > 2 * generic_state->frames_per_100s / 100) {
8118 generic_state->frames_per_window_size = (generic_state->frames_per_100s / 100 < 60) ? (generic_state->frames_per_100s / 100) : 60;
8121 if (generic_state->brc_enabled) {
8122 generic_state->hme_enabled = generic_state->frame_type != SLICE_TYPE_I;
8123 if (avc_state->min_max_qp_enable) {
8124 generic_state->num_pak_passes = 1;
8126 generic_state->brc_roi_enable = (rate_control_mode != VA_RC_CQP) && (generic_state->num_roi > 0);// only !CQP
8127 generic_state->mb_brc_enabled = generic_state->mb_brc_enabled || generic_state->brc_roi_enable;
8129 generic_state->num_pak_passes = 1;// CQP only one pass
8132 avc_state->mbenc_i_frame_dist_in_use = 0;
8133 avc_state->mbenc_i_frame_dist_in_use = (generic_state->brc_enabled) && (generic_state->brc_distortion_buffer_supported) && (generic_state->frame_type == SLICE_TYPE_I);
8135 /*ROI must enable mbbrc.*/
8138 if (avc_state->caf_supported) {
8139 switch (generic_state->frame_type) {
8141 avc_state->caf_enable = 0;
8144 avc_state->caf_enable = gen9_avc_all_fractional[preset] & 0x01;
8147 avc_state->caf_enable = (gen9_avc_all_fractional[preset] >> 1) & 0x01;
8151 if (avc_state->caf_enable && avc_state->caf_disable_hd && gen9_avc_disable_all_fractional_check_for_high_res[preset]) {
8152 if (generic_state->frame_width_in_pixel >= 1280 && generic_state->frame_height_in_pixel >= 720)
8153 avc_state->caf_enable = 0;
8157 avc_state->adaptive_transform_decision_enable &= gen9_avc_enable_adaptive_tx_decision[preset & 0x7];
8159 /* Flatness check is enabled only if scaling will be performed and CAF is enabled. here only frame */
8160 if (avc_state->flatness_check_supported) {
8161 avc_state->flatness_check_enable = ((avc_state->caf_enable) && (generic_state->brc_enabled || generic_state->hme_supported)) ;
8163 avc_state->flatness_check_enable = 0;
8166 /* check mb_status_supported/enbale*/
8167 if (avc_state->adaptive_transform_decision_enable) {
8168 avc_state->mb_status_enable = 1;
8170 avc_state->mb_status_enable = 0;
8172 /*slice check,all the slices use the same slice height except the last slice*/
8173 avc_state->arbitrary_num_mbs_in_slice = 0;
8174 for (i = 0; i < avc_state->slice_num; i++) {
8175 if (avc_state->slice_param[i]->num_macroblocks % generic_state->frame_width_in_mbs > 0) {
8176 avc_state->arbitrary_num_mbs_in_slice = 1;
8177 avc_state->slice_height = 1; /* slice height will be ignored by kernel ans here set it as default value */
8179 avc_state->slice_height = avc_state->slice_param[i]->num_macroblocks / generic_state->frame_width_in_mbs;
8183 if (generic_state->frame_type == SLICE_TYPE_I) {
8184 generic_state->hme_enabled = 0;
8185 generic_state->b16xme_enabled = 0;
8186 generic_state->b32xme_enabled = 0;
8189 if (generic_state->frame_type == SLICE_TYPE_B) {
8190 gen9_avc_get_dist_scale_factor(ctx, encode_state, encoder_context);
8191 avc_state->bi_weight = gen9_avc_get_biweight(avc_state->dist_scale_factor_list0[0], pic_param->pic_fields.bits.weighted_bipred_idc);
8194 /* Determine if SkipBiasAdjustment should be enabled for P picture 1. No B frame 2. Qp >= 22 3. CQP mode */
8195 avc_state->skip_bias_adjustment_enable = avc_state->skip_bias_adjustment_supported && (generic_state->frame_type == SLICE_TYPE_P)
8196 && (generic_state->gop_ref_distance == 1) && (avc_state->pic_param->pic_init_qp + avc_state->slice_param[0]->slice_qp_delta >= 22) && !generic_state->brc_enabled;
8198 if (generic_state->kernel_mode == INTEL_ENC_KERNEL_QUALITY) {
8199 avc_state->tq_enable = 1;
8200 avc_state->tq_rounding = 6;
8201 if (generic_state->brc_enabled) {
8202 generic_state->mb_brc_enabled = 1;
8206 //check the inter rounding
8207 avc_state->rounding_value = 0;
8208 avc_state->rounding_inter_p = 255;//default
8209 avc_state->rounding_inter_b = 255; //default
8210 avc_state->rounding_inter_b_ref = 255; //default
8212 if (generic_state->frame_type == SLICE_TYPE_P) {
8213 if (avc_state->rounding_inter_p == AVC_INVALID_ROUNDING_VALUE) {
8214 if (avc_state->adaptive_rounding_inter_enable && !(generic_state->brc_enabled)) {
8215 if (generic_state->gop_ref_distance == 1)
8216 avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_p_without_b[slice_qp];
8218 avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_p[slice_qp];
8220 avc_state->rounding_value = gen9_avc_inter_rounding_p[generic_state->preset];
8224 avc_state->rounding_value = avc_state->rounding_inter_p;
8226 } else if (generic_state->frame_type == SLICE_TYPE_B) {
8227 if (pic_param->pic_fields.bits.reference_pic_flag) {
8228 if (avc_state->rounding_inter_b_ref == AVC_INVALID_ROUNDING_VALUE)
8229 avc_state->rounding_value = gen9_avc_inter_rounding_b_ref[generic_state->preset];
8231 avc_state->rounding_value = avc_state->rounding_inter_b_ref;
8233 if (avc_state->rounding_inter_b == AVC_INVALID_ROUNDING_VALUE) {
8234 if (avc_state->adaptive_rounding_inter_enable && !(generic_state->brc_enabled))
8235 avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_b[slice_qp];
8237 avc_state->rounding_value = gen9_avc_inter_rounding_b[generic_state->preset];
8239 avc_state->rounding_value = avc_state->rounding_inter_b;
8243 return VA_STATUS_SUCCESS;
8247 gen9_avc_vme_gpe_kernel_prepare(VADriverContextP ctx,
8248 struct encode_state *encode_state,
8249 struct intel_encoder_context *encoder_context)
8252 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8253 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
8254 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
8255 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8256 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8258 struct object_surface *obj_surface;
8259 struct object_buffer *obj_buffer;
8260 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
8261 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
8262 struct i965_coded_buffer_segment *coded_buffer_segment;
8264 struct gen9_surface_avc *avc_priv_surface;
8266 struct avc_surface_param surface_param;
8268 unsigned char * pdata;
8270 /* Setup current reconstruct frame */
8271 obj_surface = encode_state->reconstructed_object;
8272 va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
8274 if (va_status != VA_STATUS_SUCCESS)
8277 memset(&surface_param, 0, sizeof(surface_param));
8278 surface_param.frame_width = generic_state->frame_width_in_pixel;
8279 surface_param.frame_height = generic_state->frame_height_in_pixel;
8280 va_status = gen9_avc_init_check_surfaces(ctx,
8284 if (va_status != VA_STATUS_SUCCESS)
8287 /* init the member of avc_priv_surface,frame_store_id,qp_value*/
8288 avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
8289 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = 0;
8290 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = 0;
8291 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2]);
8292 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1]);
8293 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2], avc_priv_surface->dmv_top);
8294 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1], avc_priv_surface->dmv_bottom);
8295 avc_priv_surface->qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
8296 avc_priv_surface->frame_store_id = 0;
8297 avc_priv_surface->frame_idx = pic_param->CurrPic.frame_idx;
8298 avc_priv_surface->top_field_order_cnt = pic_param->CurrPic.TopFieldOrderCnt;
8299 avc_priv_surface->is_as_ref = pic_param->pic_fields.bits.reference_pic_flag;
8300 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = avc_priv_surface->top_field_order_cnt;
8301 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = avc_priv_surface->top_field_order_cnt + 1;
8303 i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
8304 i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_reconstructed_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
8306 /* input YUV surface*/
8307 obj_surface = encode_state->input_yuv_object;
8308 va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
8310 if (va_status != VA_STATUS_SUCCESS)
8312 i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
8313 i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_uncompressed_input_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
8315 /* Reference surfaces */
8316 for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
8317 i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
8318 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2]);
8319 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1]);
8320 obj_surface = encode_state->reference_objects[i];
8321 avc_state->top_field_poc[2 * i] = 0;
8322 avc_state->top_field_poc[2 * i + 1] = 0;
8324 if (obj_surface && obj_surface->bo) {
8325 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->list_reference_res[i], obj_surface, GPE_RESOURCE_ALIGNMENT);
8327 /* actually it should be handled when it is reconstructed surface*/
8328 va_status = gen9_avc_init_check_surfaces(ctx,
8329 obj_surface, encoder_context,
8331 if (va_status != VA_STATUS_SUCCESS)
8333 avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
8334 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2], avc_priv_surface->dmv_top);
8335 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1], avc_priv_surface->dmv_bottom);
8336 avc_state->top_field_poc[2 * i] = avc_priv_surface->top_field_order_cnt;
8337 avc_state->top_field_poc[2 * i + 1] = avc_priv_surface->top_field_order_cnt + 1;
8338 avc_priv_surface->frame_store_id = i;
8344 /* Encoded bitstream ?*/
8345 obj_buffer = encode_state->coded_buf_object;
8346 bo = obj_buffer->buffer_store->bo;
8347 i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
8348 i965_dri_object_to_buffer_gpe_resource(&generic_ctx->compressed_bitstream.res, bo);
8349 generic_ctx->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
8350 generic_ctx->compressed_bitstream.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
8353 avc_ctx->status_buffer.bo = bo;
8355 /* set the internal flag to 0 to indicate the coded size is unknown */
8357 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
8358 coded_buffer_segment->mapped = 0;
8359 coded_buffer_segment->codec = encoder_context->codec;
8360 coded_buffer_segment->status_support = 1;
8362 pdata = bo->virtual + avc_ctx->status_buffer.base_offset;
8363 memset(pdata, 0, avc_ctx->status_buffer.status_buffer_size);
8366 //frame id, it is the ref pic id in the reference_objects list.
8367 avc_state->num_refs[0] = 0;
8368 avc_state->num_refs[1] = 0;
8369 if (generic_state->frame_type == SLICE_TYPE_P) {
8370 avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
8372 if (slice_param->num_ref_idx_active_override_flag)
8373 avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
8374 } else if (generic_state->frame_type == SLICE_TYPE_B) {
8375 avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
8376 avc_state->num_refs[1] = pic_param->num_ref_idx_l1_active_minus1 + 1;
8378 if (slice_param->num_ref_idx_active_override_flag) {
8379 avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
8380 avc_state->num_refs[1] = slice_param->num_ref_idx_l1_active_minus1 + 1;
8384 if (avc_state->num_refs[0] > ARRAY_ELEMS(avc_state->list_ref_idx[0]))
8385 return VA_STATUS_ERROR_INVALID_VALUE;
8386 if (avc_state->num_refs[1] > ARRAY_ELEMS(avc_state->list_ref_idx[1]))
8387 return VA_STATUS_ERROR_INVALID_VALUE;
8389 for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[0]); i++) {
8390 VAPictureH264 *va_pic;
8392 assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(avc_state->list_ref_idx[0]));
8393 avc_state->list_ref_idx[0][i] = 0;
8395 if (i >= avc_state->num_refs[0])
8398 va_pic = &slice_param->RefPicList0[i];
8400 for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
8401 obj_surface = encode_state->reference_objects[j];
8405 obj_surface->base.id == va_pic->picture_id) {
8407 assert(obj_surface->base.id != VA_INVALID_SURFACE);
8408 avc_state->list_ref_idx[0][i] = j;
8414 for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[1]); i++) {
8415 VAPictureH264 *va_pic;
8417 assert(ARRAY_ELEMS(slice_param->RefPicList1) == ARRAY_ELEMS(avc_state->list_ref_idx[1]));
8418 avc_state->list_ref_idx[1][i] = 0;
8420 if (i >= avc_state->num_refs[1])
8423 va_pic = &slice_param->RefPicList1[i];
8425 for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
8426 obj_surface = encode_state->reference_objects[j];
8430 obj_surface->base.id == va_pic->picture_id) {
8432 assert(obj_surface->base.id != VA_INVALID_SURFACE);
8433 avc_state->list_ref_idx[1][i] = j;
8440 return VA_STATUS_SUCCESS;
8444 gen9_avc_vme_gpe_kernel_init(VADriverContextP ctx,
8445 struct encode_state *encode_state,
8446 struct intel_encoder_context *encoder_context)
8448 return VA_STATUS_SUCCESS;
8452 gen9_avc_vme_gpe_kernel_final(VADriverContextP ctx,
8453 struct encode_state *encode_state,
8454 struct intel_encoder_context *encoder_context)
8457 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8458 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8459 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8461 /*set this flag when all kernel is finished*/
8462 if (generic_state->brc_enabled) {
8463 generic_state->brc_inited = 1;
8464 generic_state->brc_need_reset = 0;
8465 avc_state->mbenc_curbe_set_in_brc_update = 0;
8467 return VA_STATUS_SUCCESS;
8471 gen9_avc_vme_gpe_kernel_run(VADriverContextP ctx,
8472 struct encode_state *encode_state,
8473 struct intel_encoder_context *encoder_context)
8475 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8476 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8477 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8478 int fei_enabled = encoder_context->fei_enabled;
8480 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
8481 VAEncSliceParameterBufferH264 *slice_param = avc_state->slice_param[0];
8484 /* BRC init/reset needs to be called before HME since it will reset the Brc Distortion surface*/
8485 if (!fei_enabled && generic_state->brc_enabled && (!generic_state->brc_inited || generic_state->brc_need_reset)) {
8486 gen9_avc_kernel_brc_init_reset(ctx, encode_state, encoder_context);
8490 if (generic_state->hme_supported) {
8491 gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
8492 if (generic_state->b16xme_supported) {
8493 gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_16x);
8494 if (generic_state->b32xme_supported) {
8495 gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_32x);
8501 if (generic_state->hme_enabled) {
8502 if (generic_state->b16xme_enabled) {
8503 if (generic_state->b32xme_enabled) {
8504 gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_32x);
8506 gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_16x);
8508 gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
8511 /*call SFD kernel after HME in same command buffer*/
8512 sfd_in_use = avc_state->sfd_enable && generic_state->hme_enabled;
8513 sfd_in_use = sfd_in_use && !avc_state->sfd_mb_enable;
8515 gen9_avc_kernel_sfd(ctx, encode_state, encoder_context);
8518 /* BRC and MbEnc are included in the same task phase*/
8519 if (generic_state->brc_enabled) {
8520 if (avc_state->mbenc_i_frame_dist_in_use) {
8521 gen9_avc_kernel_mbenc(ctx, encode_state, encoder_context, true);
8523 gen9_avc_kernel_brc_frame_update(ctx, encode_state, encoder_context);
8525 if (avc_state->brc_split_enable && generic_state->mb_brc_enabled) {
8526 gen9_avc_kernel_brc_mb_update(ctx, encode_state, encoder_context);
8530 /*weight prediction,disable by now */
8531 avc_state->weighted_ref_l0_enable = 0;
8532 avc_state->weighted_ref_l1_enable = 0;
8533 if (avc_state->weighted_prediction_supported &&
8534 ((generic_state->frame_type == SLICE_TYPE_P && pic_param->pic_fields.bits.weighted_pred_flag) ||
8535 (generic_state->frame_type == SLICE_TYPE_B && pic_param->pic_fields.bits.weighted_bipred_idc == INTEL_AVC_WP_MODE_EXPLICIT))) {
8536 if (slice_param->luma_weight_l0_flag & 1) {
8537 gen9_avc_kernel_wp(ctx, encode_state, encoder_context, 0);
8539 } else if (!(slice_param->chroma_weight_l0_flag & 1)) {
8540 pic_param->pic_fields.bits.weighted_pred_flag = 0;// it should be handled in app
8543 if (generic_state->frame_type == SLICE_TYPE_B && pic_param->pic_fields.bits.weighted_bipred_idc == INTEL_AVC_WP_MODE_EXPLICIT) {
8544 if (slice_param->luma_weight_l1_flag & 1) {
8545 gen9_avc_kernel_wp(ctx, encode_state, encoder_context, 1);
8546 } else if (!((slice_param->luma_weight_l0_flag & 1) ||
8547 (slice_param->chroma_weight_l0_flag & 1) ||
8548 (slice_param->chroma_weight_l1_flag & 1))) {
8549 pic_param->pic_fields.bits.weighted_bipred_idc = INTEL_AVC_WP_MODE_DEFAULT;// it should be handled in app
8555 gen9_avc_kernel_mbenc(ctx, encode_state, encoder_context, false);
8557 /*ignore the reset vertical line kernel*/
8559 return VA_STATUS_SUCCESS;
8563 gen9_avc_vme_pipeline(VADriverContextP ctx,
8565 struct encode_state *encode_state,
8566 struct intel_encoder_context *encoder_context)
8570 gen9_avc_update_parameters(ctx, profile, encode_state, encoder_context);
8572 va_status = gen9_avc_encode_check_parameter(ctx, encode_state, encoder_context);
8573 if (va_status != VA_STATUS_SUCCESS)
8576 va_status = gen9_avc_allocate_resources(ctx, encode_state, encoder_context);
8577 if (va_status != VA_STATUS_SUCCESS)
8580 va_status = gen9_avc_vme_gpe_kernel_prepare(ctx, encode_state, encoder_context);
8581 if (va_status != VA_STATUS_SUCCESS)
8584 va_status = gen9_avc_vme_gpe_kernel_init(ctx, encode_state, encoder_context);
8585 if (va_status != VA_STATUS_SUCCESS)
8588 va_status = gen9_avc_vme_gpe_kernel_run(ctx, encode_state, encoder_context);
8589 if (va_status != VA_STATUS_SUCCESS)
8592 gen9_avc_vme_gpe_kernel_final(ctx, encode_state, encoder_context);
8594 return VA_STATUS_SUCCESS;
8597 /* Update PreEnc specific parameters */
8599 gen9_avc_preenc_update_parameters(VADriverContextP ctx,
8601 struct encode_state *encode_state,
8602 struct intel_encoder_context *encoder_context)
8604 struct i965_driver_data *i965 = i965_driver_data(ctx);
8605 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8606 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8607 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
8608 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8609 VAStatsStatisticsParameterH264 *stat_param_h264 = NULL;
8610 VAStatsStatisticsParameter *stat_param = NULL;
8611 struct object_buffer *obj_buffer = NULL;
8612 struct object_buffer *obj_buffer_mv = NULL, *obj_buffer_stat = NULL;
8613 struct buffer_store *buffer_store = NULL;
8614 unsigned int size = 0, i = 0;
8615 unsigned int frame_mb_nums = 0;
8617 if (!encoder_context->preenc_enabled ||
8618 !encode_state->stat_param_ext ||
8619 !encode_state->stat_param_ext->buffer)
8620 return VA_STATUS_ERROR_OPERATION_FAILED;
8622 stat_param_h264 = avc_state->stat_param =
8623 (VAStatsStatisticsParameterH264 *)encode_state->stat_param_ext->buffer;
8624 stat_param = &stat_param_h264->stats_params;
8626 /* Assume the frame type based on number of past/future ref frames */
8627 if (!stat_param->num_past_references && !stat_param->num_future_references)
8628 generic_state->frame_type = SLICE_TYPE_I;
8629 else if (stat_param->num_future_references > 0)
8630 generic_state->frame_type = SLICE_TYPE_B;
8632 generic_state->frame_type = SLICE_TYPE_P;
8634 generic_state->preset = INTEL_PRESET_RT_SPEED;
8635 generic_state->kernel_mode = gen9_avc_kernel_mode[generic_state->preset];
8637 /* frame width and height */
8638 generic_state->frame_width_in_pixel = encoder_context->frame_width_in_pixel;
8639 generic_state->frame_height_in_pixel = encoder_context->frame_height_in_pixel;
8640 generic_state->frame_width_in_mbs = ALIGN(generic_state->frame_width_in_pixel, 16) / 16;
8641 generic_state->frame_height_in_mbs = ALIGN(generic_state->frame_height_in_pixel, 16) / 16;
8643 /* 4x downscaled width and height */
8644 generic_state->frame_width_4x = ALIGN(generic_state->frame_width_in_pixel / 4, 16);
8645 generic_state->frame_height_4x = ALIGN(generic_state->frame_height_in_pixel / 4, 16);
8646 generic_state->downscaled_width_4x_in_mb = generic_state->frame_width_4x / 16 ;
8647 generic_state->downscaled_height_4x_in_mb = generic_state->frame_height_4x / 16;
8649 /* reset hme types for preenc */
8650 if (generic_state->frame_type != SLICE_TYPE_I)
8651 generic_state->hme_enabled = 1;
8653 /* ensure frame width is not too small */
8654 if (generic_state->frame_width_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8655 generic_state->frame_width_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8656 generic_state->downscaled_width_4x_in_mb =
8657 WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8660 /* ensure frame height is not too small*/
8661 if (generic_state->frame_height_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8662 generic_state->frame_height_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8663 generic_state->downscaled_height_4x_in_mb =
8664 WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8667 /********** Ensure buffer object parameters ********/
8668 frame_mb_nums = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs;
8670 /* mv predictor buffer */
8671 if (stat_param_h264->mv_predictor_ctrl) {
8672 if (stat_param->mv_predictor == VA_INVALID_ID)
8674 size = frame_mb_nums * FEI_AVC_MV_PREDICTOR_BUFFER_SIZE;
8675 obj_buffer = BUFFER(stat_param->mv_predictor);
8676 buffer_store = obj_buffer->buffer_store;
8677 if (buffer_store->bo->size < size)
8679 if (avc_ctx->preproc_mv_predictor_buffer.bo != NULL)
8680 i965_free_gpe_resource(&avc_ctx->preproc_mv_predictor_buffer);
8681 i965_dri_object_to_buffer_gpe_resource(
8682 &avc_ctx->preproc_mv_predictor_buffer,
8687 if (stat_param_h264->mb_qp) {
8688 if (stat_param->qp == VA_INVALID_ID)
8690 size = frame_mb_nums * FEI_AVC_QP_BUFFER_SIZE;
8691 obj_buffer = BUFFER(stat_param->qp);
8692 buffer_store = obj_buffer->buffer_store;
8693 if (buffer_store->bo->size < size)
8695 if (avc_ctx->preproc_mb_qp_buffer.bo != NULL)
8696 i965_free_gpe_resource(&avc_ctx->preproc_mb_qp_buffer);
8697 i965_dri_object_to_buffer_gpe_resource(
8698 &avc_ctx->preproc_mb_qp_buffer,
8702 /* locate mv and stat buffer */
8703 if (!stat_param_h264->disable_mv_output ||
8704 !stat_param_h264->disable_statistics_output) {
8706 if (!stat_param->outputs)
8709 for (i = 0; i < 2 ; i++) {
8710 if (stat_param->outputs[i] != VA_INVALID_ID) {
8711 obj_buffer = BUFFER(stat_param->outputs[i]);
8712 switch (obj_buffer->type) {
8713 case VAStatsMVBufferType:
8714 obj_buffer_mv = obj_buffer;
8716 case VAStatsStatisticsBufferType:
8717 obj_buffer_stat = obj_buffer;
8723 if (!(!stat_param_h264->disable_mv_output &&
8724 !stat_param_h264->disable_statistics_output))
8728 /* mv data output buffer */
8729 if (!stat_param_h264->disable_mv_output) {
8730 size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
8731 buffer_store = obj_buffer_mv->buffer_store;
8732 if (buffer_store->bo->size < size)
8734 if (avc_ctx->preproc_mv_data_out_buffer.bo != NULL)
8735 i965_free_gpe_resource(&avc_ctx->preproc_mv_data_out_buffer);
8736 i965_dri_object_to_buffer_gpe_resource(
8737 &avc_ctx->preproc_mv_data_out_buffer,
8740 /* statistics output buffer */
8741 if (!stat_param_h264->disable_statistics_output) {
8742 size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
8743 buffer_store = obj_buffer_stat->buffer_store;
8744 if (buffer_store->bo->size < size)
8746 if (avc_ctx->preproc_stat_data_out_buffer.bo != NULL)
8747 i965_free_gpe_resource(&avc_ctx->preproc_stat_data_out_buffer);
8748 i965_dri_object_to_buffer_gpe_resource(
8749 &avc_ctx->preproc_stat_data_out_buffer,
8753 /* past ref stat out buffer */
8754 if (stat_param->num_past_references && stat_param->past_ref_stat_buf &&
8755 stat_param->past_ref_stat_buf[0] != VA_INVALID_ID) {
8756 size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
8757 obj_buffer = BUFFER(stat_param->past_ref_stat_buf[0]);
8758 buffer_store = obj_buffer->buffer_store;
8759 if (buffer_store->bo->size < size)
8761 if (avc_ctx->preenc_past_ref_stat_data_out_buffer.bo != NULL)
8762 i965_free_gpe_resource(&avc_ctx->preenc_past_ref_stat_data_out_buffer);
8763 i965_dri_object_to_buffer_gpe_resource(
8764 &avc_ctx->preenc_past_ref_stat_data_out_buffer,
8767 /* future ref stat out buffer */
8768 if (stat_param->num_past_references && stat_param->future_ref_stat_buf &&
8769 stat_param->future_ref_stat_buf[0] != VA_INVALID_ID) {
8770 size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
8771 obj_buffer = BUFFER(stat_param->future_ref_stat_buf[0]);
8772 buffer_store = obj_buffer->buffer_store;
8773 if (buffer_store->bo->size < size)
8775 if (avc_ctx->preenc_future_ref_stat_data_out_buffer.bo != NULL)
8776 i965_free_gpe_resource(&avc_ctx->preenc_future_ref_stat_data_out_buffer);
8777 i965_dri_object_to_buffer_gpe_resource(
8778 &avc_ctx->preenc_future_ref_stat_data_out_buffer,
8781 return VA_STATUS_SUCCESS;
8784 return VA_STATUS_ERROR_INVALID_BUFFER;
8787 /* allocate internal resouces required for PreEenc */
8789 gen9_avc_preenc_allocate_internal_resources(VADriverContextP ctx,
8790 struct encode_state *encode_state,
8791 struct intel_encoder_context *encoder_context)
8793 struct i965_driver_data *i965 = i965_driver_data(ctx);
8794 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8795 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
8796 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8797 unsigned int width = 0;
8798 unsigned int height = 0;
8799 unsigned int size = 0;
8800 int allocate_flag = 1;
8802 /* 4x MEMV data buffer */
8803 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 32, 64);
8804 height = generic_state->downscaled_height_4x_in_mb * 4 * 2 * 10;
8805 i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
8806 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
8807 &avc_ctx->s4x_memv_data_buffer,
8810 "4x MEMV data buffer");
8812 goto failed_allocation;
8813 i965_zero_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
8815 /* Output DISTORTION surface from 4x ME */
8816 width = generic_state->downscaled_width_4x_in_mb * 8;
8817 height = generic_state->downscaled_height_4x_in_mb * 4 * 10;
8818 i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
8819 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
8820 &avc_ctx->s4x_memv_distortion_buffer,
8823 "4x MEMV distortion buffer");
8825 goto failed_allocation;
8826 i965_zero_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
8828 /* output BRC DISTORTION surface from 4x ME */
8829 width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
8830 height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
8831 i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
8832 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
8833 &avc_ctx->res_brc_dist_data_surface,
8836 "brc dist data buffer");
8838 goto failed_allocation;
8839 i965_zero_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
8842 /* FTQ Lut buffer,whichs is the mbbrc_const_data_buffer */
8843 i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
8844 size = 16 * AVC_QP_MAX * 4;
8845 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
8846 &avc_ctx->res_mbbrc_const_data_buffer,
8847 ALIGN(size, 0x1000),
8848 "mbbrc const data buffer");
8850 goto failed_allocation;
8851 i965_zero_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
8853 /* 4x downscaled surface */
8854 if (!avc_ctx->preenc_scaled_4x_surface_obj) {
8855 i965_CreateSurfaces(ctx,
8856 generic_state->frame_width_4x,
8857 generic_state->frame_height_4x,
8858 VA_RT_FORMAT_YUV420,
8860 &avc_ctx->preenc_scaled_4x_surface_id);
8861 avc_ctx->preenc_scaled_4x_surface_obj = SURFACE(avc_ctx->preenc_scaled_4x_surface_id);
8862 if (!avc_ctx->preenc_scaled_4x_surface_obj)
8863 goto failed_allocation;
8864 i965_check_alloc_surface_bo(ctx, avc_ctx->preenc_scaled_4x_surface_obj, 1,
8865 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
8868 /* 4x downscaled past ref surface */
8869 if (!avc_ctx->preenc_past_ref_scaled_4x_surface_obj) {
8870 i965_CreateSurfaces(ctx,
8871 generic_state->frame_width_4x,
8872 generic_state->frame_height_4x,
8873 VA_RT_FORMAT_YUV420,
8875 &avc_ctx->preenc_past_ref_scaled_4x_surface_id);
8876 avc_ctx->preenc_past_ref_scaled_4x_surface_obj =
8877 SURFACE(avc_ctx->preenc_past_ref_scaled_4x_surface_id);
8878 if (!avc_ctx->preenc_past_ref_scaled_4x_surface_obj)
8879 goto failed_allocation;
8880 i965_check_alloc_surface_bo(ctx, avc_ctx->preenc_past_ref_scaled_4x_surface_obj, 1,
8881 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
8884 /* 4x downscaled future ref surface */
8885 if (!avc_ctx->preenc_future_ref_scaled_4x_surface_obj) {
8886 i965_CreateSurfaces(ctx,
8887 generic_state->frame_width_4x,
8888 generic_state->frame_height_4x,
8889 VA_RT_FORMAT_YUV420,
8891 &avc_ctx->preenc_future_ref_scaled_4x_surface_id);
8892 avc_ctx->preenc_future_ref_scaled_4x_surface_obj =
8893 SURFACE(avc_ctx->preenc_future_ref_scaled_4x_surface_id);
8894 if (!avc_ctx->preenc_future_ref_scaled_4x_surface_obj)
8895 goto failed_allocation;
8896 i965_check_alloc_surface_bo(ctx, avc_ctx->preenc_future_ref_scaled_4x_surface_obj, 1,
8897 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
8900 /* FeiPreEncFixme: dummy coded buffer. This is a tweak which helps to use
8901 * the generic AVC Encdoe codepath which allocate status buffer as extension
8903 if (!avc_ctx->status_buffer.bo) {
8905 generic_state->frame_width_in_pixel * generic_state->frame_height_in_pixel * 12;
8906 size += I965_CODEDBUFFER_HEADER_SIZE;
8908 avc_ctx->status_buffer.bo = dri_bo_alloc(i965->intel.bufmgr,
8909 "Dummy Coded Buffer",
8913 return VA_STATUS_SUCCESS;
8916 return VA_STATUS_ERROR_ALLOCATION_FAILED;
8921 gen9_avc_preenc_gpe_kernel_run(VADriverContextP ctx,
8922 struct encode_state *encode_state,
8923 struct intel_encoder_context *encoder_context)
8925 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8926 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8927 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8928 VAStatsStatisticsParameterH264 *stat_param_h264 = avc_state->stat_param;;
8929 VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
8931 /* FeiPreEncFixme: Optimize the scaling. Keep a cache of already scaled surfaces
8932 * to avoid repeated scaling of same surfaces */
8935 gen9_avc_preenc_kernel_scaling(ctx, encode_state, encoder_context,
8936 INTEL_ENC_HME_4x, SCALE_CUR_PIC);
8937 if (stat_param->num_past_references > 0) {
8938 gen9_avc_preenc_kernel_scaling(ctx, encode_state, encoder_context,
8939 INTEL_ENC_HME_4x, SCALE_PAST_REF_PIC);
8941 if (stat_param->num_future_references > 0) {
8942 gen9_avc_preenc_kernel_scaling(ctx, encode_state, encoder_context,
8943 INTEL_ENC_HME_4x, SCALE_FUTURE_REF_PIC);
8947 if (generic_state->hme_enabled) {
8948 gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
8951 /* preproc kernel */
8952 if (!stat_param_h264->disable_mv_output || !stat_param_h264->disable_statistics_output) {
8953 gen9_avc_preenc_kernel_preproc(ctx, encode_state, encoder_context);
8956 return VA_STATUS_SUCCESS;
8960 gen9_avc_preenc_pipeline(VADriverContextP ctx,
8962 struct encode_state *encode_state,
8963 struct intel_encoder_context *encoder_context)
8967 va_status = gen9_avc_preenc_update_parameters(ctx, profile, encode_state, encoder_context);
8968 if (va_status != VA_STATUS_SUCCESS)
8971 va_status = gen9_avc_preenc_allocate_internal_resources(ctx, encode_state, encoder_context);
8972 if (va_status != VA_STATUS_SUCCESS)
8975 va_status = gen9_avc_preenc_gpe_kernel_run(ctx, encode_state, encoder_context);
8976 if (va_status != VA_STATUS_SUCCESS)
8979 return VA_STATUS_SUCCESS;
8983 gen9_avc_vme_context_destroy(void * context)
8985 struct encoder_vme_mfc_context *vme_context = (struct encoder_vme_mfc_context *)context;
8986 struct generic_encoder_context *generic_ctx;
8987 struct i965_avc_encoder_context *avc_ctx;
8988 struct generic_enc_codec_state *generic_state;
8989 struct avc_enc_state *avc_state;
8994 generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
8995 avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
8996 generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8997 avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8999 gen9_avc_kernel_destroy(vme_context);
9003 free(generic_state);
9011 gen8_avc_kernel_init(VADriverContextP ctx,
9012 struct intel_encoder_context *encoder_context)
9014 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9015 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
9016 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
9017 int fei_enabled = encoder_context->fei_enabled;
9019 generic_ctx->get_kernel_header_and_size = fei_enabled ?
9020 intel_avc_fei_get_kernel_header_and_size :
9021 intel_avc_get_kernel_header_and_size ;
9022 gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling, false);
9023 gen9_avc_kernel_init_brc(ctx, generic_ctx, &avc_ctx->context_brc);
9024 gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me, false);
9025 gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc, fei_enabled);
9026 gen9_avc_kernel_init_sfd(ctx, generic_ctx, &avc_ctx->context_sfd);
9029 generic_ctx->pfn_set_curbe_scaling2x = gen9_avc_set_curbe_scaling2x;
9030 generic_ctx->pfn_set_curbe_scaling4x = gen8_avc_set_curbe_scaling4x;
9031 generic_ctx->pfn_set_curbe_me = gen8_avc_set_curbe_me;
9032 generic_ctx->pfn_set_curbe_mbenc = gen8_avc_set_curbe_mbenc;
9033 generic_ctx->pfn_set_curbe_brc_init_reset = gen9_avc_set_curbe_brc_init_reset;
9034 generic_ctx->pfn_set_curbe_brc_frame_update = gen8_avc_set_curbe_brc_frame_update;
9035 generic_ctx->pfn_set_curbe_sfd = gen9_avc_set_curbe_sfd;
9037 generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
9038 generic_ctx->pfn_send_me_surface = gen9_avc_send_surface_me;
9039 generic_ctx->pfn_send_mbenc_surface = gen9_avc_send_surface_mbenc;
9040 generic_ctx->pfn_send_brc_init_reset_surface = gen9_avc_send_surface_brc_init_reset;
9041 generic_ctx->pfn_send_brc_frame_update_surface = gen9_avc_send_surface_brc_frame_update;
9042 generic_ctx->pfn_send_sfd_surface = gen9_avc_send_surface_sfd;
9045 gen9_avc_kernel_init(VADriverContextP ctx,
9046 struct intel_encoder_context *encoder_context)
9048 struct i965_driver_data *i965 = i965_driver_data(ctx);
9049 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9050 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
9051 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
9052 int fei_enabled = encoder_context->fei_enabled;
9053 int preenc_enabled = encoder_context->preenc_enabled;
9055 generic_ctx->get_kernel_header_and_size = (fei_enabled || preenc_enabled) ?
9056 intel_avc_fei_get_kernel_header_and_size :
9057 intel_avc_get_kernel_header_and_size ;
9059 if (!fei_enabled && !preenc_enabled) {
9060 /* generic AVC Encoder */
9061 gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling, false);
9062 gen9_avc_kernel_init_brc(ctx, generic_ctx, &avc_ctx->context_brc);
9063 gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me, false);
9064 gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc,
9065 encoder_context->fei_enabled);
9066 gen9_avc_kernel_init_wp(ctx, generic_ctx, &avc_ctx->context_wp);
9067 gen9_avc_kernel_init_sfd(ctx, generic_ctx, &avc_ctx->context_sfd);
9070 generic_ctx->pfn_set_curbe_scaling2x = gen9_avc_set_curbe_scaling2x;
9071 generic_ctx->pfn_set_curbe_scaling4x = gen9_avc_set_curbe_scaling4x;
9072 generic_ctx->pfn_set_curbe_me = gen9_avc_set_curbe_me;
9073 generic_ctx->pfn_set_curbe_mbenc = gen9_avc_set_curbe_mbenc;
9074 generic_ctx->pfn_set_curbe_brc_init_reset = gen9_avc_set_curbe_brc_init_reset;
9075 generic_ctx->pfn_set_curbe_brc_frame_update = gen9_avc_set_curbe_brc_frame_update;
9076 generic_ctx->pfn_set_curbe_brc_mb_update = gen9_avc_set_curbe_brc_mb_update;
9077 generic_ctx->pfn_set_curbe_sfd = gen9_avc_set_curbe_sfd;
9078 generic_ctx->pfn_set_curbe_wp = gen9_avc_set_curbe_wp;
9080 generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
9081 generic_ctx->pfn_send_me_surface = gen9_avc_send_surface_me;
9082 generic_ctx->pfn_send_mbenc_surface = gen9_avc_send_surface_mbenc;
9083 generic_ctx->pfn_send_brc_init_reset_surface = gen9_avc_send_surface_brc_init_reset;
9084 generic_ctx->pfn_send_brc_frame_update_surface = gen9_avc_send_surface_brc_frame_update;
9085 generic_ctx->pfn_send_brc_mb_update_surface = gen9_avc_send_surface_brc_mb_update;
9086 generic_ctx->pfn_send_sfd_surface = gen9_avc_send_surface_sfd;
9087 generic_ctx->pfn_send_wp_surface = gen9_avc_send_surface_wp;
9089 if (IS_SKL(i965->intel.device_info) ||
9090 IS_BXT(i965->intel.device_info))
9091 generic_ctx->pfn_set_curbe_scaling4x = gen9_avc_set_curbe_scaling4x;
9092 else if (IS_KBL(i965->intel.device_info) ||
9093 IS_GEN10(i965->intel.device_info) ||
9094 IS_GLK(i965->intel.device_info))
9095 generic_ctx->pfn_set_curbe_scaling4x = gen95_avc_set_curbe_scaling4x;
9097 } else if (fei_enabled) {
9098 /* FEI AVC Encoding */
9099 gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc,
9100 encoder_context->fei_enabled);
9101 generic_ctx->pfn_set_curbe_mbenc = gen9_avc_fei_set_curbe_mbenc;
9102 generic_ctx->pfn_send_mbenc_surface = gen9_avc_fei_send_surface_mbenc;
9105 /* PreEnc for AVC */
9106 gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling,
9107 encoder_context->preenc_enabled);
9108 gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me,
9109 encoder_context->preenc_enabled);
9110 gen9_avc_kernel_init_preproc(ctx, generic_ctx, &avc_ctx->context_preproc);
9112 /* preenc 4x scaling uses the gen95 kernel */
9113 generic_ctx->pfn_set_curbe_scaling4x = gen95_avc_set_curbe_scaling4x;
9114 generic_ctx->pfn_set_curbe_me = gen9_avc_preenc_set_curbe_me;
9115 generic_ctx->pfn_set_curbe_preproc = gen9_avc_preenc_set_curbe_preproc;
9117 generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
9118 generic_ctx->pfn_send_me_surface = gen9_avc_preenc_send_surface_me;
9119 generic_ctx->pfn_send_preproc_surface = gen9_avc_preenc_send_surface_preproc;
9124 PAK pipeline related function
9127 intel_avc_enc_slice_type_fixup(int slice_type);
9129 /* Allocate resources needed for PAK only mode (get invoked only in FEI encode) */
9131 gen9_avc_allocate_pak_resources(VADriverContextP ctx,
9132 struct encode_state *encode_state,
9133 struct intel_encoder_context *encoder_context)
9135 struct i965_driver_data *i965 = i965_driver_data(ctx);
9136 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9137 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
9138 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
9139 unsigned int size = 0;
9140 int allocate_flag = 1;
9142 /*second level batch buffer for image state write when cqp etc*/
9143 i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
9144 size = INTEL_AVC_IMAGE_STATE_CMD_SIZE ;
9145 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
9146 &avc_ctx->res_image_state_batch_buffer_2nd_level,
9147 ALIGN(size, 0x1000),
9148 "second levle batch (image state write) buffer");
9150 goto failed_allocation;
9152 if (!generic_state->brc_allocated) {
9153 i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
9155 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
9156 &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
9157 ALIGN(size, 0x1000),
9158 "brc pak statistic buffer");
9160 goto failed_allocation;
9163 return VA_STATUS_SUCCESS;
9166 return VA_STATUS_ERROR_ALLOCATION_FAILED;
9170 gen9_mfc_avc_pipe_mode_select(VADriverContextP ctx,
9171 struct encode_state *encode_state,
9172 struct intel_encoder_context *encoder_context)
9174 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9175 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9176 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9177 struct intel_batchbuffer *batch = encoder_context->base.batch;
9179 BEGIN_BCS_BATCH(batch, 5);
9181 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
9182 OUT_BCS_BATCH(batch,
9184 (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
9185 (MFD_MODE_VLD << 15) |
9186 (0 << 13) | /* Non-VDEnc mode is 0*/
9187 ((generic_state->curr_pak_pass != (generic_state->num_pak_passes - 1)) << 10) | /* Stream-Out Enable */
9188 ((!!avc_ctx->res_post_deblocking_output.bo) << 9) | /* Post Deblocking Output */
9189 ((!!avc_ctx->res_pre_deblocking_output.bo) << 8) | /* Pre Deblocking Output */
9190 (0 << 7) | /* Scaled surface enable */
9191 (0 << 6) | /* Frame statistics stream out enable */
9192 (0 << 5) | /* not in stitch mode */
9193 (1 << 4) | /* encoding mode */
9194 (MFX_FORMAT_AVC << 0));
9195 OUT_BCS_BATCH(batch,
9196 (0 << 7) | /* expand NOA bus flag */
9197 (0 << 6) | /* disable slice-level clock gating */
9198 (0 << 5) | /* disable clock gating for NOA */
9199 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
9200 (0 << 3) | /* terminate if AVC mbdata error occurs */
9201 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
9204 OUT_BCS_BATCH(batch, 0);
9205 OUT_BCS_BATCH(batch, 0);
9207 ADVANCE_BCS_BATCH(batch);
9211 gen9_mfc_avc_surface_state(VADriverContextP ctx,
9212 struct intel_encoder_context *encoder_context,
9213 struct i965_gpe_resource *gpe_resource,
9216 struct intel_batchbuffer *batch = encoder_context->base.batch;
9218 BEGIN_BCS_BATCH(batch, 6);
9220 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
9221 OUT_BCS_BATCH(batch, id);
9222 OUT_BCS_BATCH(batch,
9223 ((gpe_resource->height - 1) << 18) |
9224 ((gpe_resource->width - 1) << 4));
9225 OUT_BCS_BATCH(batch,
9226 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
9227 (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
9228 ((gpe_resource->pitch - 1) << 3) | /* pitch */
9229 (0 << 2) | /* must be 0 for interleave U/V */
9230 (1 << 1) | /* must be tiled */
9231 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
9232 OUT_BCS_BATCH(batch,
9233 (0 << 16) | /* must be 0 for interleave U/V */
9234 (gpe_resource->y_cb_offset)); /* y offset for U(cb) */
9235 OUT_BCS_BATCH(batch,
9236 (0 << 16) | /* must be 0 for interleave U/V */
9237 (gpe_resource->y_cb_offset)); /* y offset for U(cb) */
9239 ADVANCE_BCS_BATCH(batch);
9243 gen9_mfc_avc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
9245 struct i965_driver_data *i965 = i965_driver_data(ctx);
9246 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9247 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
9248 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9249 struct intel_batchbuffer *batch = encoder_context->base.batch;
9251 unsigned int cmd_len = 65;
9253 if (IS_GEN10(i965->intel.device_info))
9256 BEGIN_BCS_BATCH(batch, cmd_len);
9258 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (cmd_len - 2));
9260 /* the DW1-3 is for pre_deblocking */
9261 OUT_BUFFER_3DW(batch, avc_ctx->res_pre_deblocking_output.bo, 1, 0, i965->intel.mocs_state);
9263 /* the DW4-6 is for the post_deblocking */
9264 OUT_BUFFER_3DW(batch, avc_ctx->res_post_deblocking_output.bo, 1, 0, i965->intel.mocs_state);
9266 /* the DW7-9 is for the uncompressed_picture */
9267 OUT_BUFFER_3DW(batch, generic_ctx->res_uncompressed_input_surface.bo, 0, 0, i965->intel.mocs_state);
9269 /* the DW10-12 is for PAK information (write) */
9270 OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, i965->intel.mocs_state);//?
9272 /* the DW13-15 is for the intra_row_store_scratch */
9273 OUT_BUFFER_3DW(batch, avc_ctx->res_intra_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
9275 /* the DW16-18 is for the deblocking filter */
9276 OUT_BUFFER_3DW(batch, avc_ctx->res_deblocking_filter_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
9278 /* the DW 19-50 is for Reference pictures*/
9279 for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
9280 OUT_BUFFER_2DW(batch, avc_ctx->list_reference_res[i].bo, 0, 0);
9283 /* DW 51, reference picture attributes */
9284 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
9286 /* The DW 52-54 is for PAK information (read) */
9287 OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, i965->intel.mocs_state);
9289 /* the DW 55-57 is the ILDB buffer */
9290 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9292 /* the DW 58-60 is the second ILDB buffer */
9293 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9295 /* DW 61, memory compress enable & mode */
9296 OUT_BCS_BATCH(batch, 0);
9298 /* the DW 62-64 is the buffer */
9299 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9302 if (IS_GEN10(i965->intel.device_info)) {
9303 OUT_BCS_BATCH(batch, 0);
9304 OUT_BCS_BATCH(batch, 0);
9305 OUT_BCS_BATCH(batch, 0);
9308 ADVANCE_BCS_BATCH(batch);
9312 gen9_mfc_avc_ind_obj_base_addr_state(VADriverContextP ctx,
9313 struct encode_state *encode_state,
9314 struct intel_encoder_context *encoder_context)
9316 struct i965_driver_data *i965 = i965_driver_data(ctx);
9317 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9318 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
9319 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9320 struct intel_batchbuffer *batch = encoder_context->base.batch;
9321 struct object_surface *obj_surface;
9322 struct gen9_surface_avc *avc_priv_surface;
9323 unsigned int size = 0;
9324 unsigned int w_mb = generic_state->frame_width_in_mbs;
9325 unsigned int h_mb = generic_state->frame_height_in_mbs;
9327 obj_surface = encode_state->reconstructed_object;
9329 if (!obj_surface || !obj_surface->private_data)
9331 avc_priv_surface = obj_surface->private_data;
9333 BEGIN_BCS_BATCH(batch, 26);
9335 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
9336 /* The DW1-5 is for the MFX indirect bistream offset */
9337 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9338 OUT_BUFFER_2DW(batch, NULL, 0, 0);
9340 /* the DW6-10 is for MFX Indirect MV Object Base Address */
9341 size = w_mb * h_mb * 32 * 4;
9342 OUT_BUFFER_3DW(batch,
9343 avc_priv_surface->res_mv_data_surface.bo,
9346 i965->intel.mocs_state);
9347 OUT_BUFFER_2DW(batch,
9348 avc_priv_surface->res_mv_data_surface.bo,
9350 ALIGN(size, 0x1000));
9352 /* The DW11-15 is for MFX IT-COFF. Not used on encoder */
9353 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9354 OUT_BUFFER_2DW(batch, NULL, 0, 0);
9356 /* The DW16-20 is for MFX indirect DBLK. Not used on encoder */
9357 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9358 OUT_BUFFER_2DW(batch, NULL, 0, 0);
9360 /* The DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder
9361 * Note: an offset is specified in MFX_AVC_SLICE_STATE
9363 OUT_BUFFER_3DW(batch,
9364 generic_ctx->compressed_bitstream.res.bo,
9367 i965->intel.mocs_state);
9368 OUT_BUFFER_2DW(batch,
9369 generic_ctx->compressed_bitstream.res.bo,
9371 generic_ctx->compressed_bitstream.end_offset);
9373 ADVANCE_BCS_BATCH(batch);
9377 gen9_mfc_avc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
9379 struct i965_driver_data *i965 = i965_driver_data(ctx);
9380 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9381 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9382 struct intel_batchbuffer *batch = encoder_context->base.batch;
9384 BEGIN_BCS_BATCH(batch, 10);
9386 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
9388 /* The DW1-3 is for bsd/mpc row store scratch buffer */
9389 OUT_BUFFER_3DW(batch, avc_ctx->res_bsd_mpc_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
9391 /* The DW4-6 is for MPR Row Store Scratch Buffer Base Address, ignore for encoder */
9392 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9394 /* The DW7-9 is for Bitplane Read Buffer Base Address, ignore for encoder */
9395 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9397 ADVANCE_BCS_BATCH(batch);
9401 gen9_mfc_avc_directmode_state(VADriverContextP ctx,
9402 struct intel_encoder_context *encoder_context)
9404 struct i965_driver_data *i965 = i965_driver_data(ctx);
9405 struct intel_batchbuffer *batch = encoder_context->base.batch;
9406 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9407 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9408 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
9412 BEGIN_BCS_BATCH(batch, 71);
9414 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
9416 /* Reference frames and Current frames */
9417 /* the DW1-32 is for the direct MV for reference */
9418 for (i = 0; i < NUM_MFC_AVC_DMV_BUFFERS - 2; i += 2) {
9419 if (avc_ctx->res_direct_mv_buffersr[i].bo != NULL) {
9420 OUT_BCS_RELOC64(batch, avc_ctx->res_direct_mv_buffersr[i].bo,
9421 I915_GEM_DOMAIN_INSTRUCTION, 0,
9424 OUT_BCS_BATCH(batch, 0);
9425 OUT_BCS_BATCH(batch, 0);
9429 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
9431 /* the DW34-36 is the MV for the current frame */
9432 OUT_BCS_RELOC64(batch, avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2].bo,
9433 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
9436 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
9439 for (i = 0; i < 32; i++) {
9440 OUT_BCS_BATCH(batch, avc_state->top_field_poc[i]);
9442 OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2]);
9443 OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1]);
9445 ADVANCE_BCS_BATCH(batch);
9449 gen9_mfc_qm_state(VADriverContextP ctx,
9451 const unsigned int *qm,
9453 struct intel_encoder_context *encoder_context)
9455 struct intel_batchbuffer *batch = encoder_context->base.batch;
9456 unsigned int qm_buffer[16];
9458 assert(qm_length <= 16);
9459 assert(sizeof(*qm) == 4);
9460 memset(qm_buffer, 0, 16 * 4);
9461 memcpy(qm_buffer, qm, qm_length * 4);
9463 BEGIN_BCS_BATCH(batch, 18);
9464 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
9465 OUT_BCS_BATCH(batch, qm_type << 0);
9466 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
9467 ADVANCE_BCS_BATCH(batch);
9471 gen9_mfc_avc_qm_state(VADriverContextP ctx,
9472 struct encode_state *encode_state,
9473 struct intel_encoder_context *encoder_context)
9475 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9476 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
9477 VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
9478 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
9481 const unsigned int *qm_4x4_intra;
9482 const unsigned int *qm_4x4_inter;
9483 const unsigned int *qm_8x8_intra;
9484 const unsigned int *qm_8x8_inter;
9486 if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
9487 && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
9488 qm_4x4_intra = qm_4x4_inter = qm_8x8_intra = qm_8x8_inter = qm_flat;
9490 VAIQMatrixBufferH264 *qm;
9491 assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
9492 qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
9493 qm_4x4_intra = (unsigned int *)qm->ScalingList4x4[0];
9494 qm_4x4_inter = (unsigned int *)qm->ScalingList4x4[3];
9495 qm_8x8_intra = (unsigned int *)qm->ScalingList8x8[0];
9496 qm_8x8_inter = (unsigned int *)qm->ScalingList8x8[1];
9499 gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm_4x4_intra, 12, encoder_context);
9500 gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm_4x4_inter, 12, encoder_context);
9501 gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm_8x8_intra, 16, encoder_context);
9502 gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm_8x8_inter, 16, encoder_context);
9506 gen9_mfc_fqm_state(VADriverContextP ctx,
9508 const unsigned int *fqm,
9510 struct intel_encoder_context *encoder_context)
9512 struct intel_batchbuffer *batch = encoder_context->base.batch;
9513 unsigned int fqm_buffer[32];
9515 assert(fqm_length <= 32);
9516 assert(sizeof(*fqm) == 4);
9517 memset(fqm_buffer, 0, 32 * 4);
9518 memcpy(fqm_buffer, fqm, fqm_length * 4);
9520 BEGIN_BCS_BATCH(batch, 34);
9521 OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
9522 OUT_BCS_BATCH(batch, fqm_type << 0);
9523 intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
9524 ADVANCE_BCS_BATCH(batch);
9528 gen9_mfc_fill_fqm(uint8_t *qm, uint16_t *fqm, int len)
9531 for (i = 0; i < len; i++)
9532 for (j = 0; j < len; j++) {
9533 assert(qm[j * len + i]);
9534 fqm[i * len + j] = (1 << 16) / qm[j * len + i];
9539 gen9_mfc_avc_fqm_state(VADriverContextP ctx,
9540 struct encode_state *encode_state,
9541 struct intel_encoder_context *encoder_context)
9543 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9544 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
9545 VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
9546 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
9548 if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
9549 && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
9550 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm_flat, 24, encoder_context);
9551 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm_flat, 24, encoder_context);
9552 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm_flat, 32, encoder_context);
9553 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm_flat, 32, encoder_context);
9557 VAIQMatrixBufferH264 *qm;
9558 assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
9559 qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
9561 for (i = 0; i < 3; i++)
9562 gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * i, 4);
9563 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm, 24, encoder_context);
9565 for (i = 3; i < 6; i++)
9566 gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * (i - 3), 4);
9567 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm, 24, encoder_context);
9569 gen9_mfc_fill_fqm(qm->ScalingList8x8[0], (uint16_t *)fqm, 8);
9570 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm, 32, encoder_context);
9572 gen9_mfc_fill_fqm(qm->ScalingList8x8[1], (uint16_t *)fqm, 8);
9573 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm, 32, encoder_context);
9578 gen9_mfc_avc_insert_object(VADriverContextP ctx,
9579 struct intel_encoder_context *encoder_context,
9580 unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
9581 int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
9582 int slice_header_indicator,
9583 struct intel_batchbuffer *batch)
9585 if (data_bits_in_last_dw == 0)
9586 data_bits_in_last_dw = 32;
9588 BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
9590 OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws));
9591 OUT_BCS_BATCH(batch,
9592 (0 << 16) | /* always start at offset 0 */
9593 (slice_header_indicator << 14) |
9594 (data_bits_in_last_dw << 8) |
9595 (skip_emul_byte_count << 4) |
9596 (!!emulation_flag << 3) |
9597 ((!!is_last_header) << 2) |
9598 ((!!is_end_of_slice) << 1) |
9599 (0 << 0)); /* check this flag */
9600 intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
9602 ADVANCE_BCS_BATCH(batch);
9606 gen9_mfc_avc_insert_aud_packed_data(VADriverContextP ctx,
9607 struct encode_state *encode_state,
9608 struct intel_encoder_context *encoder_context,
9609 struct intel_batchbuffer *batch)
9611 VAEncPackedHeaderParameterBuffer *param = NULL;
9612 unsigned int length_in_bits;
9613 unsigned int *header_data = NULL;
9614 unsigned char *nal_type = NULL;
9615 int count, i, start_index;
9617 count = encode_state->slice_rawdata_count[0];
9618 start_index = (encode_state->slice_rawdata_index[0] & SLICE_PACKED_DATA_INDEX_MASK);
9620 for (i = 0; i < count; i++) {
9621 unsigned int skip_emul_byte_cnt;
9623 header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
9624 nal_type = (unsigned char *)header_data;
9626 param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
9627 if (param->type != VAEncPackedHeaderRawData)
9630 length_in_bits = param->bit_length;
9632 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9634 if ((*(nal_type + skip_emul_byte_cnt - 1) & 0x1f) == AVC_NAL_DELIMITER) {
9635 gen9_mfc_avc_insert_object(ctx,
9638 ALIGN(length_in_bits, 32) >> 5,
9639 length_in_bits & 0x1f,
9643 !param->has_emulation_bytes,
9652 gen9_mfc_avc_insert_slice_packed_data(VADriverContextP ctx,
9653 struct encode_state *encode_state,
9654 struct intel_encoder_context *encoder_context,
9656 struct intel_batchbuffer *batch)
9658 VAEncPackedHeaderParameterBuffer *param = NULL;
9659 unsigned int length_in_bits;
9660 unsigned int *header_data = NULL;
9661 int count, i, start_index;
9662 int slice_header_index;
9663 unsigned char *nal_type = NULL;
9665 if (encode_state->slice_header_index[slice_index] == 0)
9666 slice_header_index = -1;
9668 slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
9670 count = encode_state->slice_rawdata_count[slice_index];
9671 start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
9673 for (i = 0; i < count; i++) {
9674 unsigned int skip_emul_byte_cnt;
9676 header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
9677 nal_type = (unsigned char *)header_data;
9679 param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
9681 length_in_bits = param->bit_length;
9683 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9685 /* skip the slice header packed data type as it is lastly inserted */
9686 if (param->type == VAEncPackedHeaderSlice || (*(nal_type + skip_emul_byte_cnt - 1) & 0x1f) == AVC_NAL_DELIMITER)
9689 /* as the slice header is still required, the last header flag is set to
9692 gen9_mfc_avc_insert_object(ctx,
9695 ALIGN(length_in_bits, 32) >> 5,
9696 length_in_bits & 0x1f,
9700 !param->has_emulation_bytes,
9705 if (slice_header_index == -1) {
9706 VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
9707 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
9708 VAEncSliceParameterBufferH264 *slice_params = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
9709 unsigned char *slice_header = NULL;
9710 int slice_header_length_in_bits = 0;
9712 /* No slice header data is passed. And the driver needs to generate it */
9713 /* For the Normal H264 */
9714 slice_header_length_in_bits = build_avc_slice_header(seq_param,
9718 gen9_mfc_avc_insert_object(ctx,
9720 (unsigned int *)slice_header,
9721 ALIGN(slice_header_length_in_bits, 32) >> 5,
9722 slice_header_length_in_bits & 0x1f,
9723 5, /* first 5 bytes are start code + nal unit type */
9730 unsigned int skip_emul_byte_cnt;
9732 header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
9734 param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[slice_header_index]->buffer);
9735 length_in_bits = param->bit_length;
9737 /* as the slice header is the last header data for one slice,
9738 * the last header flag is set to one.
9740 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9742 gen9_mfc_avc_insert_object(ctx,
9745 ALIGN(length_in_bits, 32) >> 5,
9746 length_in_bits & 0x1f,
9750 !param->has_emulation_bytes,
9759 gen9_mfc_avc_inset_headers(VADriverContextP ctx,
9760 struct encode_state *encode_state,
9761 struct intel_encoder_context *encoder_context,
9762 VAEncSliceParameterBufferH264 *slice_param,
9764 struct intel_batchbuffer *batch)
9766 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9767 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9768 int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
9769 unsigned int internal_rate_mode = generic_state->internal_rate_mode;
9770 unsigned int skip_emul_byte_cnt;
9772 if (slice_index == 0) {
9774 /* if AUD exist and insert it firstly */
9775 gen9_mfc_avc_insert_aud_packed_data(ctx, encode_state, encoder_context, batch);
9777 if (encode_state->packed_header_data[idx]) {
9778 VAEncPackedHeaderParameterBuffer *param = NULL;
9779 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
9780 unsigned int length_in_bits;
9782 assert(encode_state->packed_header_param[idx]);
9783 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
9784 length_in_bits = param->bit_length;
9786 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9787 gen9_mfc_avc_insert_object(ctx,
9790 ALIGN(length_in_bits, 32) >> 5,
9791 length_in_bits & 0x1f,
9795 !param->has_emulation_bytes,
9800 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
9802 if (encode_state->packed_header_data[idx]) {
9803 VAEncPackedHeaderParameterBuffer *param = NULL;
9804 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
9805 unsigned int length_in_bits;
9807 assert(encode_state->packed_header_param[idx]);
9808 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
9809 length_in_bits = param->bit_length;
9811 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9813 gen9_mfc_avc_insert_object(ctx,
9816 ALIGN(length_in_bits, 32) >> 5,
9817 length_in_bits & 0x1f,
9821 !param->has_emulation_bytes,
9826 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
9828 if (encode_state->packed_header_data[idx]) {
9829 VAEncPackedHeaderParameterBuffer *param = NULL;
9830 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
9831 unsigned int length_in_bits;
9833 assert(encode_state->packed_header_param[idx]);
9834 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
9835 length_in_bits = param->bit_length;
9837 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9838 gen9_mfc_avc_insert_object(ctx,
9841 ALIGN(length_in_bits, 32) >> 5,
9842 length_in_bits & 0x1f,
9846 !param->has_emulation_bytes,
9849 } else if (internal_rate_mode == VA_RC_CBR) {
9854 gen9_mfc_avc_insert_slice_packed_data(ctx,
9862 gen9_mfc_avc_slice_state(VADriverContextP ctx,
9863 struct encode_state *encode_state,
9864 struct intel_encoder_context *encoder_context,
9865 VAEncPictureParameterBufferH264 *pic_param,
9866 VAEncSliceParameterBufferH264 *slice_param,
9867 VAEncSliceParameterBufferH264 *next_slice_param,
9868 struct intel_batchbuffer *batch)
9870 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9871 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
9872 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9873 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
9874 unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
9875 unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
9876 unsigned char correct[6], grow, shrink;
9877 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
9878 int max_qp_n, max_qp_p;
9880 int weighted_pred_idc = 0;
9881 int num_ref_l0 = 0, num_ref_l1 = 0;
9882 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
9883 int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
9884 unsigned int rc_panic_enable = 0;
9885 unsigned int rate_control_counter_enable = 0;
9886 unsigned int rounding_value = 0;
9887 unsigned int rounding_inter_enable = 0;
9889 slice_hor_pos = slice_param->macroblock_address % generic_state->frame_width_in_mbs;
9890 slice_ver_pos = slice_param->macroblock_address / generic_state->frame_width_in_mbs;
9892 if (next_slice_param) {
9893 next_slice_hor_pos = next_slice_param->macroblock_address % generic_state->frame_width_in_mbs;
9894 next_slice_ver_pos = next_slice_param->macroblock_address / generic_state->frame_width_in_mbs;
9896 next_slice_hor_pos = 0;
9897 next_slice_ver_pos = generic_state->frame_height_in_mbs;
9900 if (slice_type == SLICE_TYPE_I) {
9901 luma_log2_weight_denom = 0;
9902 chroma_log2_weight_denom = 0;
9903 } else if (slice_type == SLICE_TYPE_P) {
9904 weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
9905 num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
9906 rounding_inter_enable = avc_state->rounding_inter_enable;
9907 rounding_value = avc_state->rounding_value;
9909 if (slice_param->num_ref_idx_active_override_flag)
9910 num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
9911 } else if (slice_type == SLICE_TYPE_B) {
9912 weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
9913 num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
9914 num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
9915 rounding_inter_enable = avc_state->rounding_inter_enable;
9916 rounding_value = avc_state->rounding_value;
9918 if (slice_param->num_ref_idx_active_override_flag) {
9919 num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
9920 num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
9923 if (weighted_pred_idc == 2) {
9924 /* 8.4.3 - Derivation process for prediction weights (8-279) */
9925 luma_log2_weight_denom = 5;
9926 chroma_log2_weight_denom = 5;
9935 rate_control_counter_enable = (generic_state->brc_enabled && (generic_state->curr_pak_pass != 0));
9936 rc_panic_enable = (avc_state->rc_panic_enable &&
9937 (!avc_state->min_max_qp_enable) &&
9938 (encoder_context->rate_control_mode != VA_RC_CQP) &&
9939 (generic_state->curr_pak_pass == (generic_state->num_pak_passes - 1)));
9941 for (i = 0; i < 6; i++)
9944 BEGIN_BCS_BATCH(batch, 11);
9946 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
9947 OUT_BCS_BATCH(batch, slice_type);
9948 OUT_BCS_BATCH(batch,
9949 (num_ref_l1 << 24) |
9950 (num_ref_l0 << 16) |
9951 (chroma_log2_weight_denom << 8) |
9952 (luma_log2_weight_denom << 0));
9953 OUT_BCS_BATCH(batch,
9954 (weighted_pred_idc << 30) |
9955 (((slice_type == SLICE_TYPE_B) ? slice_param->direct_spatial_mv_pred_flag : 0) << 29) |
9956 (slice_param->disable_deblocking_filter_idc << 27) |
9957 (slice_param->cabac_init_idc << 24) |
9959 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
9960 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
9962 OUT_BCS_BATCH(batch,
9963 slice_ver_pos << 24 |
9964 slice_hor_pos << 16 |
9965 slice_param->macroblock_address);
9966 OUT_BCS_BATCH(batch,
9967 next_slice_ver_pos << 16 |
9968 next_slice_hor_pos);
9970 OUT_BCS_BATCH(batch,
9971 (rate_control_counter_enable << 31) |
9972 (1 << 30) | /* ResetRateControlCounter */
9973 (2 << 28) | /* Loose Rate Control */
9974 (0 << 24) | /* RC Stable Tolerance */
9975 (rc_panic_enable << 23) | /* RC Panic Enable */
9976 (1 << 22) | /* CBP mode */
9977 (0 << 21) | /* MB Type Direct Conversion, 0: Enable, 1: Disable */
9978 (0 << 20) | /* MB Type Skip Conversion, 0: Enable, 1: Disable */
9979 (!next_slice_param << 19) | /* Is Last Slice */
9980 (0 << 18) | /* BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable */
9981 (1 << 17) | /* HeaderPresentFlag */
9982 (1 << 16) | /* SliceData PresentFlag */
9983 (0 << 15) | /* TailPresentFlag */
9984 (1 << 13) | /* RBSP NAL TYPE */
9985 (1 << 12)); /* CabacZeroWordInsertionEnable */
9987 OUT_BCS_BATCH(batch, generic_ctx->compressed_bitstream.start_offset);
9989 OUT_BCS_BATCH(batch,
9990 (max_qp_n << 24) | /*Target QP - 24 is lowest QP*/
9991 (max_qp_p << 16) | /*Target QP + 20 is highest QP*/
9994 OUT_BCS_BATCH(batch,
9995 (rounding_inter_enable << 31) |
9996 (rounding_value << 28) |
9999 (correct[5] << 20) |
10000 (correct[4] << 16) |
10001 (correct[3] << 12) |
10002 (correct[2] << 8) |
10003 (correct[1] << 4) |
10004 (correct[0] << 0));
10005 OUT_BCS_BATCH(batch, 0);
10007 ADVANCE_BCS_BATCH(batch);
10011 gen9_mfc_avc_get_ref_idx_state(VAPictureH264 *va_pic, unsigned int frame_store_id)
10013 unsigned int is_long_term =
10014 !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
10015 unsigned int is_top_field =
10016 !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
10017 unsigned int is_bottom_field =
10018 !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
10020 return ((is_long_term << 6) |
10022 (frame_store_id << 1) |
10023 ((is_top_field ^ 1) & is_bottom_field));
10027 gen9_mfc_avc_ref_idx_state(VADriverContextP ctx,
10028 struct encode_state *encode_state,
10029 struct intel_encoder_context *encoder_context,
10030 VAEncSliceParameterBufferH264 *slice_param,
10031 struct intel_batchbuffer *batch)
10033 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10034 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10035 VAPictureH264 *ref_pic;
10036 int i, slice_type, ref_idx_shift;
10037 unsigned int fwd_ref_entry;
10038 unsigned int bwd_ref_entry;
10040 /* max 4 ref frames are allowed for l0 and l1 */
10041 fwd_ref_entry = 0x80808080;
10042 slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
10044 if ((slice_type == SLICE_TYPE_P) ||
10045 (slice_type == SLICE_TYPE_B)) {
10046 for (i = 0; i < MIN(avc_state->num_refs[0], 4); i++) {
10047 ref_pic = &slice_param->RefPicList0[i];
10048 ref_idx_shift = i * 8;
10050 fwd_ref_entry &= ~(0xFF << ref_idx_shift);
10051 fwd_ref_entry += (gen9_mfc_avc_get_ref_idx_state(ref_pic, avc_state->list_ref_idx[0][i]) << ref_idx_shift);
10055 bwd_ref_entry = 0x80808080;
10056 if (slice_type == SLICE_TYPE_B) {
10057 for (i = 0; i < MIN(avc_state->num_refs[1], 4); i++) {
10058 ref_pic = &slice_param->RefPicList1[i];
10059 ref_idx_shift = i * 8;
10061 bwd_ref_entry &= ~(0xFF << ref_idx_shift);
10062 bwd_ref_entry += (gen9_mfc_avc_get_ref_idx_state(ref_pic, avc_state->list_ref_idx[1][i]) << ref_idx_shift);
10066 if ((slice_type == SLICE_TYPE_P) ||
10067 (slice_type == SLICE_TYPE_B)) {
10068 BEGIN_BCS_BATCH(batch, 10);
10069 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
10070 OUT_BCS_BATCH(batch, 0); // L0
10071 OUT_BCS_BATCH(batch, fwd_ref_entry);
10073 for (i = 0; i < 7; i++) {
10074 OUT_BCS_BATCH(batch, 0x80808080);
10077 ADVANCE_BCS_BATCH(batch);
10080 if (slice_type == SLICE_TYPE_B) {
10081 BEGIN_BCS_BATCH(batch, 10);
10082 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
10083 OUT_BCS_BATCH(batch, 1); //Select L1
10084 OUT_BCS_BATCH(batch, bwd_ref_entry); //max 4 reference allowed
10085 for (i = 0; i < 7; i++) {
10086 OUT_BCS_BATCH(batch, 0x80808080);
10088 ADVANCE_BCS_BATCH(batch);
10093 gen9_mfc_avc_weightoffset_state(VADriverContextP ctx,
10094 struct encode_state *encode_state,
10095 struct intel_encoder_context *encoder_context,
10096 VAEncPictureParameterBufferH264 *pic_param,
10097 VAEncSliceParameterBufferH264 *slice_param,
10098 struct intel_batchbuffer *batch)
10101 short weightoffsets[32 * 6];
10103 slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
10105 if (slice_type == SLICE_TYPE_P &&
10106 pic_param->pic_fields.bits.weighted_pred_flag == 1) {
10107 memset(weightoffsets, 0, 32 * 6 * sizeof(short));
10108 for (i = 0; i < 32; i++) {
10109 weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
10110 weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
10111 weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
10112 weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
10113 weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
10114 weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
10117 BEGIN_BCS_BATCH(batch, 98);
10118 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
10119 OUT_BCS_BATCH(batch, 0);
10120 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
10122 ADVANCE_BCS_BATCH(batch);
10125 if (slice_type == SLICE_TYPE_B &&
10126 (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
10127 memset(weightoffsets, 0, 32 * 6 * sizeof(short));
10128 for (i = 0; i < 32; i++) {
10129 weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
10130 weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
10131 weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
10132 weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
10133 weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
10134 weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
10137 BEGIN_BCS_BATCH(batch, 98);
10138 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
10139 OUT_BCS_BATCH(batch, 0);
10140 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
10141 ADVANCE_BCS_BATCH(batch);
10143 memset(weightoffsets, 0, 32 * 6 * sizeof(short));
10144 for (i = 0; i < 32; i++) {
10145 weightoffsets[i * 6 + 0] = slice_param->luma_weight_l1[i];
10146 weightoffsets[i * 6 + 1] = slice_param->luma_offset_l1[i];
10147 weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l1[i][0];
10148 weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l1[i][0];
10149 weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l1[i][1];
10150 weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l1[i][1];
10153 BEGIN_BCS_BATCH(batch, 98);
10154 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
10155 OUT_BCS_BATCH(batch, 1);
10156 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
10157 ADVANCE_BCS_BATCH(batch);
10162 gen9_mfc_avc_single_slice(VADriverContextP ctx,
10163 struct encode_state *encode_state,
10164 struct intel_encoder_context *encoder_context,
10165 VAEncSliceParameterBufferH264 *slice_param,
10166 VAEncSliceParameterBufferH264 *next_slice_param,
10169 struct i965_driver_data *i965 = i965_driver_data(ctx);
10170 struct i965_gpe_table *gpe = &i965->gpe_table;
10171 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10172 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10173 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10174 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10175 struct intel_batchbuffer *batch = encoder_context->base.batch;
10176 struct intel_batchbuffer *slice_batch = avc_ctx->pres_slice_batch_buffer_2nd_level;
10177 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
10178 struct gpe_mi_batch_buffer_start_parameter second_level_batch;
10179 struct object_surface *obj_surface;
10180 struct gen9_surface_avc *avc_priv_surface;
10182 unsigned int slice_offset = 0;
10184 if (generic_state->curr_pak_pass == 0) {
10185 slice_offset = intel_batchbuffer_used_size(slice_batch);
10186 avc_state->slice_batch_offset[slice_index] = slice_offset;
10187 gen9_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context, slice_param, slice_batch);
10188 gen9_mfc_avc_weightoffset_state(ctx,
10194 gen9_mfc_avc_slice_state(ctx,
10201 gen9_mfc_avc_inset_headers(ctx,
10208 BEGIN_BCS_BATCH(slice_batch, 2);
10209 OUT_BCS_BATCH(slice_batch, 0);
10210 OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END);
10211 ADVANCE_BCS_BATCH(slice_batch);
10214 slice_offset = avc_state->slice_batch_offset[slice_index];
10216 /* insert slice as second level.*/
10217 memset(&second_level_batch, 0, sizeof(second_level_batch));
10218 second_level_batch.is_second_level = 1; /* Must be the second level batch buffer */
10219 second_level_batch.offset = slice_offset;
10220 second_level_batch.bo = slice_batch->buffer;
10221 gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
10223 /* insert mb code as second level.*/
10224 obj_surface = encode_state->reconstructed_object;
10225 assert(obj_surface->private_data);
10226 avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
10228 memset(&second_level_batch, 0, sizeof(second_level_batch));
10229 second_level_batch.is_second_level = 1; /* Must be the second level batch buffer */
10230 second_level_batch.offset = slice_param->macroblock_address * 16 * 4;
10231 second_level_batch.bo = avc_priv_surface->res_mb_code_surface.bo;
10232 gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
10237 gen9_avc_pak_slice_level(VADriverContextP ctx,
10238 struct encode_state *encode_state,
10239 struct intel_encoder_context *encoder_context)
10241 struct i965_driver_data *i965 = i965_driver_data(ctx);
10242 struct i965_gpe_table *gpe = &i965->gpe_table;
10243 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10244 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10245 struct intel_batchbuffer *batch = encoder_context->base.batch;
10246 struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
10247 VAEncSliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
10249 int slice_index = 0;
10250 int is_frame_level = (avc_state->slice_num > 1) ? 0 : 1; /* check it for SKL,now single slice per frame */
10251 int has_tail = 0; /* check it later */
10253 for (j = 0; j < encode_state->num_slice_params_ext; j++) {
10254 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
10256 if (j == encode_state->num_slice_params_ext - 1)
10257 next_slice_group_param = NULL;
10259 next_slice_group_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j + 1]->buffer;
10261 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
10262 if (i < encode_state->slice_params_ext[j]->num_elements - 1)
10263 next_slice_param = slice_param + 1;
10265 next_slice_param = next_slice_group_param;
10267 gen9_mfc_avc_single_slice(ctx,
10276 if (is_frame_level)
10280 if (is_frame_level)
10285 /* insert a tail if required */
10288 memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
10289 mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
10290 gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_params);
10293 gen9_avc_pak_picture_level(VADriverContextP ctx,
10294 struct encode_state *encode_state,
10295 struct intel_encoder_context *encoder_context)
10297 struct i965_driver_data *i965 = i965_driver_data(ctx);
10298 struct i965_gpe_table *gpe = &i965->gpe_table;
10299 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10300 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
10301 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10302 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10303 struct gpe_mi_batch_buffer_start_parameter second_level_batch;
10304 struct intel_batchbuffer *batch = encoder_context->base.batch;
10306 if (generic_state->brc_enabled &&
10307 generic_state->curr_pak_pass) {
10308 struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
10309 struct encoder_status_buffer_internal *status_buffer;
10310 status_buffer = &(avc_ctx->status_buffer);
10312 memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
10313 mi_conditional_batch_buffer_end_params.offset = status_buffer->image_status_mask_offset;
10314 mi_conditional_batch_buffer_end_params.bo = status_buffer->bo;
10315 mi_conditional_batch_buffer_end_params.compare_data = 0;
10316 mi_conditional_batch_buffer_end_params.compare_mask_mode_disabled = 0;
10317 gpe->mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
10320 gen9_mfc_avc_pipe_mode_select(ctx, encode_state, encoder_context);
10321 gen9_mfc_avc_surface_state(ctx, encoder_context, &(generic_ctx->res_reconstructed_surface), 0);
10322 gen9_mfc_avc_surface_state(ctx, encoder_context, &(generic_ctx->res_uncompressed_input_surface), 4);
10323 gen9_mfc_avc_pipe_buf_addr_state(ctx, encoder_context);
10324 gen9_mfc_avc_ind_obj_base_addr_state(ctx, encode_state, encoder_context);
10325 gen9_mfc_avc_bsp_buf_base_addr_state(ctx, encoder_context);
10327 if (generic_state->brc_enabled) {
10328 memset(&second_level_batch, 0, sizeof(second_level_batch));
10329 if (generic_state->curr_pak_pass == 0) {
10330 second_level_batch.offset = 0;
10332 second_level_batch.offset = generic_state->curr_pak_pass * INTEL_AVC_IMAGE_STATE_CMD_SIZE;
10334 second_level_batch.is_second_level = 1;
10335 second_level_batch.bo = avc_ctx->res_brc_image_state_read_buffer.bo;
10336 gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
10338 /*generate a new image state */
10339 gen9_avc_set_image_state_non_brc(ctx, encode_state, encoder_context, &(avc_ctx->res_image_state_batch_buffer_2nd_level));
10340 memset(&second_level_batch, 0, sizeof(second_level_batch));
10341 second_level_batch.offset = 0;
10342 second_level_batch.is_second_level = 1;
10343 second_level_batch.bo = avc_ctx->res_image_state_batch_buffer_2nd_level.bo;
10344 gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
10347 gen9_mfc_avc_qm_state(ctx, encode_state, encoder_context);
10348 gen9_mfc_avc_fqm_state(ctx, encode_state, encoder_context);
10349 gen9_mfc_avc_directmode_state(ctx, encoder_context);
10354 gen9_avc_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
10356 struct i965_driver_data *i965 = i965_driver_data(ctx);
10357 struct i965_gpe_table *gpe = &i965->gpe_table;
10358 struct intel_batchbuffer *batch = encoder_context->base.batch;
10359 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10360 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10361 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10363 struct gpe_mi_store_register_mem_parameter mi_store_reg_mem_param;
10364 struct gpe_mi_store_data_imm_parameter mi_store_data_imm_param;
10365 struct gpe_mi_flush_dw_parameter mi_flush_dw_param;
10366 struct encoder_status_buffer_internal *status_buffer;
10368 status_buffer = &(avc_ctx->status_buffer);
10370 memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
10371 gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
10373 /* read register and store into status_buffer and pak_statitistic info */
10374 memset(&mi_store_reg_mem_param, 0, sizeof(mi_store_reg_mem_param));
10375 mi_store_reg_mem_param.bo = status_buffer->bo;
10376 mi_store_reg_mem_param.offset = status_buffer->bs_byte_count_frame_offset;
10377 mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_reg_offset;
10378 gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10380 mi_store_reg_mem_param.bo = status_buffer->bo;
10381 mi_store_reg_mem_param.offset = status_buffer->image_status_mask_offset;
10382 mi_store_reg_mem_param.mmio_offset = status_buffer->image_status_mask_reg_offset;
10383 gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10385 /*update the status in the pak_statistic_surface */
10386 mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
10387 mi_store_reg_mem_param.offset = 0;
10388 mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_reg_offset;
10389 gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10391 mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
10392 mi_store_reg_mem_param.offset = 4;
10393 mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_nh_reg_offset;
10394 gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10396 memset(&mi_store_data_imm_param, 0, sizeof(mi_store_data_imm_param));
10397 mi_store_data_imm_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
10398 mi_store_data_imm_param.offset = sizeof(unsigned int) * 2;
10399 mi_store_data_imm_param.dw0 = (generic_state->curr_pak_pass + 1);
10400 gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm_param);
10402 mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
10403 mi_store_reg_mem_param.offset = sizeof(unsigned int) * (4 + generic_state->curr_pak_pass) ;
10404 mi_store_reg_mem_param.mmio_offset = status_buffer->image_status_ctrl_reg_offset;
10405 gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10407 memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
10408 gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
10414 gen9_avc_pak_brc_prepare(struct encode_state *encode_state,
10415 struct intel_encoder_context *encoder_context)
10417 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10418 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10419 unsigned int rate_control_mode = encoder_context->rate_control_mode;
10421 switch (rate_control_mode & 0x7f) {
10423 generic_state->internal_rate_mode = VA_RC_CBR;
10427 generic_state->internal_rate_mode = VA_RC_VBR;//AVBR
10432 generic_state->internal_rate_mode = VA_RC_CQP;
10436 if (encoder_context->quality_level == 0)
10437 encoder_context->quality_level = ENCODER_DEFAULT_QUALITY_AVC;
10440 /* allcate resources for pak only (fei mode) */
10442 gen9_avc_fei_pak_pipeline_prepare(VADriverContextP ctx,
10444 struct encode_state *encode_state,
10445 struct intel_encoder_context *encoder_context)
10447 VAStatus va_status;
10448 struct i965_driver_data *i965 = i965_driver_data(ctx);
10449 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10450 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
10451 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10452 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10453 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10454 struct gen9_surface_avc *avc_priv_surface;
10455 VAEncPictureParameterBufferH264 *pic_param;
10456 VAEncSliceParameterBufferH264 *slice_param;
10457 VAEncMiscParameterFEIFrameControlH264 *fei_param = NULL;
10458 unsigned int size = 0, i, j;
10459 unsigned int frame_mb_nums;
10460 struct object_buffer *obj_buffer = NULL;
10461 struct buffer_store *buffer_store = NULL;
10462 struct object_surface *obj_surface = NULL;
10463 struct avc_surface_param surface_param;
10464 struct i965_coded_buffer_segment *coded_buffer_segment;
10466 unsigned char * pdata;
10468 gen9_avc_update_parameters(ctx, profile, encode_state, encoder_context);
10470 pic_param = avc_state->pic_param;
10471 slice_param = avc_state->slice_param[0];
10473 va_status = gen9_avc_encode_check_parameter(ctx, encode_state, encoder_context);
10474 if (va_status != VA_STATUS_SUCCESS)
10477 va_status = gen9_avc_allocate_pak_resources(ctx, encode_state, encoder_context);
10478 if (va_status != VA_STATUS_SUCCESS)
10481 /* Encoded bitstream ?*/
10482 obj_buffer = encode_state->coded_buf_object;
10483 bo = obj_buffer->buffer_store->bo;
10484 i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
10485 i965_dri_object_to_buffer_gpe_resource(&generic_ctx->compressed_bitstream.res, bo);
10486 generic_ctx->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
10487 generic_ctx->compressed_bitstream.end_offset =
10488 ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
10491 dri_bo_unreference(avc_ctx->status_buffer.bo);
10492 avc_ctx->status_buffer.bo = bo;
10493 dri_bo_reference(bo);
10495 /* set the internal flag to 0 to indicate the coded size is unknown */
10497 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
10498 coded_buffer_segment->mapped = 0;
10499 coded_buffer_segment->codec = encoder_context->codec;
10500 coded_buffer_segment->status_support = 1;
10502 pdata = bo->virtual + avc_ctx->status_buffer.base_offset;
10503 memset(pdata, 0, avc_ctx->status_buffer.status_buffer_size);
10505 //frame id, it is the ref pic id in the reference_objects list.
10506 avc_state->num_refs[0] = 0;
10507 avc_state->num_refs[1] = 0;
10508 if (generic_state->frame_type == SLICE_TYPE_P) {
10509 avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
10511 if (slice_param->num_ref_idx_active_override_flag)
10512 avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
10513 } else if (generic_state->frame_type == SLICE_TYPE_B) {
10514 avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
10515 avc_state->num_refs[1] = pic_param->num_ref_idx_l1_active_minus1 + 1;
10517 if (slice_param->num_ref_idx_active_override_flag) {
10518 avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
10519 avc_state->num_refs[1] = slice_param->num_ref_idx_l1_active_minus1 + 1;
10522 for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[0]); i++) {
10523 VAPictureH264 *va_pic;
10525 assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(avc_state->list_ref_idx[0]));
10526 avc_state->list_ref_idx[0][i] = 0;
10528 if (i >= avc_state->num_refs[0])
10531 va_pic = &slice_param->RefPicList0[i];
10533 for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
10534 obj_surface = encode_state->reference_objects[j];
10538 obj_surface->base.id == va_pic->picture_id) {
10540 assert(obj_surface->base.id != VA_INVALID_SURFACE);
10541 avc_state->list_ref_idx[0][i] = j;
10547 for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[1]); i++) {
10548 VAPictureH264 *va_pic;
10550 assert(ARRAY_ELEMS(slice_param->RefPicList1) == ARRAY_ELEMS(avc_state->list_ref_idx[1]));
10551 avc_state->list_ref_idx[1][i] = 0;
10553 if (i >= avc_state->num_refs[1])
10556 va_pic = &slice_param->RefPicList1[i];
10558 for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
10559 obj_surface = encode_state->reference_objects[j];
10564 obj_surface->base.id == va_pic->picture_id) {
10566 assert(obj_surface->base.id != VA_INVALID_SURFACE);
10567 avc_state->list_ref_idx[1][i] = j;
10575 obj_surface = encode_state->reconstructed_object;
10576 fei_param = avc_state->fei_framectl_param;
10577 frame_mb_nums = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs;
10579 /* Setup current reconstruct frame */
10580 obj_surface = encode_state->reconstructed_object;
10581 va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
10583 if (va_status != VA_STATUS_SUCCESS)
10586 memset(&surface_param, 0, sizeof(surface_param));
10587 surface_param.frame_width = generic_state->frame_width_in_pixel;
10588 surface_param.frame_height = generic_state->frame_height_in_pixel;
10589 va_status = gen9_avc_init_check_surfaces(ctx,
10590 obj_surface, encoder_context,
10592 avc_priv_surface = obj_surface->private_data;
10594 /* res_mb_code_surface for MB code */
10595 /* PAK only mode must have the mb_code_surface from middleware,
10596 * so the code shouldn't reach here without an externally provided
10597 * MB Code buffer */
10598 assert(fei_param->mb_code_data != VA_INVALID_ID);
10599 size = frame_mb_nums * FEI_AVC_MB_CODE_BUFFER_SIZE;
10600 obj_buffer = BUFFER(fei_param->mb_code_data);
10601 assert(obj_buffer != NULL);
10602 buffer_store = obj_buffer->buffer_store;
10603 assert(size <= buffer_store->bo->size);
10604 if (avc_priv_surface->res_mb_code_surface.bo != NULL)
10605 i965_free_gpe_resource(&avc_priv_surface->res_mb_code_surface);
10606 i965_dri_object_to_buffer_gpe_resource(&avc_priv_surface->res_mb_code_surface,
10608 /* res_mv_data_surface for MV data */
10609 size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
10610 if (fei_param->mv_data != VA_INVALID_ID) {
10611 obj_buffer = BUFFER(fei_param->mv_data);
10612 assert(obj_buffer != NULL);
10613 buffer_store = obj_buffer->buffer_store;
10614 assert(size <= buffer_store->bo->size);
10615 if (avc_priv_surface->res_mv_data_surface.bo != NULL)
10616 i965_free_gpe_resource(&avc_priv_surface->res_mv_data_surface);
10617 i965_dri_object_to_buffer_gpe_resource(&avc_priv_surface->res_mv_data_surface,
10621 return VA_STATUS_SUCCESS;
10626 gen9_avc_pak_pipeline_prepare(VADriverContextP ctx,
10628 struct encode_state *encode_state,
10629 struct intel_encoder_context *encoder_context)
10631 VAStatus va_status;
10632 struct i965_driver_data *i965 = i965_driver_data(ctx);
10633 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10634 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
10635 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10636 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10637 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10639 struct object_surface *obj_surface;
10640 VAEncPictureParameterBufferH264 *pic_param;
10641 VAEncSliceParameterBufferH264 *slice_param;
10643 struct gen9_surface_avc *avc_priv_surface;
10644 struct avc_surface_param surface_param;
10645 int i, j, enable_avc_ildb = 0;
10646 unsigned int allocate_flag = 1;
10647 unsigned int size, w_mb, h_mb;
10649 if (encoder_context->fei_function_mode == VA_FEI_FUNCTION_PAK) {
10650 va_status = gen9_avc_fei_pak_pipeline_prepare(ctx, profile, encode_state, encoder_context);
10651 if (va_status != VA_STATUS_SUCCESS)
10655 pic_param = avc_state->pic_param;
10656 slice_param = avc_state->slice_param[0];
10657 w_mb = generic_state->frame_width_in_mbs;
10658 h_mb = generic_state->frame_height_in_mbs;
10660 /* update the parameter and check slice parameter */
10661 for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
10662 assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
10663 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
10665 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
10666 assert((slice_param->slice_type == SLICE_TYPE_I) ||
10667 (slice_param->slice_type == SLICE_TYPE_SI) ||
10668 (slice_param->slice_type == SLICE_TYPE_P) ||
10669 (slice_param->slice_type == SLICE_TYPE_SP) ||
10670 (slice_param->slice_type == SLICE_TYPE_B));
10672 if (slice_param->disable_deblocking_filter_idc != 1) {
10673 enable_avc_ildb = 1;
10680 avc_state->enable_avc_ildb = enable_avc_ildb;
10682 /* setup the all surface and buffer for PAK */
10683 /* Setup current reconstruct frame */
10684 obj_surface = encode_state->reconstructed_object;
10685 va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
10687 if (va_status != VA_STATUS_SUCCESS)
10690 memset(&surface_param, 0, sizeof(surface_param));
10691 surface_param.frame_width = generic_state->frame_width_in_pixel;
10692 surface_param.frame_height = generic_state->frame_height_in_pixel;
10693 va_status = gen9_avc_init_check_surfaces(ctx,
10694 obj_surface, encoder_context,
10696 if (va_status != VA_STATUS_SUCCESS)
10698 /* init the member of avc_priv_surface,frame_store_id,qp_value */
10700 avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
10701 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = 0;
10702 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = 0;
10703 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2]);
10704 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1]);
10705 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2], avc_priv_surface->dmv_top);
10706 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1], avc_priv_surface->dmv_bottom);
10707 avc_priv_surface->qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
10708 avc_priv_surface->frame_store_id = 0;
10709 avc_priv_surface->frame_idx = pic_param->CurrPic.frame_idx;
10710 avc_priv_surface->top_field_order_cnt = pic_param->CurrPic.TopFieldOrderCnt;
10711 avc_priv_surface->is_as_ref = pic_param->pic_fields.bits.reference_pic_flag;
10712 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = avc_priv_surface->top_field_order_cnt;
10713 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = avc_priv_surface->top_field_order_cnt + 1;
10715 i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
10716 i965_free_gpe_resource(&avc_ctx->res_post_deblocking_output);
10717 i965_free_gpe_resource(&avc_ctx->res_pre_deblocking_output);
10718 i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_reconstructed_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
10721 if (avc_state->enable_avc_ildb) {
10722 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_post_deblocking_output, obj_surface, GPE_RESOURCE_ALIGNMENT);
10724 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_pre_deblocking_output, obj_surface, GPE_RESOURCE_ALIGNMENT);
10726 /* input YUV surface */
10727 obj_surface = encode_state->input_yuv_object;
10728 va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
10730 if (va_status != VA_STATUS_SUCCESS)
10732 i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
10733 i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_uncompressed_input_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
10735 /* Reference surfaces */
10736 for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
10737 i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
10738 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2]);
10739 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1]);
10740 obj_surface = encode_state->reference_objects[i];
10741 avc_state->top_field_poc[2 * i] = 0;
10742 avc_state->top_field_poc[2 * i + 1] = 0;
10744 if (obj_surface && obj_surface->bo) {
10745 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->list_reference_res[i], obj_surface, GPE_RESOURCE_ALIGNMENT);
10747 /* actually it should be handled when it is reconstructed surface */
10748 va_status = gen9_avc_init_check_surfaces(ctx,
10749 obj_surface, encoder_context,
10751 if (va_status != VA_STATUS_SUCCESS)
10753 avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
10754 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2], avc_priv_surface->dmv_top);
10755 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1], avc_priv_surface->dmv_bottom);
10756 avc_priv_surface->frame_store_id = i;
10757 avc_state->top_field_poc[2 * i] = avc_priv_surface->top_field_order_cnt;
10758 avc_state->top_field_poc[2 * i + 1] = avc_priv_surface->top_field_order_cnt + 1;
10764 if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
10765 intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
10766 avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
10769 avc_ctx->pres_slice_batch_buffer_2nd_level =
10770 intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD,
10772 encode_state->num_slice_params_ext);
10773 if (!avc_ctx->pres_slice_batch_buffer_2nd_level)
10774 return VA_STATUS_ERROR_ALLOCATION_FAILED;
10776 for (i = 0; i < MAX_AVC_SLICE_NUM; i++) {
10777 avc_state->slice_batch_offset[i] = 0;
10782 i965_free_gpe_resource(&avc_ctx->res_intra_row_store_scratch_buffer);
10783 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
10784 &avc_ctx->res_intra_row_store_scratch_buffer,
10786 "PAK Intra row store scratch buffer");
10787 if (!allocate_flag)
10788 goto failed_allocation;
10790 size = w_mb * 4 * 64;
10791 i965_free_gpe_resource(&avc_ctx->res_deblocking_filter_row_store_scratch_buffer);
10792 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
10793 &avc_ctx->res_deblocking_filter_row_store_scratch_buffer,
10795 "PAK Deblocking filter row store scratch buffer");
10796 if (!allocate_flag)
10797 goto failed_allocation;
10799 size = w_mb * 2 * 64;
10800 i965_free_gpe_resource(&avc_ctx->res_bsd_mpc_row_store_scratch_buffer);
10801 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
10802 &avc_ctx->res_bsd_mpc_row_store_scratch_buffer,
10804 "PAK BSD/MPC row store scratch buffer");
10805 if (!allocate_flag)
10806 goto failed_allocation;
10808 size = w_mb * h_mb * 16;
10809 i965_free_gpe_resource(&avc_ctx->res_pak_mb_status_buffer);
10810 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
10811 &avc_ctx->res_pak_mb_status_buffer,
10813 "PAK MB status buffer");
10814 if (!allocate_flag)
10815 goto failed_allocation;
10817 return VA_STATUS_SUCCESS;
10820 return VA_STATUS_ERROR_ALLOCATION_FAILED;
10824 gen9_avc_encode_picture(VADriverContextP ctx,
10826 struct encode_state *encode_state,
10827 struct intel_encoder_context *encoder_context)
10829 VAStatus va_status;
10830 struct i965_driver_data *i965 = i965_driver_data(ctx);
10831 struct i965_gpe_table *gpe = &i965->gpe_table;
10832 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10833 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
10834 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
10835 struct intel_batchbuffer *batch = encoder_context->base.batch;
10837 va_status = gen9_avc_pak_pipeline_prepare(ctx, profile, encode_state, encoder_context);
10839 if (va_status != VA_STATUS_SUCCESS)
10842 if (i965->intel.has_bsd2)
10843 intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
10845 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
10846 intel_batchbuffer_emit_mi_flush(batch);
10847 for (generic_state->curr_pak_pass = 0;
10848 generic_state->curr_pak_pass < generic_state->num_pak_passes;
10849 generic_state->curr_pak_pass++) {
10851 if (generic_state->curr_pak_pass == 0) {
10852 /* Initialize the avc Image Ctrl reg for the first pass,write 0 to staturs/control register, is it needed in AVC? */
10853 struct gpe_mi_load_register_imm_parameter mi_load_reg_imm;
10854 struct encoder_status_buffer_internal *status_buffer;
10856 status_buffer = &(avc_ctx->status_buffer);
10857 memset(&mi_load_reg_imm, 0, sizeof(mi_load_reg_imm));
10858 mi_load_reg_imm.mmio_offset = status_buffer->image_status_ctrl_reg_offset;
10859 mi_load_reg_imm.data = 0;
10860 gpe->mi_load_register_imm(ctx, batch, &mi_load_reg_imm);
10862 gen9_avc_pak_picture_level(ctx, encode_state, encoder_context);
10863 gen9_avc_pak_slice_level(ctx, encode_state, encoder_context);
10864 gen9_avc_read_mfc_status(ctx, encoder_context);
10867 if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
10868 intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
10869 avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
10872 intel_batchbuffer_end_atomic(batch);
10873 intel_batchbuffer_flush(batch);
10875 generic_state->seq_frame_number++;
10876 generic_state->total_frame_number++;
10877 generic_state->first_frame = 0;
10878 return VA_STATUS_SUCCESS;
10882 gen9_avc_pak_pipeline(VADriverContextP ctx,
10884 struct encode_state *encode_state,
10885 struct intel_encoder_context *encoder_context)
10890 case VAProfileH264ConstrainedBaseline:
10891 case VAProfileH264Main:
10892 case VAProfileH264High:
10893 case VAProfileH264MultiviewHigh:
10894 case VAProfileH264StereoHigh:
10895 vaStatus = gen9_avc_encode_picture(ctx, profile, encode_state, encoder_context);
10899 vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
10907 gen9_avc_pak_context_destroy(void * context)
10909 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)context;
10910 struct generic_encoder_context * generic_ctx;
10911 struct i965_avc_encoder_context * avc_ctx;
10917 generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
10918 avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10921 i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
10922 i965_free_gpe_resource(&avc_ctx->res_post_deblocking_output);
10923 i965_free_gpe_resource(&avc_ctx->res_pre_deblocking_output);
10924 i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
10926 i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
10927 i965_free_gpe_resource(&avc_ctx->res_intra_row_store_scratch_buffer);
10928 i965_free_gpe_resource(&avc_ctx->res_deblocking_filter_row_store_scratch_buffer);
10929 i965_free_gpe_resource(&avc_ctx->res_bsd_mpc_row_store_scratch_buffer);
10930 i965_free_gpe_resource(&avc_ctx->res_pak_mb_status_buffer);
10932 for (i = 0 ; i < MAX_MFC_AVC_REFERENCE_SURFACES; i++) {
10933 i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
10936 for (i = 0 ; i < NUM_MFC_AVC_DMV_BUFFERS; i++) {
10937 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i]);
10940 if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
10941 intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
10942 avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
10948 gen9_avc_get_coded_status(VADriverContextP ctx,
10949 struct intel_encoder_context *encoder_context,
10950 struct i965_coded_buffer_segment *coded_buf_seg)
10952 struct encoder_status *avc_encode_status;
10954 if (!encoder_context || !coded_buf_seg)
10955 return VA_STATUS_ERROR_INVALID_BUFFER;
10957 avc_encode_status = (struct encoder_status *)coded_buf_seg->codec_private_data;
10958 coded_buf_seg->base.size = avc_encode_status->bs_byte_count_frame;
10960 return VA_STATUS_SUCCESS;
10964 gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
10966 /* VME & PAK share the same context */
10967 struct i965_driver_data *i965 = i965_driver_data(ctx);
10968 struct encoder_vme_mfc_context * vme_context = NULL;
10969 struct generic_encoder_context * generic_ctx = NULL;
10970 struct i965_avc_encoder_context * avc_ctx = NULL;
10971 struct generic_enc_codec_state * generic_state = NULL;
10972 struct avc_enc_state * avc_state = NULL;
10973 struct encoder_status_buffer_internal *status_buffer;
10974 uint32_t base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
10976 vme_context = calloc(1, sizeof(struct encoder_vme_mfc_context));
10977 generic_ctx = calloc(1, sizeof(struct generic_encoder_context));
10978 avc_ctx = calloc(1, sizeof(struct i965_avc_encoder_context));
10979 generic_state = calloc(1, sizeof(struct generic_enc_codec_state));
10980 avc_state = calloc(1, sizeof(struct avc_enc_state));
10982 if (!vme_context || !generic_ctx || !avc_ctx || !generic_state || !avc_state)
10983 goto allocate_structure_failed;
10985 memset(vme_context, 0, sizeof(struct encoder_vme_mfc_context));
10986 memset(generic_ctx, 0, sizeof(struct generic_encoder_context));
10987 memset(avc_ctx, 0, sizeof(struct i965_avc_encoder_context));
10988 memset(generic_state, 0, sizeof(struct generic_enc_codec_state));
10989 memset(avc_state, 0, sizeof(struct avc_enc_state));
10991 encoder_context->vme_context = vme_context;
10992 vme_context->generic_enc_ctx = generic_ctx;
10993 vme_context->private_enc_ctx = avc_ctx;
10994 vme_context->generic_enc_state = generic_state;
10995 vme_context->private_enc_state = avc_state;
10997 if (IS_SKL(i965->intel.device_info) ||
10998 IS_BXT(i965->intel.device_info)) {
10999 if (!encoder_context->fei_enabled && !encoder_context->preenc_enabled) {
11000 generic_ctx->enc_kernel_ptr = (void *)skl_avc_encoder_kernels;
11001 generic_ctx->enc_kernel_size = sizeof(skl_avc_encoder_kernels);
11003 /* FEI and PreEnc operation kernels are included in
11004 * the monolithic kernel binary */
11005 generic_ctx->enc_kernel_ptr = (void *)skl_avc_fei_encoder_kernels;
11006 generic_ctx->enc_kernel_size = sizeof(skl_avc_fei_encoder_kernels);
11008 } else if (IS_GEN8(i965->intel.device_info)) {
11009 generic_ctx->enc_kernel_ptr = (void *)bdw_avc_encoder_kernels;
11010 generic_ctx->enc_kernel_size = sizeof(bdw_avc_encoder_kernels);
11011 } else if (IS_KBL(i965->intel.device_info) ||
11012 IS_GLK(i965->intel.device_info)) {
11013 generic_ctx->enc_kernel_ptr = (void *)kbl_avc_encoder_kernels;
11014 generic_ctx->enc_kernel_size = sizeof(kbl_avc_encoder_kernels);
11015 } else if (IS_GEN10(i965->intel.device_info)) {
11016 generic_ctx->enc_kernel_ptr = (void *)cnl_avc_encoder_kernels;
11017 generic_ctx->enc_kernel_size = sizeof(cnl_avc_encoder_kernels);
11019 goto allocate_structure_failed;
11021 /* initialize misc ? */
11022 avc_ctx->ctx = ctx;
11023 generic_ctx->use_hw_scoreboard = 1;
11024 generic_ctx->use_hw_non_stalling_scoreboard = 1;
11026 /* initialize generic state */
11028 generic_state->kernel_mode = INTEL_ENC_KERNEL_NORMAL;
11029 generic_state->preset = INTEL_PRESET_RT_SPEED;
11030 generic_state->seq_frame_number = 0;
11031 generic_state->total_frame_number = 0;
11032 generic_state->frame_type = 0;
11033 generic_state->first_frame = 1;
11035 generic_state->frame_width_in_pixel = 0;
11036 generic_state->frame_height_in_pixel = 0;
11037 generic_state->frame_width_in_mbs = 0;
11038 generic_state->frame_height_in_mbs = 0;
11039 generic_state->frame_width_4x = 0;
11040 generic_state->frame_height_4x = 0;
11041 generic_state->frame_width_16x = 0;
11042 generic_state->frame_height_16x = 0;
11043 generic_state->frame_width_32x = 0;
11044 generic_state->downscaled_width_4x_in_mb = 0;
11045 generic_state->downscaled_height_4x_in_mb = 0;
11046 generic_state->downscaled_width_16x_in_mb = 0;
11047 generic_state->downscaled_height_16x_in_mb = 0;
11048 generic_state->downscaled_width_32x_in_mb = 0;
11049 generic_state->downscaled_height_32x_in_mb = 0;
11051 generic_state->hme_supported = 1;
11052 generic_state->b16xme_supported = 1;
11053 generic_state->b32xme_supported = 0;
11054 generic_state->hme_enabled = 0;
11055 generic_state->b16xme_enabled = 0;
11056 generic_state->b32xme_enabled = 0;
11058 if (encoder_context->fei_enabled) {
11059 /* Disabling HME in FEI encode */
11060 generic_state->hme_supported = 0;
11061 generic_state->b16xme_supported = 0;
11062 } else if (encoder_context->preenc_enabled) {
11063 /* Disabling 16x16ME in PreEnc */
11064 generic_state->b16xme_supported = 0;
11067 generic_state->brc_distortion_buffer_supported = 1;
11068 generic_state->brc_constant_buffer_supported = 0;
11070 generic_state->frame_rate = 30;
11071 generic_state->brc_allocated = 0;
11072 generic_state->brc_inited = 0;
11073 generic_state->brc_need_reset = 0;
11074 generic_state->is_low_delay = 0;
11075 generic_state->brc_enabled = 0;//default
11076 generic_state->internal_rate_mode = 0;
11077 generic_state->curr_pak_pass = 0;
11078 generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
11079 generic_state->is_first_pass = 1;
11080 generic_state->is_last_pass = 0;
11081 generic_state->mb_brc_enabled = 0; // enable mb brc
11082 generic_state->brc_roi_enable = 0;
11083 generic_state->brc_dirty_roi_enable = 0;
11084 generic_state->skip_frame_enbale = 0;
11086 generic_state->target_bit_rate = 0;
11087 generic_state->max_bit_rate = 0;
11088 generic_state->min_bit_rate = 0;
11089 generic_state->init_vbv_buffer_fullness_in_bit = 0;
11090 generic_state->vbv_buffer_size_in_bit = 0;
11091 generic_state->frames_per_100s = 0;
11092 generic_state->gop_size = 0;
11093 generic_state->gop_ref_distance = 0;
11094 generic_state->brc_target_size = 0;
11095 generic_state->brc_mode = 0;
11096 generic_state->brc_init_current_target_buf_full_in_bits = 0.0;
11097 generic_state->brc_init_reset_input_bits_per_frame = 0.0;
11098 generic_state->brc_init_reset_buf_size_in_bits = 0;
11099 generic_state->brc_init_previous_target_buf_full_in_bits = 0;
11100 generic_state->frames_per_window_size = 0;//default
11101 generic_state->target_percentage = 0;
11103 generic_state->avbr_curracy = 0;
11104 generic_state->avbr_convergence = 0;
11106 generic_state->num_skip_frames = 0;
11107 generic_state->size_skip_frames = 0;
11109 generic_state->num_roi = 0;
11110 generic_state->max_delta_qp = 0;
11111 generic_state->min_delta_qp = 0;
11113 if (encoder_context->rate_control_mode != VA_RC_NONE &&
11114 encoder_context->rate_control_mode != VA_RC_CQP) {
11115 generic_state->brc_enabled = 1;
11116 generic_state->brc_distortion_buffer_supported = 1;
11117 generic_state->brc_constant_buffer_supported = 1;
11118 generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
11120 /*avc state initialization */
11121 avc_state->mad_enable = 0;
11122 avc_state->mb_disable_skip_map_enable = 0;
11123 avc_state->sfd_enable = 1;//default
11124 avc_state->sfd_mb_enable = 1;//set it true
11125 avc_state->adaptive_search_window_enable = 1;//default
11126 avc_state->mb_qp_data_enable = 0;
11127 avc_state->intra_refresh_i_enable = 0;
11128 avc_state->min_max_qp_enable = 0;
11129 avc_state->skip_bias_adjustment_enable = 0;//default,same as skip_bias_adjustment_supporte? no
11132 avc_state->non_ftq_skip_threshold_lut_input_enable = 0;
11133 avc_state->ftq_skip_threshold_lut_input_enable = 0;
11134 avc_state->ftq_override = 0;
11136 avc_state->direct_bias_adjustment_enable = 0;
11137 avc_state->global_motion_bias_adjustment_enable = 0;
11138 avc_state->disable_sub_mb_partion = 0;
11139 avc_state->arbitrary_num_mbs_in_slice = 0;
11140 avc_state->adaptive_transform_decision_enable = 0;//default
11141 avc_state->skip_check_disable = 0;
11142 avc_state->tq_enable = 0;
11143 avc_state->enable_avc_ildb = 0;
11144 avc_state->mbaff_flag = 0;
11145 avc_state->enable_force_skip = 1;//default
11146 avc_state->rc_panic_enable = 1;//default
11147 avc_state->suppress_recon_enable = 1;//default
11149 avc_state->ref_pic_select_list_supported = 1;
11150 avc_state->mb_brc_supported = 1;//?,default
11151 avc_state->multi_pre_enable = 1;//default
11152 avc_state->ftq_enable = 1;//default
11153 avc_state->caf_supported = 1; //default
11154 avc_state->caf_enable = 0;
11155 avc_state->caf_disable_hd = 1;//default
11156 avc_state->skip_bias_adjustment_supported = 1;//default
11158 avc_state->adaptive_intra_scaling_enable = 1;//default
11159 avc_state->old_mode_cost_enable = 0;//default
11160 avc_state->multi_ref_qp_enable = 1;//default
11161 avc_state->weighted_ref_l0_enable = 1;//default
11162 avc_state->weighted_ref_l1_enable = 1;//default
11163 avc_state->weighted_prediction_supported = 0;
11164 avc_state->brc_split_enable = 0;
11165 avc_state->slice_level_report_supported = 0;
11167 avc_state->fbr_bypass_enable = 1;//default
11168 avc_state->field_scaling_output_interleaved = 0;
11169 avc_state->mb_variance_output_enable = 0;
11170 avc_state->mb_pixel_average_output_enable = 0;
11171 avc_state->rolling_intra_refresh_enable = 0;// same as intra_refresh_i_enable?
11172 avc_state->mbenc_curbe_set_in_brc_update = 0;
11173 avc_state->rounding_inter_enable = 1; //default
11174 avc_state->adaptive_rounding_inter_enable = 1;//default
11176 avc_state->mbenc_i_frame_dist_in_use = 0;
11177 avc_state->mb_status_supported = 1; //set in intialization for gen9
11178 avc_state->mb_status_enable = 0;
11179 avc_state->mb_vproc_stats_enable = 0;
11180 avc_state->flatness_check_enable = 0;
11181 avc_state->flatness_check_supported = 1;//default
11182 avc_state->block_based_skip_enable = 0;
11183 avc_state->use_widi_mbenc_kernel = 0;
11184 avc_state->kernel_trellis_enable = 0;
11185 avc_state->generic_reserved = 0;
11187 avc_state->rounding_value = 0;
11188 avc_state->rounding_inter_p = AVC_INVALID_ROUNDING_VALUE;//default
11189 avc_state->rounding_inter_b = AVC_INVALID_ROUNDING_VALUE; //default
11190 avc_state->rounding_inter_b_ref = AVC_INVALID_ROUNDING_VALUE; //default
11191 avc_state->min_qp_i = INTEL_AVC_MIN_QP;
11192 avc_state->min_qp_p = INTEL_AVC_MIN_QP;
11193 avc_state->min_qp_b = INTEL_AVC_MIN_QP;
11194 avc_state->max_qp_i = INTEL_AVC_MAX_QP;
11195 avc_state->max_qp_p = INTEL_AVC_MAX_QP;
11196 avc_state->max_qp_b = INTEL_AVC_MAX_QP;
11198 memset(avc_state->non_ftq_skip_threshold_lut, 0, AVC_QP_MAX * sizeof(uint8_t));
11199 memset(avc_state->ftq_skip_threshold_lut, 0, AVC_QP_MAX * sizeof(uint8_t));
11200 memset(avc_state->lamda_value_lut, 0, AVC_QP_MAX * 2 * sizeof(uint32_t));
11202 avc_state->intra_refresh_qp_threshold = 0;
11203 avc_state->trellis_flag = 0;
11204 avc_state->hme_mv_cost_scaling_factor = 0;
11205 avc_state->slice_height = 1;
11206 avc_state->slice_num = 1;
11207 memset(avc_state->dist_scale_factor_list0, 0, 32 * sizeof(uint32_t));
11208 avc_state->bi_weight = 0;
11210 avc_state->lambda_table_enable = 0;
11212 if (IS_GEN8(i965->intel.device_info)) {
11213 avc_state->brc_const_data_surface_width = 64;
11214 avc_state->brc_const_data_surface_height = 44;
11215 avc_state->mb_status_supported = 0;
11216 } else if (IS_SKL(i965->intel.device_info) ||
11217 IS_BXT(i965->intel.device_info)) {
11218 avc_state->brc_const_data_surface_width = 64;
11219 avc_state->brc_const_data_surface_height = 44;
11220 avc_state->brc_split_enable = 1;
11221 } else if (IS_KBL(i965->intel.device_info) ||
11222 IS_GEN10(i965->intel.device_info) ||
11223 IS_GLK(i965->intel.device_info)) {
11224 avc_state->brc_const_data_surface_width = 64;
11225 avc_state->brc_const_data_surface_height = 53;
11227 avc_state->decouple_mbenc_curbe_from_brc_enable = 1;
11228 avc_state->extended_mv_cost_range_enable = 0;
11229 avc_state->reserved_g95 = 0;
11230 avc_state->mbenc_brc_buffer_size = 128;
11231 avc_state->kernel_trellis_enable = 1;
11232 avc_state->lambda_table_enable = 1;
11233 avc_state->brc_split_enable = 1;
11234 avc_state->adaptive_transform_decision_enable = 1;// CNL
11237 avc_state->num_refs[0] = 0;
11238 avc_state->num_refs[1] = 0;
11239 memset(avc_state->list_ref_idx, 0, 32 * 2 * sizeof(uint32_t));
11240 memset(avc_state->top_field_poc, 0, NUM_MFC_AVC_DMV_BUFFERS * sizeof(int32_t));
11241 avc_state->tq_rounding = 0;
11242 avc_state->zero_mv_threshold = 0;
11243 avc_state->slice_second_levle_batch_buffer_in_use = 0;
11247 /* the definition of status buffer offset for Encoder */
11249 status_buffer = &avc_ctx->status_buffer;
11250 memset(status_buffer, 0, sizeof(struct encoder_status_buffer_internal));
11252 status_buffer->base_offset = base_offset;
11253 status_buffer->bs_byte_count_frame_offset = base_offset + offsetof(struct encoder_status, bs_byte_count_frame);
11254 status_buffer->bs_byte_count_frame_nh_offset = base_offset + offsetof(struct encoder_status, bs_byte_count_frame_nh);
11255 status_buffer->image_status_mask_offset = base_offset + offsetof(struct encoder_status, image_status_mask);
11256 status_buffer->image_status_ctrl_offset = base_offset + offsetof(struct encoder_status, image_status_ctrl);
11257 status_buffer->mfc_qp_status_count_offset = base_offset + offsetof(struct encoder_status, mfc_qp_status_count);
11258 status_buffer->media_index_offset = base_offset + offsetof(struct encoder_status, media_index);
11260 status_buffer->status_buffer_size = sizeof(struct encoder_status);
11261 status_buffer->bs_byte_count_frame_reg_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG;
11262 status_buffer->bs_byte_count_frame_nh_reg_offset = MFC_BITSTREAM_BYTECOUNT_SLICE_REG;
11263 status_buffer->image_status_mask_reg_offset = MFC_IMAGE_STATUS_MASK_REG;
11264 status_buffer->image_status_ctrl_reg_offset = MFC_IMAGE_STATUS_CTRL_REG;
11265 status_buffer->mfc_qp_status_count_reg_offset = MFC_QP_STATUS_COUNT_REG;
11267 if (IS_GEN8(i965->intel.device_info)) {
11268 gen8_avc_kernel_init(ctx, encoder_context);
11270 gen9_avc_kernel_init(ctx, encoder_context);
11272 encoder_context->vme_context = vme_context;
11273 /* Handling PreEnc operations separately since it gives better
11274 * code readability, avoid possible vme operations mess-up */
11275 encoder_context->vme_pipeline =
11276 !encoder_context->preenc_enabled ? gen9_avc_vme_pipeline : gen9_avc_preenc_pipeline;
11277 encoder_context->vme_context_destroy = gen9_avc_vme_context_destroy;
11281 allocate_structure_failed:
11286 free(generic_state);
11292 gen9_avc_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
11294 /* VME & PAK share the same context */
11295 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
11300 encoder_context->mfc_context = pak_context;
11301 encoder_context->mfc_context_destroy = gen9_avc_pak_context_destroy;
11302 encoder_context->mfc_pipeline = gen9_avc_pak_pipeline;
11303 encoder_context->mfc_brc_prepare = gen9_avc_pak_brc_prepare;
11304 encoder_context->get_status = gen9_avc_get_coded_status;