2 * Copyright @ 2017 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWAR OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Pengfei Qu <Pengfei.qu@intel.com>
26 * Sreerenj Balachandran <sreerenj.balachandran@intel.com>
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
41 #include "i965_defines.h"
42 #include "i965_structs.h"
43 #include "i965_drv_video.h"
44 #include "i965_encoder.h"
45 #include "i965_encoder_utils.h"
46 #include "intel_media.h"
48 #include "i965_gpe_utils.h"
49 #include "i965_encoder_common.h"
50 #include "i965_avc_encoder_common.h"
51 #include "i965_avc_encoder_kernels.h"
52 #include "i965_avc_encoder.h"
53 #include "i965_avc_const_def.h"
55 #define MAX_URB_SIZE 4096 /* In register */
56 #define NUM_KERNELS_PER_GPE_CONTEXT 1
57 #define MBENC_KERNEL_BASE GEN9_AVC_KERNEL_MBENC_QUALITY_I
58 #define GPE_RESOURCE_ALIGNMENT 4 /* 4 means 16 = 1 << 4) */
60 #define OUT_BUFFER_2DW(batch, bo, is_target, delta) do { \
62 OUT_BCS_RELOC64(batch, \
64 I915_GEM_DOMAIN_INSTRUCTION, \
65 is_target ? I915_GEM_DOMAIN_RENDER : 0, \
68 OUT_BCS_BATCH(batch, 0); \
69 OUT_BCS_BATCH(batch, 0); \
73 #define OUT_BUFFER_3DW(batch, bo, is_target, delta, attr) do { \
74 OUT_BUFFER_2DW(batch, bo, is_target, delta); \
75 OUT_BCS_BATCH(batch, attr); \
78 /* FEI specific buffer sizes per MB in bytes for gen9 */
79 #define FEI_AVC_MB_CODE_BUFFER_SIZE 64
80 #define FEI_AVC_MV_DATA_BUFFER_SIZE 128
81 #define FEI_AVC_MB_CONTROL_BUFFER_SIZE 16
82 #define FEI_AVC_MV_PREDICTOR_BUFFER_SIZE 40
83 #define FEI_AVC_DISTORTION_BUFFER_SIZE 48
84 #define FEI_AVC_QP_BUFFER_SIZE 1
85 #define PREENC_AVC_STATISTICS_BUFFER_SIZE 64
87 #define SCALE_CUR_PIC 1
88 #define SCALE_PAST_REF_PIC 2
89 #define SCALE_FUTURE_REF_PIC 3
91 static const uint32_t qm_flat[16] = {
92 0x10101010, 0x10101010, 0x10101010, 0x10101010,
93 0x10101010, 0x10101010, 0x10101010, 0x10101010,
94 0x10101010, 0x10101010, 0x10101010, 0x10101010,
95 0x10101010, 0x10101010, 0x10101010, 0x10101010
98 static const uint32_t fqm_flat[32] = {
99 0x10001000, 0x10001000, 0x10001000, 0x10001000,
100 0x10001000, 0x10001000, 0x10001000, 0x10001000,
101 0x10001000, 0x10001000, 0x10001000, 0x10001000,
102 0x10001000, 0x10001000, 0x10001000, 0x10001000,
103 0x10001000, 0x10001000, 0x10001000, 0x10001000,
104 0x10001000, 0x10001000, 0x10001000, 0x10001000,
105 0x10001000, 0x10001000, 0x10001000, 0x10001000,
106 0x10001000, 0x10001000, 0x10001000, 0x10001000
109 static const unsigned int slice_type_kernel[3] = {1, 2, 0};
111 static const gen9_avc_brc_init_reset_curbe_data gen9_avc_brc_init_reset_curbe_init_data = {
268 static const gen8_avc_frame_brc_update_curbe_data gen8_avc_frame_brc_update_curbe_init_data = {
400 static const gen9_avc_frame_brc_update_curbe_data gen9_avc_frame_brc_update_curbe_init_data = {
558 gen9_avc_update_misc_parameters(VADriverContextP ctx,
559 struct encode_state *encode_state,
560 struct intel_encoder_context *encoder_context)
562 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
563 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
567 generic_state->max_bit_rate = (encoder_context->brc.bits_per_second[0] + 1000 - 1) / 1000;
569 generic_state->brc_need_reset = encoder_context->brc.need_reset;
571 if (generic_state->internal_rate_mode == VA_RC_CBR) {
572 generic_state->min_bit_rate = generic_state->max_bit_rate;
573 generic_state->mb_brc_enabled = encoder_context->brc.mb_rate_control[0] == 1;
575 if (generic_state->target_bit_rate != generic_state->max_bit_rate) {
576 generic_state->target_bit_rate = generic_state->max_bit_rate;
577 generic_state->brc_need_reset = 1;
579 } else if (generic_state->internal_rate_mode == VA_RC_VBR) {
580 generic_state->min_bit_rate = generic_state->max_bit_rate * (2 * encoder_context->brc.target_percentage[0] - 100) / 100;
581 generic_state->mb_brc_enabled = encoder_context->brc.mb_rate_control[0] == 1;
583 if (generic_state->target_bit_rate != generic_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100) {
584 generic_state->target_bit_rate = generic_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100;
585 generic_state->brc_need_reset = 1;
590 if (generic_state->internal_rate_mode != VA_RC_CQP) {
591 generic_state->frames_per_100s = encoder_context->brc.framerate[0].num * 100 / encoder_context->brc.framerate[0].den ;
592 generic_state->frame_rate = encoder_context->brc.framerate[0].num / encoder_context->brc.framerate[0].den ;
593 generic_state->frames_per_window_size = (int)(encoder_context->brc.window_size * generic_state->frame_rate / 1000); // brc.windows size in ms as the unit
595 generic_state->frames_per_100s = 30 * 100;
596 generic_state->frame_rate = 30 ;
597 generic_state->frames_per_window_size = 30;
601 if (generic_state->internal_rate_mode != VA_RC_CQP) {
602 generic_state->vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;//misc->buffer_size;
603 generic_state->init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;//misc->initial_buffer_fullness;
607 generic_state->num_roi = MIN(encoder_context->brc.num_roi, 3);
608 if (generic_state->num_roi > 0) {
609 generic_state->max_delta_qp = encoder_context->brc.roi_max_delta_qp;
610 generic_state->min_delta_qp = encoder_context->brc.roi_min_delta_qp;
612 for (i = 0; i < generic_state->num_roi; i++) {
613 generic_state->roi[i].left = encoder_context->brc.roi[i].left;
614 generic_state->roi[i].right = encoder_context->brc.roi[i].right;
615 generic_state->roi[i].top = encoder_context->brc.roi[i].top;
616 generic_state->roi[i].bottom = encoder_context->brc.roi[i].bottom;
617 generic_state->roi[i].value = encoder_context->brc.roi[i].value;
619 generic_state->roi[i].left /= 16;
620 generic_state->roi[i].right /= 16;
621 generic_state->roi[i].top /= 16;
622 generic_state->roi[i].bottom /= 16;
629 intel_avc_get_kernel_header_and_size(void *pvbinary,
631 INTEL_GENERIC_ENC_OPERATION operation,
633 struct i965_kernel *ret_kernel)
635 typedef uint32_t BIN_PTR[4];
638 gen9_avc_encoder_kernel_header *pkh_table;
639 kernel_header *pcurr_header, *pinvalid_entry, *pnext_header;
642 if (!pvbinary || !ret_kernel)
645 bin_start = (char *)pvbinary;
646 pkh_table = (gen9_avc_encoder_kernel_header *)pvbinary;
647 pinvalid_entry = &(pkh_table->static_detection) + 1;
648 next_krnoffset = binary_size;
650 if (operation == INTEL_GENERIC_ENC_SCALING4X) {
651 pcurr_header = &pkh_table->ply_dscale_ply;
652 } else if (operation == INTEL_GENERIC_ENC_SCALING2X) {
653 pcurr_header = &pkh_table->ply_2xdscale_ply;
654 } else if (operation == INTEL_GENERIC_ENC_ME) {
655 pcurr_header = &pkh_table->me_p;
656 } else if (operation == INTEL_GENERIC_ENC_BRC) {
657 pcurr_header = &pkh_table->frame_brc_init;
658 } else if (operation == INTEL_GENERIC_ENC_MBENC) {
659 pcurr_header = &pkh_table->mbenc_quality_I;
660 } else if (operation == INTEL_GENERIC_ENC_WP) {
661 pcurr_header = &pkh_table->wp;
662 } else if (operation == INTEL_GENERIC_ENC_SFD) {
663 pcurr_header = &pkh_table->static_detection;
668 pcurr_header += krnstate_idx;
669 ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
671 pnext_header = (pcurr_header + 1);
672 if (pnext_header < pinvalid_entry) {
673 next_krnoffset = pnext_header->kernel_start_pointer << 6;
675 ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
681 intel_avc_fei_get_kernel_header_and_size(
684 INTEL_GENERIC_ENC_OPERATION operation,
686 struct i965_kernel *ret_kernel)
688 typedef uint32_t BIN_PTR[4];
691 gen9_avc_fei_encoder_kernel_header *pkh_table;
692 kernel_header *pcurr_header, *pinvalid_entry, *pnext_header;
695 if (!pvbinary || !ret_kernel)
698 bin_start = (char *)pvbinary;
699 pkh_table = (gen9_avc_fei_encoder_kernel_header *)pvbinary;
700 pinvalid_entry = &(pkh_table->wp) + 1;
701 next_krnoffset = binary_size;
703 if (operation == INTEL_GENERIC_ENC_SCALING4X) {
704 pcurr_header = &pkh_table->ply_dscale_ply;
705 } else if (operation == INTEL_GENERIC_ENC_ME) {
706 pcurr_header = &pkh_table->me_p;
707 } else if (operation == INTEL_GENERIC_ENC_MBENC) {
708 pcurr_header = &pkh_table->mbenc_i;
709 } else if (operation == INTEL_GENERIC_ENC_PREPROC) {
710 pcurr_header = &pkh_table->preproc;
715 pcurr_header += krnstate_idx;
716 ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
718 pnext_header = (pcurr_header + 1);
719 if (pnext_header < pinvalid_entry) {
720 next_krnoffset = pnext_header->kernel_start_pointer << 6;
722 ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
728 gen9_free_surfaces_avc(void **data)
730 struct gen9_surface_avc *avc_surface;
737 if (avc_surface->scaled_4x_surface_obj) {
738 i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_4x_surface_id, 1);
739 avc_surface->scaled_4x_surface_id = VA_INVALID_SURFACE;
740 avc_surface->scaled_4x_surface_obj = NULL;
743 if (avc_surface->scaled_16x_surface_obj) {
744 i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_16x_surface_id, 1);
745 avc_surface->scaled_16x_surface_id = VA_INVALID_SURFACE;
746 avc_surface->scaled_16x_surface_obj = NULL;
749 if (avc_surface->scaled_32x_surface_obj) {
750 i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_32x_surface_id, 1);
751 avc_surface->scaled_32x_surface_id = VA_INVALID_SURFACE;
752 avc_surface->scaled_32x_surface_obj = NULL;
755 i965_free_gpe_resource(&avc_surface->res_mb_code_surface);
756 i965_free_gpe_resource(&avc_surface->res_mv_data_surface);
757 i965_free_gpe_resource(&avc_surface->res_ref_pic_select_surface);
759 /* FEI specific resources */
760 /* since the driver previously taken an extra reference to the drm_bo
761 * in case the buffers were supplied by middleware, there shouldn't
762 * be any memory handling issue */
763 i965_free_gpe_resource(&avc_surface->res_fei_mb_cntrl_surface);
764 i965_free_gpe_resource(&avc_surface->res_fei_mv_predictor_surface);
765 i965_free_gpe_resource(&avc_surface->res_fei_vme_distortion_surface);
766 i965_free_gpe_resource(&avc_surface->res_fei_mb_qp_surface);
768 dri_bo_unreference(avc_surface->dmv_top);
769 avc_surface->dmv_top = NULL;
770 dri_bo_unreference(avc_surface->dmv_bottom);
771 avc_surface->dmv_bottom = NULL;
781 gen9_avc_init_check_surfaces(VADriverContextP ctx,
782 struct object_surface *obj_surface,
783 struct intel_encoder_context *encoder_context,
784 struct avc_surface_param *surface_param)
786 struct i965_driver_data *i965 = i965_driver_data(ctx);
787 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
788 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
789 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
791 struct gen9_surface_avc *avc_surface;
792 int downscaled_width_4x, downscaled_height_4x;
793 int downscaled_width_16x, downscaled_height_16x;
794 int downscaled_width_32x, downscaled_height_32x;
796 unsigned int frame_width_in_mbs = ALIGN(surface_param->frame_width, 16) / 16;
797 unsigned int frame_height_in_mbs = ALIGN(surface_param->frame_height, 16) / 16;
798 unsigned int frame_mb_nums = frame_width_in_mbs * frame_height_in_mbs;
799 int allocate_flag = 1;
802 if (!obj_surface || !obj_surface->bo)
803 return VA_STATUS_ERROR_INVALID_SURFACE;
805 if (obj_surface->private_data) {
806 return VA_STATUS_SUCCESS;
809 avc_surface = calloc(1, sizeof(struct gen9_surface_avc));
812 return VA_STATUS_ERROR_ALLOCATION_FAILED;
814 avc_surface->ctx = ctx;
815 obj_surface->private_data = avc_surface;
816 obj_surface->free_private_data = gen9_free_surfaces_avc;
818 downscaled_width_4x = generic_state->frame_width_4x;
819 downscaled_height_4x = generic_state->frame_height_4x;
821 i965_CreateSurfaces(ctx,
823 downscaled_height_4x,
826 &avc_surface->scaled_4x_surface_id);
828 avc_surface->scaled_4x_surface_obj = SURFACE(avc_surface->scaled_4x_surface_id);
830 if (!avc_surface->scaled_4x_surface_obj) {
831 return VA_STATUS_ERROR_ALLOCATION_FAILED;
834 i965_check_alloc_surface_bo(ctx, avc_surface->scaled_4x_surface_obj, 1,
835 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
837 downscaled_width_16x = generic_state->frame_width_16x;
838 downscaled_height_16x = generic_state->frame_height_16x;
839 i965_CreateSurfaces(ctx,
840 downscaled_width_16x,
841 downscaled_height_16x,
844 &avc_surface->scaled_16x_surface_id);
845 avc_surface->scaled_16x_surface_obj = SURFACE(avc_surface->scaled_16x_surface_id);
847 if (!avc_surface->scaled_16x_surface_obj) {
848 return VA_STATUS_ERROR_ALLOCATION_FAILED;
851 i965_check_alloc_surface_bo(ctx, avc_surface->scaled_16x_surface_obj, 1,
852 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
854 if (generic_state->b32xme_supported ||
855 generic_state->b32xme_enabled) {
856 downscaled_width_32x = generic_state->frame_width_32x;
857 downscaled_height_32x = generic_state->frame_height_32x;
858 i965_CreateSurfaces(ctx,
859 downscaled_width_32x,
860 downscaled_height_32x,
863 &avc_surface->scaled_32x_surface_id);
864 avc_surface->scaled_32x_surface_obj = SURFACE(avc_surface->scaled_32x_surface_id);
866 if (!avc_surface->scaled_32x_surface_obj) {
867 return VA_STATUS_ERROR_ALLOCATION_FAILED;
870 i965_check_alloc_surface_bo(ctx, avc_surface->scaled_32x_surface_obj, 1,
871 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
874 /*mb code and mv data for each frame*/
875 if (!encoder_context->fei_enabled) {
876 size = frame_mb_nums * 16 * 4;
877 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
878 &avc_surface->res_mb_code_surface,
882 goto failed_allocation;
884 size = frame_mb_nums * 32 * 4;
885 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
886 &avc_surface->res_mv_data_surface,
890 goto failed_allocation;
894 if (avc_state->ref_pic_select_list_supported) {
895 width = ALIGN(frame_width_in_mbs * 8, 64);
896 height = frame_height_in_mbs ;
897 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
898 &avc_surface->res_ref_pic_select_surface,
901 "Ref pic select list buffer");
903 goto failed_allocation;
907 avc_surface->dmv_top =
908 dri_bo_alloc(i965->intel.bufmgr,
909 "direct mv top Buffer",
912 avc_surface->dmv_bottom =
913 dri_bo_alloc(i965->intel.bufmgr,
914 "direct mv bottom Buffer",
917 assert(avc_surface->dmv_top);
918 assert(avc_surface->dmv_bottom);
920 return VA_STATUS_SUCCESS;
923 return VA_STATUS_ERROR_ALLOCATION_FAILED;
927 gen9_avc_generate_slice_map(VADriverContextP ctx,
928 struct encode_state *encode_state,
929 struct intel_encoder_context *encoder_context)
931 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
932 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
933 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
934 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
936 struct i965_gpe_resource *gpe_resource = NULL;
937 VAEncSliceParameterBufferH264 * slice_param = NULL;
938 unsigned int * data = NULL;
939 unsigned int * data_row = NULL;
941 unsigned int pitch = ALIGN((generic_state->frame_width_in_mbs + 1) * 4, 64) / 4;
943 if (!avc_state->arbitrary_num_mbs_in_slice)
946 gpe_resource = &(avc_ctx->res_mbenc_slice_map_surface);
947 assert(gpe_resource);
949 i965_zero_gpe_resource(gpe_resource);
951 data_row = (unsigned int *)i965_map_gpe_resource(gpe_resource);
955 for (i = 0; i < avc_state->slice_num; i++) {
956 slice_param = avc_state->slice_param[i];
957 for (j = 0; j < slice_param->num_macroblocks; j++) {
959 if ((count > 0) && (count % generic_state->frame_width_in_mbs == 0)) {
967 *data++ = 0xFFFFFFFF;
969 i965_unmap_gpe_resource(gpe_resource);
973 gen9_avc_allocate_resources(VADriverContextP ctx,
974 struct encode_state *encode_state,
975 struct intel_encoder_context *encoder_context)
977 struct i965_driver_data *i965 = i965_driver_data(ctx);
978 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
979 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
980 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
981 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
982 unsigned int size = 0;
983 unsigned int width = 0;
984 unsigned int height = 0;
985 unsigned char * data = NULL;
986 int allocate_flag = 1;
989 /*all the surface/buffer are allocated here*/
991 /*second level batch buffer for image state write when cqp etc*/
992 i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
993 size = INTEL_AVC_IMAGE_STATE_CMD_SIZE ;
994 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
995 &avc_ctx->res_image_state_batch_buffer_2nd_level,
997 "second levle batch (image state write) buffer");
999 goto failed_allocation;
1001 /* scaling related surface */
1002 if (avc_state->mb_status_supported) {
1003 i965_free_gpe_resource(&avc_ctx->res_mb_status_buffer);
1004 size = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * 16 * 4 + 1023) & ~0x3ff;
1005 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1006 &avc_ctx->res_mb_status_buffer,
1007 ALIGN(size, 0x1000),
1008 "MB statistics output buffer");
1010 goto failed_allocation;
1011 i965_zero_gpe_resource(&avc_ctx->res_mb_status_buffer);
1014 if (avc_state->flatness_check_supported) {
1015 width = generic_state->frame_width_in_mbs * 4;
1016 height = generic_state->frame_height_in_mbs * 4;
1017 i965_free_gpe_resource(&avc_ctx->res_flatness_check_surface);
1018 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1019 &avc_ctx->res_flatness_check_surface,
1022 "Flatness check buffer");
1024 goto failed_allocation;
1026 /* me related surface */
1027 width = generic_state->downscaled_width_4x_in_mb * 8;
1028 height = generic_state->downscaled_height_4x_in_mb * 4 * 10;
1029 i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
1030 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1031 &avc_ctx->s4x_memv_distortion_buffer,
1034 "4x MEMV distortion buffer");
1036 goto failed_allocation;
1037 i965_zero_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
1039 width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
1040 height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
1041 i965_free_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
1042 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1043 &avc_ctx->s4x_memv_min_distortion_brc_buffer,
1046 "4x MEMV min distortion brc buffer");
1048 goto failed_allocation;
1049 i965_zero_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
1052 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 32, 64);
1053 height = generic_state->downscaled_height_4x_in_mb * 4 * 2 * 10;
1054 i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
1055 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1056 &avc_ctx->s4x_memv_data_buffer,
1059 "4x MEMV data buffer");
1061 goto failed_allocation;
1062 i965_zero_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
1065 width = ALIGN(generic_state->downscaled_width_16x_in_mb * 32, 64);
1066 height = generic_state->downscaled_height_16x_in_mb * 4 * 2 * 10 ;
1067 i965_free_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
1068 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1069 &avc_ctx->s16x_memv_data_buffer,
1072 "16x MEMV data buffer");
1074 goto failed_allocation;
1075 i965_zero_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
1078 width = ALIGN(generic_state->downscaled_width_32x_in_mb * 32, 64);
1079 height = generic_state->downscaled_height_32x_in_mb * 4 * 2 * 10 ;
1080 i965_free_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
1081 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1082 &avc_ctx->s32x_memv_data_buffer,
1085 "32x MEMV data buffer");
1087 goto failed_allocation;
1088 i965_zero_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
1091 if (!generic_state->brc_allocated) {
1092 /*brc related surface */
1093 i965_free_gpe_resource(&avc_ctx->res_brc_history_buffer);
1095 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1096 &avc_ctx->res_brc_history_buffer,
1097 ALIGN(size, 0x1000),
1098 "brc history buffer");
1100 goto failed_allocation;
1102 i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
1104 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1105 &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
1106 ALIGN(size, 0x1000),
1107 "brc pak statistic buffer");
1109 goto failed_allocation;
1111 i965_free_gpe_resource(&avc_ctx->res_brc_image_state_read_buffer);
1112 size = INTEL_AVC_IMAGE_STATE_CMD_SIZE * 7;
1113 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1114 &avc_ctx->res_brc_image_state_read_buffer,
1115 ALIGN(size, 0x1000),
1116 "brc image state read buffer");
1118 goto failed_allocation;
1120 i965_free_gpe_resource(&avc_ctx->res_brc_image_state_write_buffer);
1121 size = INTEL_AVC_IMAGE_STATE_CMD_SIZE * 7;
1122 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1123 &avc_ctx->res_brc_image_state_write_buffer,
1124 ALIGN(size, 0x1000),
1125 "brc image state write buffer");
1127 goto failed_allocation;
1129 width = ALIGN(avc_state->brc_const_data_surface_width, 64);
1130 height = avc_state->brc_const_data_surface_height;
1131 i965_free_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
1132 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1133 &avc_ctx->res_brc_const_data_buffer,
1136 "brc const data buffer");
1138 goto failed_allocation;
1139 i965_zero_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
1141 if (generic_state->brc_distortion_buffer_supported) {
1142 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 8, 64);
1143 height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1144 width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
1145 height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
1146 i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
1147 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1148 &avc_ctx->res_brc_dist_data_surface,
1151 "brc dist data buffer");
1153 goto failed_allocation;
1154 i965_zero_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
1157 if (generic_state->brc_roi_enable) {
1158 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 16, 64);
1159 height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1160 i965_free_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
1161 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1162 &avc_ctx->res_mbbrc_roi_surface,
1165 "mbbrc roi buffer");
1167 goto failed_allocation;
1168 i965_zero_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
1172 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 4, 64);
1173 height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1174 i965_free_gpe_resource(&avc_ctx->res_mbbrc_mb_qp_data_surface);
1175 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1176 &avc_ctx->res_mbbrc_mb_qp_data_surface,
1179 "mbbrc mb qp buffer");
1181 goto failed_allocation;
1183 i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
1184 size = 16 * AVC_QP_MAX * 4;
1185 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1186 &avc_ctx->res_mbbrc_const_data_buffer,
1187 ALIGN(size, 0x1000),
1188 "mbbrc const data buffer");
1190 goto failed_allocation;
1192 if (avc_state->decouple_mbenc_curbe_from_brc_enable) {
1193 i965_free_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
1194 size = avc_state->mbenc_brc_buffer_size;
1195 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1196 &avc_ctx->res_mbenc_brc_buffer,
1197 ALIGN(size, 0x1000),
1198 "mbenc brc buffer");
1200 goto failed_allocation;
1201 i965_zero_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
1203 generic_state->brc_allocated = 1;
1207 if (avc_state->mb_qp_data_enable) {
1208 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 4, 64);
1209 height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1210 i965_free_gpe_resource(&avc_ctx->res_mb_qp_data_surface);
1211 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1212 &avc_ctx->res_mb_qp_data_surface,
1215 "external mb qp buffer");
1217 goto failed_allocation;
1220 /* mbenc related surface. it share most of surface with other kernels */
1221 if (avc_state->arbitrary_num_mbs_in_slice) {
1222 width = ALIGN((generic_state->frame_width_in_mbs + 1) * 4, 64);
1223 height = generic_state->frame_height_in_mbs ;
1224 i965_free_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1225 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1226 &avc_ctx->res_mbenc_slice_map_surface,
1229 "slice map buffer");
1231 goto failed_allocation;
1232 i965_zero_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1234 /*generate slice map,default one slice per frame.*/
1237 /* sfd related surface */
1238 if (avc_state->sfd_enable) {
1239 i965_free_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1241 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1242 &avc_ctx->res_sfd_output_buffer,
1244 "sfd output buffer");
1246 goto failed_allocation;
1247 i965_zero_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1249 i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer);
1250 size = ALIGN(52, 64);
1251 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1252 &avc_ctx->res_sfd_cost_table_p_frame_buffer,
1254 "sfd P frame cost table buffer");
1256 goto failed_allocation;
1257 data = i965_map_gpe_resource(&(avc_ctx->res_sfd_cost_table_p_frame_buffer));
1259 memcpy(data, gen9_avc_sfd_cost_table_p_frame, sizeof(unsigned char) * 52);
1260 i965_unmap_gpe_resource(&(avc_ctx->res_sfd_cost_table_p_frame_buffer));
1262 i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_b_frame_buffer);
1263 size = ALIGN(52, 64);
1264 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1265 &avc_ctx->res_sfd_cost_table_b_frame_buffer,
1267 "sfd B frame cost table buffer");
1269 goto failed_allocation;
1270 data = i965_map_gpe_resource(&(avc_ctx->res_sfd_cost_table_b_frame_buffer));
1272 memcpy(data, gen9_avc_sfd_cost_table_b_frame, sizeof(unsigned char) * 52);
1273 i965_unmap_gpe_resource(&(avc_ctx->res_sfd_cost_table_b_frame_buffer));
1276 /* wp related surfaces */
1277 if (avc_state->weighted_prediction_supported) {
1278 for (i = 0; i < 2 ; i++) {
1279 if (avc_ctx->wp_output_pic_select_surface_obj[i]) {
1283 width = generic_state->frame_width_in_pixel;
1284 height = generic_state->frame_height_in_pixel ;
1285 i965_CreateSurfaces(ctx,
1288 VA_RT_FORMAT_YUV420,
1290 &avc_ctx->wp_output_pic_select_surface_id[i]);
1291 avc_ctx->wp_output_pic_select_surface_obj[i] = SURFACE(avc_ctx->wp_output_pic_select_surface_id[i]);
1293 if (!avc_ctx->wp_output_pic_select_surface_obj[i]) {
1294 goto failed_allocation;
1297 i965_check_alloc_surface_bo(ctx, avc_ctx->wp_output_pic_select_surface_obj[i], 1,
1298 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
1300 i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[0]);
1301 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_wp_output_pic_select_surface_list[0], avc_ctx->wp_output_pic_select_surface_obj[0], GPE_RESOURCE_ALIGNMENT);
1302 i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[1]);
1303 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_wp_output_pic_select_surface_list[1], avc_ctx->wp_output_pic_select_surface_obj[1], GPE_RESOURCE_ALIGNMENT);
1308 i965_free_gpe_resource(&avc_ctx->res_mad_data_buffer);
1310 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1311 &avc_ctx->res_mad_data_buffer,
1312 ALIGN(size, 0x1000),
1315 goto failed_allocation;
1317 return VA_STATUS_SUCCESS;
1320 return VA_STATUS_ERROR_ALLOCATION_FAILED;
1324 gen9_avc_free_resources(struct encoder_vme_mfc_context * vme_context)
1329 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1330 VADriverContextP ctx = avc_ctx->ctx;
1333 /* free all the surface/buffer here*/
1334 i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
1335 i965_free_gpe_resource(&avc_ctx->res_mb_status_buffer);
1336 i965_free_gpe_resource(&avc_ctx->res_flatness_check_surface);
1337 i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
1338 i965_free_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
1339 i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
1340 i965_free_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
1341 i965_free_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
1342 i965_free_gpe_resource(&avc_ctx->res_brc_history_buffer);
1343 i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
1344 i965_free_gpe_resource(&avc_ctx->res_brc_image_state_read_buffer);
1345 i965_free_gpe_resource(&avc_ctx->res_brc_image_state_write_buffer);
1346 i965_free_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
1347 i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
1348 i965_free_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
1349 i965_free_gpe_resource(&avc_ctx->res_mbbrc_mb_qp_data_surface);
1350 i965_free_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
1351 i965_free_gpe_resource(&avc_ctx->res_mb_qp_data_surface);
1352 i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
1353 i965_free_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1354 i965_free_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1355 i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer);
1356 i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_b_frame_buffer);
1357 i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[0]);
1358 i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[1]);
1359 i965_free_gpe_resource(&avc_ctx->res_mad_data_buffer);
1361 for (i = 0; i < 2 ; i++) {
1362 if (avc_ctx->wp_output_pic_select_surface_obj[i]) {
1363 i965_DestroySurfaces(ctx, &avc_ctx->wp_output_pic_select_surface_id[i], 1);
1364 avc_ctx->wp_output_pic_select_surface_id[i] = VA_INVALID_SURFACE;
1365 avc_ctx->wp_output_pic_select_surface_obj[i] = NULL;
1369 /* free preenc resources */
1370 i965_free_gpe_resource(&avc_ctx->preproc_mv_predictor_buffer);
1371 i965_free_gpe_resource(&avc_ctx->preproc_mb_qp_buffer);
1372 i965_free_gpe_resource(&avc_ctx->preproc_mv_data_out_buffer);
1373 i965_free_gpe_resource(&avc_ctx->preproc_stat_data_out_buffer);
1375 i965_free_gpe_resource(&avc_ctx->preenc_past_ref_stat_data_out_buffer);
1376 i965_free_gpe_resource(&avc_ctx->preenc_future_ref_stat_data_out_buffer);
1378 i965_DestroySurfaces(ctx, &avc_ctx->preenc_scaled_4x_surface_id, 1);
1379 avc_ctx->preenc_scaled_4x_surface_id = VA_INVALID_SURFACE;
1380 avc_ctx->preenc_scaled_4x_surface_obj = NULL;
1382 i965_DestroySurfaces(ctx, &avc_ctx->preenc_past_ref_scaled_4x_surface_id, 1);
1383 avc_ctx->preenc_past_ref_scaled_4x_surface_id = VA_INVALID_SURFACE;
1384 avc_ctx->preenc_past_ref_scaled_4x_surface_obj = NULL;
1386 i965_DestroySurfaces(ctx, &avc_ctx->preenc_future_ref_scaled_4x_surface_id, 1);
1387 avc_ctx->preenc_future_ref_scaled_4x_surface_id = VA_INVALID_SURFACE;
1388 avc_ctx->preenc_future_ref_scaled_4x_surface_obj = NULL;
1392 gen9_avc_run_kernel_media_object(VADriverContextP ctx,
1393 struct intel_encoder_context *encoder_context,
1394 struct i965_gpe_context *gpe_context,
1396 struct gpe_media_object_parameter *param)
1398 struct i965_driver_data *i965 = i965_driver_data(ctx);
1399 struct i965_gpe_table *gpe = &i965->gpe_table;
1400 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1401 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1403 struct intel_batchbuffer *batch = encoder_context->base.batch;
1404 struct encoder_status_buffer_internal *status_buffer;
1405 struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1410 intel_batchbuffer_start_atomic(batch, 0x1000);
1411 intel_batchbuffer_emit_mi_flush(batch);
1413 status_buffer = &(avc_ctx->status_buffer);
1414 memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1415 mi_store_data_imm.bo = status_buffer->bo;
1416 mi_store_data_imm.offset = status_buffer->media_index_offset;
1417 mi_store_data_imm.dw0 = media_function;
1418 gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1420 gpe->pipeline_setup(ctx, gpe_context, batch);
1421 gpe->media_object(ctx, gpe_context, batch, param);
1422 gpe->media_state_flush(ctx, gpe_context, batch);
1424 gpe->pipeline_end(ctx, gpe_context, batch);
1426 intel_batchbuffer_end_atomic(batch);
1428 intel_batchbuffer_flush(batch);
1432 gen9_avc_run_kernel_media_object_walker(VADriverContextP ctx,
1433 struct intel_encoder_context *encoder_context,
1434 struct i965_gpe_context *gpe_context,
1436 struct gpe_media_object_walker_parameter *param)
1438 struct i965_driver_data *i965 = i965_driver_data(ctx);
1439 struct i965_gpe_table *gpe = &i965->gpe_table;
1440 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1441 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1443 struct intel_batchbuffer *batch = encoder_context->base.batch;
1444 struct encoder_status_buffer_internal *status_buffer;
1445 struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1450 intel_batchbuffer_start_atomic(batch, 0x1000);
1452 intel_batchbuffer_emit_mi_flush(batch);
1454 status_buffer = &(avc_ctx->status_buffer);
1455 memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1456 mi_store_data_imm.bo = status_buffer->bo;
1457 mi_store_data_imm.offset = status_buffer->media_index_offset;
1458 mi_store_data_imm.dw0 = media_function;
1459 gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1461 gpe->pipeline_setup(ctx, gpe_context, batch);
1462 gpe->media_object_walker(ctx, gpe_context, batch, param);
1463 gpe->media_state_flush(ctx, gpe_context, batch);
1465 gpe->pipeline_end(ctx, gpe_context, batch);
1467 intel_batchbuffer_end_atomic(batch);
1469 intel_batchbuffer_flush(batch);
1473 gen9_init_gpe_context_avc(VADriverContextP ctx,
1474 struct i965_gpe_context *gpe_context,
1475 struct encoder_kernel_parameter *kernel_param)
1477 struct i965_driver_data *i965 = i965_driver_data(ctx);
1479 gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
1481 gpe_context->sampler.entry_size = 0;
1482 gpe_context->sampler.max_entries = 0;
1484 if (kernel_param->sampler_size) {
1485 gpe_context->sampler.entry_size = ALIGN(kernel_param->sampler_size, 64);
1486 gpe_context->sampler.max_entries = 1;
1489 gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
1490 gpe_context->idrt.max_entries = NUM_KERNELS_PER_GPE_CONTEXT;
1492 gpe_context->surface_state_binding_table.max_entries = MAX_AVC_ENCODER_SURFACES;
1493 gpe_context->surface_state_binding_table.binding_table_offset = 0;
1494 gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_AVC_ENCODER_SURFACES * 4, 64);
1495 gpe_context->surface_state_binding_table.length = ALIGN(MAX_AVC_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_AVC_ENCODER_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
1497 if (i965->intel.eu_total > 0)
1498 gpe_context->vfe_state.max_num_threads = 6 * i965->intel.eu_total;
1500 gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads
1502 gpe_context->vfe_state.curbe_allocation_size = MAX(1, ALIGN(gpe_context->curbe.length, 32) >> 5); // in registers
1503 gpe_context->vfe_state.urb_entry_size = MAX(1, ALIGN(kernel_param->inline_data_size, 32) >> 5); // in registers
1504 gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE -
1505 gpe_context->vfe_state.curbe_allocation_size -
1506 ((gpe_context->idrt.entry_size >> 5) *
1507 gpe_context->idrt.max_entries)) / gpe_context->vfe_state.urb_entry_size;
1508 gpe_context->vfe_state.num_urb_entries = CLAMP(1, 127, gpe_context->vfe_state.num_urb_entries);
1509 gpe_context->vfe_state.gpgpu_mode = 0;
1513 gen9_init_vfe_scoreboard_avc(struct i965_gpe_context *gpe_context,
1514 struct encoder_scoreboard_parameter *scoreboard_param)
1516 gpe_context->vfe_desc5.scoreboard0.mask = scoreboard_param->mask;
1517 gpe_context->vfe_desc5.scoreboard0.type = scoreboard_param->type;
1518 gpe_context->vfe_desc5.scoreboard0.enable = scoreboard_param->enable;
1520 if (scoreboard_param->walkpat_flag) {
1521 gpe_context->vfe_desc5.scoreboard0.mask = 0x0F;
1522 gpe_context->vfe_desc5.scoreboard0.type = 1;
1524 gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0x0;
1525 gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0xF;
1527 gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
1528 gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xE;
1530 gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0xF;
1531 gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0x3;
1533 gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
1534 gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0x1;
1537 gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0xF;
1538 gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0x0;
1541 gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
1542 gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xF;
1545 gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0x1;
1546 gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0xF;
1549 gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
1550 gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0xF;
1553 gpe_context->vfe_desc7.scoreboard2.delta_x4 = 0xF;
1554 gpe_context->vfe_desc7.scoreboard2.delta_y4 = 0x1;
1557 gpe_context->vfe_desc7.scoreboard2.delta_x5 = 0x0;
1558 gpe_context->vfe_desc7.scoreboard2.delta_y5 = 0xE;
1561 gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0x1;
1562 gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
1565 gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0xF;
1566 gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
1570 VME pipeline related function
1574 scaling kernel related function
1577 gen9_avc_set_curbe_scaling4x(VADriverContextP ctx,
1578 struct encode_state *encode_state,
1579 struct i965_gpe_context *gpe_context,
1580 struct intel_encoder_context *encoder_context,
1583 gen9_avc_scaling4x_curbe_data *curbe_cmd;
1584 struct scaling_param *surface_param = (struct scaling_param *)param;
1586 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1591 memset(curbe_cmd, 0, sizeof(gen9_avc_scaling4x_curbe_data));
1593 curbe_cmd->dw0.input_picture_width = surface_param->input_frame_width;
1594 curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1596 curbe_cmd->dw1.input_y_bti = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1597 curbe_cmd->dw2.output_y_bti = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1600 curbe_cmd->dw5.flatness_threshold = 128;
1601 curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
1602 curbe_cmd->dw7.enable_mb_variance_output = surface_param->enable_mb_variance_output;
1603 curbe_cmd->dw8.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
1605 if (curbe_cmd->dw6.enable_mb_flatness_check ||
1606 curbe_cmd->dw7.enable_mb_variance_output ||
1607 curbe_cmd->dw8.enable_mb_pixel_average_output) {
1608 curbe_cmd->dw10.mbv_proc_stat_bti = GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX;
1611 i965_gpe_context_unmap_curbe(gpe_context);
1616 gen95_avc_set_curbe_scaling4x(VADriverContextP ctx,
1617 struct encode_state *encode_state,
1618 struct i965_gpe_context *gpe_context,
1619 struct intel_encoder_context *encoder_context,
1622 gen95_avc_scaling4x_curbe_data *curbe_cmd;
1623 struct scaling_param *surface_param = (struct scaling_param *)param;
1625 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1630 memset(curbe_cmd, 0, sizeof(gen95_avc_scaling4x_curbe_data));
1632 curbe_cmd->dw0.input_picture_width = surface_param->input_frame_width;
1633 curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1635 curbe_cmd->dw1.input_y_bti_frame = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1636 curbe_cmd->dw2.output_y_bti_frame = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1638 if (surface_param->enable_mb_flatness_check)
1639 curbe_cmd->dw5.flatness_threshold = 128;
1640 curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
1641 curbe_cmd->dw6.enable_mb_variance_output = surface_param->enable_mb_variance_output;
1642 curbe_cmd->dw6.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
1643 curbe_cmd->dw6.enable_block8x8_statistics_output = surface_param->blk8x8_stat_enabled;
1645 if (curbe_cmd->dw6.enable_mb_flatness_check ||
1646 curbe_cmd->dw6.enable_mb_variance_output ||
1647 curbe_cmd->dw6.enable_mb_pixel_average_output) {
1648 curbe_cmd->dw8.mbv_proc_stat_bti_frame = GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX;
1651 i965_gpe_context_unmap_curbe(gpe_context);
1656 gen9_avc_set_curbe_scaling2x(VADriverContextP ctx,
1657 struct encode_state *encode_state,
1658 struct i965_gpe_context *gpe_context,
1659 struct intel_encoder_context *encoder_context,
1662 gen9_avc_scaling2x_curbe_data *curbe_cmd;
1663 struct scaling_param *surface_param = (struct scaling_param *)param;
1665 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1670 memset(curbe_cmd, 0, sizeof(gen9_avc_scaling2x_curbe_data));
1672 curbe_cmd->dw0.input_picture_width = surface_param->input_frame_width;
1673 curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1675 curbe_cmd->dw8.input_y_bti = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1676 curbe_cmd->dw9.output_y_bti = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1678 i965_gpe_context_unmap_curbe(gpe_context);
1683 gen9_avc_send_surface_scaling(VADriverContextP ctx,
1684 struct encode_state *encode_state,
1685 struct i965_gpe_context *gpe_context,
1686 struct intel_encoder_context *encoder_context,
1689 struct scaling_param *surface_param = (struct scaling_param *)param;
1690 struct i965_driver_data *i965 = i965_driver_data(ctx);
1691 unsigned int surface_format;
1692 unsigned int res_size;
1694 if (surface_param->scaling_out_use_32unorm_surf_fmt)
1695 surface_format = I965_SURFACEFORMAT_R32_UNORM;
1696 else if (surface_param->scaling_out_use_16unorm_surf_fmt)
1697 surface_format = I965_SURFACEFORMAT_R16_UNORM;
1699 surface_format = I965_SURFACEFORMAT_R8_UNORM;
1701 i965_add_2d_gpe_surface(ctx, gpe_context,
1702 surface_param->input_surface,
1703 0, 1, surface_format,
1704 GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX);
1706 i965_add_2d_gpe_surface(ctx, gpe_context,
1707 surface_param->output_surface,
1708 0, 1, surface_format,
1709 GEN9_AVC_SCALING_FRAME_DST_Y_INDEX);
1711 /*add buffer mv_proc_stat, here need change*/
1712 if (IS_GEN8(i965->intel.device_info)) {
1713 if (surface_param->mbv_proc_stat_enabled) {
1714 res_size = 16 * (surface_param->input_frame_width / 16) * (surface_param->input_frame_height / 16) * sizeof(unsigned int);
1716 i965_add_buffer_gpe_surface(ctx,
1718 surface_param->pres_mbv_proc_stat_buffer,
1722 GEN8_SCALING_FRAME_MBVPROCSTATS_DST_CM);
1724 if (surface_param->enable_mb_flatness_check) {
1725 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
1726 surface_param->pres_flatness_check_surface,
1728 I965_SURFACEFORMAT_R8_UNORM,
1729 GEN8_SCALING_FRAME_FLATNESS_DST_CM);
1732 if (surface_param->mbv_proc_stat_enabled) {
1733 res_size = 16 * (surface_param->input_frame_width / 16) * (surface_param->input_frame_height / 16) * sizeof(unsigned int);
1735 i965_add_buffer_gpe_surface(ctx,
1737 surface_param->pres_mbv_proc_stat_buffer,
1741 GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
1742 } else if (surface_param->enable_mb_flatness_check) {
1743 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
1744 surface_param->pres_flatness_check_surface,
1746 I965_SURFACEFORMAT_R8_UNORM,
1747 GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
1754 gen9_avc_kernel_scaling(VADriverContextP ctx,
1755 struct encode_state *encode_state,
1756 struct intel_encoder_context *encoder_context,
1759 struct i965_driver_data *i965 = i965_driver_data(ctx);
1760 struct i965_gpe_table *gpe = &i965->gpe_table;
1761 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1762 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1763 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1764 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1765 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
1767 struct i965_gpe_context *gpe_context;
1768 struct scaling_param surface_param;
1769 struct object_surface *obj_surface;
1770 struct gen9_surface_avc *avc_priv_surface;
1771 struct gpe_media_object_walker_parameter media_object_walker_param;
1772 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
1773 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
1774 int media_function = 0;
1777 obj_surface = encode_state->reconstructed_object;
1778 avc_priv_surface = obj_surface->private_data;
1780 memset(&surface_param, 0, sizeof(struct scaling_param));
1782 case INTEL_ENC_HME_4x : {
1783 media_function = INTEL_MEDIA_STATE_4X_SCALING;
1784 kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
1785 downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
1786 downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
1788 surface_param.input_surface = encode_state->input_yuv_object ;
1789 surface_param.input_frame_width = generic_state->frame_width_in_pixel ;
1790 surface_param.input_frame_height = generic_state->frame_height_in_pixel ;
1792 surface_param.output_surface = avc_priv_surface->scaled_4x_surface_obj ;
1793 surface_param.output_frame_width = generic_state->frame_width_4x ;
1794 surface_param.output_frame_height = generic_state->frame_height_4x ;
1796 surface_param.enable_mb_flatness_check = avc_state->flatness_check_enable;
1797 surface_param.enable_mb_variance_output = avc_state->mb_status_enable;
1798 surface_param.enable_mb_pixel_average_output = avc_state->mb_status_enable;
1800 surface_param.blk8x8_stat_enabled = 0 ;
1801 surface_param.use_4x_scaling = 1 ;
1802 surface_param.use_16x_scaling = 0 ;
1803 surface_param.use_32x_scaling = 0 ;
1806 case INTEL_ENC_HME_16x : {
1807 media_function = INTEL_MEDIA_STATE_16X_SCALING;
1808 kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
1809 downscaled_width_in_mb = generic_state->downscaled_width_16x_in_mb;
1810 downscaled_height_in_mb = generic_state->downscaled_height_16x_in_mb;
1812 surface_param.input_surface = avc_priv_surface->scaled_4x_surface_obj ;
1813 surface_param.input_frame_width = generic_state->frame_width_4x ;
1814 surface_param.input_frame_height = generic_state->frame_height_4x ;
1816 surface_param.output_surface = avc_priv_surface->scaled_16x_surface_obj ;
1817 surface_param.output_frame_width = generic_state->frame_width_16x ;
1818 surface_param.output_frame_height = generic_state->frame_height_16x ;
1820 surface_param.enable_mb_flatness_check = 0 ;
1821 surface_param.enable_mb_variance_output = 0 ;
1822 surface_param.enable_mb_pixel_average_output = 0 ;
1824 surface_param.blk8x8_stat_enabled = 0 ;
1825 surface_param.use_4x_scaling = 0 ;
1826 surface_param.use_16x_scaling = 1 ;
1827 surface_param.use_32x_scaling = 0 ;
1831 case INTEL_ENC_HME_32x : {
1832 media_function = INTEL_MEDIA_STATE_32X_SCALING;
1833 kernel_idx = GEN9_AVC_KERNEL_SCALING_2X_IDX;
1834 downscaled_width_in_mb = generic_state->downscaled_width_32x_in_mb;
1835 downscaled_height_in_mb = generic_state->downscaled_height_32x_in_mb;
1837 surface_param.input_surface = avc_priv_surface->scaled_16x_surface_obj ;
1838 surface_param.input_frame_width = generic_state->frame_width_16x ;
1839 surface_param.input_frame_height = generic_state->frame_height_16x ;
1841 surface_param.output_surface = avc_priv_surface->scaled_32x_surface_obj ;
1842 surface_param.output_frame_width = generic_state->frame_width_32x ;
1843 surface_param.output_frame_height = generic_state->frame_height_32x ;
1845 surface_param.enable_mb_flatness_check = 0 ;
1846 surface_param.enable_mb_variance_output = 0 ;
1847 surface_param.enable_mb_pixel_average_output = 0 ;
1849 surface_param.blk8x8_stat_enabled = 0 ;
1850 surface_param.use_4x_scaling = 0 ;
1851 surface_param.use_16x_scaling = 0 ;
1852 surface_param.use_32x_scaling = 1 ;
1860 gpe_context = &(avc_ctx->context_scaling.gpe_contexts[kernel_idx]);
1862 gpe->context_init(ctx, gpe_context);
1863 gpe->reset_binding_table(ctx, gpe_context);
1865 if (surface_param.use_32x_scaling) {
1866 generic_ctx->pfn_set_curbe_scaling2x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1868 generic_ctx->pfn_set_curbe_scaling4x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1871 if (surface_param.use_32x_scaling) {
1872 surface_param.scaling_out_use_16unorm_surf_fmt = 1 ;
1873 surface_param.scaling_out_use_32unorm_surf_fmt = 0 ;
1875 surface_param.scaling_out_use_16unorm_surf_fmt = 0 ;
1876 surface_param.scaling_out_use_32unorm_surf_fmt = 1 ;
1879 if (surface_param.use_4x_scaling) {
1880 if (avc_state->mb_status_supported) {
1881 surface_param.enable_mb_flatness_check = 0;
1882 surface_param.mbv_proc_stat_enabled = (surface_param.use_4x_scaling) ? (avc_state->mb_status_enable || avc_state->flatness_check_enable) : 0 ;
1883 surface_param.pres_mbv_proc_stat_buffer = &(avc_ctx->res_mb_status_buffer);
1886 surface_param.enable_mb_flatness_check = (surface_param.use_4x_scaling) ? avc_state->flatness_check_enable : 0;
1887 surface_param.mbv_proc_stat_enabled = 0 ;
1888 surface_param.pres_flatness_check_surface = &(avc_ctx->res_flatness_check_surface);
1892 generic_ctx->pfn_send_scaling_surface(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1894 /* setup the interface data */
1895 gpe->setup_interface_data(ctx, gpe_context);
1897 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
1898 if (surface_param.use_32x_scaling) {
1899 kernel_walker_param.resolution_x = downscaled_width_in_mb ;
1900 kernel_walker_param.resolution_y = downscaled_height_in_mb ;
1902 /* the scaling is based on 8x8 blk level */
1903 kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
1904 kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
1906 kernel_walker_param.no_dependency = 1;
1908 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
1910 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
1913 &media_object_walker_param);
1915 return VA_STATUS_SUCCESS;
1919 frame/mb brc related function
1922 gen9_avc_init_mfx_avc_img_state(VADriverContextP ctx,
1923 struct encode_state *encode_state,
1924 struct intel_encoder_context *encoder_context,
1925 struct gen9_mfx_avc_img_state *pstate)
1927 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1928 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1929 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1931 VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
1932 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
1934 memset(pstate, 0, sizeof(*pstate));
1936 pstate->dw0.dword_length = (sizeof(struct gen9_mfx_avc_img_state)) / 4 - 2;
1937 pstate->dw0.sub_opcode_b = 0;
1938 pstate->dw0.sub_opcode_a = 0;
1939 pstate->dw0.command_opcode = 1;
1940 pstate->dw0.pipeline = 2;
1941 pstate->dw0.command_type = 3;
1943 pstate->dw1.frame_size_in_mbs = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs ;
1945 pstate->dw2.frame_width_in_mbs_minus1 = generic_state->frame_width_in_mbs - 1;
1946 pstate->dw2.frame_height_in_mbs_minus1 = generic_state->frame_height_in_mbs - 1;
1948 pstate->dw3.image_structure = 0;//frame is zero
1949 pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
1950 pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
1951 pstate->dw3.brc_domain_rate_control_enable = 0;//0,set for non-vdenc mode;
1952 pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
1953 pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
1955 pstate->dw4.field_picture_flag = 0;
1956 pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
1957 pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
1958 pstate->dw4.transform_8x8_idct_mode_flag = pic_param->pic_fields.bits.transform_8x8_mode_flag;
1959 pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
1960 pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
1961 pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
1962 pstate->dw4.mb_mv_format_flag = 1;
1963 pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
1964 pstate->dw4.mv_unpacked_flag = 1;
1965 pstate->dw4.insert_test_flag = 0;
1966 pstate->dw4.load_slice_pointer_flag = 0;
1967 pstate->dw4.macroblock_stat_enable = 0; /* disable in the first pass */
1968 pstate->dw4.minimum_frame_size = 0;
1969 pstate->dw5.intra_mb_max_bit_flag = 1;
1970 pstate->dw5.inter_mb_max_bit_flag = 1;
1971 pstate->dw5.frame_size_over_flag = 1;
1972 pstate->dw5.frame_size_under_flag = 1;
1973 pstate->dw5.intra_mb_ipcm_flag = 1;
1974 pstate->dw5.mb_rate_ctrl_flag = 0;
1975 pstate->dw5.non_first_pass_flag = 0;
1976 pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
1977 pstate->dw5.aq_chroma_disable = 1;
1978 if (pstate->dw4.entropy_coding_flag && (avc_state->tq_enable)) {
1979 pstate->dw5.aq_enable = avc_state->tq_enable;
1980 pstate->dw5.aq_rounding = avc_state->tq_rounding;
1982 pstate->dw5.aq_rounding = 0;
1985 pstate->dw6.intra_mb_max_size = 2700;
1986 pstate->dw6.inter_mb_max_size = 4095;
1988 pstate->dw8.slice_delta_qp_max0 = 0;
1989 pstate->dw8.slice_delta_qp_max1 = 0;
1990 pstate->dw8.slice_delta_qp_max2 = 0;
1991 pstate->dw8.slice_delta_qp_max3 = 0;
1993 pstate->dw9.slice_delta_qp_min0 = 0;
1994 pstate->dw9.slice_delta_qp_min1 = 0;
1995 pstate->dw9.slice_delta_qp_min2 = 0;
1996 pstate->dw9.slice_delta_qp_min3 = 0;
1998 pstate->dw10.frame_bitrate_min = 0;
1999 pstate->dw10.frame_bitrate_min_unit = 1;
2000 pstate->dw10.frame_bitrate_min_unit_mode = 1;
2001 pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
2002 pstate->dw10.frame_bitrate_max_unit = 1;
2003 pstate->dw10.frame_bitrate_max_unit_mode = 1;
2005 pstate->dw11.frame_bitrate_min_delta = 0;
2006 pstate->dw11.frame_bitrate_max_delta = 0;
2008 pstate->dw12.vad_error_logic = 1;
2009 /* set paramters DW19/DW20 for slices */
2013 gen8_avc_init_mfx_avc_img_state(VADriverContextP ctx,
2014 struct encode_state *encode_state,
2015 struct intel_encoder_context *encoder_context,
2016 struct gen8_mfx_avc_img_state *pstate)
2018 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2019 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2020 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2022 VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
2023 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
2025 memset(pstate, 0, sizeof(*pstate));
2027 pstate->dw0.dword_length = (sizeof(struct gen8_mfx_avc_img_state)) / 4 - 2;
2028 pstate->dw0.command_sub_opcode_b = 0;
2029 pstate->dw0.command_sub_opcode_a = 0;
2030 pstate->dw0.command_opcode = 1;
2031 pstate->dw0.command_pipeline = 2;
2032 pstate->dw0.command_type = 3;
2034 pstate->dw1.frame_size_in_mbs = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs ;
2036 pstate->dw2.frame_width_in_mbs_minus1 = generic_state->frame_width_in_mbs - 1;
2037 pstate->dw2.frame_height_in_mbs_minus1 = generic_state->frame_height_in_mbs - 1;
2039 pstate->dw3.image_structure = 0;//frame is zero
2040 pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
2041 pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
2042 pstate->dw3.inter_mb_conf_flag = 0;
2043 pstate->dw3.intra_mb_conf_flag = 0;
2044 pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
2045 pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
2047 pstate->dw4.field_picture_flag = 0;
2048 pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
2049 pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
2050 pstate->dw4.transform_8x8_idct_mode_flag = pic_param->pic_fields.bits.transform_8x8_mode_flag;
2051 pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
2052 pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
2053 pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
2054 pstate->dw4.mb_mv_format_flag = 1;
2055 pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
2056 pstate->dw4.mv_unpacked_flag = 1;
2057 pstate->dw4.insert_test_flag = 0;
2058 pstate->dw4.load_slice_pointer_flag = 0;
2059 pstate->dw4.macroblock_stat_enable = 0; /* disable in the first pass */
2060 pstate->dw4.minimum_frame_size = 0;
2061 pstate->dw5.intra_mb_max_bit_flag = 1;
2062 pstate->dw5.inter_mb_max_bit_flag = 1;
2063 pstate->dw5.frame_size_over_flag = 1;
2064 pstate->dw5.frame_size_under_flag = 1;
2065 pstate->dw5.intra_mb_ipcm_flag = 1;
2066 pstate->dw5.mb_rate_ctrl_flag = 0;
2067 pstate->dw5.non_first_pass_flag = 0;
2068 pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
2069 pstate->dw5.aq_chroma_disable = 1;
2070 if (pstate->dw4.entropy_coding_flag && (avc_state->tq_enable)) {
2071 pstate->dw5.aq_enable = avc_state->tq_enable;
2072 pstate->dw5.aq_rounding = avc_state->tq_rounding;
2074 pstate->dw5.aq_rounding = 0;
2077 pstate->dw6.intra_mb_max_size = 2700;
2078 pstate->dw6.inter_mb_max_size = 4095;
2080 pstate->dw8.slice_delta_qp_max0 = 0;
2081 pstate->dw8.slice_delta_qp_max1 = 0;
2082 pstate->dw8.slice_delta_qp_max2 = 0;
2083 pstate->dw8.slice_delta_qp_max3 = 0;
2085 pstate->dw9.slice_delta_qp_min0 = 0;
2086 pstate->dw9.slice_delta_qp_min1 = 0;
2087 pstate->dw9.slice_delta_qp_min2 = 0;
2088 pstate->dw9.slice_delta_qp_min3 = 0;
2090 pstate->dw10.frame_bitrate_min = 0;
2091 pstate->dw10.frame_bitrate_min_unit = 1;
2092 pstate->dw10.frame_bitrate_min_unit_mode = 1;
2093 pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
2094 pstate->dw10.frame_bitrate_max_unit = 1;
2095 pstate->dw10.frame_bitrate_max_unit_mode = 1;
2097 pstate->dw11.frame_bitrate_min_delta = 0;
2098 pstate->dw11.frame_bitrate_max_delta = 0;
2099 /* set paramters DW19/DW20 for slices */
2101 void gen9_avc_set_image_state(VADriverContextP ctx,
2102 struct encode_state *encode_state,
2103 struct intel_encoder_context *encoder_context,
2104 struct i965_gpe_resource *gpe_resource)
2106 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2107 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
2110 unsigned int * data;
2111 struct gen9_mfx_avc_img_state cmd;
2113 pdata = i965_map_gpe_resource(gpe_resource);
2118 gen9_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
2119 for (i = 0; i < generic_state->num_pak_passes; i++) {
2122 cmd.dw4.macroblock_stat_enable = 0;
2123 cmd.dw5.non_first_pass_flag = 0;
2125 cmd.dw4.macroblock_stat_enable = 1;
2126 cmd.dw5.non_first_pass_flag = 1;
2127 cmd.dw5.intra_mb_ipcm_flag = 1;
2130 cmd.dw5.mb_rate_ctrl_flag = 0;
2131 memcpy(pdata, &cmd, sizeof(struct gen9_mfx_avc_img_state));
2132 data = (unsigned int *)(pdata + sizeof(struct gen9_mfx_avc_img_state));
2133 *data = MI_BATCH_BUFFER_END;
2135 pdata += INTEL_AVC_IMAGE_STATE_CMD_SIZE;
2137 i965_unmap_gpe_resource(gpe_resource);
2141 void gen8_avc_set_image_state(VADriverContextP ctx,
2142 struct encode_state *encode_state,
2143 struct intel_encoder_context *encoder_context,
2144 struct i965_gpe_resource *gpe_resource)
2146 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2147 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
2150 unsigned int * data;
2151 struct gen8_mfx_avc_img_state cmd;
2153 pdata = i965_map_gpe_resource(gpe_resource);
2158 gen8_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
2159 for (i = 0; i < generic_state->num_pak_passes; i++) {
2162 cmd.dw4.macroblock_stat_enable = 0;
2163 cmd.dw5.non_first_pass_flag = 0;
2165 cmd.dw4.macroblock_stat_enable = 1;
2166 cmd.dw5.non_first_pass_flag = 1;
2167 cmd.dw5.intra_mb_ipcm_flag = 1;
2168 cmd.dw3.inter_mb_conf_flag = 1;
2169 cmd.dw3.intra_mb_conf_flag = 1;
2171 cmd.dw5.mb_rate_ctrl_flag = 0;
2172 memcpy(pdata, &cmd, sizeof(struct gen8_mfx_avc_img_state));
2173 data = (unsigned int *)(pdata + sizeof(struct gen8_mfx_avc_img_state));
2174 *data = MI_BATCH_BUFFER_END;
2176 pdata += INTEL_AVC_IMAGE_STATE_CMD_SIZE;
2178 i965_unmap_gpe_resource(gpe_resource);
2182 void gen9_avc_set_image_state_non_brc(VADriverContextP ctx,
2183 struct encode_state *encode_state,
2184 struct intel_encoder_context *encoder_context,
2185 struct i965_gpe_resource *gpe_resource)
2187 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2188 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
2191 unsigned int * data;
2192 struct gen9_mfx_avc_img_state cmd;
2194 pdata = i965_map_gpe_resource(gpe_resource);
2199 gen9_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
2201 if (generic_state->curr_pak_pass == 0) {
2202 cmd.dw4.macroblock_stat_enable = 0;
2203 cmd.dw5.non_first_pass_flag = 0;
2206 cmd.dw4.macroblock_stat_enable = 1;
2207 cmd.dw5.non_first_pass_flag = 0;
2208 cmd.dw5.intra_mb_ipcm_flag = 1;
2211 cmd.dw5.mb_rate_ctrl_flag = 0;
2212 memcpy(pdata, &cmd, sizeof(struct gen9_mfx_avc_img_state));
2213 data = (unsigned int *)(pdata + sizeof(struct gen9_mfx_avc_img_state));
2214 *data = MI_BATCH_BUFFER_END;
2216 i965_unmap_gpe_resource(gpe_resource);
2221 gen95_avc_calc_lambda_table(VADriverContextP ctx,
2222 struct encode_state *encode_state,
2223 struct intel_encoder_context *encoder_context)
2225 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2226 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2227 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2228 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
2229 unsigned int value, inter, intra;
2230 unsigned int rounding_value = 0;
2231 unsigned int size = 0;
2234 unsigned int * lambda_table = (unsigned int *)(avc_state->lamda_value_lut);
2240 size = AVC_QP_MAX * 2 * sizeof(unsigned int);
2241 switch (generic_state->frame_type) {
2243 memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_i_frame[0][0], size * sizeof(unsigned char));
2246 memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_p_frame[0][0], size * sizeof(unsigned char));
2249 memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_b_frame[0][0], size * sizeof(unsigned char));
2256 for (i = 0; i < AVC_QP_MAX ; i++) {
2257 for (col = 0; col < 2; col++) {
2258 value = *(lambda_table + i * 2 + col);
2259 intra = value >> 16;
2261 if (intra < GEN95_AVC_MAX_LAMBDA) {
2262 if (intra == 0xfffa) {
2263 intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
2267 intra = intra << 16;
2268 inter = value & 0xffff;
2270 if (inter < GEN95_AVC_MAX_LAMBDA) {
2271 if (inter == 0xffef) {
2272 if (generic_state->frame_type == SLICE_TYPE_P) {
2273 if (avc_state->rounding_inter_p == AVC_INVALID_ROUNDING_VALUE)
2274 rounding_value = gen9_avc_inter_rounding_p[generic_state->preset];
2276 rounding_value = avc_state->rounding_inter_p;
2277 } else if (generic_state->frame_type == SLICE_TYPE_B) {
2278 if (pic_param->pic_fields.bits.reference_pic_flag) {
2279 if (avc_state->rounding_inter_b_ref == AVC_INVALID_ROUNDING_VALUE)
2280 rounding_value = gen9_avc_inter_rounding_b_ref[generic_state->preset];
2282 rounding_value = avc_state->rounding_inter_b_ref;
2284 if (avc_state->rounding_inter_b == AVC_INVALID_ROUNDING_VALUE)
2285 rounding_value = gen9_avc_inter_rounding_b[generic_state->preset];
2287 rounding_value = avc_state->rounding_inter_b;
2291 inter = 0xf000 + rounding_value;
2293 *(lambda_table + i * 2 + col) = intra + inter;
2299 gen9_avc_init_brc_const_data(VADriverContextP ctx,
2300 struct encode_state *encode_state,
2301 struct intel_encoder_context *encoder_context)
2303 struct i965_driver_data *i965 = i965_driver_data(ctx);
2304 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2305 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2306 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2307 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2309 struct i965_gpe_resource *gpe_resource = NULL;
2310 unsigned char * data = NULL;
2311 unsigned char * data_tmp = NULL;
2312 unsigned int size = 0;
2313 unsigned int table_idx = 0;
2314 unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
2317 struct object_surface *obj_surface;
2318 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
2319 VASurfaceID surface_id;
2320 unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
2322 gpe_resource = &(avc_ctx->res_brc_const_data_buffer);
2323 assert(gpe_resource);
2325 i965_zero_gpe_resource(gpe_resource);
2327 data = i965_map_gpe_resource(gpe_resource);
2330 table_idx = slice_type_kernel[generic_state->frame_type];
2332 /* Fill surface with QP Adjustment table, Distortion threshold table, MaxFrame threshold table, Distortion QP Adjustment Table*/
2333 size = sizeof(gen9_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb);
2334 memcpy(data, gen9_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb, size * sizeof(unsigned char));
2338 /* skip threshold table*/
2340 switch (generic_state->frame_type) {
2342 memcpy(data, gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2345 memcpy(data, gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2348 /*SLICE_TYPE_I,no change */
2352 if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->non_ftq_skip_threshold_lut_input_enable) {
2353 for (i = 0; i < AVC_QP_MAX ; i++) {
2354 *(data + 1 + (i * 2)) = (unsigned char)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
2359 /*fill the qp for ref list*/
2360 size = 32 + 32 + 32 + 160;
2361 memset(data, 0xff, 32);
2362 memset(data + 32 + 32, 0xff, 32);
2363 switch (generic_state->frame_type) {
2364 case SLICE_TYPE_P: {
2365 for (i = 0 ; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
2366 surface_id = slice_param->RefPicList0[i].picture_id;
2367 obj_surface = SURFACE(surface_id);
2370 *(data + i) = avc_state->list_ref_idx[0][i];//?
2374 case SLICE_TYPE_B: {
2375 data = data + 32 + 32;
2376 for (i = 0 ; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
2377 surface_id = slice_param->RefPicList1[i].picture_id;
2378 obj_surface = SURFACE(surface_id);
2381 *(data + i) = avc_state->list_ref_idx[1][i];//?
2384 data = data - 32 - 32;
2386 for (i = 0 ; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
2387 surface_id = slice_param->RefPicList0[i].picture_id;
2388 obj_surface = SURFACE(surface_id);
2391 *(data + i) = avc_state->list_ref_idx[0][i];//?
2396 /*SLICE_TYPE_I,no change */
2401 /*mv cost and mode cost*/
2403 memcpy(data, (unsigned char *)&gen9_avc_mode_mv_cost_table[table_idx][0][0], size * sizeof(unsigned char));
2405 if (avc_state->old_mode_cost_enable) {
2407 for (i = 0; i < AVC_QP_MAX ; i++) {
2408 *(data_tmp + 3) = (unsigned int)gen9_avc_old_intra_mode_cost[i];
2413 if (avc_state->ftq_skip_threshold_lut_input_enable) {
2414 for (i = 0; i < AVC_QP_MAX ; i++) {
2415 *(data + (i * 32) + 24) =
2416 *(data + (i * 32) + 25) =
2417 *(data + (i * 32) + 27) =
2418 *(data + (i * 32) + 28) =
2419 *(data + (i * 32) + 29) =
2420 *(data + (i * 32) + 30) =
2421 *(data + (i * 32) + 31) = avc_state->ftq_skip_threshold_lut[i];
2429 memcpy(data, (unsigned char *)&gen9_avc_ref_cost[table_idx][0], size * sizeof(unsigned char));
2434 if (avc_state->adaptive_intra_scaling_enable) {
2435 memcpy(data, (unsigned char *)gen9_avc_adaptive_intra_scaling_factor, size * sizeof(unsigned char));
2437 memcpy(data, (unsigned char *)gen9_avc_intra_scaling_factor, size * sizeof(unsigned char));
2440 if (IS_KBL(i965->intel.device_info) ||
2441 IS_GEN10(i965->intel.device_info) ||
2442 IS_GLK(i965->intel.device_info)) {
2446 memcpy(data, (unsigned char *)gen95_avc_lambda_data, size * sizeof(unsigned char));
2450 memcpy(data, (unsigned char *)gen95_avc_ftq25, size * sizeof(unsigned char));
2453 i965_unmap_gpe_resource(gpe_resource);
2457 gen9_avc_init_brc_const_data_old(VADriverContextP ctx,
2458 struct encode_state *encode_state,
2459 struct intel_encoder_context *encoder_context)
2461 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2462 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2463 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2464 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2466 struct i965_gpe_resource *gpe_resource = NULL;
2467 unsigned int * data = NULL;
2468 unsigned int * data_tmp = NULL;
2469 unsigned int size = 0;
2470 unsigned int table_idx = 0;
2471 unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
2472 unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
2475 gpe_resource = &(avc_ctx->res_brc_const_data_buffer);
2476 assert(gpe_resource);
2478 i965_zero_gpe_resource(gpe_resource);
2480 data = i965_map_gpe_resource(gpe_resource);
2483 table_idx = slice_type_kernel[generic_state->frame_type];
2485 /* Fill surface with QP Adjustment table, Distortion threshold table, MaxFrame threshold table, Distortion QP Adjustment Table*/
2486 size = sizeof(gen75_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb);
2487 memcpy(data, gen75_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb, size * sizeof(unsigned char));
2491 /* skip threshold table*/
2493 switch (generic_state->frame_type) {
2495 memcpy(data, gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2498 memcpy(data, gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2501 /*SLICE_TYPE_I,no change */
2505 if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->non_ftq_skip_threshold_lut_input_enable) {
2506 for (i = 0; i < AVC_QP_MAX ; i++) {
2507 *(data + 1 + (i * 2)) = (unsigned char)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
2512 /*fill the qp for ref list*/
2518 /*mv cost and mode cost*/
2520 memcpy(data, (unsigned char *)&gen75_avc_mode_mv_cost_table[table_idx][0][0], size * sizeof(unsigned char));
2522 if (avc_state->old_mode_cost_enable) {
2524 for (i = 0; i < AVC_QP_MAX ; i++) {
2525 *(data_tmp + 3) = (unsigned int)gen9_avc_old_intra_mode_cost[i];
2530 if (avc_state->ftq_skip_threshold_lut_input_enable) {
2531 for (i = 0; i < AVC_QP_MAX ; i++) {
2532 *(data + (i * 32) + 24) =
2533 *(data + (i * 32) + 25) =
2534 *(data + (i * 32) + 27) =
2535 *(data + (i * 32) + 28) =
2536 *(data + (i * 32) + 29) =
2537 *(data + (i * 32) + 30) =
2538 *(data + (i * 32) + 31) = avc_state->ftq_skip_threshold_lut[i];
2546 memcpy(data, (unsigned char *)&gen9_avc_ref_cost[table_idx][0], size * sizeof(unsigned char));
2548 i965_unmap_gpe_resource(gpe_resource);
2551 gen9_avc_set_curbe_brc_init_reset(VADriverContextP ctx,
2552 struct encode_state *encode_state,
2553 struct i965_gpe_context *gpe_context,
2554 struct intel_encoder_context *encoder_context,
2557 gen9_avc_brc_init_reset_curbe_data *cmd;
2558 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2559 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2560 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2561 double input_bits_per_frame = 0;
2562 double bps_ratio = 0;
2563 VAEncSequenceParameterBufferH264 * seq_param = avc_state->seq_param;
2564 struct avc_param common_param;
2566 cmd = i965_gpe_context_map_curbe(gpe_context);
2571 memcpy(cmd, &gen9_avc_brc_init_reset_curbe_init_data, sizeof(gen9_avc_brc_init_reset_curbe_data));
2573 memset(&common_param, 0, sizeof(common_param));
2574 common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
2575 common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
2576 common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
2577 common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
2578 common_param.frames_per_100s = generic_state->frames_per_100s;
2579 common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
2580 common_param.target_bit_rate = generic_state->target_bit_rate;
2582 cmd->dw0.profile_level_max_frame = i965_avc_get_profile_level_max_frame(&common_param, seq_param->level_idc);
2583 cmd->dw1.init_buf_full_in_bits = generic_state->init_vbv_buffer_fullness_in_bit;
2584 cmd->dw2.buf_size_in_bits = generic_state->vbv_buffer_size_in_bit;
2585 cmd->dw3.average_bit_rate = generic_state->target_bit_rate * 1000;
2586 cmd->dw4.max_bit_rate = generic_state->max_bit_rate * 1000;
2587 cmd->dw8.gop_p = (generic_state->gop_ref_distance) ? ((generic_state->gop_size - 1) / generic_state->gop_ref_distance) : 0;
2588 cmd->dw9.gop_b = (generic_state->gop_size - 1 - cmd->dw8.gop_p);
2589 cmd->dw9.frame_width_in_bytes = generic_state->frame_width_in_pixel;
2590 cmd->dw10.frame_height_in_bytes = generic_state->frame_height_in_pixel;
2591 cmd->dw12.no_slices = avc_state->slice_num;
2594 if (seq_param->vui_parameters_present_flag && generic_state->internal_rate_mode != INTEL_BRC_AVBR) {
2595 cmd->dw4.max_bit_rate = cmd->dw4.max_bit_rate;
2596 if (generic_state->internal_rate_mode == VA_RC_CBR) {
2597 cmd->dw3.average_bit_rate = cmd->dw4.max_bit_rate;
2602 cmd->dw6.frame_rate_m = generic_state->frames_per_100s;
2603 cmd->dw7.frame_rate_d = 100;
2604 cmd->dw8.brc_flag = 0;
2605 cmd->dw8.brc_flag |= (generic_state->mb_brc_enabled) ? 0 : 0x8000;
2608 if (generic_state->internal_rate_mode == VA_RC_CBR) {
2610 cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate;
2611 cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISCBR;
2613 } else if (generic_state->internal_rate_mode == VA_RC_VBR) {
2615 if (cmd->dw4.max_bit_rate < cmd->dw3.average_bit_rate) {
2616 cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate << 1;
2618 cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISVBR;
2620 } else if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2622 cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate;
2623 cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISAVBR;
2626 //igonre icq/vcm/qvbr
2628 cmd->dw10.avbr_accuracy = generic_state->avbr_curracy;
2629 cmd->dw11.avbr_convergence = generic_state->avbr_convergence;
2632 input_bits_per_frame = (double)(cmd->dw4.max_bit_rate) * (double)(cmd->dw7.frame_rate_d) / (double)(cmd->dw6.frame_rate_m);;
2634 if (cmd->dw2.buf_size_in_bits == 0) {
2635 cmd->dw2.buf_size_in_bits = (unsigned int)(input_bits_per_frame * 4);
2638 if (cmd->dw1.init_buf_full_in_bits == 0) {
2639 cmd->dw1.init_buf_full_in_bits = cmd->dw2.buf_size_in_bits * 7 / 8;
2641 if (cmd->dw1.init_buf_full_in_bits < (unsigned int)(input_bits_per_frame * 2)) {
2642 cmd->dw1.init_buf_full_in_bits = (unsigned int)(input_bits_per_frame * 2);
2644 if (cmd->dw1.init_buf_full_in_bits > cmd->dw2.buf_size_in_bits) {
2645 cmd->dw1.init_buf_full_in_bits = cmd->dw2.buf_size_in_bits;
2649 if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2650 cmd->dw2.buf_size_in_bits = 2 * generic_state->target_bit_rate * 1000;
2651 cmd->dw1.init_buf_full_in_bits = (unsigned int)(3 * cmd->dw2.buf_size_in_bits / 4);
2655 bps_ratio = input_bits_per_frame / (cmd->dw2.buf_size_in_bits / 30.0);
2656 bps_ratio = (bps_ratio < 0.1) ? 0.1 : (bps_ratio > 3.5) ? 3.5 : bps_ratio;
2659 cmd->dw16.deviation_threshold_0_pand_b = (unsigned int)(-50 * pow(0.90, bps_ratio));
2660 cmd->dw16.deviation_threshold_1_pand_b = (unsigned int)(-50 * pow(0.66, bps_ratio));
2661 cmd->dw16.deviation_threshold_2_pand_b = (unsigned int)(-50 * pow(0.46, bps_ratio));
2662 cmd->dw16.deviation_threshold_3_pand_b = (unsigned int)(-50 * pow(0.3, bps_ratio));
2663 cmd->dw17.deviation_threshold_4_pand_b = (unsigned int)(50 * pow(0.3, bps_ratio));
2664 cmd->dw17.deviation_threshold_5_pand_b = (unsigned int)(50 * pow(0.46, bps_ratio));
2665 cmd->dw17.deviation_threshold_6_pand_b = (unsigned int)(50 * pow(0.7, bps_ratio));
2666 cmd->dw17.deviation_threshold_7_pand_b = (unsigned int)(50 * pow(0.9, bps_ratio));
2667 cmd->dw18.deviation_threshold_0_vbr = (unsigned int)(-50 * pow(0.9, bps_ratio));
2668 cmd->dw18.deviation_threshold_1_vbr = (unsigned int)(-50 * pow(0.7, bps_ratio));
2669 cmd->dw18.deviation_threshold_2_vbr = (unsigned int)(-50 * pow(0.5, bps_ratio));
2670 cmd->dw18.deviation_threshold_3_vbr = (unsigned int)(-50 * pow(0.3, bps_ratio));
2671 cmd->dw19.deviation_threshold_4_vbr = (unsigned int)(100 * pow(0.4, bps_ratio));
2672 cmd->dw19.deviation_threshold_5_vbr = (unsigned int)(100 * pow(0.5, bps_ratio));
2673 cmd->dw19.deviation_threshold_6_vbr = (unsigned int)(100 * pow(0.75, bps_ratio));
2674 cmd->dw19.deviation_threshold_7_vbr = (unsigned int)(100 * pow(0.9, bps_ratio));
2675 cmd->dw20.deviation_threshold_0_i = (unsigned int)(-50 * pow(0.8, bps_ratio));
2676 cmd->dw20.deviation_threshold_1_i = (unsigned int)(-50 * pow(0.6, bps_ratio));
2677 cmd->dw20.deviation_threshold_2_i = (unsigned int)(-50 * pow(0.34, bps_ratio));
2678 cmd->dw20.deviation_threshold_3_i = (unsigned int)(-50 * pow(0.2, bps_ratio));
2679 cmd->dw21.deviation_threshold_4_i = (unsigned int)(50 * pow(0.2, bps_ratio));
2680 cmd->dw21.deviation_threshold_5_i = (unsigned int)(50 * pow(0.4, bps_ratio));
2681 cmd->dw21.deviation_threshold_6_i = (unsigned int)(50 * pow(0.66, bps_ratio));
2682 cmd->dw21.deviation_threshold_7_i = (unsigned int)(50 * pow(0.9, bps_ratio));
2684 cmd->dw22.sliding_window_size = generic_state->frames_per_window_size;
2686 i965_gpe_context_unmap_curbe(gpe_context);
2692 gen9_avc_send_surface_brc_init_reset(VADriverContextP ctx,
2693 struct encode_state *encode_state,
2694 struct i965_gpe_context *gpe_context,
2695 struct intel_encoder_context *encoder_context,
2698 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2699 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2701 i965_add_buffer_gpe_surface(ctx,
2703 &avc_ctx->res_brc_history_buffer,
2705 avc_ctx->res_brc_history_buffer.size,
2707 GEN9_AVC_BRC_INIT_RESET_HISTORY_INDEX);
2709 i965_add_buffer_2d_gpe_surface(ctx,
2711 &avc_ctx->res_brc_dist_data_surface,
2713 I965_SURFACEFORMAT_R8_UNORM,
2714 GEN9_AVC_BRC_INIT_RESET_DISTORTION_INDEX);
2720 gen9_avc_kernel_brc_init_reset(VADriverContextP ctx,
2721 struct encode_state *encode_state,
2722 struct intel_encoder_context *encoder_context)
2724 struct i965_driver_data *i965 = i965_driver_data(ctx);
2725 struct i965_gpe_table *gpe = &i965->gpe_table;
2726 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2727 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2728 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2729 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
2731 struct i965_gpe_context *gpe_context;
2732 struct gpe_media_object_parameter media_object_param;
2733 struct gpe_media_object_inline_data media_object_inline_data;
2734 int media_function = 0;
2735 int kernel_idx = GEN9_AVC_KERNEL_BRC_INIT;
2737 media_function = INTEL_MEDIA_STATE_BRC_INIT_RESET;
2739 if (generic_state->brc_inited)
2740 kernel_idx = GEN9_AVC_KERNEL_BRC_RESET;
2742 gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
2744 gpe->context_init(ctx, gpe_context);
2745 gpe->reset_binding_table(ctx, gpe_context);
2747 generic_ctx->pfn_set_curbe_brc_init_reset(ctx, encode_state, gpe_context, encoder_context, NULL);
2749 generic_ctx->pfn_send_brc_init_reset_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
2751 gpe->setup_interface_data(ctx, gpe_context);
2753 memset(&media_object_param, 0, sizeof(media_object_param));
2754 memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
2755 media_object_param.pinline_data = &media_object_inline_data;
2756 media_object_param.inline_size = sizeof(media_object_inline_data);
2758 gen9_avc_run_kernel_media_object(ctx, encoder_context,
2761 &media_object_param);
2763 return VA_STATUS_SUCCESS;
2767 gen9_avc_set_curbe_brc_frame_update(VADriverContextP ctx,
2768 struct encode_state *encode_state,
2769 struct i965_gpe_context *gpe_context,
2770 struct intel_encoder_context *encoder_context,
2773 gen9_avc_frame_brc_update_curbe_data *cmd;
2774 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2775 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2776 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2777 struct object_surface *obj_surface;
2778 struct gen9_surface_avc *avc_priv_surface;
2779 struct avc_param common_param;
2780 VAEncSequenceParameterBufferH264 * seq_param = avc_state->seq_param;
2782 obj_surface = encode_state->reconstructed_object;
2784 if (!obj_surface || !obj_surface->private_data)
2786 avc_priv_surface = obj_surface->private_data;
2788 cmd = i965_gpe_context_map_curbe(gpe_context);
2793 memcpy(cmd, &gen9_avc_frame_brc_update_curbe_init_data, sizeof(gen9_avc_frame_brc_update_curbe_data));
2795 cmd->dw5.target_size_flag = 0 ;
2796 if (generic_state->brc_init_current_target_buf_full_in_bits > (double)generic_state->brc_init_reset_buf_size_in_bits) {
2798 generic_state->brc_init_current_target_buf_full_in_bits -= (double)generic_state->brc_init_reset_buf_size_in_bits;
2799 cmd->dw5.target_size_flag = 1 ;
2802 if (generic_state->skip_frame_enbale) {
2803 cmd->dw6.num_skip_frames = generic_state->num_skip_frames ;
2804 cmd->dw7.size_skip_frames = generic_state->size_skip_frames;
2806 generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame * generic_state->num_skip_frames;
2809 cmd->dw0.target_size = (unsigned int)generic_state->brc_init_current_target_buf_full_in_bits ;
2810 cmd->dw1.frame_number = generic_state->seq_frame_number ;
2811 cmd->dw2.size_of_pic_headers = generic_state->herder_bytes_inserted << 3 ;
2812 cmd->dw5.cur_frame_type = generic_state->frame_type ;
2813 cmd->dw5.brc_flag = 0 ;
2814 cmd->dw5.brc_flag |= (avc_priv_surface->is_as_ref) ? INTEL_ENCODE_BRCUPDATE_IS_REFERENCE : 0 ;
2816 if (avc_state->multi_pre_enable) {
2817 cmd->dw5.brc_flag |= INTEL_ENCODE_BRCUPDATE_IS_ACTUALQP ;
2818 cmd->dw14.qp_index_of_cur_pic = avc_priv_surface->frame_idx ; //do not know this. use -1
2821 cmd->dw5.max_num_paks = generic_state->num_pak_passes ;
2822 if (avc_state->min_max_qp_enable) {
2823 switch (generic_state->frame_type) {
2825 cmd->dw6.minimum_qp = avc_state->min_qp_i ;
2826 cmd->dw6.maximum_qp = avc_state->max_qp_i ;
2829 cmd->dw6.minimum_qp = avc_state->min_qp_p ;
2830 cmd->dw6.maximum_qp = avc_state->max_qp_p ;
2833 cmd->dw6.minimum_qp = avc_state->min_qp_b ;
2834 cmd->dw6.maximum_qp = avc_state->max_qp_b ;
2838 cmd->dw6.minimum_qp = 0 ;
2839 cmd->dw6.maximum_qp = 0 ;
2841 cmd->dw6.enable_force_skip = avc_state->enable_force_skip ;
2842 cmd->dw6.enable_sliding_window = 0 ;
2844 generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame;
2846 if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2847 cmd->dw3.start_gadj_frame0 = (unsigned int)((10 * generic_state->avbr_convergence) / (double)150);
2848 cmd->dw3.start_gadj_frame1 = (unsigned int)((50 * generic_state->avbr_convergence) / (double)150);
2849 cmd->dw4.start_gadj_frame2 = (unsigned int)((100 * generic_state->avbr_convergence) / (double)150);
2850 cmd->dw4.start_gadj_frame3 = (unsigned int)((150 * generic_state->avbr_convergence) / (double)150);
2851 cmd->dw11.g_rate_ratio_threshold_0 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 40)));
2852 cmd->dw11.g_rate_ratio_threshold_1 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 75)));
2853 cmd->dw12.g_rate_ratio_threshold_2 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 97)));
2854 cmd->dw12.g_rate_ratio_threshold_3 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (103 - 100)));
2855 cmd->dw12.g_rate_ratio_threshold_4 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (125 - 100)));
2856 cmd->dw12.g_rate_ratio_threshold_5 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (160 - 100)));
2859 cmd->dw15.enable_roi = generic_state->brc_roi_enable ;
2861 memset(&common_param, 0, sizeof(common_param));
2862 common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
2863 common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
2864 common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
2865 common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
2866 common_param.frames_per_100s = generic_state->frames_per_100s;
2867 common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
2868 common_param.target_bit_rate = generic_state->target_bit_rate;
2870 cmd->dw19.user_max_frame = i965_avc_get_profile_level_max_frame(&common_param, seq_param->level_idc);
2871 i965_gpe_context_unmap_curbe(gpe_context);
2877 gen9_avc_send_surface_brc_frame_update(VADriverContextP ctx,
2878 struct encode_state *encode_state,
2879 struct i965_gpe_context *gpe_context,
2880 struct intel_encoder_context *encoder_context,
2883 struct i965_driver_data *i965 = i965_driver_data(ctx);
2884 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2885 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2886 struct brc_param * param = (struct brc_param *)param_brc ;
2887 struct i965_gpe_context * gpe_context_mbenc = param->gpe_context_mbenc;
2888 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2889 unsigned char is_g95 = 0;
2891 if (IS_SKL(i965->intel.device_info) ||
2892 IS_BXT(i965->intel.device_info) ||
2893 IS_GEN8(i965->intel.device_info))
2895 else if (IS_KBL(i965->intel.device_info) ||
2896 IS_GEN10(i965->intel.device_info) ||
2897 IS_GLK(i965->intel.device_info))
2900 /* brc history buffer*/
2901 i965_add_buffer_gpe_surface(ctx,
2903 &avc_ctx->res_brc_history_buffer,
2905 avc_ctx->res_brc_history_buffer.size,
2907 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_HISTORY_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_HISTORY_INDEX));
2909 /* previous pak buffer*/
2910 i965_add_buffer_gpe_surface(ctx,
2912 &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
2914 avc_ctx->res_brc_pre_pak_statistics_output_buffer.size,
2916 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_PAK_STATISTICS_OUTPUT_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_PAK_STATISTICS_OUTPUT_INDEX));
2918 /* image state command buffer read only*/
2919 i965_add_buffer_gpe_surface(ctx,
2921 &avc_ctx->res_brc_image_state_read_buffer,
2923 avc_ctx->res_brc_image_state_read_buffer.size,
2925 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_READ_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_READ_INDEX));
2927 /* image state command buffer write only*/
2928 i965_add_buffer_gpe_surface(ctx,
2930 &avc_ctx->res_brc_image_state_write_buffer,
2932 avc_ctx->res_brc_image_state_write_buffer.size,
2934 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_WRITE_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_WRITE_INDEX));
2936 if (avc_state->mbenc_brc_buffer_size > 0) {
2937 i965_add_buffer_gpe_surface(ctx,
2939 &(avc_ctx->res_mbenc_brc_buffer),
2941 avc_ctx->res_mbenc_brc_buffer.size,
2943 GEN95_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_WRITE_INDEX);
2945 /* Mbenc curbe input buffer */
2946 gen9_add_dri_buffer_gpe_surface(ctx,
2948 gpe_context_mbenc->dynamic_state.bo,
2950 ALIGN(gpe_context_mbenc->curbe.length, 64),
2951 gpe_context_mbenc->curbe.offset,
2952 GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_READ_INDEX);
2953 /* Mbenc curbe output buffer */
2954 gen9_add_dri_buffer_gpe_surface(ctx,
2956 gpe_context_mbenc->dynamic_state.bo,
2958 ALIGN(gpe_context_mbenc->curbe.length, 64),
2959 gpe_context_mbenc->curbe.offset,
2960 GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_WRITE_INDEX);
2963 /* AVC_ME Distortion 2D surface buffer,input/output. is it res_brc_dist_data_surface*/
2964 i965_add_buffer_2d_gpe_surface(ctx,
2966 &avc_ctx->res_brc_dist_data_surface,
2968 I965_SURFACEFORMAT_R8_UNORM,
2969 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_DISTORTION_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_DISTORTION_INDEX));
2971 /* BRC const data 2D surface buffer */
2972 i965_add_buffer_2d_gpe_surface(ctx,
2974 &avc_ctx->res_brc_const_data_buffer,
2976 I965_SURFACEFORMAT_R8_UNORM,
2977 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX));
2979 /* MB statistical data surface*/
2980 if (!IS_GEN8(i965->intel.device_info)) {
2981 i965_add_buffer_gpe_surface(ctx,
2983 &avc_ctx->res_mb_status_buffer,
2985 avc_ctx->res_mb_status_buffer.size,
2987 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX));
2989 i965_add_buffer_2d_gpe_surface(ctx,
2991 &avc_ctx->res_mbbrc_mb_qp_data_surface,
2993 I965_SURFACEFORMAT_R8_UNORM,
2994 GEN9_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX);
3000 gen9_avc_kernel_brc_frame_update(VADriverContextP ctx,
3001 struct encode_state *encode_state,
3002 struct intel_encoder_context *encoder_context)
3005 struct i965_driver_data *i965 = i965_driver_data(ctx);
3006 struct i965_gpe_table *gpe = &i965->gpe_table;
3007 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3008 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
3009 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3010 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3011 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3013 struct i965_gpe_context *gpe_context = NULL;
3014 struct gpe_media_object_parameter media_object_param;
3015 struct gpe_media_object_inline_data media_object_inline_data;
3016 int media_function = 0;
3018 unsigned int mb_const_data_buffer_in_use, mb_qp_buffer_in_use;
3019 unsigned int brc_enabled = 0;
3020 unsigned int roi_enable = (generic_state->num_roi > 0) ? 1 : 0;
3021 unsigned int dirty_roi_enable = ((generic_state->dirty_num_roi > 0) && (generic_state->frame_type == SLICE_TYPE_P) && (0));
3023 /* the following set the mbenc curbe*/
3024 struct mbenc_param curbe_mbenc_param ;
3025 struct brc_param curbe_brc_param ;
3027 mb_const_data_buffer_in_use =
3028 generic_state->mb_brc_enabled ||
3031 avc_state->mb_qp_data_enable ||
3032 avc_state->rolling_intra_refresh_enable;
3033 mb_qp_buffer_in_use =
3034 generic_state->mb_brc_enabled ||
3035 generic_state->brc_roi_enable ||
3036 avc_state->mb_qp_data_enable;
3038 switch (generic_state->kernel_mode) {
3039 case INTEL_ENC_KERNEL_NORMAL : {
3040 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_NORMAL_I;
3043 case INTEL_ENC_KERNEL_PERFORMANCE : {
3044 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_PERFORMANCE_I;
3047 case INTEL_ENC_KERNEL_QUALITY : {
3048 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_QUALITY_I;
3056 if (generic_state->frame_type == SLICE_TYPE_P) {
3058 } else if (generic_state->frame_type == SLICE_TYPE_B) {
3062 gpe_context = &(avc_ctx->context_mbenc.gpe_contexts[kernel_idx]);
3063 gpe->context_init(ctx, gpe_context);
3065 memset(&curbe_mbenc_param, 0, sizeof(struct mbenc_param));
3067 curbe_mbenc_param.mb_const_data_buffer_in_use = mb_const_data_buffer_in_use;
3068 curbe_mbenc_param.mb_qp_buffer_in_use = mb_qp_buffer_in_use;
3069 curbe_mbenc_param.mbenc_i_frame_dist_in_use = 0;
3070 curbe_mbenc_param.brc_enabled = brc_enabled;
3071 curbe_mbenc_param.roi_enabled = roi_enable;
3073 /* set curbe mbenc*/
3074 generic_ctx->pfn_set_curbe_mbenc(ctx, encode_state, gpe_context, encoder_context, &curbe_mbenc_param);
3076 // gen95 set curbe out of the brc. gen9 do it here
3077 avc_state->mbenc_curbe_set_in_brc_update = !avc_state->decouple_mbenc_curbe_from_brc_enable;
3078 /*begin brc frame update*/
3079 memset(&curbe_brc_param, 0, sizeof(struct brc_param));
3080 curbe_brc_param.gpe_context_mbenc = gpe_context;
3081 media_function = INTEL_MEDIA_STATE_BRC_UPDATE;
3082 kernel_idx = GEN9_AVC_KERNEL_BRC_FRAME_UPDATE;
3083 gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
3084 curbe_brc_param.gpe_context_brc_frame_update = gpe_context;
3086 gpe->context_init(ctx, gpe_context);
3087 gpe->reset_binding_table(ctx, gpe_context);
3088 /*brc copy ignored*/
3090 /* set curbe frame update*/
3091 generic_ctx->pfn_set_curbe_brc_frame_update(ctx, encode_state, gpe_context, encoder_context, &curbe_brc_param);
3093 /* load brc constant data, is it same as mbenc mb brc constant data? no.*/
3094 if (avc_state->multi_pre_enable) {
3095 gen9_avc_init_brc_const_data(ctx, encode_state, encoder_context);
3097 gen9_avc_init_brc_const_data_old(ctx, encode_state, encoder_context);
3099 /* image state construct*/
3100 if (IS_GEN8(i965->intel.device_info)) {
3101 gen8_avc_set_image_state(ctx, encode_state, encoder_context, &(avc_ctx->res_brc_image_state_read_buffer));
3103 gen9_avc_set_image_state(ctx, encode_state, encoder_context, &(avc_ctx->res_brc_image_state_read_buffer));
3105 /* set surface frame mbenc*/
3106 generic_ctx->pfn_send_brc_frame_update_surface(ctx, encode_state, gpe_context, encoder_context, &curbe_brc_param);
3109 gpe->setup_interface_data(ctx, gpe_context);
3111 memset(&media_object_param, 0, sizeof(media_object_param));
3112 memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
3113 media_object_param.pinline_data = &media_object_inline_data;
3114 media_object_param.inline_size = sizeof(media_object_inline_data);
3116 gen9_avc_run_kernel_media_object(ctx, encoder_context,
3119 &media_object_param);
3121 return VA_STATUS_SUCCESS;
3125 gen9_avc_set_curbe_brc_mb_update(VADriverContextP ctx,
3126 struct encode_state *encode_state,
3127 struct i965_gpe_context *gpe_context,
3128 struct intel_encoder_context *encoder_context,
3131 gen9_avc_mb_brc_curbe_data *cmd;
3132 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3133 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3135 cmd = i965_gpe_context_map_curbe(gpe_context);
3140 memset(cmd, 0, sizeof(gen9_avc_mb_brc_curbe_data));
3142 cmd->dw0.cur_frame_type = generic_state->frame_type;
3143 if (generic_state->brc_roi_enable) {
3144 cmd->dw0.enable_roi = 1;
3146 cmd->dw0.enable_roi = 0;
3149 i965_gpe_context_unmap_curbe(gpe_context);
3155 gen9_avc_send_surface_brc_mb_update(VADriverContextP ctx,
3156 struct encode_state *encode_state,
3157 struct i965_gpe_context *gpe_context,
3158 struct intel_encoder_context *encoder_context,
3161 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3162 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3163 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3165 /* brc history buffer*/
3166 i965_add_buffer_gpe_surface(ctx,
3168 &avc_ctx->res_brc_history_buffer,
3170 avc_ctx->res_brc_history_buffer.size,
3172 GEN9_AVC_MB_BRC_UPDATE_HISTORY_INDEX);
3174 /* MB qp data buffer is it same as res_mbbrc_mb_qp_data_surface*/
3175 if (generic_state->mb_brc_enabled) {
3176 i965_add_buffer_2d_gpe_surface(ctx,
3178 &avc_ctx->res_mbbrc_mb_qp_data_surface,
3180 I965_SURFACEFORMAT_R8_UNORM,
3181 GEN9_AVC_MB_BRC_UPDATE_MB_QP_INDEX);
3185 /* BRC roi feature*/
3186 if (generic_state->brc_roi_enable) {
3187 i965_add_buffer_gpe_surface(ctx,
3189 &avc_ctx->res_mbbrc_roi_surface,
3191 avc_ctx->res_mbbrc_roi_surface.size,
3193 GEN9_AVC_MB_BRC_UPDATE_ROI_INDEX);
3197 /* MB statistical data surface*/
3198 i965_add_buffer_gpe_surface(ctx,
3200 &avc_ctx->res_mb_status_buffer,
3202 avc_ctx->res_mb_status_buffer.size,
3204 GEN9_AVC_MB_BRC_UPDATE_MB_STATUS_INDEX);
3210 gen9_avc_kernel_brc_mb_update(VADriverContextP ctx,
3211 struct encode_state *encode_state,
3212 struct intel_encoder_context *encoder_context)
3215 struct i965_driver_data *i965 = i965_driver_data(ctx);
3216 struct i965_gpe_table *gpe = &i965->gpe_table;
3217 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3218 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3219 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3220 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
3222 struct i965_gpe_context *gpe_context;
3223 struct gpe_media_object_walker_parameter media_object_walker_param;
3224 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
3225 int media_function = 0;
3228 media_function = INTEL_MEDIA_STATE_MB_BRC_UPDATE;
3229 kernel_idx = GEN9_AVC_KERNEL_BRC_MB_UPDATE;
3230 gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
3232 gpe->context_init(ctx, gpe_context);
3233 gpe->reset_binding_table(ctx, gpe_context);
3235 /* set curbe brc mb update*/
3236 generic_ctx->pfn_set_curbe_brc_mb_update(ctx, encode_state, gpe_context, encoder_context, NULL);
3239 /* set surface brc mb update*/
3240 generic_ctx->pfn_send_brc_mb_update_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
3243 gpe->setup_interface_data(ctx, gpe_context);
3245 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
3246 /* the scaling is based on 8x8 blk level */
3247 kernel_walker_param.resolution_x = (generic_state->frame_width_in_mbs + 1) / 2;
3248 kernel_walker_param.resolution_y = (generic_state->frame_height_in_mbs + 1) / 2 ;
3249 kernel_walker_param.no_dependency = 1;
3251 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
3253 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
3256 &media_object_walker_param);
3258 return VA_STATUS_SUCCESS;
3262 mbenc kernel related function,it include intra dist kernel
3265 gen9_avc_get_biweight(int dist_scale_factor_ref_id0_list0, unsigned short weighted_bipredidc)
3267 int biweight = 32; // default value
3269 /* based on kernel HLD*/
3270 if (weighted_bipredidc != INTEL_AVC_WP_MODE_IMPLICIT) {
3273 biweight = (dist_scale_factor_ref_id0_list0 + 2) >> 2;
3275 if (biweight != 16 && biweight != 21 &&
3276 biweight != 32 && biweight != 43 && biweight != 48) {
3277 biweight = 32; // If # of B-pics between two refs is more than 3. VME does not support it.
3285 gen9_avc_get_dist_scale_factor(VADriverContextP ctx,
3286 struct encode_state *encode_state,
3287 struct intel_encoder_context *encoder_context)
3289 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3290 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3291 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3292 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
3294 int max_num_references;
3295 VAPictureH264 *curr_pic;
3296 VAPictureH264 *ref_pic_l0;
3297 VAPictureH264 *ref_pic_l1;
3306 max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
3308 memset(avc_state->dist_scale_factor_list0, 0, 32 * sizeof(unsigned int));
3309 curr_pic = &pic_param->CurrPic;
3310 for (i = 0; i < max_num_references; i++) {
3311 ref_pic_l0 = &(slice_param->RefPicList0[i]);
3313 if ((ref_pic_l0->flags & VA_PICTURE_H264_INVALID) ||
3314 (ref_pic_l0->picture_id == VA_INVALID_SURFACE))
3316 ref_pic_l1 = &(slice_param->RefPicList1[0]);
3317 if ((ref_pic_l0->flags & VA_PICTURE_H264_INVALID) ||
3318 (ref_pic_l0->picture_id == VA_INVALID_SURFACE))
3321 poc0 = (curr_pic->TopFieldOrderCnt - ref_pic_l0->TopFieldOrderCnt);
3322 poc1 = (ref_pic_l1->TopFieldOrderCnt - ref_pic_l0->TopFieldOrderCnt);
3323 CLIP(poc0, -128, 127);
3324 CLIP(poc1, -128, 127);
3331 tmp = (td / 2 > 0) ? (td / 2) : (-(td / 2));
3332 tx = (16384 + tmp) / td ;
3333 tmp = (tb * tx + 32) >> 6;
3334 CLIP(tmp, -1024, 1023);
3335 avc_state->dist_scale_factor_list0[i] = tmp;
3341 gen9_avc_get_qp_from_ref_list(VADriverContextP ctx,
3342 VAEncSliceParameterBufferH264 *slice_param,
3346 struct i965_driver_data *i965 = i965_driver_data(ctx);
3347 struct object_surface *obj_surface;
3348 struct gen9_surface_avc *avc_priv_surface;
3349 VASurfaceID surface_id;
3351 assert(slice_param);
3355 if (ref_frame_idx < slice_param->num_ref_idx_l0_active_minus1 + 1)
3356 surface_id = slice_param->RefPicList0[ref_frame_idx].picture_id;
3360 if (ref_frame_idx < slice_param->num_ref_idx_l1_active_minus1 + 1)
3361 surface_id = slice_param->RefPicList1[ref_frame_idx].picture_id;
3365 obj_surface = SURFACE(surface_id);
3366 if (obj_surface && obj_surface->private_data) {
3367 avc_priv_surface = obj_surface->private_data;
3368 return avc_priv_surface->qp_value;
3375 gen9_avc_load_mb_brc_const_data(VADriverContextP ctx,
3376 struct encode_state *encode_state,
3377 struct intel_encoder_context *encoder_context)
3379 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3380 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3381 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3382 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3384 struct i965_gpe_resource *gpe_resource = NULL;
3385 unsigned int * data = NULL;
3386 unsigned int * data_tmp = NULL;
3387 unsigned int size = 16 * 52;
3388 unsigned int table_idx = 0;
3389 unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
3390 unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
3393 gpe_resource = &(avc_ctx->res_mbbrc_const_data_buffer);
3394 assert(gpe_resource);
3395 data = i965_map_gpe_resource(gpe_resource);
3398 table_idx = slice_type_kernel[generic_state->frame_type];
3400 memcpy(data, gen9_avc_mb_brc_const_data[table_idx][0], size * sizeof(unsigned int));
3404 switch (generic_state->frame_type) {
3406 for (i = 0; i < AVC_QP_MAX ; i++) {
3407 if (avc_state->old_mode_cost_enable)
3408 *data = (unsigned int)gen9_avc_old_intra_mode_cost[i];
3414 for (i = 0; i < AVC_QP_MAX ; i++) {
3415 if (generic_state->frame_type == SLICE_TYPE_P) {
3416 if (avc_state->skip_bias_adjustment_enable)
3417 *(data + 3) = (unsigned int)gen9_avc_mv_cost_p_skip_adjustment[i];
3419 if (avc_state->non_ftq_skip_threshold_lut_input_enable) {
3420 *(data + 9) = (unsigned int)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
3421 } else if (generic_state->frame_type == SLICE_TYPE_P) {
3422 *(data + 9) = (unsigned int)gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag][i];
3424 *(data + 9) = (unsigned int)gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag][i];
3427 if (avc_state->adaptive_intra_scaling_enable) {
3428 *(data + 10) = (unsigned int)gen9_avc_adaptive_intra_scaling_factor[i];
3430 *(data + 10) = (unsigned int)gen9_avc_intra_scaling_factor[i];
3442 for (i = 0; i < AVC_QP_MAX ; i++) {
3443 if (avc_state->ftq_skip_threshold_lut_input_enable) {
3444 *(data + 6) = (avc_state->ftq_skip_threshold_lut[i] |
3445 (avc_state->ftq_skip_threshold_lut[i] << 16) |
3446 (avc_state->ftq_skip_threshold_lut[i] << 24));
3447 *(data + 7) = (avc_state->ftq_skip_threshold_lut[i] |
3448 (avc_state->ftq_skip_threshold_lut[i] << 8) |
3449 (avc_state->ftq_skip_threshold_lut[i] << 16) |
3450 (avc_state->ftq_skip_threshold_lut[i] << 24));
3453 if (avc_state->kernel_trellis_enable) {
3454 *(data + 11) = (unsigned int)avc_state->lamda_value_lut[i][0];
3455 *(data + 12) = (unsigned int)avc_state->lamda_value_lut[i][1];
3461 i965_unmap_gpe_resource(gpe_resource);
3465 gen9_avc_set_curbe_mbenc(VADriverContextP ctx,
3466 struct encode_state *encode_state,
3467 struct i965_gpe_context *gpe_context,
3468 struct intel_encoder_context *encoder_context,
3471 struct i965_driver_data *i965 = i965_driver_data(ctx);
3473 gen9_avc_mbenc_curbe_data *g9;
3474 gen95_avc_mbenc_curbe_data *g95;
3476 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3477 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3478 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3480 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3481 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
3482 VASurfaceID surface_id;
3483 struct object_surface *obj_surface;
3485 struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
3486 unsigned char qp = 0;
3487 unsigned char me_method = 0;
3488 unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
3489 unsigned int table_idx = 0;
3490 unsigned char is_g9 = 0;
3491 unsigned char is_g95 = 0;
3492 unsigned int curbe_size = 0;
3494 unsigned int preset = generic_state->preset;
3495 if (IS_SKL(i965->intel.device_info) ||
3496 IS_BXT(i965->intel.device_info)) {
3497 cmd.g9 = (gen9_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
3501 curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
3502 memset(cmd.g9, 0, curbe_size);
3504 if (mbenc_i_frame_dist_in_use) {
3505 memcpy(cmd.g9, gen9_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
3508 switch (generic_state->frame_type) {
3510 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
3513 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
3516 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
3523 } else if (IS_KBL(i965->intel.device_info) ||
3524 IS_GEN10(i965->intel.device_info) ||
3525 IS_GLK(i965->intel.device_info)) {
3526 cmd.g95 = (gen95_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
3530 curbe_size = sizeof(gen95_avc_mbenc_curbe_data);
3531 memset(cmd.g9, 0, curbe_size);
3533 if (mbenc_i_frame_dist_in_use) {
3534 memcpy(cmd.g95, gen95_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
3537 switch (generic_state->frame_type) {
3539 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
3542 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
3545 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
3553 /* Never get here, just silence a gcc warning */
3559 me_method = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_b_me_method[preset] : gen9_avc_p_me_method[preset];
3560 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
3562 cmd.g9->dw0.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
3563 cmd.g9->dw37.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
3564 cmd.g9->dw0.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
3565 cmd.g9->dw37.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
3567 cmd.g9->dw2.max_len_sp = gen9_avc_max_len_sp[preset];
3568 cmd.g9->dw38.max_len_sp = 0;
3571 cmd.g95->dw1.extended_mv_cost_range = avc_state->extended_mv_cost_range_enable;
3573 cmd.g9->dw3.src_access = 0;
3574 cmd.g9->dw3.ref_access = 0;
3576 if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) {
3577 //disable ftq_override by now.
3578 if (avc_state->ftq_override) {
3579 cmd.g9->dw3.ftq_enable = avc_state->ftq_enable;
3582 // both gen9 and gen95 come here by now
3583 if (generic_state->frame_type == SLICE_TYPE_P) {
3584 cmd.g9->dw3.ftq_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
3587 cmd.g9->dw3.ftq_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
3591 cmd.g9->dw3.ftq_enable = 0;
3594 if (avc_state->disable_sub_mb_partion)
3595 cmd.g9->dw3.sub_mb_part_mask = 0x7;
3597 if (mbenc_i_frame_dist_in_use) {
3598 cmd.g9->dw2.pitch_width = generic_state->downscaled_width_4x_in_mb;
3599 cmd.g9->dw4.picture_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
3600 cmd.g9->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4;
3601 cmd.g9->dw6.batch_buffer_end = 0;
3602 cmd.g9->dw31.intra_compute_type = 1;
3605 cmd.g9->dw2.pitch_width = generic_state->frame_width_in_mbs;
3606 cmd.g9->dw4.picture_height_minus1 = generic_state->frame_height_in_mbs - 1;
3607 cmd.g9->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ? generic_state->frame_height_in_mbs : avc_state->slice_height;
3610 memcpy(&(cmd.g9->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
3611 if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
3612 } else if (avc_state->skip_bias_adjustment_enable) {
3613 /* Load different MvCost for P picture when SkipBiasAdjustment is enabled
3614 // No need to check for P picture as the flag is only enabled for P picture */
3615 cmd.g9->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
3620 table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
3621 memcpy(&(cmd.g9->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
3623 cmd.g9->dw4.enable_fbr_bypass = avc_state->fbr_bypass_enable;
3624 cmd.g9->dw4.enable_intra_cost_scaling_for_static_frame = avc_state->sfd_enable && generic_state->hme_enabled;
3625 cmd.g9->dw4.field_parity_flag = 0;//bottom field
3626 cmd.g9->dw4.enable_cur_fld_idr = 0;//field realted
3627 cmd.g9->dw4.contrained_intra_pred_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
3628 cmd.g9->dw4.hme_enable = generic_state->hme_enabled;
3629 cmd.g9->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
3630 cmd.g9->dw4.use_actual_ref_qp_value = generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
3633 cmd.g9->dw7.intra_part_mask = avc_state->transform_8x8_mode_enable ? 0 : 0x02;
3634 cmd.g9->dw7.src_field_polarity = 0;//field related
3636 /*ftq_skip_threshold_lut set,dw14 /15*/
3638 /*r5 disable NonFTQSkipThresholdLUT*/
3639 if (generic_state->frame_type == SLICE_TYPE_P) {
3640 cmd.g9->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
3642 } else if (generic_state->frame_type == SLICE_TYPE_B) {
3643 cmd.g9->dw32.skip_val = gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
3647 cmd.g9->dw13.qp_prime_y = qp;
3648 cmd.g9->dw13.qp_prime_cb = qp;
3649 cmd.g9->dw13.qp_prime_cr = qp;
3650 cmd.g9->dw13.target_size_in_word = 0xff;//hardcode for brc disable
3652 if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) {
3653 switch (gen9_avc_multi_pred[preset]) {
3655 cmd.g9->dw32.mult_pred_l0_disable = 128;
3656 cmd.g9->dw32.mult_pred_l1_disable = 128;
3659 cmd.g9->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_P) ? 1 : 128;
3660 cmd.g9->dw32.mult_pred_l1_disable = 128;
3663 cmd.g9->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3664 cmd.g9->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3667 cmd.g9->dw32.mult_pred_l0_disable = 1;
3668 cmd.g9->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3674 cmd.g9->dw32.mult_pred_l0_disable = 128;
3675 cmd.g9->dw32.mult_pred_l1_disable = 128;
3678 /*field setting for dw33 34, ignored*/
3680 if (avc_state->adaptive_transform_decision_enable) {
3681 if (generic_state->frame_type != SLICE_TYPE_I) {
3683 cmd.g9->dw34.enable_adaptive_tx_decision = 1;
3684 cmd.g9->dw58.mb_texture_threshold = 1024;
3685 cmd.g9->dw58.tx_decision_threshold = 128;
3686 } else if (is_g95) {
3687 cmd.g95->dw34.enable_adaptive_tx_decision = 1;
3688 cmd.g95->dw60.mb_texture_threshold = 1024;
3689 cmd.g95->dw60.tx_decision_threshold = 128;
3695 if (generic_state->frame_type == SLICE_TYPE_B) {
3696 cmd.g9->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
3697 cmd.g9->dw34.list1_ref_id0_frm_field_parity = 0;
3698 cmd.g9->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
3701 cmd.g9->dw34.b_original_bff = 0; //frame only
3702 cmd.g9->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
3703 cmd.g9->dw34.roi_enable_flag = curbe_param->roi_enabled;
3704 cmd.g9->dw34.mad_enable_falg = avc_state->mad_enable;
3705 cmd.g9->dw34.mb_brc_enable = avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled;
3706 cmd.g9->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
3708 cmd.g95->dw34.tq_enable = avc_state->tq_enable;
3709 cmd.g95->dw34.cqp_flag = !generic_state->brc_enabled;
3713 cmd.g9->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
3715 if (cmd.g9->dw34.force_non_skip_check) {
3716 cmd.g9->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
3721 cmd.g9->dw36.check_all_fractional_enable = avc_state->caf_enable;
3722 cmd.g9->dw38.ref_threshold = 400;
3723 cmd.g9->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
3725 /* Default:2 used for MBBRC (MB QP Surface width and height are 4x downscaled picture in MB unit * 4 bytes)
3726 0 used for MBQP data surface (MB QP Surface width and height are same as the input picture size in MB unit * 1bytes)
3727 starting GEN9, BRC use split kernel, MB QP surface is same size as input picture */
3728 cmd.g9->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled) ? 0 : 2;
3730 if (mbenc_i_frame_dist_in_use) {
3731 cmd.g9->dw13.qp_prime_y = 0;
3732 cmd.g9->dw13.qp_prime_cb = 0;
3733 cmd.g9->dw13.qp_prime_cr = 0;
3734 cmd.g9->dw33.intra_16x16_nondc_penalty = 0;
3735 cmd.g9->dw33.intra_8x8_nondc_penalty = 0;
3736 cmd.g9->dw33.intra_4x4_nondc_penalty = 0;
3739 if (cmd.g9->dw4.use_actual_ref_qp_value) {
3740 cmd.g9->dw44.actual_qp_value_for_ref_id0_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 0);
3741 cmd.g9->dw44.actual_qp_value_for_ref_id1_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 1);
3742 cmd.g9->dw44.actual_qp_value_for_ref_id2_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 2);
3743 cmd.g9->dw44.actual_qp_value_for_ref_id3_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 3);
3744 cmd.g9->dw45.actual_qp_value_for_ref_id4_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 4);
3745 cmd.g9->dw45.actual_qp_value_for_ref_id5_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 5);
3746 cmd.g9->dw45.actual_qp_value_for_ref_id6_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 6);
3747 cmd.g9->dw45.actual_qp_value_for_ref_id7_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 7);
3748 cmd.g9->dw46.actual_qp_value_for_ref_id0_list1 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 0);
3749 cmd.g9->dw46.actual_qp_value_for_ref_id1_list1 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 1);
3752 table_idx = slice_type_kernel[generic_state->frame_type];
3753 cmd.g9->dw46.ref_cost = gen9_avc_ref_cost[table_idx][qp];
3755 if (generic_state->frame_type == SLICE_TYPE_I) {
3756 cmd.g9->dw0.skip_mode_enable = 0;
3757 cmd.g9->dw37.skip_mode_enable = 0;
3758 cmd.g9->dw36.hme_combine_overlap = 0;
3759 cmd.g9->dw47.intra_cost_sf = 16;
3760 cmd.g9->dw34.enable_direct_bias_adjustment = 0;
3762 cmd.g9->dw34.enable_global_motion_bias_adjustment = 0;
3764 } else if (generic_state->frame_type == SLICE_TYPE_P) {
3765 cmd.g9->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
3766 cmd.g9->dw3.bme_disable_fbr = 1;
3767 cmd.g9->dw5.ref_width = gen9_avc_search_x[preset];
3768 cmd.g9->dw5.ref_height = gen9_avc_search_y[preset];
3769 cmd.g9->dw7.non_skip_zmv_added = 1;
3770 cmd.g9->dw7.non_skip_mode_added = 1;
3771 cmd.g9->dw7.skip_center_mask = 1;
3772 cmd.g9->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
3773 cmd.g9->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame onlys
3774 cmd.g9->dw36.hme_combine_overlap = 1;
3775 cmd.g9->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
3776 cmd.g9->dw39.ref_width = gen9_avc_search_x[preset];
3777 cmd.g9->dw39.ref_height = gen9_avc_search_y[preset];
3778 cmd.g9->dw34.enable_direct_bias_adjustment = 0;
3779 cmd.g9->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
3780 if (is_g9 && avc_state->global_motion_bias_adjustment_enable)
3781 cmd.g9->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
3784 cmd.g9->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
3785 cmd.g9->dw1.bi_weight = avc_state->bi_weight;
3786 cmd.g9->dw3.search_ctrl = 7;
3787 cmd.g9->dw3.skip_type = 1;
3788 cmd.g9->dw5.ref_width = gen9_avc_b_search_x[preset];
3789 cmd.g9->dw5.ref_height = gen9_avc_b_search_y[preset];
3790 cmd.g9->dw7.skip_center_mask = 0xff;
3791 cmd.g9->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
3792 cmd.g9->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
3793 cmd.g9->dw36.hme_combine_overlap = 1;
3794 surface_id = slice_param->RefPicList1[0].picture_id;
3795 obj_surface = SURFACE(surface_id);
3797 WARN_ONCE("Invalid backward reference frame\n");
3800 cmd.g9->dw36.is_fwd_frame_short_term_ref = !!(slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
3802 cmd.g9->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
3803 cmd.g9->dw36.num_ref_idx_l1_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l1_active_minus1 : 0;
3804 cmd.g9->dw39.ref_width = gen9_avc_b_search_x[preset];
3805 cmd.g9->dw39.ref_height = gen9_avc_b_search_y[preset];
3806 cmd.g9->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
3807 cmd.g9->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
3808 cmd.g9->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
3809 cmd.g9->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
3810 cmd.g9->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
3811 cmd.g9->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
3812 cmd.g9->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
3813 cmd.g9->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
3815 cmd.g9->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
3816 if (cmd.g9->dw34.enable_direct_bias_adjustment) {
3817 cmd.g9->dw7.non_skip_zmv_added = 1;
3818 cmd.g9->dw7.non_skip_mode_added = 1;
3821 cmd.g9->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
3822 if (is_g9 && avc_state->global_motion_bias_adjustment_enable)
3823 cmd.g9->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
3827 avc_state->block_based_skip_enable = cmd.g9->dw3.block_based_skip_enable;
3829 if (avc_state->rolling_intra_refresh_enable) {
3830 /*by now disable it*/
3831 cmd.g9->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
3832 cmd.g9->dw32.mult_pred_l0_disable = 128;
3833 /* Pass the same IntraRefreshUnit to the kernel w/o the adjustment by -1, so as to have an overlap of one MB row or column of Intra macroblocks
3834 across one P frame to another P frame, as needed by the RollingI algo */
3836 cmd.g9->dw48.widi_intra_refresh_mb_num = 0;
3837 cmd.g9->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
3838 cmd.g9->dw48.widi_intra_refresh_qp_delta = 0;
3842 if (avc_state->rolling_intra_refresh_enable == INTEL_ROLLING_I_SQUARE && generic_state->brc_enabled) {
3843 cmd.g95->dw4.enable_intra_refresh = 0;
3844 cmd.g95->dw34.widi_intra_refresh_en = INTEL_ROLLING_I_DISABLED;
3845 cmd.g95->dw48.widi_intra_refresh_mb_x = 0;
3846 cmd.g95->dw61.widi_intra_refresh_mb_y = 0;
3848 cmd.g95->dw4.enable_intra_refresh = 1;
3849 cmd.g95->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
3850 cmd.g95->dw48.widi_intra_refresh_mb_x = 0;
3851 cmd.g95->dw61.widi_intra_refresh_mb_y = 0;
3852 cmd.g9->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
3853 cmd.g9->dw48.widi_intra_refresh_qp_delta = 0;
3858 cmd.g9->dw34.widi_intra_refresh_en = 0;
3861 cmd.g9->dw34.enable_per_mb_static_check = avc_state->sfd_enable && generic_state->hme_enabled;
3863 cmd.g9->dw34.enable_adaptive_search_window_size = avc_state->adaptive_search_window_enable;
3865 cmd.g95->dw34.enable_adaptive_search_window_size = avc_state->adaptive_search_window_enable;
3867 /*roi set disable by now. 49-56*/
3868 if (curbe_param->roi_enabled) {
3869 cmd.g9->dw49.roi_1_x_left = generic_state->roi[0].left;
3870 cmd.g9->dw49.roi_1_y_top = generic_state->roi[0].top;
3871 cmd.g9->dw50.roi_1_x_right = generic_state->roi[0].right;
3872 cmd.g9->dw50.roi_1_y_bottom = generic_state->roi[0].bottom;
3874 cmd.g9->dw51.roi_2_x_left = generic_state->roi[1].left;
3875 cmd.g9->dw51.roi_2_y_top = generic_state->roi[1].top;
3876 cmd.g9->dw52.roi_2_x_right = generic_state->roi[1].right;
3877 cmd.g9->dw52.roi_2_y_bottom = generic_state->roi[1].bottom;
3879 cmd.g9->dw53.roi_3_x_left = generic_state->roi[2].left;
3880 cmd.g9->dw53.roi_3_y_top = generic_state->roi[2].top;
3881 cmd.g9->dw54.roi_3_x_right = generic_state->roi[2].right;
3882 cmd.g9->dw54.roi_3_y_bottom = generic_state->roi[2].bottom;
3884 cmd.g9->dw55.roi_4_x_left = generic_state->roi[3].left;
3885 cmd.g9->dw55.roi_4_y_top = generic_state->roi[3].top;
3886 cmd.g9->dw56.roi_4_x_right = generic_state->roi[3].right;
3887 cmd.g9->dw56.roi_4_y_bottom = generic_state->roi[3].bottom;
3889 if (!generic_state->brc_enabled) {
3891 tmp = generic_state->roi[0].value;
3892 CLIP(tmp, -qp, AVC_QP_MAX - qp);
3893 cmd.g9->dw57.roi_1_dqp_prime_y = tmp;
3894 tmp = generic_state->roi[1].value;
3895 CLIP(tmp, -qp, AVC_QP_MAX - qp);
3896 cmd.g9->dw57.roi_2_dqp_prime_y = tmp;
3897 tmp = generic_state->roi[2].value;
3898 CLIP(tmp, -qp, AVC_QP_MAX - qp);
3899 cmd.g9->dw57.roi_3_dqp_prime_y = tmp;
3900 tmp = generic_state->roi[3].value;
3901 CLIP(tmp, -qp, AVC_QP_MAX - qp);
3902 cmd.g9->dw57.roi_4_dqp_prime_y = tmp;
3904 cmd.g9->dw34.roi_enable_flag = 0;
3909 if (avc_state->tq_enable) {
3910 if (generic_state->frame_type == SLICE_TYPE_I) {
3911 cmd.g95->dw58.value = gen95_avc_tq_lambda_i_frame[qp][0];
3912 cmd.g95->dw59.value = gen95_avc_tq_lambda_i_frame[qp][1];
3914 } else if (generic_state->frame_type == SLICE_TYPE_P) {
3915 cmd.g95->dw58.value = gen95_avc_tq_lambda_p_frame[qp][0];
3916 cmd.g95->dw59.value = gen95_avc_tq_lambda_p_frame[qp][1];
3919 cmd.g95->dw58.value = gen95_avc_tq_lambda_b_frame[qp][0];
3920 cmd.g95->dw59.value = gen95_avc_tq_lambda_b_frame[qp][1];
3923 if (cmd.g95->dw58.lambda_8x8_inter > GEN95_AVC_MAX_LAMBDA)
3924 cmd.g95->dw58.lambda_8x8_inter = 0xf000 + avc_state->rounding_value;
3926 if (cmd.g95->dw58.lambda_8x8_intra > GEN95_AVC_MAX_LAMBDA)
3927 cmd.g95->dw58.lambda_8x8_intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
3929 if (cmd.g95->dw59.lambda_inter > GEN95_AVC_MAX_LAMBDA)
3930 cmd.g95->dw59.lambda_inter = 0xf000 + avc_state->rounding_value;
3932 if (cmd.g95->dw59.lambda_intra > GEN95_AVC_MAX_LAMBDA)
3933 cmd.g95->dw59.lambda_intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
3938 cmd.g95->dw66.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
3939 cmd.g95->dw67.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
3940 cmd.g95->dw68.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
3941 cmd.g95->dw69.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
3942 cmd.g95->dw70.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
3943 cmd.g95->dw71.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
3944 cmd.g95->dw72.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
3945 cmd.g95->dw73.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
3946 cmd.g95->dw74.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
3947 cmd.g95->dw75.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
3948 cmd.g95->dw76.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
3949 cmd.g95->dw77.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
3950 cmd.g95->dw78.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
3951 cmd.g95->dw79.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
3952 cmd.g95->dw80.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
3953 cmd.g95->dw81.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
3954 cmd.g95->dw82.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
3955 cmd.g95->dw83.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
3956 cmd.g95->dw84.brc_curbe_surf_index = GEN95_AVC_MBENC_BRC_CURBE_DATA_INDEX;
3957 cmd.g95->dw85.force_non_skip_mb_map_surface = GEN95_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
3958 cmd.g95->dw86.widi_wa_surf_index = GEN95_AVC_MBENC_WIDI_WA_INDEX;
3959 cmd.g95->dw87.static_detection_cost_table_index = GEN95_AVC_MBENC_SFD_COST_TABLE_INDEX;
3963 cmd.g9->dw64.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
3964 cmd.g9->dw65.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
3965 cmd.g9->dw66.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
3966 cmd.g9->dw67.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
3967 cmd.g9->dw68.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
3968 cmd.g9->dw69.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
3969 cmd.g9->dw70.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
3970 cmd.g9->dw71.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
3971 cmd.g9->dw72.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
3972 cmd.g9->dw73.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
3973 cmd.g9->dw74.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
3974 cmd.g9->dw75.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
3975 cmd.g9->dw76.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
3976 cmd.g9->dw77.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
3977 cmd.g9->dw78.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
3978 cmd.g9->dw79.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
3979 cmd.g9->dw80.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
3980 cmd.g9->dw81.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
3981 cmd.g9->dw82.force_non_skip_mb_map_surface = GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
3982 cmd.g9->dw83.widi_wa_surf_index = GEN9_AVC_MBENC_WIDI_WA_INDEX;
3983 cmd.g9->dw84.brc_curbe_surf_index = GEN9_AVC_MBENC_BRC_CURBE_DATA_INDEX;
3984 cmd.g9->dw85.static_detection_cost_table_index = GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX;
3987 i965_gpe_context_unmap_curbe(gpe_context);
3993 gen9_avc_fei_set_curbe_mbenc(VADriverContextP ctx,
3994 struct encode_state *encode_state,
3995 struct i965_gpe_context *gpe_context,
3996 struct intel_encoder_context *encoder_context,
3999 struct i965_driver_data *i965 = i965_driver_data(ctx);
4000 gen9_avc_fei_mbenc_curbe_data *cmd;
4001 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4002 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4003 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4004 VASurfaceID surface_id;
4005 struct object_surface *obj_surface;
4006 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4007 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
4008 VAEncMiscParameterFEIFrameControlH264 *fei_param = avc_state->fei_framectl_param;
4010 struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
4011 unsigned char qp = 0;
4012 unsigned char me_method = 0;
4013 unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
4014 unsigned int table_idx = 0;
4015 int ref_width, ref_height, len_sp;
4016 int is_bframe = (generic_state->frame_type == SLICE_TYPE_B);
4017 int is_pframe = (generic_state->frame_type == SLICE_TYPE_P);
4018 unsigned int preset = generic_state->preset;
4020 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
4022 assert(gpe_context != NULL);
4023 cmd = (gen9_avc_fei_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
4024 memset(cmd, 0, sizeof(gen9_avc_fei_mbenc_curbe_data));
4026 if (mbenc_i_frame_dist_in_use) {
4027 memcpy(cmd, gen9_avc_fei_mbenc_curbe_i_frame_dist_init_data, sizeof(gen9_avc_fei_mbenc_curbe_data));
4030 switch (generic_state->frame_type) {
4032 memcpy(cmd, gen9_avc_fei_mbenc_curbe_i_frame_init_data, sizeof(gen9_avc_fei_mbenc_curbe_data));
4035 memcpy(cmd, gen9_avc_fei_mbenc_curbe_p_frame_init_data, sizeof(gen9_avc_fei_mbenc_curbe_data));
4038 memcpy(cmd, gen9_avc_fei_mbenc_curbe_b_frame_init_data, sizeof(gen9_avc_fei_mbenc_curbe_data));
4045 /* 4 means full search, 6 means diamand search */
4046 me_method = (fei_param->search_window == 5) ||
4047 (fei_param->search_window == 8) ? 4 : 6;
4049 ref_width = fei_param->ref_width;
4050 ref_height = fei_param->ref_height;
4051 len_sp = fei_param->len_sp;
4052 /* If there is a serch_window, discard user provided ref_width, ref_height
4053 * and search_path length */
4054 switch (fei_param->search_window) {
4056 /* not use predefined search window, there should be a search_path input */
4057 if ((fei_param->search_path != 0) &&
4058 (fei_param->search_path != 1) &&
4059 (fei_param->search_path != 2)) {
4060 WARN_ONCE("Invalid input search_path for SearchWindow=0 \n");
4063 /* 4 means full search, 6 means diamand search */
4064 me_method = (fei_param->search_path == 1) ? 6 : 4;
4065 if (((ref_width * ref_height) > 2048) || (ref_width > 64) || (ref_height > 64)) {
4066 WARN_ONCE("Invalid input ref_width/ref_height in"
4067 "SearchWindow=0 case! \n");
4073 /* Tiny - 4 SUs 24x24 window */
4080 /* Small - 9 SUs 28x28 window */
4086 /* Diamond - 16 SUs 48x40 window */
4092 /* Large Diamond - 32 SUs 48x40 window */
4098 /* Exhaustive - 48 SUs 48x40 window */
4104 /* Diamond - 16 SUs 64x32 window */
4110 /* Large Diamond - 32 SUs 64x32 window */
4116 /* Exhaustive - 48 SUs 64x32 window */
4126 /* ref_width*ref_height = Max 64x32 one direction, Max 32x32 two directions */
4128 CLIP(ref_width, 4, 32);
4129 CLIP(ref_height, 4, 32);
4130 } else if (is_pframe) {
4131 CLIP(ref_width, 4, 64);
4132 CLIP(ref_height, 4, 32);
4135 cmd->dw0.adaptive_enable =
4136 cmd->dw37.adaptive_enable = fei_param->adaptive_search;
4137 cmd->dw0.t8x8_flag_for_inter_enable = cmd->dw37.t8x8_flag_for_inter_enable
4138 = avc_state->transform_8x8_mode_enable;
4139 cmd->dw2.max_len_sp = len_sp;
4140 cmd->dw38.max_len_sp = 0; // HLD mandates this field to be Zero
4141 cmd->dw2.max_num_su = cmd->dw38.max_num_su = 57;
4142 cmd->dw3.src_access =
4143 cmd->dw3.ref_access = 0; // change it to (is_frame ? 0: 1) when interlace is suppoted
4145 if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) {
4146 if (avc_state->ftq_override) {
4147 cmd->dw3.ft_enable = avc_state->ftq_enable;
4149 if (generic_state->frame_type == SLICE_TYPE_P) {
4150 cmd->dw3.ft_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
4152 cmd->dw3.ft_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
4156 cmd->dw3.ft_enable = 0;
4159 if (avc_state->disable_sub_mb_partion)
4160 cmd->dw3.sub_mb_part_mask = 0x7;
4162 if (mbenc_i_frame_dist_in_use) {
4163 /* Fixme: Not supported, no brc in fei */
4165 cmd->dw2.pic_width = generic_state->downscaled_width_4x_in_mb;
4166 cmd->dw4.pic_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
4167 cmd->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4;
4168 cmd->dw6.batch_buffer_end = 0;
4169 cmd->dw31.intra_compute_type = 1;
4172 cmd->dw2.pic_width = generic_state->frame_width_in_mbs;
4173 cmd->dw4.pic_height_minus1 = generic_state->frame_height_in_mbs - 1;
4174 cmd->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ?
4175 generic_state->frame_height_in_mbs : avc_state->slice_height;
4176 cmd->dw3.sub_mb_part_mask = fei_param->sub_mb_part_mask;
4177 cmd->dw3.sub_pel_mode = fei_param->sub_pel_mode;
4178 cmd->dw3.inter_sad = fei_param->inter_sad;
4179 cmd->dw3.Intra_sad = fei_param->intra_sad;
4180 cmd->dw3.search_ctrl = (is_bframe) ? 7 : 0;
4181 cmd->dw4.enable_fbr_bypass = avc_state->fbr_bypass_enable;
4182 cmd->dw4.enable_intra_cost_scaling_for_static_frame =
4183 avc_state->sfd_enable && generic_state->hme_enabled;
4184 cmd->dw4.true_distortion_enable = fei_param->distortion_type == 0 ? 1 : 0;
4185 cmd->dw4.constrained_intra_pred_flag =
4186 pic_param->pic_fields.bits.constrained_intra_pred_flag;
4187 cmd->dw4.hme_enable = 0;
4188 cmd->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
4189 cmd->dw4.use_actual_ref_qp_value =
4190 generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
4191 cmd->dw7.intra_part_mask = fei_param->intra_part_mask;
4192 cmd->dw7.src_field_polarity = 0;
4195 memcpy(&(cmd->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
4196 if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
4197 // cmd->dw8 = gen9_avc_old_intra_mode_cost[qp];
4198 } else if (avc_state->skip_bias_adjustment_enable) {
4199 // Load different MvCost for P picture when SkipBiasAdjustment is enabled
4200 // No need to check for P picture as the flag is only enabled for P picture
4201 cmd->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
4205 /* search path tables */
4206 table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
4207 memcpy(&(cmd->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
4209 //ftq_skip_threshold_lut set,dw14 /15
4211 //r5 disable NonFTQSkipThresholdLUT
4212 if (generic_state->frame_type == SLICE_TYPE_P) {
4213 cmd->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
4214 } else if (generic_state->frame_type == SLICE_TYPE_B) {
4215 cmd->dw32.skip_val =
4216 gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
4218 cmd->dw13.qp_prime_y = qp;
4219 cmd->dw13.qp_prime_cb = qp;
4220 cmd->dw13.qp_prime_cr = qp;
4221 cmd->dw13.target_size_in_word = 0xff; /* hardcoded for brc disable */
4223 if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) {
4224 cmd->dw32.mult_pred_l0_disable = fei_param->multi_pred_l0 ? 0x01 : 0x80;
4225 cmd->dw32.mult_pred_l1_disable = ((generic_state->frame_type == SLICE_TYPE_B) && fei_param->multi_pred_l1) ? 0x01 : 0x80;
4228 cmd->dw32.mult_pred_l0_disable = 0x80;
4229 cmd->dw32.mult_pred_l1_disable = 0x80;
4231 /* no field pic setting, not supported */
4234 if (avc_state->adaptive_transform_decision_enable) {
4235 if (generic_state->frame_type != SLICE_TYPE_I) {
4236 cmd->dw34.enable_adaptive_tx_decision = 1;
4239 cmd->dw58.mb_texture_threshold = 1024;
4240 cmd->dw58.tx_decision_threshold = 128;
4242 if (generic_state->frame_type == SLICE_TYPE_B) {
4243 cmd->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
4244 cmd->dw34.list1_ref_id1_frm_field_parity = 0;
4245 cmd->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
4247 cmd->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
4248 cmd->dw34.roi_enable_flag = generic_state->brc_roi_enable;
4249 cmd->dw34.mad_enable_falg = avc_state->mad_enable;
4250 cmd->dw34.mb_brc_enable = avc_state->mb_qp_data_enable && generic_state->mb_brc_enabled;
4251 cmd->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
4252 cmd->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
4254 if (cmd->dw34.force_non_skip_check) {
4255 cmd->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
4257 cmd->dw36.check_all_fractional_enable = avc_state->caf_enable;
4258 cmd->dw38.ref_threshold = 400;
4259 cmd->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
4260 // Default:2 used for MBBRC (MB QP Surface width and height are 4x downscaled picture in MB unit * 4 bytes)
4261 // 0 used for MBQP data surface (MB QP Surface width and height are same as the input picture size in MB unit * 1bytes)
4262 // starting GEN9, BRC use split kernel, MB QP surface is same size as input picture
4263 cmd->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled) ? 0 : 2;
4264 if (mbenc_i_frame_dist_in_use) {
4265 cmd->dw13.qp_prime_y = 0;
4266 cmd->dw13.qp_prime_cb = 0;
4267 cmd->dw13.qp_prime_cr = 0;
4268 cmd->dw33.intra_16x16_nondc_penalty = 0;
4269 cmd->dw33.intra_8x8_nondc_penalty = 0;
4270 cmd->dw33.intra_4x4_nondc_penalty = 0;
4272 if (cmd->dw4.use_actual_ref_qp_value) {
4273 cmd->dw44.actual_qp_value_for_ref_id0_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 0);
4274 cmd->dw44.actual_qp_value_for_ref_id1_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 1);
4275 cmd->dw44.actual_qp_value_for_ref_id2_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 2);
4276 cmd->dw44.actual_qp_value_for_ref_id3_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 3);
4277 cmd->dw45.actual_qp_value_for_ref_id4_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 4);
4278 cmd->dw45.actual_qp_value_for_ref_id5_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 5);
4279 cmd->dw45.actual_qp_value_for_ref_id6_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 6);
4280 cmd->dw45.actual_qp_value_for_ref_id7_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 7);
4281 cmd->dw46.actual_qp_value_for_ref_id0_list1 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 0);
4282 cmd->dw46.actual_qp_value_for_ref_id1_list1 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 1);
4285 table_idx = slice_type_kernel[generic_state->frame_type];
4286 cmd->dw46.ref_cost = gen9_avc_ref_cost[table_idx][qp];
4288 if (generic_state->frame_type == SLICE_TYPE_I) {
4289 cmd->dw0.skip_mode_enable = 0;
4290 cmd->dw37.skip_mode_enable = 0;
4291 cmd->dw36.hme_combine_overlap = 0;
4292 cmd->dw36.check_all_fractional_enable = 0;
4293 cmd->dw47.intra_cost_sf = 16;/* not used, but recommended to set 16 by kernel team */
4294 cmd->dw34.enable_direct_bias_adjustment = 0;
4295 cmd->dw34.enable_global_motion_bias_adjustment = 0;
4297 } else if (generic_state->frame_type == SLICE_TYPE_P) {
4298 cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
4299 cmd->dw3.bme_disable_fbr = 1;
4300 cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
4301 cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
4302 cmd->dw7.non_skip_zmv_added = 1;
4303 cmd->dw7.non_skip_mode_added = 1;
4304 cmd->dw7.skip_center_mask = 1;
4306 cmd->dw47.intra_cost_sf =
4307 (avc_state->adaptive_intra_scaling_enable) ?
4308 gen9_avc_adaptive_intra_scaling_factor[preset] :
4309 gen9_avc_intra_scaling_factor[preset];
4311 cmd->dw47.max_vmv_r =
4312 i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
4314 cmd->dw36.hme_combine_overlap = 1;
4315 cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
4316 cmd->dw36.check_all_fractional_enable = fei_param->repartition_check_enable;
4317 cmd->dw34.enable_direct_bias_adjustment = 0;
4318 cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
4319 if (avc_state->global_motion_bias_adjustment_enable)
4320 cmd->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
4322 cmd->dw64.num_mv_predictors_l0 = fei_param->num_mv_predictors_l0;
4324 } else { /* B slice */
4326 cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
4327 cmd->dw1.bi_Weight = avc_state->bi_weight;
4328 cmd->dw3.search_ctrl = 7;
4329 cmd->dw3.skip_type = 1;
4330 cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
4331 cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
4332 cmd->dw7.skip_center_mask = 0xff;
4334 cmd->dw47.intra_cost_sf = avc_state->adaptive_intra_scaling_enable ?
4335 gen9_avc_adaptive_intra_scaling_factor[qp] :
4336 gen9_avc_intra_scaling_factor[qp];
4338 cmd->dw47.max_vmv_r =
4339 i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
4341 cmd->dw36.hme_combine_overlap = 1;
4343 //check is_fwd_frame_short_term_ref
4344 surface_id = slice_param->RefPicList1[0].picture_id;
4345 obj_surface = SURFACE(surface_id);
4347 WARN_ONCE("Invalid backward reference frame\n");
4349 i965_gpe_context_unmap_curbe(gpe_context);
4352 cmd->dw36.is_fwd_frame_short_term_ref = !!(slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
4354 cmd->dw36.num_ref_idx_l0_minus_one =
4355 (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1
4357 cmd->dw36.num_ref_idx_l1_minus_one =
4358 (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l1_active_minus1
4360 cmd->dw36.check_all_fractional_enable = fei_param->repartition_check_enable;
4362 cmd->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
4363 cmd->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
4364 cmd->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
4365 cmd->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
4366 cmd->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
4367 cmd->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
4368 cmd->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
4369 cmd->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
4371 cmd->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
4372 if (cmd->dw34.enable_direct_bias_adjustment) {
4373 cmd->dw7.non_skip_mode_added = 1;
4374 cmd->dw7.non_skip_zmv_added = 1;
4377 cmd->dw34.enable_global_motion_bias_adjustment =
4378 avc_state->global_motion_bias_adjustment_enable;
4379 if (avc_state->global_motion_bias_adjustment_enable)
4380 cmd->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
4382 cmd->dw64.num_mv_predictors_l0 = fei_param->num_mv_predictors_l0;
4383 cmd->dw64.num_mv_predictors_l1 = fei_param->num_mv_predictors_l1;
4386 avc_state->block_based_skip_enable = cmd->dw3.block_based_skip_enable;
4388 if (avc_state->rolling_intra_refresh_enable) {
4390 cmd->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
4393 cmd->dw34.widi_intra_refresh_en = 0;
4395 cmd->dw34.enable_per_mb_static_check = avc_state->sfd_enable && generic_state->hme_enabled;
4396 cmd->dw34.enable_adaptive_search_window_size = avc_state->adaptive_search_window_enable;
4398 /* Fixme: Skipped ROI stuffs for now */
4400 /* r64: FEI specific fields */
4401 cmd->dw64.fei_enable = 1;
4402 cmd->dw64.multiple_mv_predictor_per_mb_enable = fei_param->mv_predictor_enable;
4403 if (fei_param->distortion != VA_INVALID_ID)
4404 cmd->dw64.vme_distortion_output_enable = 1;
4405 cmd->dw64.per_mb_qp_enable = fei_param->mb_qp;
4406 cmd->dw64.mb_input_enable = fei_param->mb_input;
4408 // FEI mode is disabled when external MVP is available
4409 if (fei_param->mv_predictor_enable)
4410 cmd->dw64.fei_mode = 0;
4412 cmd->dw64.fei_mode = 1;
4414 cmd->dw80.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
4415 cmd->dw81.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
4416 cmd->dw82.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
4417 cmd->dw83.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
4418 cmd->dw84.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
4419 cmd->dw85.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
4420 cmd->dw86.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
4421 cmd->dw87.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
4422 cmd->dw88.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
4423 cmd->dw89.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
4424 cmd->dw90.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
4425 cmd->dw91.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
4426 cmd->dw92.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
4427 cmd->dw93.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
4428 cmd->dw94.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
4429 cmd->dw95.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
4430 cmd->dw96.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
4431 cmd->dw97.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
4432 cmd->dw98.force_non_skip_mb_map_surface = GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
4433 cmd->dw99.widi_wa_surf_index = GEN9_AVC_MBENC_WIDI_WA_INDEX;
4434 cmd->dw100.brc_curbe_surf_index = GEN9_AVC_MBENC_BRC_CURBE_DATA_INDEX;
4435 cmd->dw101.static_detection_cost_table_index = GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX;
4436 cmd->dw102.fei_mv_predictor_surf_index = GEN9_AVC_MBENC_MV_PREDICTOR_INDEX;
4437 i965_gpe_context_unmap_curbe(gpe_context);
4443 gen9_avc_send_surface_mbenc(VADriverContextP ctx,
4444 struct encode_state *encode_state,
4445 struct i965_gpe_context *gpe_context,
4446 struct intel_encoder_context *encoder_context,
4449 struct i965_driver_data *i965 = i965_driver_data(ctx);
4450 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4451 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4452 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4453 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4454 struct object_surface *obj_surface;
4455 struct gen9_surface_avc *avc_priv_surface;
4456 struct i965_gpe_resource *gpe_resource;
4457 struct mbenc_param * param = (struct mbenc_param *)param_mbenc ;
4458 VASurfaceID surface_id;
4459 unsigned int mbenc_i_frame_dist_in_use = param->mbenc_i_frame_dist_in_use;
4460 unsigned int size = 0;
4461 unsigned int frame_mb_size = generic_state->frame_width_in_mbs *
4462 generic_state->frame_height_in_mbs;
4464 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4465 unsigned char is_g95 = 0;
4467 if (IS_SKL(i965->intel.device_info) ||
4468 IS_BXT(i965->intel.device_info))
4470 else if (IS_KBL(i965->intel.device_info) ||
4471 IS_GEN10(i965->intel.device_info) ||
4472 IS_GLK(i965->intel.device_info))
4475 obj_surface = encode_state->reconstructed_object;
4477 if (!obj_surface || !obj_surface->private_data)
4479 avc_priv_surface = obj_surface->private_data;
4481 /*pak obj command buffer output*/
4482 size = frame_mb_size * 16 * 4;
4483 gpe_resource = &avc_priv_surface->res_mb_code_surface;
4484 i965_add_buffer_gpe_surface(ctx,
4490 GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX);
4492 /*mv data buffer output*/
4493 size = frame_mb_size * 32 * 4;
4494 gpe_resource = &avc_priv_surface->res_mv_data_surface;
4495 i965_add_buffer_gpe_surface(ctx,
4501 GEN9_AVC_MBENC_IND_MV_DATA_INDEX);
4503 /*input current YUV surface, current input Y/UV object*/
4504 if (mbenc_i_frame_dist_in_use) {
4505 obj_surface = encode_state->reconstructed_object;
4506 if (!obj_surface || !obj_surface->private_data)
4508 avc_priv_surface = obj_surface->private_data;
4509 obj_surface = avc_priv_surface->scaled_4x_surface_obj;
4511 obj_surface = encode_state->input_yuv_object;
4513 i965_add_2d_gpe_surface(ctx,
4518 I965_SURFACEFORMAT_R8_UNORM,
4519 GEN9_AVC_MBENC_CURR_Y_INDEX);
4521 i965_add_2d_gpe_surface(ctx,
4526 I965_SURFACEFORMAT_R16_UINT,
4527 GEN9_AVC_MBENC_CURR_UV_INDEX);
4529 if (generic_state->hme_enabled) {
4531 if (!IS_GEN8(i965->intel.device_info)) {
4532 gpe_resource = &(avc_ctx->s4x_memv_data_buffer);
4533 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4536 I965_SURFACEFORMAT_R8_UNORM,
4537 GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX);
4538 /* memv distortion input*/
4539 gpe_resource = &(avc_ctx->s4x_memv_distortion_buffer);
4540 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4543 I965_SURFACEFORMAT_R8_UNORM,
4544 GEN9_AVC_MBENC_4XME_DISTORTION_INDEX);
4545 } else if (generic_state->frame_type != SLICE_TYPE_I) {
4546 gpe_resource = &(avc_ctx->s4x_memv_data_buffer);
4547 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4550 I965_SURFACEFORMAT_R8_UNORM,
4551 GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX);
4552 /* memv distortion input*/
4553 gpe_resource = &(avc_ctx->s4x_memv_distortion_buffer);
4554 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4557 I965_SURFACEFORMAT_R8_UNORM,
4558 GEN9_AVC_MBENC_4XME_DISTORTION_INDEX);
4562 /*mbbrc const data_buffer*/
4563 if (param->mb_const_data_buffer_in_use) {
4564 size = 16 * AVC_QP_MAX * sizeof(unsigned int);
4565 gpe_resource = &avc_ctx->res_mbbrc_const_data_buffer;
4566 i965_add_buffer_gpe_surface(ctx,
4572 GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX);
4576 /*mb qp data_buffer*/
4577 if (param->mb_qp_buffer_in_use) {
4578 if (avc_state->mb_qp_data_enable)
4579 gpe_resource = &(avc_ctx->res_mb_qp_data_surface);
4581 gpe_resource = &(avc_ctx->res_mbbrc_mb_qp_data_surface);
4582 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4585 I965_SURFACEFORMAT_R8_UNORM,
4586 GEN9_AVC_MBENC_MBQP_INDEX);
4589 /*input current YUV surface, current input Y/UV object*/
4590 if (mbenc_i_frame_dist_in_use) {
4591 obj_surface = encode_state->reconstructed_object;
4592 if (!obj_surface || !obj_surface->private_data)
4594 avc_priv_surface = obj_surface->private_data;
4595 obj_surface = avc_priv_surface->scaled_4x_surface_obj;
4597 obj_surface = encode_state->input_yuv_object;
4599 i965_add_adv_gpe_surface(ctx, gpe_context,
4601 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX);
4602 /*input ref YUV surface*/
4603 for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
4604 surface_id = slice_param->RefPicList0[i].picture_id;
4605 obj_surface = SURFACE(surface_id);
4606 if (!obj_surface || !obj_surface->private_data)
4609 i965_add_adv_gpe_surface(ctx, gpe_context,
4611 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 1);
4613 /*input current YUV surface, current input Y/UV object*/
4614 if (mbenc_i_frame_dist_in_use) {
4615 obj_surface = encode_state->reconstructed_object;
4616 if (!obj_surface || !obj_surface->private_data)
4618 avc_priv_surface = obj_surface->private_data;
4619 obj_surface = avc_priv_surface->scaled_4x_surface_obj;
4621 obj_surface = encode_state->input_yuv_object;
4623 i965_add_adv_gpe_surface(ctx, gpe_context,
4625 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX);
4627 for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
4628 if (i > 0) break; // only one ref supported here for B frame
4629 surface_id = slice_param->RefPicList1[i].picture_id;
4630 obj_surface = SURFACE(surface_id);
4631 if (!obj_surface || !obj_surface->private_data)
4634 i965_add_adv_gpe_surface(ctx, gpe_context,
4636 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1);
4637 i965_add_adv_gpe_surface(ctx, gpe_context,
4639 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 2);
4641 avc_priv_surface = obj_surface->private_data;
4642 /*pak obj command buffer output(mb code)*/
4643 size = frame_mb_size * 16 * 4;
4644 gpe_resource = &avc_priv_surface->res_mb_code_surface;
4645 i965_add_buffer_gpe_surface(ctx,
4651 GEN9_AVC_MBENC_FWD_MB_DATA_INDEX);
4653 /*mv data buffer output*/
4654 size = frame_mb_size * 32 * 4;
4655 gpe_resource = &avc_priv_surface->res_mv_data_surface;
4656 i965_add_buffer_gpe_surface(ctx,
4662 GEN9_AVC_MBENC_FWD_MV_DATA_INDEX);
4666 if (i < INTEL_AVC_MAX_BWD_REF_NUM) {
4667 i965_add_adv_gpe_surface(ctx, gpe_context,
4669 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1 + INTEL_AVC_MAX_BWD_REF_NUM);
4674 /* BRC distortion data buffer for I frame*/
4675 if (mbenc_i_frame_dist_in_use) {
4676 gpe_resource = &(avc_ctx->res_brc_dist_data_surface);
4677 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4680 I965_SURFACEFORMAT_R8_UNORM,
4681 GEN9_AVC_MBENC_BRC_DISTORTION_INDEX);
4684 /* as ref frame ,update later RefPicSelect of Current Picture*/
4685 obj_surface = encode_state->reconstructed_object;
4686 avc_priv_surface = obj_surface->private_data;
4687 if (avc_state->ref_pic_select_list_supported && avc_priv_surface->is_as_ref) {
4688 gpe_resource = &(avc_priv_surface->res_ref_pic_select_surface);
4689 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4692 I965_SURFACEFORMAT_R8_UNORM,
4693 GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX);
4696 if (!IS_GEN8(i965->intel.device_info)) {
4697 if (param->mb_vproc_stats_enable) {
4698 /*mb status buffer input*/
4699 size = frame_mb_size * 16 * 4;
4700 gpe_resource = &(avc_ctx->res_mb_status_buffer);
4701 i965_add_buffer_gpe_surface(ctx,
4707 GEN9_AVC_MBENC_MB_STATS_INDEX);
4709 } else if (avc_state->flatness_check_enable) {
4710 gpe_resource = &(avc_ctx->res_flatness_check_surface);
4711 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4714 I965_SURFACEFORMAT_R8_UNORM,
4715 GEN9_AVC_MBENC_MB_STATS_INDEX);
4717 } else if (avc_state->flatness_check_enable) {
4718 gpe_resource = &(avc_ctx->res_flatness_check_surface);
4719 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4722 I965_SURFACEFORMAT_R8_UNORM,
4723 GEN9_AVC_MBENC_MB_STATS_INDEX);
4726 if (param->mad_enable) {
4727 /*mad buffer input*/
4729 gpe_resource = &(avc_ctx->res_mad_data_buffer);
4730 i965_add_buffer_gpe_surface(ctx,
4736 GEN9_AVC_MBENC_MAD_DATA_INDEX);
4737 i965_zero_gpe_resource(gpe_resource);
4740 /*brc updated mbenc curbe data buffer,it is ignored by gen9 and used in gen95*/
4741 if (avc_state->mbenc_brc_buffer_size > 0) {
4742 size = avc_state->mbenc_brc_buffer_size;
4743 gpe_resource = &(avc_ctx->res_mbenc_brc_buffer);
4744 i965_add_buffer_gpe_surface(ctx,
4750 GEN95_AVC_MBENC_BRC_CURBE_DATA_INDEX);
4753 /*artitratry num mbs in slice*/
4754 if (avc_state->arbitrary_num_mbs_in_slice) {
4755 /*slice surface input*/
4756 gpe_resource = &(avc_ctx->res_mbenc_slice_map_surface);
4757 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4760 I965_SURFACEFORMAT_R8_UNORM,
4761 GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX);
4762 gen9_avc_generate_slice_map(ctx, encode_state, encoder_context);
4765 /* BRC distortion data buffer for I frame */
4766 if (!mbenc_i_frame_dist_in_use) {
4767 if (avc_state->mb_disable_skip_map_enable) {
4768 gpe_resource = &(avc_ctx->res_mb_disable_skip_map_surface);
4769 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4772 I965_SURFACEFORMAT_R8_UNORM,
4773 (is_g95 ? GEN95_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX : GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX));
4775 if (IS_GEN8(i965->intel.device_info)) {
4776 if (avc_state->sfd_enable) {
4777 size = 128 / sizeof(unsigned long);
4778 gpe_resource = &(avc_ctx->res_sfd_output_buffer);
4779 i965_add_buffer_gpe_surface(ctx,
4785 GEN8_AVC_MBENC_STATIC_FRAME_DETECTION_OUTPUT_CM);
4789 if (avc_state->sfd_enable && generic_state->hme_enabled) {
4790 if (generic_state->frame_type == SLICE_TYPE_P) {
4791 gpe_resource = &(avc_ctx->res_sfd_cost_table_p_frame_buffer);
4792 } else if (generic_state->frame_type == SLICE_TYPE_B) {
4793 gpe_resource = &(avc_ctx->res_sfd_cost_table_b_frame_buffer);
4795 if (generic_state->frame_type != SLICE_TYPE_I) {
4797 i965_add_buffer_gpe_surface(ctx,
4803 (is_g95 ? GEN95_AVC_MBENC_SFD_COST_TABLE_INDEX : GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX));
4814 gen9_avc_fei_send_surface_mbenc(VADriverContextP ctx,
4815 struct encode_state *encode_state,
4816 struct i965_gpe_context *gpe_context,
4817 struct intel_encoder_context *encoder_context,
4820 struct i965_driver_data *i965 = i965_driver_data(ctx);
4821 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4822 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4823 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4824 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4825 VAEncMiscParameterFEIFrameControlH264 *fei_param = NULL;
4826 struct object_buffer *obj_buffer = NULL;
4827 struct buffer_store *buffer_store = NULL;
4828 struct object_surface *obj_surface = NULL;
4829 struct gen9_surface_avc *avc_priv_surface;
4830 struct i965_gpe_resource *gpe_resource;
4831 VASurfaceID surface_id;
4832 unsigned int size = 0;
4833 unsigned int frame_mb_nums;
4834 int i = 0, allocate_flag = 1;
4836 obj_surface = encode_state->reconstructed_object;
4837 if (!obj_surface || !obj_surface->private_data)
4839 avc_priv_surface = obj_surface->private_data;
4841 frame_mb_nums = generic_state->frame_width_in_mbs *
4842 generic_state->frame_height_in_mbs;
4843 fei_param = avc_state->fei_framectl_param;
4845 assert(fei_param != NULL);
4847 /* res_mb_code_surface for MB code */
4848 size = frame_mb_nums * FEI_AVC_MB_CODE_BUFFER_SIZE;
4849 if (avc_priv_surface->res_mb_code_surface.bo != NULL)
4850 i965_free_gpe_resource(&avc_priv_surface->res_mb_code_surface);
4851 if (fei_param->mb_code_data != VA_INVALID_ID) {
4852 obj_buffer = BUFFER(fei_param->mb_code_data);
4853 assert(obj_buffer != NULL);
4854 buffer_store = obj_buffer->buffer_store;
4855 assert(size <= buffer_store->bo->size);
4856 i965_dri_object_to_buffer_gpe_resource(
4857 &avc_priv_surface->res_mb_code_surface,
4860 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
4861 &avc_priv_surface->res_mb_code_surface,
4862 ALIGN(size, 0x1000),
4864 assert(allocate_flag != 0);
4867 /* res_mv_data_surface for MV data */
4868 size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
4869 if (avc_priv_surface->res_mv_data_surface.bo != NULL)
4870 i965_free_gpe_resource(&avc_priv_surface->res_mv_data_surface);
4871 if (fei_param->mv_data != VA_INVALID_ID) {
4872 obj_buffer = BUFFER(fei_param->mv_data);
4873 assert(obj_buffer != NULL);
4874 buffer_store = obj_buffer->buffer_store;
4875 assert(size <= buffer_store->bo->size);
4876 i965_dri_object_to_buffer_gpe_resource(
4877 &avc_priv_surface->res_mv_data_surface,
4880 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
4881 &avc_priv_surface->res_mv_data_surface,
4882 ALIGN(size, 0x1000),
4884 assert(allocate_flag != 0);
4887 /* fei mb control data surface */
4888 size = frame_mb_nums * FEI_AVC_MB_CONTROL_BUFFER_SIZE;
4889 if (fei_param->mb_input | fei_param->mb_size_ctrl) {
4890 assert(fei_param->mb_ctrl != VA_INVALID_ID);
4891 obj_buffer = BUFFER(fei_param->mb_ctrl);
4892 assert(obj_buffer != NULL);
4893 buffer_store = obj_buffer->buffer_store;
4894 assert(size <= buffer_store->bo->size);
4895 if (avc_priv_surface->res_fei_mb_cntrl_surface.bo != NULL)
4896 i965_free_gpe_resource(&avc_priv_surface->res_fei_mb_cntrl_surface);
4897 i965_dri_object_to_buffer_gpe_resource(
4898 &avc_priv_surface->res_fei_mb_cntrl_surface,
4902 /* fei mv predictor surface*/
4903 size = frame_mb_nums * FEI_AVC_MV_PREDICTOR_BUFFER_SIZE;
4904 if (fei_param->mv_predictor_enable &&
4905 (fei_param->mv_predictor != VA_INVALID_ID)) {
4906 obj_buffer = BUFFER(fei_param->mv_predictor);
4907 assert(obj_buffer != NULL);
4908 buffer_store = obj_buffer->buffer_store;
4909 assert(size <= buffer_store->bo->size);
4910 if (avc_priv_surface->res_fei_mv_predictor_surface.bo != NULL)
4911 i965_free_gpe_resource(&avc_priv_surface->res_fei_mv_predictor_surface);
4912 i965_dri_object_to_buffer_gpe_resource(
4913 &avc_priv_surface->res_fei_mv_predictor_surface,
4916 if (fei_param->mv_predictor_enable)
4917 assert(fei_param->mv_predictor != VA_INVALID_ID);
4920 /* fei vme distortion */
4921 size = frame_mb_nums * FEI_AVC_DISTORTION_BUFFER_SIZE;
4922 if (avc_priv_surface->res_fei_vme_distortion_surface.bo != NULL)
4923 i965_free_gpe_resource(&avc_priv_surface->res_fei_vme_distortion_surface);
4924 if (fei_param->distortion != VA_INVALID_ID) {
4925 obj_buffer = BUFFER(fei_param->distortion);
4926 assert(obj_buffer != NULL);
4927 buffer_store = obj_buffer->buffer_store;
4928 assert(size <= buffer_store->bo->size);
4929 i965_dri_object_to_buffer_gpe_resource(
4930 &avc_priv_surface->res_fei_vme_distortion_surface,
4933 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
4934 &avc_priv_surface->res_fei_vme_distortion_surface,
4935 ALIGN(size, 0x1000),
4936 "fei vme distortion");
4937 assert(allocate_flag != 0);
4941 /* Fixme/Confirm: not sure why we need 3 byte padding here */
4942 size = frame_mb_nums * FEI_AVC_QP_BUFFER_SIZE + 3;
4943 if (fei_param->mb_qp && (fei_param->qp != VA_INVALID_ID)) {
4944 obj_buffer = BUFFER(fei_param->qp);
4945 assert(obj_buffer != NULL);
4946 buffer_store = obj_buffer->buffer_store;
4947 assert((size - 3) <= buffer_store->bo->size);
4948 if (avc_priv_surface->res_fei_mb_qp_surface.bo != NULL)
4949 i965_free_gpe_resource(&avc_priv_surface->res_fei_mb_qp_surface);
4950 i965_dri_object_to_buffer_gpe_resource(
4951 &avc_priv_surface->res_fei_mb_qp_surface,
4954 if (fei_param->mb_qp)
4955 assert(fei_param->qp != VA_INVALID_ID);
4958 /*==== pak obj command buffer output ====*/
4959 size = frame_mb_nums * FEI_AVC_MB_CODE_BUFFER_SIZE;
4960 gpe_resource = &avc_priv_surface->res_mb_code_surface;
4961 i965_add_buffer_gpe_surface(ctx,
4967 GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX);
4970 /*=== mv data buffer output */
4971 size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
4972 gpe_resource = &avc_priv_surface->res_mv_data_surface;
4973 i965_add_buffer_gpe_surface(ctx,
4979 GEN9_AVC_MBENC_IND_MV_DATA_INDEX);
4982 /* === current input Y (binding table offset = 3)=== */
4983 obj_surface = encode_state->input_yuv_object;
4984 i965_add_2d_gpe_surface(ctx,
4989 I965_SURFACEFORMAT_R8_UNORM,
4990 GEN9_AVC_MBENC_CURR_Y_INDEX);
4992 /* === current input UV === (binding table offset == 4)*/
4993 i965_add_2d_gpe_surface(ctx,
4998 I965_SURFACEFORMAT_R16_UINT,
4999 GEN9_AVC_MBENC_CURR_UV_INDEX);
5001 /* === input current YUV surface, (binding table offset == 15) === */
5002 i965_add_adv_gpe_surface(ctx, gpe_context,
5004 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX);
5007 /*== input current YUV surface, (binding table offset == 32)*/
5008 i965_add_adv_gpe_surface(ctx, gpe_context,
5010 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX);
5012 /* list 0 references */
5013 for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
5015 surface_id = slice_param->RefPicList0[i].picture_id;
5016 obj_surface = SURFACE(surface_id);
5017 if (!obj_surface || !obj_surface->private_data)
5019 i965_add_adv_gpe_surface(ctx, gpe_context,
5021 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 1);
5025 /* list 1 references */
5026 for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
5027 if (i > 0) break; // only one ref supported here for B frame
5028 surface_id = slice_param->RefPicList1[i].picture_id;
5029 obj_surface = SURFACE(surface_id);
5030 if (!obj_surface || !obj_surface->private_data)
5033 i965_add_adv_gpe_surface(ctx, gpe_context,
5035 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 2);
5037 avc_priv_surface = obj_surface->private_data;
5038 /* mb code of Backward reference frame */
5039 size = frame_mb_nums * FEI_AVC_MB_CODE_BUFFER_SIZE;
5040 gpe_resource = &avc_priv_surface->res_mb_code_surface;
5041 i965_add_buffer_gpe_surface(ctx,
5047 GEN9_AVC_MBENC_FWD_MB_DATA_INDEX);
5049 /* mv data of backward ref frame */
5050 size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
5051 gpe_resource = &avc_priv_surface->res_mv_data_surface;
5052 i965_add_buffer_gpe_surface(ctx,
5058 GEN9_AVC_MBENC_FWD_MV_DATA_INDEX);
5062 if (i < INTEL_AVC_MAX_BWD_REF_NUM) {
5063 i965_add_adv_gpe_surface(ctx, gpe_context,
5065 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1);
5069 /* as ref frame ,update later RefPicSelect of Current Picture*/
5070 obj_surface = encode_state->reconstructed_object;
5071 avc_priv_surface = obj_surface->private_data;
5072 if (avc_state->ref_pic_select_list_supported && avc_priv_surface->is_as_ref) {
5073 gpe_resource = &(avc_priv_surface->res_ref_pic_select_surface);
5074 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5077 I965_SURFACEFORMAT_R8_UNORM,
5078 GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX);
5083 /* mb specific data, macroblock control parameters */
5084 if ((fei_param->mb_input | fei_param->mb_size_ctrl) &&
5085 (fei_param->mb_ctrl != VA_INVALID_ID)) {
5086 size = frame_mb_nums * FEI_AVC_MB_CONTROL_BUFFER_SIZE;
5087 gpe_resource = &avc_priv_surface->res_fei_mb_cntrl_surface;
5088 i965_add_buffer_gpe_surface(ctx,
5094 GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX);
5097 /* multi mv predictor surface */
5098 if (fei_param->mv_predictor_enable && (fei_param->mv_predictor != VA_INVALID_ID)) {
5099 size = frame_mb_nums * 48; //sizeof (VAEncMVPredictorH264Intel) == 40
5100 gpe_resource = &avc_priv_surface->res_fei_mv_predictor_surface;
5101 i965_add_buffer_gpe_surface(ctx,
5107 GEN9_AVC_MBENC_MV_PREDICTOR_INDEX);
5111 if (fei_param->mb_qp && (fei_param->qp != VA_INVALID_ID)) {
5112 size = frame_mb_nums + 3;
5113 gpe_resource = &avc_priv_surface->res_fei_mb_qp_surface,
5114 i965_add_buffer_gpe_surface(ctx,
5120 GEN9_AVC_MBENC_MBQP_INDEX);
5124 /*=== FEI distortion surface ====*/
5125 size = frame_mb_nums * 48; //sizeof (VAEncFEIDistortionBufferH264Intel) == 48
5126 gpe_resource = &avc_priv_surface->res_fei_vme_distortion_surface;
5127 i965_add_buffer_gpe_surface(ctx,
5133 GEN9_AVC_MBENC_AUX_VME_OUT_INDEX);
5139 gen9_avc_kernel_mbenc(VADriverContextP ctx,
5140 struct encode_state *encode_state,
5141 struct intel_encoder_context *encoder_context,
5142 bool i_frame_dist_in_use)
5144 struct i965_driver_data *i965 = i965_driver_data(ctx);
5145 struct i965_gpe_table *gpe = &i965->gpe_table;
5146 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5147 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5148 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5149 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5150 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5152 struct i965_gpe_context *gpe_context;
5153 struct gpe_media_object_walker_parameter media_object_walker_param;
5154 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
5155 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
5156 int media_function = 0;
5158 unsigned int mb_const_data_buffer_in_use = 0;
5159 unsigned int mb_qp_buffer_in_use = 0;
5160 unsigned int brc_enabled = 0;
5161 unsigned int roi_enable = (generic_state->num_roi > 0) ? 1 : 0;
5162 unsigned int dirty_roi_enable = ((generic_state->dirty_num_roi > 0) && (generic_state->frame_type == SLICE_TYPE_P) && (0));
5163 struct mbenc_param param ;
5165 int mbenc_i_frame_dist_in_use = i_frame_dist_in_use;
5167 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5169 mb_const_data_buffer_in_use =
5170 generic_state->mb_brc_enabled ||
5173 avc_state->mb_qp_data_enable ||
5174 avc_state->rolling_intra_refresh_enable;
5175 mb_qp_buffer_in_use =
5176 generic_state->mb_brc_enabled ||
5177 generic_state->brc_roi_enable ||
5178 avc_state->mb_qp_data_enable;
5180 if (mbenc_i_frame_dist_in_use) {
5181 media_function = INTEL_MEDIA_STATE_ENC_I_FRAME_DIST;
5182 kernel_idx = GEN9_AVC_KERNEL_BRC_I_FRAME_DIST;
5183 downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
5184 downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
5188 gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
5190 switch (generic_state->kernel_mode) {
5191 case INTEL_ENC_KERNEL_NORMAL : {
5192 media_function = INTEL_MEDIA_STATE_ENC_NORMAL;
5193 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_NORMAL_I;
5196 case INTEL_ENC_KERNEL_PERFORMANCE : {
5197 media_function = INTEL_MEDIA_STATE_ENC_PERFORMANCE;
5198 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_PERFORMANCE_I;
5201 case INTEL_ENC_KERNEL_QUALITY : {
5202 media_function = INTEL_MEDIA_STATE_ENC_QUALITY;
5203 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_QUALITY_I;
5211 if (encoder_context->fei_enabled) {
5212 media_function = INTEL_MEDIA_STATE_ENC_NORMAL;
5213 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_FEI_KERNEL_I;
5216 if (generic_state->frame_type == SLICE_TYPE_P) {
5218 } else if (generic_state->frame_type == SLICE_TYPE_B) {
5222 downscaled_width_in_mb = generic_state->frame_width_in_mbs;
5223 downscaled_height_in_mb = generic_state->frame_height_in_mbs;
5224 mad_enable = avc_state->mad_enable;
5225 brc_enabled = generic_state->brc_enabled;
5227 gpe_context = &(avc_ctx->context_mbenc.gpe_contexts[kernel_idx]);
5230 memset(¶m, 0, sizeof(struct mbenc_param));
5232 param.mb_const_data_buffer_in_use = mb_const_data_buffer_in_use;
5233 param.mb_qp_buffer_in_use = mb_qp_buffer_in_use;
5234 param.mbenc_i_frame_dist_in_use = mbenc_i_frame_dist_in_use;
5235 param.mad_enable = mad_enable;
5236 param.brc_enabled = brc_enabled;
5237 param.roi_enabled = roi_enable;
5239 if (avc_state->mb_status_supported) {
5240 param.mb_vproc_stats_enable = avc_state->flatness_check_enable || avc_state->adaptive_transform_decision_enable;
5243 if (!avc_state->mbenc_curbe_set_in_brc_update) {
5244 gpe->context_init(ctx, gpe_context);
5247 gpe->reset_binding_table(ctx, gpe_context);
5249 if (!avc_state->mbenc_curbe_set_in_brc_update) {
5251 generic_ctx->pfn_set_curbe_mbenc(ctx, encode_state, gpe_context, encoder_context, ¶m);
5254 /* MB brc const data buffer set up*/
5255 if (mb_const_data_buffer_in_use) {
5256 // caculate the lambda table, it is kernel controlled trellis quantization,gen95+
5257 if (avc_state->lambda_table_enable)
5258 gen95_avc_calc_lambda_table(ctx, encode_state, encoder_context);
5260 gen9_avc_load_mb_brc_const_data(ctx, encode_state, encoder_context);
5263 /*clear the mad buffer*/
5265 i965_zero_gpe_resource(&(avc_ctx->res_mad_data_buffer));
5268 generic_ctx->pfn_send_mbenc_surface(ctx, encode_state, gpe_context, encoder_context, ¶m);
5270 gpe->setup_interface_data(ctx, gpe_context);
5273 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
5275 kernel_walker_param.use_scoreboard = 1;
5276 kernel_walker_param.resolution_x = downscaled_width_in_mb ;
5277 kernel_walker_param.resolution_y = downscaled_height_in_mb ;
5278 if (mbenc_i_frame_dist_in_use) {
5279 kernel_walker_param.no_dependency = 1;
5281 switch (generic_state->frame_type) {
5283 kernel_walker_param.walker_degree = WALKER_45_DEGREE;
5286 kernel_walker_param.walker_degree = WALKER_26_DEGREE;
5289 kernel_walker_param.walker_degree = WALKER_26_DEGREE;
5290 if (!slice_param->direct_spatial_mv_pred_flag) {
5291 kernel_walker_param.walker_degree = WALKER_45_DEGREE;
5297 kernel_walker_param.no_dependency = 0;
5300 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
5302 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
5305 &media_object_walker_param);
5306 return VA_STATUS_SUCCESS;
5310 me kernle related function
5313 gen9_avc_set_curbe_me(VADriverContextP ctx,
5314 struct encode_state *encode_state,
5315 struct i965_gpe_context *gpe_context,
5316 struct intel_encoder_context *encoder_context,
5319 gen9_avc_me_curbe_data *curbe_cmd;
5320 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5321 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5322 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5324 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5326 struct me_param * curbe_param = (struct me_param *)param ;
5327 unsigned char use_mv_from_prev_step = 0;
5328 unsigned char write_distortions = 0;
5329 unsigned char qp_prime_y = 0;
5330 unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
5331 unsigned char seach_table_idx = 0;
5332 unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
5333 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
5334 unsigned int scale_factor = 0;
5336 qp_prime_y = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta;
5337 switch (curbe_param->hme_type) {
5338 case INTEL_ENC_HME_4x : {
5339 use_mv_from_prev_step = (generic_state->b16xme_enabled) ? 1 : 0;
5340 write_distortions = 1;
5341 mv_shift_factor = 2;
5343 prev_mv_read_pos_factor = 0;
5346 case INTEL_ENC_HME_16x : {
5347 use_mv_from_prev_step = (generic_state->b32xme_enabled) ? 1 : 0;
5348 write_distortions = 0;
5349 mv_shift_factor = 2;
5351 prev_mv_read_pos_factor = 1;
5354 case INTEL_ENC_HME_32x : {
5355 use_mv_from_prev_step = 0;
5356 write_distortions = 0;
5357 mv_shift_factor = 1;
5359 prev_mv_read_pos_factor = 0;
5366 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
5371 downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
5372 downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
5374 memcpy(curbe_cmd, gen9_avc_me_curbe_init_data, sizeof(gen9_avc_me_curbe_data));
5376 curbe_cmd->dw3.sub_pel_mode = 3;
5377 if (avc_state->field_scaling_output_interleaved) {
5378 /*frame set to zero,field specified*/
5379 curbe_cmd->dw3.src_access = 0;
5380 curbe_cmd->dw3.ref_access = 0;
5381 curbe_cmd->dw7.src_field_polarity = 0;
5383 curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
5384 curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
5385 curbe_cmd->dw5.qp_prime_y = qp_prime_y;
5387 curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
5388 curbe_cmd->dw6.write_distortions = write_distortions;
5389 curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
5390 curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
5392 if (generic_state->frame_type == SLICE_TYPE_B) {
5393 curbe_cmd->dw1.bi_weight = 32;
5394 curbe_cmd->dw13.num_ref_idx_l1_minus1 = slice_param->num_ref_idx_l1_active_minus1;
5395 me_method = gen9_avc_b_me_method[generic_state->preset];
5396 seach_table_idx = 1;
5399 if (generic_state->frame_type == SLICE_TYPE_P ||
5400 generic_state->frame_type == SLICE_TYPE_B)
5401 curbe_cmd->dw13.num_ref_idx_l0_minus1 = slice_param->num_ref_idx_l0_active_minus1;
5403 curbe_cmd->dw13.ref_streamin_cost = 5;
5404 curbe_cmd->dw13.roi_enable = 0;
5406 curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
5407 curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
5409 memcpy(&curbe_cmd->dw16, table_enc_search_path[seach_table_idx][me_method], 14 * sizeof(int));
5411 curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN9_AVC_ME_MV_DATA_SURFACE_INDEX;
5412 curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x) ? GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX : GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX ;
5413 curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN9_AVC_ME_DISTORTION_SURFACE_INDEX;
5414 curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN9_AVC_ME_BRC_DISTORTION_INDEX;
5415 curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX;
5416 curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX;
5417 curbe_cmd->dw38.reserved = GEN9_AVC_ME_VDENC_STREAMIN_INDEX;
5419 i965_gpe_context_unmap_curbe(gpe_context);
5424 gen9_avc_send_surface_me(VADriverContextP ctx,
5425 struct encode_state *encode_state,
5426 struct i965_gpe_context *gpe_context,
5427 struct intel_encoder_context *encoder_context,
5430 struct i965_driver_data *i965 = i965_driver_data(ctx);
5432 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5433 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5434 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5435 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5437 struct object_surface *obj_surface, *input_surface;
5438 struct gen9_surface_avc *avc_priv_surface;
5439 struct i965_gpe_resource *gpe_resource;
5440 struct me_param * curbe_param = (struct me_param *)param ;
5442 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5443 VASurfaceID surface_id;
5446 /* all scaled input surface stored in reconstructed_object*/
5447 obj_surface = encode_state->reconstructed_object;
5448 if (!obj_surface || !obj_surface->private_data)
5450 avc_priv_surface = obj_surface->private_data;
5453 switch (curbe_param->hme_type) {
5454 case INTEL_ENC_HME_4x : {
5456 gpe_resource = &avc_ctx->s4x_memv_data_buffer;
5457 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5460 I965_SURFACEFORMAT_R8_UNORM,
5461 GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
5464 if (generic_state->b16xme_enabled) {
5465 gpe_resource = &avc_ctx->s16x_memv_data_buffer;
5466 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5469 I965_SURFACEFORMAT_R8_UNORM,
5470 GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX);
5472 /* brc distortion output*/
5473 gpe_resource = &avc_ctx->res_brc_dist_data_surface;
5474 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5477 I965_SURFACEFORMAT_R8_UNORM,
5478 GEN9_AVC_ME_BRC_DISTORTION_INDEX);
5479 /* memv distortion output*/
5480 gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
5481 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5484 I965_SURFACEFORMAT_R8_UNORM,
5485 GEN9_AVC_ME_DISTORTION_SURFACE_INDEX);
5486 /*input current down scaled YUV surface*/
5487 obj_surface = encode_state->reconstructed_object;
5488 avc_priv_surface = obj_surface->private_data;
5489 input_surface = avc_priv_surface->scaled_4x_surface_obj;
5490 i965_add_adv_gpe_surface(ctx, gpe_context,
5492 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
5493 /*input ref scaled YUV surface*/
5494 for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
5495 surface_id = slice_param->RefPicList0[i].picture_id;
5496 obj_surface = SURFACE(surface_id);
5497 if (!obj_surface || !obj_surface->private_data)
5499 avc_priv_surface = obj_surface->private_data;
5501 input_surface = avc_priv_surface->scaled_4x_surface_obj;
5503 i965_add_adv_gpe_surface(ctx, gpe_context,
5505 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
5508 obj_surface = encode_state->reconstructed_object;
5509 avc_priv_surface = obj_surface->private_data;
5510 input_surface = avc_priv_surface->scaled_4x_surface_obj;
5512 i965_add_adv_gpe_surface(ctx, gpe_context,
5514 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
5516 for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
5517 surface_id = slice_param->RefPicList1[i].picture_id;
5518 obj_surface = SURFACE(surface_id);
5519 if (!obj_surface || !obj_surface->private_data)
5521 avc_priv_surface = obj_surface->private_data;
5523 input_surface = avc_priv_surface->scaled_4x_surface_obj;
5525 i965_add_adv_gpe_surface(ctx, gpe_context,
5527 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
5532 case INTEL_ENC_HME_16x : {
5533 gpe_resource = &avc_ctx->s16x_memv_data_buffer;
5534 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5537 I965_SURFACEFORMAT_R8_UNORM,
5538 GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
5540 if (generic_state->b32xme_enabled) {
5541 gpe_resource = &avc_ctx->s32x_memv_data_buffer;
5542 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5545 I965_SURFACEFORMAT_R8_UNORM,
5546 GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX);
5549 obj_surface = encode_state->reconstructed_object;
5550 avc_priv_surface = obj_surface->private_data;
5551 input_surface = avc_priv_surface->scaled_16x_surface_obj;
5552 i965_add_adv_gpe_surface(ctx, gpe_context,
5554 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
5556 for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
5557 surface_id = slice_param->RefPicList0[i].picture_id;
5558 obj_surface = SURFACE(surface_id);
5559 if (!obj_surface || !obj_surface->private_data)
5561 avc_priv_surface = obj_surface->private_data;
5563 input_surface = avc_priv_surface->scaled_16x_surface_obj;
5565 i965_add_adv_gpe_surface(ctx, gpe_context,
5567 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
5570 obj_surface = encode_state->reconstructed_object;
5571 avc_priv_surface = obj_surface->private_data;
5572 input_surface = avc_priv_surface->scaled_16x_surface_obj;
5574 i965_add_adv_gpe_surface(ctx, gpe_context,
5576 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
5578 for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
5579 surface_id = slice_param->RefPicList1[i].picture_id;
5580 obj_surface = SURFACE(surface_id);
5581 if (!obj_surface || !obj_surface->private_data)
5583 avc_priv_surface = obj_surface->private_data;
5585 input_surface = avc_priv_surface->scaled_16x_surface_obj;
5587 i965_add_adv_gpe_surface(ctx, gpe_context,
5589 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
5593 case INTEL_ENC_HME_32x : {
5594 gpe_resource = &avc_ctx->s32x_memv_data_buffer;
5595 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5598 I965_SURFACEFORMAT_R8_UNORM,
5599 GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
5601 obj_surface = encode_state->reconstructed_object;
5602 avc_priv_surface = obj_surface->private_data;
5603 input_surface = avc_priv_surface->scaled_32x_surface_obj;
5604 i965_add_adv_gpe_surface(ctx, gpe_context,
5606 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
5608 for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
5609 surface_id = slice_param->RefPicList0[i].picture_id;
5610 obj_surface = SURFACE(surface_id);
5611 if (!obj_surface || !obj_surface->private_data)
5613 avc_priv_surface = obj_surface->private_data;
5615 input_surface = avc_priv_surface->scaled_32x_surface_obj;
5617 i965_add_adv_gpe_surface(ctx, gpe_context,
5619 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
5622 obj_surface = encode_state->reconstructed_object;
5623 avc_priv_surface = obj_surface->private_data;
5624 input_surface = avc_priv_surface->scaled_32x_surface_obj;
5626 i965_add_adv_gpe_surface(ctx, gpe_context,
5628 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
5630 for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
5631 surface_id = slice_param->RefPicList1[i].picture_id;
5632 obj_surface = SURFACE(surface_id);
5633 if (!obj_surface || !obj_surface->private_data)
5635 avc_priv_surface = obj_surface->private_data;
5637 input_surface = avc_priv_surface->scaled_32x_surface_obj;
5639 i965_add_adv_gpe_surface(ctx, gpe_context,
5641 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
5652 gen9_avc_kernel_me(VADriverContextP ctx,
5653 struct encode_state *encode_state,
5654 struct intel_encoder_context *encoder_context,
5657 struct i965_driver_data *i965 = i965_driver_data(ctx);
5658 struct i965_gpe_table *gpe = &i965->gpe_table;
5659 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5660 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5661 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5662 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5664 struct i965_gpe_context *gpe_context;
5665 struct gpe_media_object_walker_parameter media_object_walker_param;
5666 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
5667 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
5668 int media_function = 0;
5670 struct me_param param ;
5671 unsigned int scale_factor = 0;
5674 case INTEL_ENC_HME_4x : {
5675 media_function = INTEL_MEDIA_STATE_4X_ME;
5679 case INTEL_ENC_HME_16x : {
5680 media_function = INTEL_MEDIA_STATE_16X_ME;
5684 case INTEL_ENC_HME_32x : {
5685 media_function = INTEL_MEDIA_STATE_32X_ME;
5694 downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
5695 downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
5697 /* I frame should not come here.*/
5698 kernel_idx = (generic_state->frame_type == SLICE_TYPE_P) ? GEN9_AVC_KERNEL_ME_P_IDX : GEN9_AVC_KERNEL_ME_B_IDX;
5699 gpe_context = &(avc_ctx->context_me.gpe_contexts[kernel_idx]);
5701 gpe->context_init(ctx, gpe_context);
5702 gpe->reset_binding_table(ctx, gpe_context);
5705 memset(¶m, 0, sizeof(param));
5706 param.hme_type = hme_type;
5707 generic_ctx->pfn_set_curbe_me(ctx, encode_state, gpe_context, encoder_context, ¶m);
5710 generic_ctx->pfn_send_me_surface(ctx, encode_state, gpe_context, encoder_context, ¶m);
5712 gpe->setup_interface_data(ctx, gpe_context);
5714 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
5715 /* the scaling is based on 8x8 blk level */
5716 kernel_walker_param.resolution_x = downscaled_width_in_mb ;
5717 kernel_walker_param.resolution_y = downscaled_height_in_mb ;
5718 kernel_walker_param.no_dependency = 1;
5720 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
5722 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
5725 &media_object_walker_param);
5727 return VA_STATUS_SUCCESS;
5734 gen9_avc_set_curbe_wp(VADriverContextP ctx,
5735 struct encode_state *encode_state,
5736 struct i965_gpe_context *gpe_context,
5737 struct intel_encoder_context *encoder_context,
5740 gen9_avc_wp_curbe_data *cmd;
5741 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5742 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5743 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5744 struct wp_param * curbe_param = (struct wp_param *)param;
5746 cmd = i965_gpe_context_map_curbe(gpe_context);
5750 memset(cmd, 0, sizeof(gen9_avc_wp_curbe_data));
5751 if (curbe_param->ref_list_idx) {
5752 cmd->dw0.default_weight = slice_param->luma_weight_l1[0];
5753 cmd->dw0.default_offset = slice_param->luma_offset_l1[0];
5755 cmd->dw0.default_weight = slice_param->luma_weight_l0[0];
5756 cmd->dw0.default_offset = slice_param->luma_offset_l0[0];
5759 cmd->dw49.input_surface = GEN9_AVC_WP_INPUT_REF_SURFACE_INDEX;
5760 cmd->dw50.output_surface = GEN9_AVC_WP_OUTPUT_SCALED_SURFACE_INDEX;
5762 i965_gpe_context_unmap_curbe(gpe_context);
5767 gen9_avc_send_surface_wp(VADriverContextP ctx,
5768 struct encode_state *encode_state,
5769 struct i965_gpe_context *gpe_context,
5770 struct intel_encoder_context *encoder_context,
5773 struct i965_driver_data *i965 = i965_driver_data(ctx);
5774 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5775 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5776 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5777 struct wp_param * curbe_param = (struct wp_param *)param;
5778 struct object_surface *obj_surface;
5779 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5780 VASurfaceID surface_id;
5782 if (curbe_param->ref_list_idx) {
5783 surface_id = slice_param->RefPicList1[0].picture_id;
5784 obj_surface = SURFACE(surface_id);
5785 if (!obj_surface || !obj_surface->private_data)
5786 avc_state->weighted_ref_l1_enable = 0;
5788 avc_state->weighted_ref_l1_enable = 1;
5790 surface_id = slice_param->RefPicList0[0].picture_id;
5791 obj_surface = SURFACE(surface_id);
5792 if (!obj_surface || !obj_surface->private_data)
5793 avc_state->weighted_ref_l0_enable = 0;
5795 avc_state->weighted_ref_l0_enable = 1;
5798 obj_surface = encode_state->reference_objects[0];
5801 i965_add_adv_gpe_surface(ctx, gpe_context,
5803 GEN9_AVC_WP_INPUT_REF_SURFACE_INDEX);
5805 obj_surface = avc_ctx->wp_output_pic_select_surface_obj[curbe_param->ref_list_idx];
5806 i965_add_adv_gpe_surface(ctx, gpe_context,
5808 GEN9_AVC_WP_OUTPUT_SCALED_SURFACE_INDEX);
5813 gen9_avc_kernel_wp(VADriverContextP ctx,
5814 struct encode_state *encode_state,
5815 struct intel_encoder_context *encoder_context,
5816 unsigned int list1_in_use)
5818 struct i965_driver_data *i965 = i965_driver_data(ctx);
5819 struct i965_gpe_table *gpe = &i965->gpe_table;
5820 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5821 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5822 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5823 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5825 struct i965_gpe_context *gpe_context;
5826 struct gpe_media_object_walker_parameter media_object_walker_param;
5827 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
5828 int media_function = INTEL_MEDIA_STATE_ENC_WP;
5829 struct wp_param param;
5831 gpe_context = &(avc_ctx->context_wp.gpe_contexts);
5833 gpe->context_init(ctx, gpe_context);
5834 gpe->reset_binding_table(ctx, gpe_context);
5836 memset(¶m, 0, sizeof(param));
5837 param.ref_list_idx = (list1_in_use == 1) ? 1 : 0;
5839 generic_ctx->pfn_set_curbe_wp(ctx, encode_state, gpe_context, encoder_context, ¶m);
5842 generic_ctx->pfn_send_wp_surface(ctx, encode_state, gpe_context, encoder_context, ¶m);
5844 gpe->setup_interface_data(ctx, gpe_context);
5846 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
5847 /* the scaling is based on 8x8 blk level */
5848 kernel_walker_param.resolution_x = generic_state->frame_width_in_mbs;
5849 kernel_walker_param.resolution_y = generic_state->frame_height_in_mbs;
5850 kernel_walker_param.no_dependency = 1;
5852 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
5854 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
5857 &media_object_walker_param);
5859 return VA_STATUS_SUCCESS;
5864 sfd related function
5867 gen9_avc_set_curbe_sfd(VADriverContextP ctx,
5868 struct encode_state *encode_state,
5869 struct i965_gpe_context *gpe_context,
5870 struct intel_encoder_context *encoder_context,
5873 gen9_avc_sfd_curbe_data *cmd;
5874 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5875 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5876 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5877 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5879 cmd = i965_gpe_context_map_curbe(gpe_context);
5883 memset(cmd, 0, sizeof(gen9_avc_sfd_curbe_data));
5885 cmd->dw0.enable_intra_cost_scaling_for_static_frame = 1 ;
5886 cmd->dw0.enable_adaptive_mv_stream_in = 0 ;
5887 cmd->dw0.stream_in_type = 7 ;
5888 cmd->dw0.slice_type = slice_type_kernel[generic_state->frame_type] ;
5889 cmd->dw0.brc_mode_enable = generic_state->brc_enabled ;
5890 cmd->dw0.vdenc_mode_disable = 1 ;
5892 cmd->dw1.hme_stream_in_ref_cost = 5 ;
5893 cmd->dw1.num_of_refs = slice_param->num_ref_idx_l0_active_minus1 ;
5894 cmd->dw1.qp_value = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta ;
5896 cmd->dw2.frame_width_in_mbs = generic_state->frame_width_in_mbs ;
5897 cmd->dw2.frame_height_in_mbs = generic_state->frame_height_in_mbs ;
5899 cmd->dw3.large_mv_threshold = 128 ;
5900 cmd->dw4.total_large_mv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs) / 100 ;
5901 cmd->dw5.zmv_threshold = 4 ;
5902 cmd->dw6.total_zmv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * avc_state->zero_mv_threshold) / 100 ; // zero_mv_threshold = 60;
5903 cmd->dw7.min_dist_threshold = 10 ;
5905 if (generic_state->frame_type == SLICE_TYPE_P) {
5906 memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_p_frame, AVC_QP_MAX * sizeof(unsigned char));
5908 } else if (generic_state->frame_type == SLICE_TYPE_B) {
5909 memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_b_frame, AVC_QP_MAX * sizeof(unsigned char));
5912 cmd->dw21.actual_width_in_mb = cmd->dw2.frame_width_in_mbs ;
5913 cmd->dw21.actual_height_in_mb = cmd->dw2.frame_height_in_mbs ;
5914 cmd->dw24.vdenc_input_image_state_index = GEN9_AVC_SFD_VDENC_INPUT_IMAGE_STATE_INDEX ;
5915 cmd->dw26.mv_data_surface_index = GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX ;
5916 cmd->dw27.inter_distortion_surface_index = GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX ;
5917 cmd->dw28.output_data_surface_index = GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX ;
5918 cmd->dw29.vdenc_output_image_state_index = GEN9_AVC_SFD_VDENC_OUTPUT_IMAGE_STATE_INDEX ;
5920 i965_gpe_context_unmap_curbe(gpe_context);
5925 gen9_avc_send_surface_sfd(VADriverContextP ctx,
5926 struct encode_state *encode_state,
5927 struct i965_gpe_context *gpe_context,
5928 struct intel_encoder_context *encoder_context,
5931 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5932 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5933 struct i965_gpe_resource *gpe_resource;
5936 /*HME mv data surface memv output 4x*/
5937 gpe_resource = &avc_ctx->s4x_memv_data_buffer;
5938 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5941 I965_SURFACEFORMAT_R8_UNORM,
5942 GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX);
5944 /* memv distortion */
5945 gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
5946 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5949 I965_SURFACEFORMAT_R8_UNORM,
5950 GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX);
5953 gpe_resource = &avc_ctx->res_sfd_output_buffer;
5954 i965_add_buffer_gpe_surface(ctx,
5960 GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX);
5965 gen9_avc_kernel_sfd(VADriverContextP ctx,
5966 struct encode_state *encode_state,
5967 struct intel_encoder_context *encoder_context)
5969 struct i965_driver_data *i965 = i965_driver_data(ctx);
5970 struct i965_gpe_table *gpe = &i965->gpe_table;
5971 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5972 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5973 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5975 struct i965_gpe_context *gpe_context;
5976 struct gpe_media_object_parameter media_object_param;
5977 struct gpe_media_object_inline_data media_object_inline_data;
5978 int media_function = INTEL_MEDIA_STATE_STATIC_FRAME_DETECTION;
5979 gpe_context = &(avc_ctx->context_sfd.gpe_contexts);
5981 gpe->context_init(ctx, gpe_context);
5982 gpe->reset_binding_table(ctx, gpe_context);
5985 generic_ctx->pfn_set_curbe_sfd(ctx, encode_state, gpe_context, encoder_context, NULL);
5988 generic_ctx->pfn_send_sfd_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
5990 gpe->setup_interface_data(ctx, gpe_context);
5992 memset(&media_object_param, 0, sizeof(media_object_param));
5993 memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
5994 media_object_param.pinline_data = &media_object_inline_data;
5995 media_object_param.inline_size = sizeof(media_object_inline_data);
5997 gen9_avc_run_kernel_media_object(ctx, encoder_context,
6000 &media_object_param);
6002 return VA_STATUS_SUCCESS;
6005 /**************** PreEnc Scaling *************************************/
6006 /* function to run preenc scaling: gen9_avc_preenc_kernel_scaling()
6007 * function to set preenc scaling curbe is the same one using for avc encode
6008 == gen95_avc_set_curbe_scaling4x()
6009 * function to send buffer/surface resources is the same one using for avc encode
6010 == gen9_avc_send_surface_scaling()
6013 gen9_avc_preenc_kernel_scaling(VADriverContextP ctx,
6014 struct encode_state *encode_state,
6015 struct intel_encoder_context *encoder_context,
6017 int scale_surface_type)
6019 struct i965_driver_data *i965 = i965_driver_data(ctx);
6020 struct i965_gpe_table *gpe = &i965->gpe_table;
6021 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6022 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
6023 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6024 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6025 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
6026 VAStatsStatisticsParameterH264 *stat_param_h264 = NULL;
6027 VAStatsStatisticsParameter *stat_param = NULL;
6028 struct i965_gpe_context *gpe_context;
6029 struct scaling_param surface_param;
6030 struct object_surface *obj_surface = NULL;
6031 struct gpe_media_object_walker_parameter media_object_walker_param;
6032 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
6033 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
6034 int media_function = 0;
6036 int enable_statistics_output;
6038 stat_param_h264 = avc_state->stat_param;
6039 assert(stat_param_h264);
6040 stat_param = &stat_param_h264->stats_params;
6041 enable_statistics_output = !stat_param_h264->disable_statistics_output;
6043 memset(&surface_param, 0, sizeof(struct scaling_param));
6044 media_function = INTEL_MEDIA_STATE_4X_SCALING;
6045 kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
6046 downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
6047 downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
6049 surface_param.input_frame_width = generic_state->frame_width_in_pixel;
6050 surface_param.input_frame_height = generic_state->frame_height_in_pixel;
6051 surface_param.output_frame_width = generic_state->frame_width_4x;
6052 surface_param.output_frame_height = generic_state->frame_height_4x;
6053 surface_param.use_4x_scaling = 1 ;
6054 surface_param.use_16x_scaling = 0 ;
6055 surface_param.use_32x_scaling = 0 ;
6056 surface_param.enable_mb_flatness_check = enable_statistics_output;
6057 surface_param.enable_mb_variance_output = enable_statistics_output;
6058 surface_param.enable_mb_pixel_average_output = enable_statistics_output;
6059 surface_param.blk8x8_stat_enabled = stat_param_h264->enable_8x8_statistics;
6061 switch (scale_surface_type) {
6064 surface_param.input_surface = encode_state->input_yuv_object ;
6065 surface_param.output_surface = avc_ctx->preenc_scaled_4x_surface_obj ;
6067 if (enable_statistics_output) {
6068 surface_param.pres_mbv_proc_stat_buffer =
6069 &avc_ctx->preproc_stat_data_out_buffer;
6070 surface_param.mbv_proc_stat_enabled = 1;
6072 surface_param.mbv_proc_stat_enabled = 0;
6073 surface_param.pres_mbv_proc_stat_buffer = NULL;
6077 case SCALE_PAST_REF_PIC:
6078 obj_surface = SURFACE(stat_param->past_references[0].picture_id);
6079 assert(obj_surface);
6080 surface_param.input_surface = obj_surface;
6081 surface_param.output_surface = avc_ctx->preenc_past_ref_scaled_4x_surface_obj;
6083 if (stat_param->past_ref_stat_buf) {
6084 surface_param.pres_mbv_proc_stat_buffer =
6085 &avc_ctx->preenc_past_ref_stat_data_out_buffer;
6086 surface_param.mbv_proc_stat_enabled = 1;
6088 surface_param.mbv_proc_stat_enabled = 0;
6089 surface_param.pres_mbv_proc_stat_buffer = NULL;
6093 case SCALE_FUTURE_REF_PIC:
6095 obj_surface = SURFACE(stat_param->future_references[0].picture_id);
6096 assert(obj_surface);
6097 surface_param.input_surface = obj_surface;
6098 surface_param.output_surface = avc_ctx->preenc_future_ref_scaled_4x_surface_obj;
6100 if (stat_param->future_ref_stat_buf) {
6101 surface_param.pres_mbv_proc_stat_buffer =
6102 &avc_ctx->preenc_future_ref_stat_data_out_buffer;
6103 surface_param.mbv_proc_stat_enabled = 1;
6105 surface_param.mbv_proc_stat_enabled = 0;
6106 surface_param.pres_mbv_proc_stat_buffer = NULL;
6113 gpe_context = &(avc_ctx->context_scaling.gpe_contexts[kernel_idx]);
6115 gpe->context_init(ctx, gpe_context);
6116 gpe->reset_binding_table(ctx, gpe_context);
6118 generic_ctx->pfn_set_curbe_scaling4x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
6120 surface_param.scaling_out_use_16unorm_surf_fmt = 0 ;
6121 surface_param.scaling_out_use_32unorm_surf_fmt = 1 ;
6123 /* No need of explicit flatness_check surface allocation. The field mb_is_flat
6124 * VAStatsStatisticsH264 will be used to store the output. */
6125 surface_param.enable_mb_flatness_check = 0;
6126 generic_ctx->pfn_send_scaling_surface(ctx, encode_state, gpe_context, encoder_context, &surface_param);
6128 /* setup the interface data */
6129 gpe->setup_interface_data(ctx, gpe_context);
6131 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
6132 /* the scaling is based on 8x8 blk level */
6133 kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
6134 kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
6135 kernel_walker_param.no_dependency = 1;
6137 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
6139 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
6142 &media_object_walker_param);
6144 return VA_STATUS_SUCCESS;
6147 /**************** PreEnc HME *************************************/
6148 /* function to run preenc hme is the same one we using in avc encode:
6149 == gen9_avc_kernel_me()
6150 * function to set preenc hme curbe: gen9_avc_preenc_set_curbe_me()
6151 * function to send hme buffer/surface: gen9_avc_preenc_send_surface_me()
6154 gen9_avc_preenc_set_curbe_me(VADriverContextP ctx,
6155 struct encode_state *encode_state,
6156 struct i965_gpe_context *gpe_context,
6157 struct intel_encoder_context *encoder_context,
6160 gen9_avc_fei_me_curbe_data *curbe_cmd;
6161 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6162 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6163 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6164 VAStatsStatisticsParameterH264 * stat_param_h264 = avc_state->stat_param;
6165 VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
6167 struct me_param * curbe_param = (struct me_param *)param ;
6168 unsigned char use_mv_from_prev_step = 0;
6169 unsigned char write_distortions = 0;
6170 unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
6171 unsigned char seach_table_idx = 0;
6172 unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
6173 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
6174 unsigned int scale_factor = 0;
6176 switch (curbe_param->hme_type) {
6177 case INTEL_ENC_HME_4x:
6178 use_mv_from_prev_step = 0;
6179 write_distortions = 0;
6180 mv_shift_factor = 2;
6182 prev_mv_read_pos_factor = 0;
6189 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
6193 downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
6194 downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
6196 memcpy(curbe_cmd, gen9_avc_me_curbe_init_data, sizeof(gen9_avc_me_curbe_init_data));
6198 curbe_cmd->dw3.sub_pel_mode = stat_param_h264->sub_pel_mode;
6199 if (avc_state->field_scaling_output_interleaved) {
6200 /*frame set to zero,field specified*/
6201 curbe_cmd->dw3.src_access = 0;
6202 curbe_cmd->dw3.ref_access = 0;
6203 curbe_cmd->dw7.src_field_polarity = 0;
6205 curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
6206 curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
6207 curbe_cmd->dw5.qp_prime_y = stat_param_h264->frame_qp;
6209 curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
6210 curbe_cmd->dw6.write_distortions = write_distortions;
6211 curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
6212 curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(INTEL_AVC_LEVEL_52) * 4;//frame only
6214 if (generic_state->frame_type == SLICE_TYPE_B) {
6215 curbe_cmd->dw1.bi_weight = 32;
6216 curbe_cmd->dw13.num_ref_idx_l1_minus1 = stat_param->num_future_references - 1;
6217 me_method = gen9_avc_b_me_method[generic_state->preset];
6218 seach_table_idx = 1;
6221 if (generic_state->frame_type == SLICE_TYPE_P ||
6222 generic_state->frame_type == SLICE_TYPE_B)
6223 curbe_cmd->dw13.num_ref_idx_l0_minus1 = stat_param->num_past_references - 1;
6225 curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
6226 curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
6228 memcpy(&curbe_cmd->dw16, table_enc_search_path[seach_table_idx][me_method], 14 * sizeof(int));
6230 curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN9_AVC_ME_MV_DATA_SURFACE_INDEX;
6231 curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x) ? GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX : GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX ;
6232 curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN9_AVC_ME_DISTORTION_SURFACE_INDEX;
6233 curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN9_AVC_ME_BRC_DISTORTION_INDEX;
6234 curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX;
6235 curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX;
6236 curbe_cmd->dw38.reserved = 0;
6238 i965_gpe_context_unmap_curbe(gpe_context);
6243 gen9_avc_preenc_send_surface_me(VADriverContextP ctx,
6244 struct encode_state *encode_state,
6245 struct i965_gpe_context *gpe_context,
6246 struct intel_encoder_context *encoder_context,
6249 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6250 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
6251 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6252 VAStatsStatisticsParameterH264 * stat_param_h264 = avc_state->stat_param;
6253 VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
6254 struct object_surface *input_surface;
6255 struct i965_gpe_resource *gpe_resource;
6256 struct me_param * curbe_param = (struct me_param *)param ;
6259 /* PreEnc Only supports 4xme */
6260 assert(curbe_param->hme_type == INTEL_ENC_HME_4x);
6262 switch (curbe_param->hme_type) {
6263 case INTEL_ENC_HME_4x : {
6265 gpe_resource = &avc_ctx->s4x_memv_data_buffer;
6266 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
6269 I965_SURFACEFORMAT_R8_UNORM,
6270 GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
6272 /* memv distortion output*/
6273 gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
6274 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
6277 I965_SURFACEFORMAT_R8_UNORM,
6278 GEN9_AVC_ME_DISTORTION_SURFACE_INDEX);
6280 /* brc distortion output*/
6281 gpe_resource = &avc_ctx->res_brc_dist_data_surface;
6282 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
6285 I965_SURFACEFORMAT_R8_UNORM,
6286 GEN9_AVC_ME_BRC_DISTORTION_INDEX);
6288 /* input past ref scaled YUV surface*/
6289 for (i = 0; i < stat_param->num_past_references; i++) {
6290 /*input current down scaled YUV surface for forward refef */
6291 input_surface = avc_ctx->preenc_scaled_4x_surface_obj;
6292 i965_add_adv_gpe_surface(ctx, gpe_context,
6294 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
6296 input_surface = avc_ctx->preenc_past_ref_scaled_4x_surface_obj;
6297 i965_add_adv_gpe_surface(ctx, gpe_context,
6299 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
6302 /* input future ref scaled YUV surface*/
6303 for (i = 0; i < stat_param->num_future_references; i++) {
6304 /*input current down scaled YUV surface for backward ref */
6305 input_surface = avc_ctx->preenc_scaled_4x_surface_obj;
6306 i965_add_adv_gpe_surface(ctx, gpe_context,
6308 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
6310 input_surface = avc_ctx->preenc_future_ref_scaled_4x_surface_obj;
6311 i965_add_adv_gpe_surface(ctx, gpe_context,
6313 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
6324 /**************** PreEnc PreProc *************************************/
6325 /* function to run preenc preproc: gen9_avc_preenc_kernel_preproc()
6326 * function to set preenc preproc curbe: gen9_avc_preenc_set_curbe_preproc()
6327 * function to send preproc buffer/surface: gen9_avc_preenc_send_surface_preproc ()
6330 gen9_avc_preenc_set_curbe_preproc(VADriverContextP ctx,
6331 struct encode_state *encode_state,
6332 struct i965_gpe_context *gpe_context,
6333 struct intel_encoder_context *encoder_context,
6336 gen9_avc_preproc_curbe_data *cmd;
6337 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6338 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6339 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6340 VAStatsStatisticsParameterH264 *stat_param_h264 = avc_state->stat_param;
6341 VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
6342 unsigned char me_method = 0;
6343 unsigned int table_idx = 0;
6344 int ref_width, ref_height, len_sp;
6345 int is_bframe = (generic_state->frame_type == SLICE_TYPE_B);
6346 int is_pframe = (generic_state->frame_type == SLICE_TYPE_P);
6347 unsigned int preset = generic_state->preset;
6349 cmd = (gen9_avc_preproc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
6352 memset(cmd, 0, sizeof(gen9_avc_preproc_curbe_data));
6354 switch (generic_state->frame_type) {
6356 memcpy(cmd, gen9_avc_preenc_preproc_curbe_i_frame_init_data,
6357 sizeof(gen9_avc_preproc_curbe_data));
6360 memcpy(cmd, gen9_avc_preenc_preproc_curbe_p_frame_init_data,
6361 sizeof(gen9_avc_preproc_curbe_data));
6364 memcpy(cmd, gen9_avc_preenc_preproc_curbe_b_frame_init_data,
6365 sizeof(gen9_avc_preproc_curbe_data));
6370 /* 4 means full search, 6 means diamand search */
6371 me_method = (stat_param_h264->search_window == 5) ||
6372 (stat_param_h264->search_window == 8) ? 4 : 6;
6374 ref_width = stat_param_h264->ref_width;
6375 ref_height = stat_param_h264->ref_height;
6376 len_sp = stat_param_h264->len_sp;
6377 /* If there is a serch_window, discard user provided ref_width, ref_height
6378 * and search_path length */
6379 switch (stat_param_h264->search_window) {
6381 /* not use predefined search window, there should be a search_path input */
6382 if ((stat_param_h264->search_path != 0) &&
6383 (stat_param_h264->search_path != 1) &&
6384 (stat_param_h264->search_path != 2)) {
6385 WARN_ONCE("Invalid input search_path for SearchWindow=0 \n");
6388 /* 4 means full search, 6 means diamand search */
6389 me_method = (stat_param_h264->search_path == 1) ? 6 : 4;
6390 if (((ref_width * ref_height) > 2048) || (ref_width > 64) || (ref_height > 64)) {
6391 WARN_ONCE("Invalid input ref_width/ref_height in"
6392 "SearchWindow=0 case! \n");
6398 /* Tiny - 4 SUs 24x24 window */
6405 /* Small - 9 SUs 28x28 window */
6411 /* Diamond - 16 SUs 48x40 window */
6417 /* Large Diamond - 32 SUs 48x40 window */
6423 /* Exhaustive - 48 SUs 48x40 window */
6429 /* Diamond - 16 SUs 64x32 window */
6435 /* Large Diamond - 32 SUs 64x32 window */
6441 /* Exhaustive - 48 SUs 64x32 window */
6451 /* ref_width*ref_height = Max 64x32 one direction, Max 32x32 two directions */
6453 CLIP(ref_width, 4, 32);
6454 CLIP(ref_height, 4, 32);
6455 } else if (is_pframe) {
6456 CLIP(ref_width, 4, 64);
6457 CLIP(ref_height, 4, 32);
6460 cmd->dw0.adaptive_enable =
6461 cmd->dw37.adaptive_enable = stat_param_h264->adaptive_search;
6462 cmd->dw2.max_len_sp = len_sp;
6463 cmd->dw38.max_len_sp = 0; // HLD mandates this field to be Zero
6464 cmd->dw2.max_num_su = cmd->dw38.max_num_su = 57;
6465 cmd->dw3.src_access =
6466 cmd->dw3.ref_access = 0; // change it to (is_frame ? 0: 1) when interlace is suppoted
6468 if (generic_state->frame_type != SLICE_TYPE_I && avc_state->ftq_enable)
6469 cmd->dw3.ft_enable = stat_param_h264->ft_enable;
6471 cmd->dw3.ft_enable = 0;
6473 cmd->dw2.pic_width = generic_state->frame_width_in_mbs;
6474 cmd->dw6.pic_height = cmd->dw5.slice_mb_height = generic_state->frame_height_in_mbs;
6475 cmd->dw3.sub_mb_part_mask = stat_param_h264->sub_mb_part_mask;
6476 cmd->dw3.sub_pel_mode = stat_param_h264->sub_pel_mode;
6477 cmd->dw3.inter_sad = stat_param_h264->inter_sad;
6478 cmd->dw3.intra_sad = stat_param_h264->intra_sad;
6479 cmd->dw4.hme_enable = generic_state->hme_enabled;
6480 cmd->dw4.frame_qp = stat_param_h264->frame_qp;
6481 cmd->dw4.per_mb_qp_enable = stat_param_h264->mb_qp;
6483 cmd->dw4.multiple_mv_predictor_per_mb_enable =
6484 (generic_state->frame_type != SLICE_TYPE_I) ? 0 : stat_param_h264->mv_predictor_ctrl;
6486 cmd->dw4.disable_mv_output = (generic_state->frame_type == SLICE_TYPE_I) ? 1 : stat_param_h264->disable_mv_output;
6487 cmd->dw4.disable_mb_stats = stat_param_h264->disable_statistics_output;
6489 cmd->dw4.fwd_ref_pic_enable = (stat_param->num_past_references > 0) ? 1 : 0;
6490 cmd->dw4.bwd_ref_pic_enable = (stat_param->num_future_references > 0) ? 1 : 0;
6492 cmd->dw7.intra_part_mask = stat_param_h264->intra_part_mask;
6495 memcpy(&(cmd->dw8), gen75_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][stat_param_h264->frame_qp], 8 * sizeof(unsigned int));
6497 /* reset all except sic_fwd_trans_coeff_threshold_* from dw8 to dw15 */
6498 memset(&(cmd->dw8), 0, 6 * (sizeof(unsigned int)));
6500 /* search path tables */
6501 table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
6502 memcpy(&(cmd->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
6504 if (stat_param_h264->intra_part_mask == 0x07)
6505 cmd->dw31.intra_compute_type = 3;
6507 cmd->dw38.ref_threshold = 400;
6508 cmd->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
6510 if (generic_state->frame_type == SLICE_TYPE_I) {
6511 cmd->dw0.skip_mode_enable = cmd->dw37.skip_mode_enable = 0;
6512 cmd->dw36.hme_combine_overlap = 0;
6513 } else if (generic_state->frame_type == SLICE_TYPE_P) {
6514 cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(INTEL_AVC_LEVEL_52) / 2;
6515 cmd->dw3.bme_disable_fbr = 1;
6516 cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
6517 cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
6518 cmd->dw7.non_skip_zmv_added = 1;
6519 cmd->dw7.non_skip_mode_added = 1;
6520 cmd->dw7.skip_center_mask = 1;
6521 cmd->dw32.max_vmv_r =
6522 i965_avc_get_max_mv_len(INTEL_AVC_LEVEL_52) * 4;
6523 cmd->dw36.hme_combine_overlap = 1;
6525 } else if (generic_state->frame_type == SLICE_TYPE_P) { /* B slice */
6527 cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(INTEL_AVC_LEVEL_52) / 2;
6528 cmd->dw3.search_ctrl = 0;
6529 cmd->dw3.skip_type = 1;
6530 cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
6531 cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
6532 cmd->dw7.skip_center_mask = 0xff;
6533 cmd->dw32.max_vmv_r =
6534 i965_avc_get_max_mv_len(INTEL_AVC_LEVEL_52) * 4;
6535 cmd->dw36.hme_combine_overlap = 1;
6538 cmd->dw40.curr_pic_surf_index = GEN9_AVC_PREPROC_CURR_Y_INDEX;
6539 cmd->dw41.hme_mv_dat_surf_index = GEN9_AVC_PREPROC_HME_MV_DATA_INDEX;
6540 cmd->dw42.mv_predictor_surf_index = GEN9_AVC_PREPROC_MV_PREDICTOR_INDEX;
6541 cmd->dw43.mb_qp_surf_index = GEN9_AVC_PREPROC_MBQP_INDEX;
6542 cmd->dw44.mv_data_out_surf_index = GEN9_AVC_PREPROC_MV_DATA_INDEX;
6543 cmd->dw45.mb_stats_out_surf_index = GEN9_AVC_PREPROC_MB_STATS_INDEX;
6544 cmd->dw46.vme_inter_prediction_surf_index = GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_0_INDEX;
6545 cmd->dw47.vme_Inter_prediction_mr_surf_index = GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_1_INDEX;
6546 cmd->dw48.ftq_lut_surf_index = GEN9_AVC_PREPROC_FTQ_LUT_INDEX;
6548 i965_gpe_context_unmap_curbe(gpe_context);
6552 gen9_avc_preenc_send_surface_preproc(VADriverContextP ctx,
6553 struct encode_state *encode_state,
6554 struct i965_gpe_context *gpe_context,
6555 struct intel_encoder_context *encoder_context,
6558 struct i965_driver_data *i965 = i965_driver_data(ctx);
6559 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6560 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
6561 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6562 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6563 struct object_surface *obj_surface;
6564 struct i965_gpe_resource *gpe_resource;
6565 VASurfaceID surface_id;
6566 VAStatsStatisticsParameterH264 *stat_param_h264 = avc_state->stat_param;
6567 VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
6568 unsigned int size = 0, frame_mb_nums = 0;
6570 frame_mb_nums = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs;
6572 /* input yuv surface, Y index */
6573 obj_surface = encode_state->input_yuv_object;
6574 i965_add_2d_gpe_surface(ctx,
6579 I965_SURFACEFORMAT_R8_UNORM,
6580 GEN9_AVC_PREPROC_CURR_Y_INDEX);
6582 /* input yuv surface, UV index */
6583 i965_add_2d_gpe_surface(ctx,
6588 I965_SURFACEFORMAT_R16_UINT,
6589 GEN9_AVC_MBENC_CURR_UV_INDEX);
6592 if (generic_state->hme_enabled) {
6593 /* HME mv data buffer */
6594 gpe_resource = &avc_ctx->s4x_memv_data_buffer;
6595 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
6598 I965_SURFACEFORMAT_R8_UNORM,
6599 GEN9_AVC_PREPROC_HME_MV_DATA_INDEX);
6602 /* mv predictor buffer */
6603 if (stat_param_h264->mv_predictor_ctrl) {
6604 size = frame_mb_nums * FEI_AVC_MV_PREDICTOR_BUFFER_SIZE;
6605 gpe_resource = &avc_ctx->preproc_mv_predictor_buffer;
6606 i965_add_buffer_gpe_surface(ctx,
6612 GEN9_AVC_PREPROC_MV_PREDICTOR_INDEX);
6616 if (stat_param_h264->mb_qp) {
6617 size = frame_mb_nums * FEI_AVC_QP_BUFFER_SIZE;
6618 gpe_resource = &avc_ctx->preproc_mb_qp_buffer;
6619 i965_add_buffer_gpe_surface(ctx,
6625 GEN9_AVC_PREPROC_MBQP_INDEX);
6627 gpe_resource = &avc_ctx->res_mbbrc_const_data_buffer;
6628 size = 16 * AVC_QP_MAX * 4;
6629 i965_add_buffer_gpe_surface(ctx,
6635 GEN9_AVC_PREPROC_FTQ_LUT_INDEX);
6639 /* mv data output buffer */
6640 if (!stat_param_h264->disable_mv_output) {
6641 gpe_resource = &avc_ctx->preproc_mv_data_out_buffer;
6642 size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
6643 i965_add_buffer_gpe_surface(ctx,
6649 GEN9_AVC_PREPROC_MV_DATA_INDEX);
6652 /* statistics output buffer */
6653 if (!stat_param_h264->disable_statistics_output) {
6654 gpe_resource = &avc_ctx->preproc_stat_data_out_buffer;
6655 size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
6656 i965_add_buffer_gpe_surface(ctx,
6662 GEN9_AVC_PREPROC_MB_STATS_INDEX);
6666 obj_surface = encode_state->input_yuv_object;
6667 i965_add_2d_gpe_surface(ctx,
6672 I965_SURFACEFORMAT_R8_UNORM,
6673 GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_0_INDEX);
6675 /* vme cur pic y (repeating based on required BTI order for mediakerel)*/
6676 obj_surface = encode_state->input_yuv_object;
6677 i965_add_2d_gpe_surface(ctx,
6682 I965_SURFACEFORMAT_R8_UNORM,
6683 GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_1_INDEX);
6685 /* vme forward ref */
6686 /* Only supports one past ref */
6687 if (stat_param->num_past_references > 0) {
6688 surface_id = stat_param->past_references[0].picture_id;
6689 assert(surface_id != VA_INVALID_ID);
6690 obj_surface = SURFACE(surface_id);
6693 i965_add_adv_gpe_surface(ctx, gpe_context,
6695 GEN9_AVC_PREPROC_VME_FWD_PIC_IDX0_INDEX);
6699 /* vme future ref */
6700 /* Only supports one future ref */
6701 if (stat_param->num_future_references > 0) {
6702 surface_id = stat_param->future_references[0].picture_id;
6703 assert(surface_id != VA_INVALID_ID);
6704 obj_surface = SURFACE(surface_id);
6707 i965_add_adv_gpe_surface(ctx, gpe_context,
6709 GEN9_AVC_PREPROC_VME_BWD_PIC_IDX0_0_INDEX);
6711 surface_id = stat_param->future_references[0].picture_id;
6712 assert(surface_id != VA_INVALID_ID);
6713 obj_surface = SURFACE(surface_id);
6716 i965_add_adv_gpe_surface(ctx, gpe_context,
6718 GEN9_AVC_PREPROC_VME_BWD_PIC_IDX0_1_INDEX);
6726 gen9_avc_preenc_kernel_preproc(VADriverContextP ctx,
6727 struct encode_state *encode_state,
6728 struct intel_encoder_context *encoder_context)
6730 struct i965_driver_data *i965 = i965_driver_data(ctx);
6731 struct i965_gpe_table *gpe = &i965->gpe_table;
6732 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6733 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
6734 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
6735 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6736 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6737 VAStatsStatisticsParameterH264 * stat_param_h264 = avc_state->stat_param;
6738 struct i965_gpe_context *gpe_context;
6739 struct gpe_media_object_walker_parameter media_object_walker_param;
6740 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
6741 int media_function = INTEL_MEDIA_STATE_PREPROC;
6742 struct i965_gpe_resource *gpe_resource = NULL;
6743 unsigned int * data = NULL;
6744 unsigned int ftq_lut_table_size = 16 * 52; /* 16 DW per QP for each qp*/
6746 gpe_context = &(avc_ctx->context_preproc.gpe_contexts);
6747 gpe->context_init(ctx, gpe_context);
6748 gpe->reset_binding_table(ctx, gpe_context);
6751 generic_ctx->pfn_set_curbe_preproc(ctx, encode_state, gpe_context, encoder_context, NULL);
6754 generic_ctx->pfn_send_preproc_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
6756 gpe->setup_interface_data(ctx, gpe_context);
6758 /* Set up FtqLut Buffer if there is QP change within a frame */
6759 if (stat_param_h264->mb_qp) {
6760 gpe_resource = &(avc_ctx->res_mbbrc_const_data_buffer);
6761 assert(gpe_resource);
6762 data = i965_map_gpe_resource(gpe_resource);
6764 memcpy(data, gen9_avc_preenc_preproc_ftq_lut, ftq_lut_table_size * sizeof(unsigned int));
6767 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
6768 kernel_walker_param.resolution_x = generic_state->frame_width_in_mbs ;
6769 kernel_walker_param.resolution_y = generic_state->frame_height_in_mbs ;
6770 kernel_walker_param.no_dependency = 1;
6772 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
6774 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
6777 &media_object_walker_param);
6779 return VA_STATUS_SUCCESS;
6784 gen8_avc_set_curbe_mbenc(VADriverContextP ctx,
6785 struct encode_state *encode_state,
6786 struct i965_gpe_context *gpe_context,
6787 struct intel_encoder_context *encoder_context,
6790 struct i965_driver_data *i965 = i965_driver_data(ctx);
6791 gen8_avc_mbenc_curbe_data *cmd;
6792 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6793 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6794 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6796 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
6797 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
6798 VASurfaceID surface_id;
6799 struct object_surface *obj_surface;
6801 struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
6802 unsigned char qp = 0;
6803 unsigned char me_method = 0;
6804 unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
6805 unsigned int table_idx = 0;
6806 unsigned int curbe_size = 0;
6808 unsigned int preset = generic_state->preset;
6809 if (IS_GEN8(i965->intel.device_info)) {
6810 cmd = (gen8_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
6813 curbe_size = sizeof(gen8_avc_mbenc_curbe_data);
6814 memset(cmd, 0, curbe_size);
6816 if (mbenc_i_frame_dist_in_use) {
6817 memcpy(cmd, gen8_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
6819 switch (generic_state->frame_type) {
6821 memcpy(cmd, gen8_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
6824 memcpy(cmd, gen8_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
6827 memcpy(cmd, gen8_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
6839 me_method = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_b_me_method[preset] : gen9_avc_p_me_method[preset];
6840 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
6842 cmd->dw0.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
6843 cmd->dw37.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
6844 cmd->dw0.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
6845 cmd->dw37.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
6847 cmd->dw2.max_len_sp = gen9_avc_max_len_sp[preset];
6848 cmd->dw38.max_len_sp = 0;
6850 cmd->dw3.src_access = 0;
6851 cmd->dw3.ref_access = 0;
6853 if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) {
6854 //disable ftq_override by now.
6855 if (avc_state->ftq_override) {
6856 cmd->dw3.ftq_enable = avc_state->ftq_enable;
6859 if (generic_state->frame_type == SLICE_TYPE_P) {
6860 cmd->dw3.ftq_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
6863 cmd->dw3.ftq_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
6867 cmd->dw3.ftq_enable = 0;
6870 if (avc_state->disable_sub_mb_partion)
6871 cmd->dw3.sub_mb_part_mask = 0x7;
6873 if (mbenc_i_frame_dist_in_use) {
6874 cmd->dw2.pitch_width = generic_state->downscaled_width_4x_in_mb;
6875 cmd->dw4.picture_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
6876 cmd->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4;
6877 cmd->dw6.batch_buffer_end = 0;
6878 cmd->dw31.intra_compute_type = 1;
6880 cmd->dw2.pitch_width = generic_state->frame_width_in_mbs;
6881 cmd->dw4.picture_height_minus1 = generic_state->frame_height_in_mbs - 1;
6882 cmd->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ? generic_state->frame_height_in_mbs : avc_state->slice_height;
6885 memcpy(&(cmd->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
6886 if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
6887 } else if (avc_state->skip_bias_adjustment_enable) {
6888 /* Load different MvCost for P picture when SkipBiasAdjustment is enabled
6889 // No need to check for P picture as the flag is only enabled for P picture */
6890 cmd->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
6893 table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
6894 memcpy(&(cmd->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
6896 cmd->dw4.enable_intra_cost_scaling_for_static_frame = avc_state->sfd_enable && generic_state->hme_enabled;
6897 cmd->dw4.field_parity_flag = 0;//bottom field
6898 cmd->dw4.enable_cur_fld_idr = 0;//field realted
6899 cmd->dw4.contrained_intra_pred_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
6900 cmd->dw4.hme_enable = generic_state->hme_enabled;
6901 cmd->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
6902 cmd->dw4.use_actual_ref_qp_value = generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
6904 cmd->dw7.intra_part_mask = avc_state->transform_8x8_mode_enable ? 0 : 0x02;
6905 cmd->dw7.src_field_polarity = 0;//field related
6907 /*ftq_skip_threshold_lut set,dw14 /15*/
6909 /*r5 disable NonFTQSkipThresholdLUT*/
6910 if (generic_state->frame_type == SLICE_TYPE_P) {
6911 cmd->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
6912 } else if (generic_state->frame_type == SLICE_TYPE_B) {
6913 cmd->dw32.skip_val = gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
6916 cmd->dw13.qp_prime_y = qp;
6917 cmd->dw13.qp_prime_cb = qp;
6918 cmd->dw13.qp_prime_cr = qp;
6919 cmd->dw13.target_size_in_word = 0xff;//hardcode for brc disable
6921 if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) {
6922 switch (gen9_avc_multi_pred[preset]) {
6924 cmd->dw32.mult_pred_l0_disable = 128;
6925 cmd->dw32.mult_pred_l1_disable = 128;
6928 cmd->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_P) ? 1 : 128;
6929 cmd->dw32.mult_pred_l1_disable = 128;
6932 cmd->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
6933 cmd->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
6936 cmd->dw32.mult_pred_l0_disable = 1;
6937 cmd->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
6942 cmd->dw32.mult_pred_l0_disable = 128;
6943 cmd->dw32.mult_pred_l1_disable = 128;
6946 if (generic_state->frame_type == SLICE_TYPE_B) {
6947 cmd->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
6948 cmd->dw34.list1_ref_id0_frm_field_parity = 0;
6949 cmd->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
6952 cmd->dw34.b_original_bff = 0; //frame only
6953 cmd->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
6954 cmd->dw34.roi_enable_flag = curbe_param->roi_enabled;
6955 cmd->dw34.mad_enable_falg = avc_state->mad_enable;
6956 cmd->dw34.mb_brc_enable = avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled;
6957 cmd->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
6958 cmd->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
6960 if (cmd->dw34.force_non_skip_check) {
6961 cmd->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
6964 cmd->dw36.check_all_fractional_enable = avc_state->caf_enable;
6965 cmd->dw38.ref_threshold = 400;
6966 cmd->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
6967 cmd->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable) ? 0 : 2;
6969 if (mbenc_i_frame_dist_in_use) {
6970 cmd->dw13.qp_prime_y = 0;
6971 cmd->dw13.qp_prime_cb = 0;
6972 cmd->dw13.qp_prime_cr = 0;
6973 cmd->dw33.intra_16x16_nondc_penalty = 0;
6974 cmd->dw33.intra_8x8_nondc_penalty = 0;
6975 cmd->dw33.intra_4x4_nondc_penalty = 0;
6977 if (cmd->dw4.use_actual_ref_qp_value) {
6978 cmd->dw44.actual_qp_value_for_ref_id0_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 0);
6979 cmd->dw44.actual_qp_value_for_ref_id1_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 1);
6980 cmd->dw44.actual_qp_value_for_ref_id2_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 2);
6981 cmd->dw44.actual_qp_value_for_ref_id3_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 3);
6982 cmd->dw45.actual_qp_value_for_ref_id4_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 4);
6983 cmd->dw45.actual_qp_value_for_ref_id5_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 5);
6984 cmd->dw45.actual_qp_value_for_ref_id6_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 6);
6985 cmd->dw45.actual_qp_value_for_ref_id7_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 7);
6986 cmd->dw46.actual_qp_value_for_ref_id0_list1 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 0);
6987 cmd->dw46.actual_qp_value_for_ref_id1_list1 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 1);
6990 table_idx = slice_type_kernel[generic_state->frame_type];
6991 cmd->dw46.ref_cost = gen8_avc_ref_cost[table_idx][qp];
6992 if (generic_state->frame_type == SLICE_TYPE_I) {
6993 cmd->dw0.skip_mode_enable = 0;
6994 cmd->dw37.skip_mode_enable = 0;
6995 cmd->dw36.hme_combine_overlap = 0;
6996 cmd->dw47.intra_cost_sf = 16;
6997 cmd->dw34.enable_direct_bias_adjustment = 0;
6998 cmd->dw34.enable_global_motion_bias_adjustment = 0;
7000 } else if (generic_state->frame_type == SLICE_TYPE_P) {
7001 cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
7002 cmd->dw3.bme_disable_fbr = 1;
7003 cmd->dw5.ref_width = gen9_avc_search_x[preset];
7004 cmd->dw5.ref_height = gen9_avc_search_y[preset];
7005 cmd->dw7.non_skip_zmv_added = 1;
7006 cmd->dw7.non_skip_mode_added = 1;
7007 cmd->dw7.skip_center_mask = 1;
7008 cmd->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
7009 cmd->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame onlys
7010 cmd->dw36.hme_combine_overlap = 1;
7011 cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
7012 cmd->dw39.ref_width = gen9_avc_search_x[preset];
7013 cmd->dw39.ref_height = gen9_avc_search_y[preset];
7014 cmd->dw34.enable_direct_bias_adjustment = 0;
7015 cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
7016 if (avc_state->global_motion_bias_adjustment_enable)
7017 cmd->dw58.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
7019 cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
7020 cmd->dw1.bi_weight = avc_state->bi_weight;
7021 cmd->dw3.search_ctrl = 7;
7022 cmd->dw3.skip_type = 1;
7023 cmd->dw5.ref_width = gen9_avc_b_search_x[preset];
7024 cmd->dw5.ref_height = gen9_avc_b_search_y[preset];
7025 cmd->dw7.skip_center_mask = 0xff;
7026 cmd->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
7027 cmd->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
7028 cmd->dw36.hme_combine_overlap = 1;
7029 surface_id = slice_param->RefPicList1[0].picture_id;
7030 obj_surface = SURFACE(surface_id);
7032 WARN_ONCE("Invalid backward reference frame\n");
7035 cmd->dw36.is_fwd_frame_short_term_ref = !!(slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
7036 cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
7037 cmd->dw36.num_ref_idx_l1_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l1_active_minus1 : 0;
7038 cmd->dw39.ref_width = gen9_avc_b_search_x[preset];
7039 cmd->dw39.ref_height = gen9_avc_b_search_y[preset];
7040 cmd->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
7041 cmd->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
7042 cmd->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
7043 cmd->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
7044 cmd->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
7045 cmd->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
7046 cmd->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
7047 cmd->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
7048 cmd->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
7049 if (cmd->dw34.enable_direct_bias_adjustment) {
7050 cmd->dw7.non_skip_zmv_added = 1;
7051 cmd->dw7.non_skip_mode_added = 1;
7054 cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
7055 if (avc_state->global_motion_bias_adjustment_enable)
7056 cmd->dw58.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
7058 avc_state->block_based_skip_enable = cmd->dw3.block_based_skip_enable;
7060 if (avc_state->rolling_intra_refresh_enable) {
7061 /*by now disable it*/
7062 if (generic_state->brc_enabled) {
7063 cmd->dw4.enable_intra_refresh = false;
7064 cmd->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
7065 cmd->dw48.widi_intra_refresh_mbx = 0;
7066 cmd->dw58.widi_intra_refresh_mby = 0;
7068 cmd->dw4.enable_intra_refresh = true;
7069 cmd->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
7071 cmd->dw32.mult_pred_l0_disable = 128;
7072 /* Pass the same IntraRefreshUnit to the kernel w/o the adjustment by -1, so as to have an overlap of one MB row or column of Intra macroblocks
7073 across one P frame to another P frame, as needed by the RollingI algo */
7074 cmd->dw48.widi_intra_refresh_mbx = 0;
7075 cmd->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
7076 cmd->dw48.widi_intra_refresh_qp_delta = 0;
7079 cmd->dw34.widi_intra_refresh_en = 0;
7082 /*roi set disable by now. 49-56*/
7083 if (curbe_param->roi_enabled) {
7084 cmd->dw49.roi_1_x_left = generic_state->roi[0].left;
7085 cmd->dw49.roi_1_y_top = generic_state->roi[0].top;
7086 cmd->dw50.roi_1_x_right = generic_state->roi[0].right;
7087 cmd->dw50.roi_1_y_bottom = generic_state->roi[0].bottom;
7089 cmd->dw51.roi_2_x_left = generic_state->roi[1].left;
7090 cmd->dw51.roi_2_y_top = generic_state->roi[1].top;
7091 cmd->dw52.roi_2_x_right = generic_state->roi[1].right;
7092 cmd->dw52.roi_2_y_bottom = generic_state->roi[1].bottom;
7094 cmd->dw53.roi_3_x_left = generic_state->roi[2].left;
7095 cmd->dw53.roi_3_y_top = generic_state->roi[2].top;
7096 cmd->dw54.roi_3_x_right = generic_state->roi[2].right;
7097 cmd->dw54.roi_3_y_bottom = generic_state->roi[2].bottom;
7099 cmd->dw55.roi_4_x_left = generic_state->roi[3].left;
7100 cmd->dw55.roi_4_y_top = generic_state->roi[3].top;
7101 cmd->dw56.roi_4_x_right = generic_state->roi[3].right;
7102 cmd->dw56.roi_4_y_bottom = generic_state->roi[3].bottom;
7104 cmd->dw36.enable_cabac_work_around = 0;
7106 if (!generic_state->brc_enabled) {
7108 tmp = generic_state->roi[0].value;
7109 CLIP(tmp, -qp, AVC_QP_MAX - qp);
7110 cmd->dw57.roi_1_dqp_prime_y = tmp;
7111 tmp = generic_state->roi[1].value;
7112 CLIP(tmp, -qp, AVC_QP_MAX - qp);
7113 cmd->dw57.roi_2_dqp_prime_y = tmp;
7114 tmp = generic_state->roi[2].value;
7115 CLIP(tmp, -qp, AVC_QP_MAX - qp);
7116 cmd->dw57.roi_3_dqp_prime_y = tmp;
7117 tmp = generic_state->roi[3].value;
7118 CLIP(tmp, -qp, AVC_QP_MAX - qp);
7119 cmd->dw57.roi_4_dqp_prime_y = tmp;
7121 cmd->dw34.roi_enable_flag = 0;
7125 cmd->dw65.mb_data_surf_index = GEN8_AVC_MBENC_MFC_AVC_PAK_OBJ_CM;
7126 cmd->dw66.mv_data_surf_index = GEN8_AVC_MBENC_IND_MV_DATA_CM;
7127 cmd->dw67.i_dist_surf_index = GEN8_AVC_MBENC_BRC_DISTORTION_CM;
7128 cmd->dw68.src_y_surf_index = GEN8_AVC_MBENC_CURR_Y_CM;
7129 cmd->dw69.mb_specific_data_surf_index = GEN8_AVC_MBENC_MB_SPECIFIC_DATA_CM;
7130 cmd->dw70.aux_vme_out_surf_index = GEN8_AVC_MBENC_AUX_VME_OUT_CM;
7131 cmd->dw71.curr_ref_pic_sel_surf_index = GEN8_AVC_MBENC_REFPICSELECT_L0_CM;
7132 cmd->dw72.hme_mv_pred_fwd_bwd_surf_index = GEN8_AVC_MBENC_MV_DATA_FROM_ME_CM;
7133 cmd->dw73.hme_dist_surf_index = GEN8_AVC_MBENC_4xME_DISTORTION_CM;
7134 cmd->dw74.slice_map_surf_index = GEN8_AVC_MBENC_SLICEMAP_DATA_CM;
7135 cmd->dw75.fwd_frm_mb_data_surf_index = GEN8_AVC_MBENC_FWD_MB_DATA_CM;
7136 cmd->dw76.fwd_frm_mv_surf_index = GEN8_AVC_MBENC_FWD_MV_DATA_CM;
7137 cmd->dw77.mb_qp_buffer = GEN8_AVC_MBENC_MBQP_CM;
7138 cmd->dw78.mb_brc_lut = GEN8_AVC_MBENC_MBBRC_CONST_DATA_CM;
7139 cmd->dw79.vme_inter_prediction_surf_index = GEN8_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_CM;
7140 cmd->dw80.vme_inter_prediction_mr_surf_index = GEN8_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_CM;
7141 cmd->dw81.flatness_chk_surf_index = GEN8_AVC_MBENC_FLATNESS_CHECK_CM;
7142 cmd->dw82.mad_surf_index = GEN8_AVC_MBENC_MAD_DATA_CM;
7143 cmd->dw83.force_non_skip_mb_map_surface = GEN8_AVC_MBENC_FORCE_NONSKIP_MB_MAP_CM;
7144 cmd->dw84.widi_wa_surf_index = GEN8_AVC_MBENC_WIDI_WA_DATA_CM;
7145 cmd->dw85.brc_curbe_surf_index = GEN8_AVC_MBENC_BRC_CURBE_DATA_CM;
7146 cmd->dw86.static_detection_cost_table_index = GEN8_AVC_MBENC_STATIC_FRAME_DETECTION_OUTPUT_CM;
7148 i965_gpe_context_unmap_curbe(gpe_context);
7154 gen8_avc_set_curbe_scaling4x(VADriverContextP ctx,
7155 struct encode_state *encode_state,
7156 struct i965_gpe_context *gpe_context,
7157 struct intel_encoder_context *encoder_context,
7160 gen8_avc_scaling4x_curbe_data *curbe_cmd;
7161 struct scaling_param *surface_param = (struct scaling_param *)param;
7163 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
7168 memset(curbe_cmd, 0, sizeof(gen9_avc_scaling4x_curbe_data));
7170 curbe_cmd->dw0.input_picture_width = surface_param->input_frame_width;
7171 curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
7173 curbe_cmd->dw1.input_y_bti = GEN8_SCALING_FRAME_SRC_Y_CM;
7174 curbe_cmd->dw2.output_y_bti = GEN8_SCALING_FRAME_DST_Y_CM;
7176 curbe_cmd->dw5.flatness_threshold = 0;
7177 if (surface_param->enable_mb_flatness_check) {
7178 curbe_cmd->dw5.flatness_threshold = 128;
7179 curbe_cmd->dw8.flatness_output_bti_top_field = 4;
7182 curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
7183 curbe_cmd->dw6.enable_mb_variance_output = surface_param->enable_mb_variance_output;
7184 curbe_cmd->dw6.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
7186 if (curbe_cmd->dw6.enable_mb_variance_output ||
7187 curbe_cmd->dw6.enable_mb_pixel_average_output) {
7188 curbe_cmd->dw10.mbv_proc_states_bti_top_field = GEN8_SCALING_FIELD_TOP_MBVPROCSTATS_DST_CM;
7189 curbe_cmd->dw11.mbv_proc_states_bti_bottom_field = GEN8_SCALING_FIELD_BOT_MBVPROCSTATS_DST_CM;
7192 i965_gpe_context_unmap_curbe(gpe_context);
7197 gen8_avc_set_curbe_me(VADriverContextP ctx,
7198 struct encode_state *encode_state,
7199 struct i965_gpe_context *gpe_context,
7200 struct intel_encoder_context *encoder_context,
7203 gen8_avc_me_curbe_data *curbe_cmd;
7204 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7205 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
7206 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
7208 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
7210 struct me_param * curbe_param = (struct me_param *)param ;
7211 unsigned char use_mv_from_prev_step = 0;
7212 unsigned char write_distortions = 0;
7213 unsigned char qp_prime_y = 0;
7214 unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
7215 unsigned char seach_table_idx = 0;
7216 unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
7217 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
7218 unsigned int scale_factor = 0;
7220 qp_prime_y = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta;
7221 switch (curbe_param->hme_type) {
7222 case INTEL_ENC_HME_4x : {
7223 use_mv_from_prev_step = (generic_state->b16xme_enabled) ? 1 : 0;
7224 write_distortions = 1;
7225 mv_shift_factor = 2;
7227 prev_mv_read_pos_factor = 0;
7230 case INTEL_ENC_HME_16x : {
7231 use_mv_from_prev_step = (generic_state->b32xme_enabled) ? 1 : 0;
7232 write_distortions = 0;
7233 mv_shift_factor = 2;
7235 prev_mv_read_pos_factor = 1;
7238 case INTEL_ENC_HME_32x : {
7239 use_mv_from_prev_step = 0;
7240 write_distortions = 0;
7241 mv_shift_factor = 1;
7243 prev_mv_read_pos_factor = 0;
7250 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
7255 downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
7256 downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
7258 memcpy(curbe_cmd, gen8_avc_me_curbe_init_data, sizeof(gen8_avc_me_curbe_data));
7260 curbe_cmd->dw3.sub_pel_mode = 3;
7261 if (avc_state->field_scaling_output_interleaved) {
7262 /*frame set to zero,field specified*/
7263 curbe_cmd->dw3.src_access = 0;
7264 curbe_cmd->dw3.ref_access = 0;
7265 curbe_cmd->dw7.src_field_polarity = 0;
7267 curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
7268 curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
7269 curbe_cmd->dw5.qp_prime_y = qp_prime_y;
7271 curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
7272 curbe_cmd->dw6.write_distortions = write_distortions;
7273 curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
7274 curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
7276 if (generic_state->frame_type == SLICE_TYPE_B) {
7277 curbe_cmd->dw1.bi_weight = 32;
7278 curbe_cmd->dw13.num_ref_idx_l1_minus1 = slice_param->num_ref_idx_l1_active_minus1;
7279 me_method = gen9_avc_b_me_method[generic_state->preset];
7280 seach_table_idx = 1;
7283 if (generic_state->frame_type == SLICE_TYPE_P ||
7284 generic_state->frame_type == SLICE_TYPE_B)
7285 curbe_cmd->dw13.num_ref_idx_l0_minus1 = slice_param->num_ref_idx_l0_active_minus1;
7287 curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
7288 curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
7290 memcpy(&curbe_cmd->dw16, table_enc_search_path[seach_table_idx][me_method], 14 * sizeof(int));
7292 curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN8_AVC_ME_MV_DATA_SURFACE_CM;
7293 curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x) ? GEN8_AVC_32xME_MV_DATA_SURFACE_CM : GEN8_AVC_16xME_MV_DATA_SURFACE_CM ;
7294 curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN8_AVC_ME_DISTORTION_SURFACE_CM;
7295 curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN8_AVC_ME_BRC_DISTORTION_CM;
7296 curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN8_AVC_ME_CURR_FOR_FWD_REF_CM;
7297 curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN8_AVC_ME_CURR_FOR_BWD_REF_CM;
7298 curbe_cmd->dw38.reserved = 0;
7300 i965_gpe_context_unmap_curbe(gpe_context);
7305 gen8_avc_set_curbe_brc_frame_update(VADriverContextP ctx,
7306 struct encode_state *encode_state,
7307 struct i965_gpe_context *gpe_context,
7308 struct intel_encoder_context *encoder_context,
7311 gen8_avc_frame_brc_update_curbe_data *cmd;
7312 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7313 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
7314 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
7315 struct object_surface *obj_surface;
7316 struct gen9_surface_avc *avc_priv_surface;
7317 struct avc_param common_param;
7319 obj_surface = encode_state->reconstructed_object;
7321 if (!obj_surface || !obj_surface->private_data)
7323 avc_priv_surface = obj_surface->private_data;
7325 cmd = i965_gpe_context_map_curbe(gpe_context);
7330 memcpy(cmd, &gen8_avc_frame_brc_update_curbe_init_data, sizeof(gen8_avc_frame_brc_update_curbe_data));
7332 cmd->dw5.target_size_flag = 0 ;
7333 if (generic_state->brc_init_current_target_buf_full_in_bits > (double)generic_state->brc_init_reset_buf_size_in_bits) {
7335 generic_state->brc_init_current_target_buf_full_in_bits -= (double)generic_state->brc_init_reset_buf_size_in_bits;
7336 cmd->dw5.target_size_flag = 1 ;
7339 if (generic_state->skip_frame_enbale) {
7340 cmd->dw6.num_skip_frames = generic_state->num_skip_frames ;
7341 cmd->dw7.size_skip_frames = generic_state->size_skip_frames;
7343 generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame * generic_state->num_skip_frames;
7346 cmd->dw0.target_size = (unsigned int)generic_state->brc_init_current_target_buf_full_in_bits ;
7347 cmd->dw1.frame_number = generic_state->seq_frame_number ;
7348 cmd->dw2.size_of_pic_headers = generic_state->herder_bytes_inserted << 3 ;
7349 cmd->dw5.cur_frame_type = generic_state->frame_type ;
7350 cmd->dw5.brc_flag = 0 ;
7351 cmd->dw5.brc_flag |= (avc_priv_surface->is_as_ref) ? INTEL_ENCODE_BRCUPDATE_IS_REFERENCE : 0 ;
7353 if (avc_state->multi_pre_enable) {
7354 cmd->dw5.brc_flag |= INTEL_ENCODE_BRCUPDATE_IS_ACTUALQP ;
7355 cmd->dw14.qp_index_of_cur_pic = avc_priv_surface->frame_idx ; //do not know this. use -1
7358 cmd->dw5.max_num_paks = generic_state->num_pak_passes ;
7359 if (avc_state->min_max_qp_enable) {
7360 switch (generic_state->frame_type) {
7362 cmd->dw6.minimum_qp = avc_state->min_qp_i ;
7363 cmd->dw6.maximum_qp = avc_state->max_qp_i ;
7366 cmd->dw6.minimum_qp = avc_state->min_qp_p ;
7367 cmd->dw6.maximum_qp = avc_state->max_qp_p ;
7370 cmd->dw6.minimum_qp = avc_state->min_qp_b ;
7371 cmd->dw6.maximum_qp = avc_state->max_qp_b ;
7375 cmd->dw6.minimum_qp = 0 ;
7376 cmd->dw6.maximum_qp = 0 ;
7379 generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame;
7381 if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
7382 cmd->dw3.start_gadj_frame0 = (unsigned int)((10 * generic_state->avbr_convergence) / (double)150);
7383 cmd->dw3.start_gadj_frame1 = (unsigned int)((50 * generic_state->avbr_convergence) / (double)150);
7384 cmd->dw4.start_gadj_frame2 = (unsigned int)((100 * generic_state->avbr_convergence) / (double)150);
7385 cmd->dw4.start_gadj_frame3 = (unsigned int)((150 * generic_state->avbr_convergence) / (double)150);
7386 cmd->dw11.g_rate_ratio_threshold_0 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 40)));
7387 cmd->dw11.g_rate_ratio_threshold_1 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 75)));
7388 cmd->dw12.g_rate_ratio_threshold_2 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 97)));
7389 cmd->dw12.g_rate_ratio_threshold_3 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (103 - 100)));
7390 cmd->dw12.g_rate_ratio_threshold_4 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (125 - 100)));
7391 cmd->dw12.g_rate_ratio_threshold_5 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (160 - 100)));
7395 memset(&common_param, 0, sizeof(common_param));
7396 common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
7397 common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
7398 common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
7399 common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
7400 common_param.frames_per_100s = generic_state->frames_per_100s;
7401 common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
7402 common_param.target_bit_rate = generic_state->target_bit_rate;
7404 i965_gpe_context_unmap_curbe(gpe_context);
7410 kernel related function:init/destroy etc
7413 gen9_avc_kernel_init_scaling(VADriverContextP ctx,
7414 struct generic_encoder_context *generic_context,
7415 struct gen_avc_scaling_context *kernel_context,
7418 struct i965_driver_data *i965 = i965_driver_data(ctx);
7419 struct i965_gpe_table *gpe = &i965->gpe_table;
7420 struct i965_gpe_context *gpe_context = NULL;
7421 struct encoder_kernel_parameter kernel_param ;
7422 struct encoder_scoreboard_parameter scoreboard_param;
7423 struct i965_kernel common_kernel;
7425 memset(&kernel_param, 0, sizeof(kernel_param));
7426 if (IS_SKL(i965->intel.device_info) ||
7427 IS_BXT(i965->intel.device_info)) {
7428 if (!preenc_enabled) {
7429 kernel_param.curbe_size = sizeof(gen9_avc_scaling4x_curbe_data);
7430 kernel_param.inline_data_size = sizeof(gen9_avc_scaling4x_curbe_data);
7432 /* Skylake PreEnc using GEN95/gen10 DS kernel */
7433 kernel_param.curbe_size = sizeof(gen95_avc_scaling4x_curbe_data);
7434 kernel_param.inline_data_size = sizeof(gen95_avc_scaling4x_curbe_data);
7436 } else if (IS_KBL(i965->intel.device_info) ||
7437 IS_GEN10(i965->intel.device_info) ||
7438 IS_GLK(i965->intel.device_info)) {
7439 kernel_param.curbe_size = sizeof(gen95_avc_scaling4x_curbe_data);
7440 kernel_param.inline_data_size = sizeof(gen95_avc_scaling4x_curbe_data);
7441 } else if (IS_GEN8(i965->intel.device_info)) {
7442 kernel_param.curbe_size = sizeof(gen8_avc_scaling4x_curbe_data);
7443 kernel_param.inline_data_size = sizeof(gen8_avc_scaling4x_curbe_data);
7447 /* 4x scaling kernel*/
7448 kernel_param.sampler_size = 0;
7450 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7451 scoreboard_param.mask = 0xFF;
7452 scoreboard_param.enable = generic_context->use_hw_scoreboard;
7453 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7454 scoreboard_param.walkpat_flag = 0;
7456 gpe_context = &kernel_context->gpe_contexts[GEN9_AVC_KERNEL_SCALING_4X_IDX];
7457 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7458 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7460 memset(&common_kernel, 0, sizeof(common_kernel));
7462 generic_context->get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7463 generic_context->enc_kernel_size,
7464 INTEL_GENERIC_ENC_SCALING4X,
7468 gpe->load_kernels(ctx,
7473 /* PreEnc using only the 4X scaling */
7477 /*2x scaling kernel*/
7478 kernel_param.curbe_size = sizeof(gen9_avc_scaling2x_curbe_data);
7479 kernel_param.inline_data_size = 0;
7480 kernel_param.sampler_size = 0;
7482 gpe_context = &kernel_context->gpe_contexts[GEN9_AVC_KERNEL_SCALING_2X_IDX];
7483 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7484 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7486 memset(&common_kernel, 0, sizeof(common_kernel));
7488 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7489 generic_context->enc_kernel_size,
7490 INTEL_GENERIC_ENC_SCALING2X,
7494 gpe->load_kernels(ctx,
7502 gen9_avc_kernel_init_me(VADriverContextP ctx,
7503 struct generic_encoder_context *generic_context,
7504 struct gen_avc_me_context *kernel_context,
7507 struct i965_driver_data *i965 = i965_driver_data(ctx);
7508 struct i965_gpe_table *gpe = &i965->gpe_table;
7509 struct i965_gpe_context *gpe_context = NULL;
7510 struct encoder_kernel_parameter kernel_param ;
7511 struct encoder_scoreboard_parameter scoreboard_param;
7512 struct i965_kernel common_kernel;
7514 unsigned int curbe_size = 0;
7516 if (IS_GEN8(i965->intel.device_info)) {
7517 curbe_size = sizeof(gen8_avc_me_curbe_data);
7519 if (!preenc_enabled)
7520 curbe_size = sizeof(gen9_avc_me_curbe_data);
7522 curbe_size = sizeof(gen9_avc_fei_me_curbe_data);
7525 kernel_param.curbe_size = curbe_size;
7526 kernel_param.inline_data_size = 0;
7527 kernel_param.sampler_size = 0;
7529 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7530 scoreboard_param.mask = 0xFF;
7531 scoreboard_param.enable = generic_context->use_hw_scoreboard;
7532 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7533 scoreboard_param.walkpat_flag = 0;
7535 /* There is two hme kernel, one for P and other for B frame */
7536 for (i = 0; i < 2; i++) {
7537 gpe_context = &kernel_context->gpe_contexts[i];
7538 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7539 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7541 memset(&common_kernel, 0, sizeof(common_kernel));
7543 generic_context->get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7544 generic_context->enc_kernel_size,
7545 INTEL_GENERIC_ENC_ME,
7549 gpe->load_kernels(ctx,
7558 gen9_avc_kernel_init_preproc(VADriverContextP ctx,
7559 struct generic_encoder_context *generic_context,
7560 struct gen_avc_preproc_context *kernel_context)
7562 struct i965_driver_data *i965 = i965_driver_data(ctx);
7563 struct i965_gpe_table *gpe = &i965->gpe_table;
7564 struct i965_gpe_context *gpe_context = NULL;
7565 struct encoder_kernel_parameter kernel_param ;
7566 struct encoder_scoreboard_parameter scoreboard_param;
7567 struct i965_kernel common_kernel;
7569 kernel_param.curbe_size = sizeof(gen9_avc_preproc_curbe_data);
7570 kernel_param.inline_data_size = 0;
7571 kernel_param.sampler_size = 0;
7573 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7574 scoreboard_param.mask = 0xFF;
7575 scoreboard_param.enable = generic_context->use_hw_scoreboard;
7576 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7577 scoreboard_param.walkpat_flag = 0;
7579 gpe_context = &kernel_context->gpe_contexts;
7580 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7581 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7583 memset(&common_kernel, 0, sizeof(common_kernel));
7585 intel_avc_fei_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7586 generic_context->enc_kernel_size,
7587 INTEL_GENERIC_ENC_PREPROC,
7591 gpe->load_kernels(ctx,
7599 gen9_avc_kernel_init_mbenc(VADriverContextP ctx,
7600 struct generic_encoder_context *generic_context,
7601 struct gen_avc_mbenc_context *kernel_context,
7604 struct i965_driver_data *i965 = i965_driver_data(ctx);
7605 struct i965_gpe_table *gpe = &i965->gpe_table;
7606 struct i965_gpe_context *gpe_context = NULL;
7607 struct encoder_kernel_parameter kernel_param ;
7608 struct encoder_scoreboard_parameter scoreboard_param;
7609 struct i965_kernel common_kernel;
7611 unsigned int curbe_size = 0;
7612 unsigned int num_mbenc_kernels = 0;
7614 if (IS_SKL(i965->intel.device_info) ||
7615 IS_BXT(i965->intel.device_info)) {
7617 curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
7618 num_mbenc_kernels = NUM_GEN9_AVC_KERNEL_MBENC;
7620 curbe_size = sizeof(gen9_avc_fei_mbenc_curbe_data);
7621 num_mbenc_kernels = NUM_GEN9_AVC_FEI_KERNEL_MBENC;
7623 } else if (IS_KBL(i965->intel.device_info) ||
7624 IS_GEN10(i965->intel.device_info) ||
7625 IS_GLK(i965->intel.device_info)) {
7626 curbe_size = sizeof(gen95_avc_mbenc_curbe_data);
7627 num_mbenc_kernels = NUM_GEN9_AVC_KERNEL_MBENC;
7628 } else if (IS_GEN8(i965->intel.device_info)) {
7629 curbe_size = sizeof(gen8_avc_mbenc_curbe_data);
7630 num_mbenc_kernels = NUM_GEN9_AVC_KERNEL_MBENC;
7633 assert(curbe_size > 0);
7634 kernel_param.curbe_size = curbe_size;
7635 kernel_param.inline_data_size = 0;
7636 kernel_param.sampler_size = 0;
7638 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7639 scoreboard_param.mask = 0xFF;
7640 scoreboard_param.enable = generic_context->use_hw_scoreboard;
7641 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7642 scoreboard_param.walkpat_flag = 0;
7644 for (i = 0; i < num_mbenc_kernels ; i++) {
7645 gpe_context = &kernel_context->gpe_contexts[i];
7646 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7647 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7649 memset(&common_kernel, 0, sizeof(common_kernel));
7651 generic_context->get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7652 generic_context->enc_kernel_size,
7653 INTEL_GENERIC_ENC_MBENC,
7657 gpe->load_kernels(ctx,
7666 gen9_avc_kernel_init_brc(VADriverContextP ctx,
7667 struct generic_encoder_context *generic_context,
7668 struct gen_avc_brc_context *kernel_context)
7670 struct i965_driver_data *i965 = i965_driver_data(ctx);
7671 struct i965_gpe_table *gpe = &i965->gpe_table;
7672 struct i965_gpe_context *gpe_context = NULL;
7673 struct encoder_kernel_parameter kernel_param ;
7674 struct encoder_scoreboard_parameter scoreboard_param;
7675 struct i965_kernel common_kernel;
7676 int num_brc_init_kernels = 0;
7679 if (IS_GEN8(i965->intel.device_info)) {
7680 num_brc_init_kernels = NUM_GEN9_AVC_KERNEL_BRC - 1;
7682 num_brc_init_kernels = NUM_GEN9_AVC_KERNEL_BRC;
7685 const int gen8_brc_curbe_size[NUM_GEN9_AVC_KERNEL_BRC - 1] = {
7686 (sizeof(gen9_avc_brc_init_reset_curbe_data)),
7687 (sizeof(gen8_avc_frame_brc_update_curbe_data)),
7688 (sizeof(gen9_avc_brc_init_reset_curbe_data)),
7689 (sizeof(gen8_avc_mbenc_curbe_data)),
7692 const int gen9_brc_curbe_size[NUM_GEN9_AVC_KERNEL_BRC] = {
7693 (sizeof(gen9_avc_brc_init_reset_curbe_data)),
7694 (sizeof(gen9_avc_frame_brc_update_curbe_data)),
7695 (sizeof(gen9_avc_brc_init_reset_curbe_data)),
7696 ((IS_SKL(i965->intel.device_info) || IS_BXT(i965->intel.device_info)) ? sizeof(gen9_avc_mbenc_curbe_data) : sizeof(gen95_avc_mbenc_curbe_data)),
7698 (sizeof(gen9_avc_mb_brc_curbe_data))
7701 kernel_param.inline_data_size = 0;
7702 kernel_param.sampler_size = 0;
7704 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7705 scoreboard_param.mask = 0xFF;
7706 scoreboard_param.enable = generic_context->use_hw_scoreboard;
7707 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7708 scoreboard_param.walkpat_flag = 0;
7710 for (i = 0; i < num_brc_init_kernels; i++) {
7711 if (IS_GEN8(i965->intel.device_info)) {
7712 kernel_param.curbe_size = gen8_brc_curbe_size[i];
7714 kernel_param.curbe_size = gen9_brc_curbe_size[i];
7716 gpe_context = &kernel_context->gpe_contexts[i];
7717 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7718 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7720 memset(&common_kernel, 0, sizeof(common_kernel));
7722 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7723 generic_context->enc_kernel_size,
7724 INTEL_GENERIC_ENC_BRC,
7728 gpe->load_kernels(ctx,
7737 gen9_avc_kernel_init_wp(VADriverContextP ctx,
7738 struct generic_encoder_context *generic_context,
7739 struct gen_avc_wp_context *kernel_context)
7741 struct i965_driver_data *i965 = i965_driver_data(ctx);
7742 struct i965_gpe_table *gpe = &i965->gpe_table;
7743 struct i965_gpe_context *gpe_context = NULL;
7744 struct encoder_kernel_parameter kernel_param ;
7745 struct encoder_scoreboard_parameter scoreboard_param;
7746 struct i965_kernel common_kernel;
7748 kernel_param.curbe_size = sizeof(gen9_avc_wp_curbe_data);
7749 kernel_param.inline_data_size = 0;
7750 kernel_param.sampler_size = 0;
7752 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7753 scoreboard_param.mask = 0xFF;
7754 scoreboard_param.enable = generic_context->use_hw_scoreboard;
7755 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7756 scoreboard_param.walkpat_flag = 0;
7758 gpe_context = &kernel_context->gpe_contexts;
7759 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7760 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7762 memset(&common_kernel, 0, sizeof(common_kernel));
7764 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7765 generic_context->enc_kernel_size,
7766 INTEL_GENERIC_ENC_WP,
7770 gpe->load_kernels(ctx,
7778 gen9_avc_kernel_init_sfd(VADriverContextP ctx,
7779 struct generic_encoder_context *generic_context,
7780 struct gen_avc_sfd_context *kernel_context)
7782 struct i965_driver_data *i965 = i965_driver_data(ctx);
7783 struct i965_gpe_table *gpe = &i965->gpe_table;
7784 struct i965_gpe_context *gpe_context = NULL;
7785 struct encoder_kernel_parameter kernel_param ;
7786 struct encoder_scoreboard_parameter scoreboard_param;
7787 struct i965_kernel common_kernel;
7789 kernel_param.curbe_size = sizeof(gen9_avc_sfd_curbe_data);
7790 kernel_param.inline_data_size = 0;
7791 kernel_param.sampler_size = 0;
7793 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7794 scoreboard_param.mask = 0xFF;
7795 scoreboard_param.enable = generic_context->use_hw_scoreboard;
7796 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7797 scoreboard_param.walkpat_flag = 0;
7799 gpe_context = &kernel_context->gpe_contexts;
7800 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7801 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7803 memset(&common_kernel, 0, sizeof(common_kernel));
7805 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7806 generic_context->enc_kernel_size,
7807 INTEL_GENERIC_ENC_SFD,
7811 gpe->load_kernels(ctx,
7819 gen9_avc_kernel_destroy(struct encoder_vme_mfc_context * vme_context)
7822 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
7823 struct i965_driver_data *i965 = i965_driver_data(avc_ctx->ctx);
7824 struct i965_gpe_table *gpe = &i965->gpe_table;
7828 gen9_avc_free_resources(vme_context);
7830 for (i = 0; i < NUM_GEN9_AVC_KERNEL_SCALING; i++)
7831 gpe->context_destroy(&avc_ctx->context_scaling.gpe_contexts[i]);
7833 for (i = 0; i < NUM_GEN9_AVC_KERNEL_BRC; i++)
7834 gpe->context_destroy(&avc_ctx->context_brc.gpe_contexts[i]);
7836 for (i = 0; i < NUM_GEN9_AVC_KERNEL_ME; i++)
7837 gpe->context_destroy(&avc_ctx->context_me.gpe_contexts[i]);
7839 for (i = 0; i < NUM_GEN9_AVC_KERNEL_MBENC; i++)
7840 gpe->context_destroy(&avc_ctx->context_mbenc.gpe_contexts[i]);
7842 gpe->context_destroy(&avc_ctx->context_wp.gpe_contexts);
7844 gpe->context_destroy(&avc_ctx->context_sfd.gpe_contexts);
7846 gpe->context_destroy(&avc_ctx->context_preproc.gpe_contexts);
7854 gen9_avc_update_parameters(VADriverContextP ctx,
7856 struct encode_state *encode_state,
7857 struct intel_encoder_context *encoder_context)
7859 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7860 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
7861 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
7862 VAEncSequenceParameterBufferH264 *seq_param;
7863 VAEncSliceParameterBufferH264 *slice_param;
7864 VAEncMiscParameterBuffer *fei_misc_param;
7865 int i, j, slice_index;
7866 unsigned int preset = generic_state->preset;
7867 unsigned int fei_enabled = encoder_context->fei_enabled;
7869 /* seq/pic/slice parameter setting */
7870 generic_state->b16xme_supported = gen9_avc_super_hme[preset];
7871 generic_state->b32xme_supported = gen9_avc_ultra_hme[preset];
7873 avc_state->seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
7874 avc_state->pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
7877 encode_state->misc_param[VAEncMiscParameterTypeFEIFrameControl]) {
7878 fei_misc_param = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeFEIFrameControl][0]->buffer;
7879 avc_state->fei_framectl_param =
7880 (VAEncMiscParameterFEIFrameControlH264 *)fei_misc_param->data;
7883 avc_state->slice_num = 0;
7885 for (j = 0; j < encode_state->num_slice_params_ext; j++) {
7886 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
7887 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
7888 avc_state->slice_param[slice_index] = slice_param;
7891 avc_state->slice_num++;
7895 /* how many slices support by now? 1 slice or multi slices, but row slice.not slice group. */
7896 seq_param = avc_state->seq_param;
7897 slice_param = avc_state->slice_param[0];
7899 generic_state->frame_type = avc_state->slice_param[0]->slice_type;
7901 if (slice_param->slice_type == SLICE_TYPE_I ||
7902 slice_param->slice_type == SLICE_TYPE_SI)
7903 generic_state->frame_type = SLICE_TYPE_I;
7904 else if (slice_param->slice_type == SLICE_TYPE_P)
7905 generic_state->frame_type = SLICE_TYPE_P;
7906 else if (slice_param->slice_type == SLICE_TYPE_B)
7907 generic_state->frame_type = SLICE_TYPE_B;
7908 if (profile == VAProfileH264High)
7909 avc_state->transform_8x8_mode_enable = 0;//work around for high profile to disabel pic_param->pic_fields.bits.transform_8x8_mode_flag
7911 avc_state->transform_8x8_mode_enable = 0;
7914 if (generic_state->brc_enabled && (!generic_state->brc_inited || generic_state->brc_need_reset)) {
7915 generic_state->target_bit_rate = ALIGN(seq_param->bits_per_second, 1000) / 1000;
7916 generic_state->init_vbv_buffer_fullness_in_bit = seq_param->bits_per_second;
7917 generic_state->vbv_buffer_size_in_bit = (uint64_t)seq_param->bits_per_second << 1;
7918 generic_state->frames_per_100s = 3000; /* 30fps */
7921 generic_state->gop_size = seq_param->intra_period;
7922 generic_state->gop_ref_distance = seq_param->ip_period;
7924 if (generic_state->internal_rate_mode == VA_RC_CBR) {
7925 generic_state->max_bit_rate = generic_state->target_bit_rate;
7926 generic_state->min_bit_rate = generic_state->target_bit_rate;
7929 if (generic_state->frame_type == SLICE_TYPE_I || generic_state->first_frame) {
7930 gen9_avc_update_misc_parameters(ctx, encode_state, encoder_context);
7933 generic_state->preset = encoder_context->quality_level;
7934 if (encoder_context->quality_level == INTEL_PRESET_UNKNOWN) {
7935 generic_state->preset = INTEL_PRESET_RT_SPEED;
7937 generic_state->kernel_mode = gen9_avc_kernel_mode[generic_state->preset];
7939 if (!generic_state->brc_inited) {
7940 generic_state->brc_init_reset_input_bits_per_frame = ((double)(generic_state->max_bit_rate * 1000) * 100) / generic_state->frames_per_100s;;
7941 generic_state->brc_init_current_target_buf_full_in_bits = generic_state->init_vbv_buffer_fullness_in_bit;
7942 generic_state->brc_init_reset_buf_size_in_bits = generic_state->vbv_buffer_size_in_bit;
7943 generic_state->brc_target_size = generic_state->init_vbv_buffer_fullness_in_bit;
7947 generic_state->curr_pak_pass = 0;
7948 generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
7950 if (generic_state->internal_rate_mode == VA_RC_CBR ||
7951 generic_state->internal_rate_mode == VA_RC_VBR)
7952 generic_state->brc_enabled = 1;
7954 generic_state->brc_enabled = 0;
7956 if (generic_state->brc_enabled &&
7957 (!generic_state->init_vbv_buffer_fullness_in_bit ||
7958 !generic_state->vbv_buffer_size_in_bit ||
7959 !generic_state->max_bit_rate ||
7960 !generic_state->target_bit_rate ||
7961 !generic_state->frames_per_100s)) {
7962 WARN_ONCE("Rate control parameter is required for BRC\n");
7963 generic_state->brc_enabled = 0;
7966 if (!generic_state->brc_enabled) {
7967 generic_state->target_bit_rate = 0;
7968 generic_state->max_bit_rate = 0;
7969 generic_state->min_bit_rate = 0;
7970 generic_state->init_vbv_buffer_fullness_in_bit = 0;
7971 generic_state->vbv_buffer_size_in_bit = 0;
7972 generic_state->num_pak_passes = 1;
7974 generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
7978 generic_state->frame_width_in_mbs = seq_param->picture_width_in_mbs;
7979 generic_state->frame_height_in_mbs = seq_param->picture_height_in_mbs;
7980 generic_state->frame_width_in_pixel = generic_state->frame_width_in_mbs * 16;
7981 generic_state->frame_height_in_pixel = generic_state->frame_height_in_mbs * 16;
7983 generic_state->frame_width_4x = ALIGN(generic_state->frame_width_in_pixel / 4, 16);
7984 generic_state->frame_height_4x = ALIGN(generic_state->frame_height_in_pixel / 4, 16);
7985 generic_state->downscaled_width_4x_in_mb = generic_state->frame_width_4x / 16 ;
7986 generic_state->downscaled_height_4x_in_mb = generic_state->frame_height_4x / 16;
7988 generic_state->frame_width_16x = ALIGN(generic_state->frame_width_in_pixel / 16, 16);
7989 generic_state->frame_height_16x = ALIGN(generic_state->frame_height_in_pixel / 16, 16);
7990 generic_state->downscaled_width_16x_in_mb = generic_state->frame_width_16x / 16 ;
7991 generic_state->downscaled_height_16x_in_mb = generic_state->frame_height_16x / 16;
7993 generic_state->frame_width_32x = ALIGN(generic_state->frame_width_in_pixel / 32, 16);
7994 generic_state->frame_height_32x = ALIGN(generic_state->frame_height_in_pixel / 32, 16);
7995 generic_state->downscaled_width_32x_in_mb = generic_state->frame_width_32x / 16 ;
7996 generic_state->downscaled_height_32x_in_mb = generic_state->frame_height_32x / 16;
7998 if (generic_state->hme_supported) {
7999 generic_state->hme_enabled = 1;
8001 generic_state->hme_enabled = 0;
8004 if (generic_state->b16xme_supported) {
8005 generic_state->b16xme_enabled = 1;
8007 generic_state->b16xme_enabled = 0;
8010 if (generic_state->b32xme_supported) {
8011 generic_state->b32xme_enabled = 1;
8013 generic_state->b32xme_enabled = 0;
8015 /* disable HME/16xME if the size is too small */
8016 if (generic_state->frame_width_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8017 generic_state->b32xme_supported = 0;
8018 generic_state->b32xme_enabled = 0;
8019 generic_state->b16xme_supported = 0;
8020 generic_state->b16xme_enabled = 0;
8021 generic_state->frame_width_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8022 generic_state->downscaled_width_4x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8024 if (generic_state->frame_height_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8025 generic_state->b32xme_supported = 0;
8026 generic_state->b32xme_enabled = 0;
8027 generic_state->b16xme_supported = 0;
8028 generic_state->b16xme_enabled = 0;
8029 generic_state->frame_height_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8030 generic_state->downscaled_height_4x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8033 if (generic_state->frame_width_16x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8034 generic_state->b32xme_supported = 0;
8035 generic_state->b32xme_enabled = 0;
8036 generic_state->frame_width_16x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8037 generic_state->downscaled_width_16x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8039 if (generic_state->frame_height_16x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8040 generic_state->b32xme_supported = 0;
8041 generic_state->b32xme_enabled = 0;
8042 generic_state->frame_height_16x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8043 generic_state->downscaled_height_16x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8046 if (generic_state->frame_width_32x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8047 generic_state->frame_width_32x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8048 generic_state->downscaled_width_32x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8050 if (generic_state->frame_height_32x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8051 generic_state->frame_height_32x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8052 generic_state->downscaled_height_32x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8058 gen9_avc_encode_check_parameter(VADriverContextP ctx,
8059 struct encode_state *encode_state,
8060 struct intel_encoder_context *encoder_context)
8062 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8063 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8064 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8065 unsigned int rate_control_mode = encoder_context->rate_control_mode;
8066 unsigned int preset = generic_state->preset;
8067 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param ;
8068 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
8070 int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
8072 generic_state->avbr_curracy = 30;
8073 generic_state->avbr_convergence = 150;
8075 switch (rate_control_mode & 0x7f) {
8077 generic_state->internal_rate_mode = VA_RC_CBR;
8081 generic_state->internal_rate_mode = VA_RC_VBR;
8086 generic_state->internal_rate_mode = VA_RC_CQP;
8090 if (rate_control_mode != VA_RC_NONE &&
8091 rate_control_mode != VA_RC_CQP) {
8092 generic_state->brc_enabled = 1;
8093 generic_state->brc_distortion_buffer_supported = 1;
8094 generic_state->brc_constant_buffer_supported = 1;
8095 generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
8098 /*check brc parameter*/
8099 if (generic_state->brc_enabled) {
8100 avc_state->mb_qp_data_enable = 0;
8103 /*set the brc init and reset accordingly*/
8104 if (generic_state->brc_need_reset &&
8105 (generic_state->brc_distortion_buffer_supported == 0 ||
8106 rate_control_mode == VA_RC_CQP)) {
8107 generic_state->brc_need_reset = 0;// not support by CQP
8109 if (generic_state->internal_rate_mode == VA_RC_CBR || generic_state->internal_rate_mode == VA_RC_VBR || generic_state->frame_type == SLICE_TYPE_I) {
8110 avc_state->sfd_enable = 0;
8112 avc_state->sfd_enable = 1;
8115 if (generic_state->frames_per_window_size == 0) {
8116 generic_state->frames_per_window_size = (generic_state->frames_per_100s / 100 < 60) ? (generic_state->frames_per_100s / 100) : 60;
8117 } else if (generic_state->frames_per_window_size > 2 * generic_state->frames_per_100s / 100) {
8118 generic_state->frames_per_window_size = (generic_state->frames_per_100s / 100 < 60) ? (generic_state->frames_per_100s / 100) : 60;
8121 if (generic_state->brc_enabled) {
8122 generic_state->hme_enabled = generic_state->frame_type != SLICE_TYPE_I;
8123 if (avc_state->min_max_qp_enable) {
8124 generic_state->num_pak_passes = 1;
8126 generic_state->brc_roi_enable = (rate_control_mode != VA_RC_CQP) && (generic_state->num_roi > 0);// only !CQP
8127 generic_state->mb_brc_enabled = generic_state->mb_brc_enabled || generic_state->brc_roi_enable;
8129 generic_state->num_pak_passes = 1;// CQP only one pass
8132 avc_state->mbenc_i_frame_dist_in_use = 0;
8133 avc_state->mbenc_i_frame_dist_in_use = (generic_state->brc_enabled) && (generic_state->brc_distortion_buffer_supported) && (generic_state->frame_type == SLICE_TYPE_I);
8135 /*ROI must enable mbbrc.*/
8138 if (avc_state->caf_supported) {
8139 switch (generic_state->frame_type) {
8141 avc_state->caf_enable = 0;
8144 avc_state->caf_enable = gen9_avc_all_fractional[preset] & 0x01;
8147 avc_state->caf_enable = (gen9_avc_all_fractional[preset] >> 1) & 0x01;
8151 if (avc_state->caf_enable && avc_state->caf_disable_hd && gen9_avc_disable_all_fractional_check_for_high_res[preset]) {
8152 if (generic_state->frame_width_in_pixel >= 1280 && generic_state->frame_height_in_pixel >= 720)
8153 avc_state->caf_enable = 0;
8157 avc_state->adaptive_transform_decision_enable &= gen9_avc_enable_adaptive_tx_decision[preset & 0x7];
8159 /* Flatness check is enabled only if scaling will be performed and CAF is enabled. here only frame */
8160 if (avc_state->flatness_check_supported) {
8161 avc_state->flatness_check_enable = ((avc_state->caf_enable) && (generic_state->brc_enabled || generic_state->hme_supported)) ;
8163 avc_state->flatness_check_enable = 0;
8166 /* check mb_status_supported/enbale*/
8167 if (avc_state->adaptive_transform_decision_enable) {
8168 avc_state->mb_status_enable = 1;
8170 avc_state->mb_status_enable = 0;
8172 /*slice check,all the slices use the same slice height except the last slice*/
8173 avc_state->arbitrary_num_mbs_in_slice = 0;
8174 for (i = 0; i < avc_state->slice_num; i++) {
8175 if (avc_state->slice_param[i]->num_macroblocks % generic_state->frame_width_in_mbs > 0) {
8176 avc_state->arbitrary_num_mbs_in_slice = 1;
8177 avc_state->slice_height = 1; /* slice height will be ignored by kernel ans here set it as default value */
8179 avc_state->slice_height = avc_state->slice_param[i]->num_macroblocks / generic_state->frame_width_in_mbs;
8183 if (avc_state->slice_num > 1)
8184 avc_state->arbitrary_num_mbs_in_slice = 1;
8186 if (generic_state->frame_type == SLICE_TYPE_I) {
8187 generic_state->hme_enabled = 0;
8188 generic_state->b16xme_enabled = 0;
8189 generic_state->b32xme_enabled = 0;
8192 if (generic_state->frame_type == SLICE_TYPE_B) {
8193 gen9_avc_get_dist_scale_factor(ctx, encode_state, encoder_context);
8194 avc_state->bi_weight = gen9_avc_get_biweight(avc_state->dist_scale_factor_list0[0], pic_param->pic_fields.bits.weighted_bipred_idc);
8197 /* Determine if SkipBiasAdjustment should be enabled for P picture 1. No B frame 2. Qp >= 22 3. CQP mode */
8198 avc_state->skip_bias_adjustment_enable = avc_state->skip_bias_adjustment_supported && (generic_state->frame_type == SLICE_TYPE_P)
8199 && (generic_state->gop_ref_distance == 1) && (avc_state->pic_param->pic_init_qp + avc_state->slice_param[0]->slice_qp_delta >= 22) && !generic_state->brc_enabled;
8201 if (generic_state->kernel_mode == INTEL_ENC_KERNEL_QUALITY) {
8202 avc_state->tq_enable = 1;
8203 avc_state->tq_rounding = 6;
8204 if (generic_state->brc_enabled) {
8205 generic_state->mb_brc_enabled = 1;
8209 //check the inter rounding
8210 avc_state->rounding_value = 0;
8211 avc_state->rounding_inter_p = 255;//default
8212 avc_state->rounding_inter_b = 255; //default
8213 avc_state->rounding_inter_b_ref = 255; //default
8215 if (generic_state->frame_type == SLICE_TYPE_P) {
8216 if (avc_state->rounding_inter_p == AVC_INVALID_ROUNDING_VALUE) {
8217 if (avc_state->adaptive_rounding_inter_enable && !(generic_state->brc_enabled)) {
8218 if (generic_state->gop_ref_distance == 1)
8219 avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_p_without_b[slice_qp];
8221 avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_p[slice_qp];
8223 avc_state->rounding_value = gen9_avc_inter_rounding_p[generic_state->preset];
8227 avc_state->rounding_value = avc_state->rounding_inter_p;
8229 } else if (generic_state->frame_type == SLICE_TYPE_B) {
8230 if (pic_param->pic_fields.bits.reference_pic_flag) {
8231 if (avc_state->rounding_inter_b_ref == AVC_INVALID_ROUNDING_VALUE)
8232 avc_state->rounding_value = gen9_avc_inter_rounding_b_ref[generic_state->preset];
8234 avc_state->rounding_value = avc_state->rounding_inter_b_ref;
8236 if (avc_state->rounding_inter_b == AVC_INVALID_ROUNDING_VALUE) {
8237 if (avc_state->adaptive_rounding_inter_enable && !(generic_state->brc_enabled))
8238 avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_b[slice_qp];
8240 avc_state->rounding_value = gen9_avc_inter_rounding_b[generic_state->preset];
8242 avc_state->rounding_value = avc_state->rounding_inter_b;
8246 return VA_STATUS_SUCCESS;
8250 gen9_avc_vme_gpe_kernel_prepare(VADriverContextP ctx,
8251 struct encode_state *encode_state,
8252 struct intel_encoder_context *encoder_context)
8255 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8256 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
8257 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
8258 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8259 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8261 struct object_surface *obj_surface;
8262 struct object_buffer *obj_buffer;
8263 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
8264 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
8265 struct i965_coded_buffer_segment *coded_buffer_segment;
8267 struct gen9_surface_avc *avc_priv_surface;
8269 struct avc_surface_param surface_param;
8271 unsigned char * pdata;
8273 /* Setup current reconstruct frame */
8274 obj_surface = encode_state->reconstructed_object;
8275 va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
8277 if (va_status != VA_STATUS_SUCCESS)
8280 memset(&surface_param, 0, sizeof(surface_param));
8281 surface_param.frame_width = generic_state->frame_width_in_pixel;
8282 surface_param.frame_height = generic_state->frame_height_in_pixel;
8283 va_status = gen9_avc_init_check_surfaces(ctx,
8287 if (va_status != VA_STATUS_SUCCESS)
8290 /* init the member of avc_priv_surface,frame_store_id,qp_value*/
8291 avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
8292 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = 0;
8293 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = 0;
8294 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2]);
8295 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1]);
8296 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2], avc_priv_surface->dmv_top);
8297 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1], avc_priv_surface->dmv_bottom);
8298 avc_priv_surface->qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
8299 avc_priv_surface->frame_store_id = 0;
8300 avc_priv_surface->frame_idx = pic_param->CurrPic.frame_idx;
8301 avc_priv_surface->top_field_order_cnt = pic_param->CurrPic.TopFieldOrderCnt;
8302 avc_priv_surface->is_as_ref = pic_param->pic_fields.bits.reference_pic_flag;
8303 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = avc_priv_surface->top_field_order_cnt;
8304 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = avc_priv_surface->top_field_order_cnt + 1;
8306 i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
8307 i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_reconstructed_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
8309 /* input YUV surface*/
8310 obj_surface = encode_state->input_yuv_object;
8311 va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
8313 if (va_status != VA_STATUS_SUCCESS)
8315 i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
8316 i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_uncompressed_input_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
8318 /* Reference surfaces */
8319 for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
8320 i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
8321 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2]);
8322 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1]);
8323 obj_surface = encode_state->reference_objects[i];
8324 avc_state->top_field_poc[2 * i] = 0;
8325 avc_state->top_field_poc[2 * i + 1] = 0;
8327 if (obj_surface && obj_surface->bo) {
8328 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->list_reference_res[i], obj_surface, GPE_RESOURCE_ALIGNMENT);
8330 /* actually it should be handled when it is reconstructed surface*/
8331 va_status = gen9_avc_init_check_surfaces(ctx,
8332 obj_surface, encoder_context,
8334 if (va_status != VA_STATUS_SUCCESS)
8336 avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
8337 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2], avc_priv_surface->dmv_top);
8338 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1], avc_priv_surface->dmv_bottom);
8339 avc_state->top_field_poc[2 * i] = avc_priv_surface->top_field_order_cnt;
8340 avc_state->top_field_poc[2 * i + 1] = avc_priv_surface->top_field_order_cnt + 1;
8341 avc_priv_surface->frame_store_id = i;
8347 /* Encoded bitstream ?*/
8348 obj_buffer = encode_state->coded_buf_object;
8349 bo = obj_buffer->buffer_store->bo;
8350 i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
8351 i965_dri_object_to_buffer_gpe_resource(&generic_ctx->compressed_bitstream.res, bo);
8352 generic_ctx->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
8353 generic_ctx->compressed_bitstream.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
8356 avc_ctx->status_buffer.bo = bo;
8358 /* set the internal flag to 0 to indicate the coded size is unknown */
8360 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
8361 coded_buffer_segment->mapped = 0;
8362 coded_buffer_segment->codec = encoder_context->codec;
8363 coded_buffer_segment->status_support = 1;
8365 pdata = bo->virtual + avc_ctx->status_buffer.base_offset;
8366 memset(pdata, 0, avc_ctx->status_buffer.status_buffer_size);
8369 //frame id, it is the ref pic id in the reference_objects list.
8370 avc_state->num_refs[0] = 0;
8371 avc_state->num_refs[1] = 0;
8372 if (generic_state->frame_type == SLICE_TYPE_P) {
8373 avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
8375 if (slice_param->num_ref_idx_active_override_flag)
8376 avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
8377 } else if (generic_state->frame_type == SLICE_TYPE_B) {
8378 avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
8379 avc_state->num_refs[1] = pic_param->num_ref_idx_l1_active_minus1 + 1;
8381 if (slice_param->num_ref_idx_active_override_flag) {
8382 avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
8383 avc_state->num_refs[1] = slice_param->num_ref_idx_l1_active_minus1 + 1;
8387 if (avc_state->num_refs[0] > ARRAY_ELEMS(avc_state->list_ref_idx[0]))
8388 return VA_STATUS_ERROR_INVALID_VALUE;
8389 if (avc_state->num_refs[1] > ARRAY_ELEMS(avc_state->list_ref_idx[1]))
8390 return VA_STATUS_ERROR_INVALID_VALUE;
8392 for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[0]); i++) {
8393 VAPictureH264 *va_pic;
8395 assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(avc_state->list_ref_idx[0]));
8396 avc_state->list_ref_idx[0][i] = 0;
8398 if (i >= avc_state->num_refs[0])
8401 va_pic = &slice_param->RefPicList0[i];
8403 for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
8404 obj_surface = encode_state->reference_objects[j];
8408 obj_surface->base.id == va_pic->picture_id) {
8410 assert(obj_surface->base.id != VA_INVALID_SURFACE);
8411 avc_state->list_ref_idx[0][i] = j;
8417 for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[1]); i++) {
8418 VAPictureH264 *va_pic;
8420 assert(ARRAY_ELEMS(slice_param->RefPicList1) == ARRAY_ELEMS(avc_state->list_ref_idx[1]));
8421 avc_state->list_ref_idx[1][i] = 0;
8423 if (i >= avc_state->num_refs[1])
8426 va_pic = &slice_param->RefPicList1[i];
8428 for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
8429 obj_surface = encode_state->reference_objects[j];
8433 obj_surface->base.id == va_pic->picture_id) {
8435 assert(obj_surface->base.id != VA_INVALID_SURFACE);
8436 avc_state->list_ref_idx[1][i] = j;
8443 return VA_STATUS_SUCCESS;
8447 gen9_avc_vme_gpe_kernel_init(VADriverContextP ctx,
8448 struct encode_state *encode_state,
8449 struct intel_encoder_context *encoder_context)
8451 return VA_STATUS_SUCCESS;
8455 gen9_avc_vme_gpe_kernel_final(VADriverContextP ctx,
8456 struct encode_state *encode_state,
8457 struct intel_encoder_context *encoder_context)
8460 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8461 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8462 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8464 /*set this flag when all kernel is finished*/
8465 if (generic_state->brc_enabled) {
8466 generic_state->brc_inited = 1;
8467 generic_state->brc_need_reset = 0;
8468 avc_state->mbenc_curbe_set_in_brc_update = 0;
8470 return VA_STATUS_SUCCESS;
8474 gen9_avc_vme_gpe_kernel_run(VADriverContextP ctx,
8475 struct encode_state *encode_state,
8476 struct intel_encoder_context *encoder_context)
8478 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8479 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8480 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8481 int fei_enabled = encoder_context->fei_enabled;
8483 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
8484 VAEncSliceParameterBufferH264 *slice_param = avc_state->slice_param[0];
8487 /* BRC init/reset needs to be called before HME since it will reset the Brc Distortion surface*/
8488 if (!fei_enabled && generic_state->brc_enabled && (!generic_state->brc_inited || generic_state->brc_need_reset)) {
8489 gen9_avc_kernel_brc_init_reset(ctx, encode_state, encoder_context);
8493 if (generic_state->hme_supported) {
8494 gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
8495 if (generic_state->b16xme_supported) {
8496 gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_16x);
8497 if (generic_state->b32xme_supported) {
8498 gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_32x);
8504 if (generic_state->hme_enabled) {
8505 if (generic_state->b16xme_enabled) {
8506 if (generic_state->b32xme_enabled) {
8507 gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_32x);
8509 gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_16x);
8511 gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
8514 /*call SFD kernel after HME in same command buffer*/
8515 sfd_in_use = avc_state->sfd_enable && generic_state->hme_enabled;
8516 sfd_in_use = sfd_in_use && !avc_state->sfd_mb_enable;
8518 gen9_avc_kernel_sfd(ctx, encode_state, encoder_context);
8521 /* BRC and MbEnc are included in the same task phase*/
8522 if (generic_state->brc_enabled) {
8523 if (avc_state->mbenc_i_frame_dist_in_use) {
8524 gen9_avc_kernel_mbenc(ctx, encode_state, encoder_context, true);
8526 gen9_avc_kernel_brc_frame_update(ctx, encode_state, encoder_context);
8528 if (avc_state->brc_split_enable && generic_state->mb_brc_enabled) {
8529 gen9_avc_kernel_brc_mb_update(ctx, encode_state, encoder_context);
8533 /*weight prediction,disable by now */
8534 avc_state->weighted_ref_l0_enable = 0;
8535 avc_state->weighted_ref_l1_enable = 0;
8536 if (avc_state->weighted_prediction_supported &&
8537 ((generic_state->frame_type == SLICE_TYPE_P && pic_param->pic_fields.bits.weighted_pred_flag) ||
8538 (generic_state->frame_type == SLICE_TYPE_B && pic_param->pic_fields.bits.weighted_bipred_idc == INTEL_AVC_WP_MODE_EXPLICIT))) {
8539 if (slice_param->luma_weight_l0_flag & 1) {
8540 gen9_avc_kernel_wp(ctx, encode_state, encoder_context, 0);
8542 } else if (!(slice_param->chroma_weight_l0_flag & 1)) {
8543 pic_param->pic_fields.bits.weighted_pred_flag = 0;// it should be handled in app
8546 if (generic_state->frame_type == SLICE_TYPE_B && pic_param->pic_fields.bits.weighted_bipred_idc == INTEL_AVC_WP_MODE_EXPLICIT) {
8547 if (slice_param->luma_weight_l1_flag & 1) {
8548 gen9_avc_kernel_wp(ctx, encode_state, encoder_context, 1);
8549 } else if (!((slice_param->luma_weight_l0_flag & 1) ||
8550 (slice_param->chroma_weight_l0_flag & 1) ||
8551 (slice_param->chroma_weight_l1_flag & 1))) {
8552 pic_param->pic_fields.bits.weighted_bipred_idc = INTEL_AVC_WP_MODE_DEFAULT;// it should be handled in app
8558 gen9_avc_kernel_mbenc(ctx, encode_state, encoder_context, false);
8560 /*ignore the reset vertical line kernel*/
8562 return VA_STATUS_SUCCESS;
8566 gen9_avc_vme_pipeline(VADriverContextP ctx,
8568 struct encode_state *encode_state,
8569 struct intel_encoder_context *encoder_context)
8573 gen9_avc_update_parameters(ctx, profile, encode_state, encoder_context);
8575 va_status = gen9_avc_encode_check_parameter(ctx, encode_state, encoder_context);
8576 if (va_status != VA_STATUS_SUCCESS)
8579 va_status = gen9_avc_allocate_resources(ctx, encode_state, encoder_context);
8580 if (va_status != VA_STATUS_SUCCESS)
8583 va_status = gen9_avc_vme_gpe_kernel_prepare(ctx, encode_state, encoder_context);
8584 if (va_status != VA_STATUS_SUCCESS)
8587 va_status = gen9_avc_vme_gpe_kernel_init(ctx, encode_state, encoder_context);
8588 if (va_status != VA_STATUS_SUCCESS)
8591 va_status = gen9_avc_vme_gpe_kernel_run(ctx, encode_state, encoder_context);
8592 if (va_status != VA_STATUS_SUCCESS)
8595 gen9_avc_vme_gpe_kernel_final(ctx, encode_state, encoder_context);
8597 return VA_STATUS_SUCCESS;
8600 /* Update PreEnc specific parameters */
8602 gen9_avc_preenc_update_parameters(VADriverContextP ctx,
8604 struct encode_state *encode_state,
8605 struct intel_encoder_context *encoder_context)
8607 struct i965_driver_data *i965 = i965_driver_data(ctx);
8608 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8609 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8610 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
8611 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8612 VAStatsStatisticsParameterH264 *stat_param_h264 = NULL;
8613 VAStatsStatisticsParameter *stat_param = NULL;
8614 struct object_buffer *obj_buffer = NULL;
8615 struct object_buffer *obj_buffer_mv = NULL, *obj_buffer_stat = NULL;
8616 struct buffer_store *buffer_store = NULL;
8617 unsigned int size = 0, i = 0;
8618 unsigned int frame_mb_nums = 0;
8620 if (!encoder_context->preenc_enabled ||
8621 !encode_state->stat_param_ext ||
8622 !encode_state->stat_param_ext->buffer)
8623 return VA_STATUS_ERROR_OPERATION_FAILED;
8625 stat_param_h264 = avc_state->stat_param =
8626 (VAStatsStatisticsParameterH264 *)encode_state->stat_param_ext->buffer;
8627 stat_param = &stat_param_h264->stats_params;
8629 /* Assume the frame type based on number of past/future ref frames */
8630 if (!stat_param->num_past_references && !stat_param->num_future_references)
8631 generic_state->frame_type = SLICE_TYPE_I;
8632 else if (stat_param->num_future_references > 0)
8633 generic_state->frame_type = SLICE_TYPE_B;
8635 generic_state->frame_type = SLICE_TYPE_P;
8637 generic_state->preset = INTEL_PRESET_RT_SPEED;
8638 generic_state->kernel_mode = gen9_avc_kernel_mode[generic_state->preset];
8640 /* frame width and height */
8641 generic_state->frame_width_in_pixel = encoder_context->frame_width_in_pixel;
8642 generic_state->frame_height_in_pixel = encoder_context->frame_height_in_pixel;
8643 generic_state->frame_width_in_mbs = ALIGN(generic_state->frame_width_in_pixel, 16) / 16;
8644 generic_state->frame_height_in_mbs = ALIGN(generic_state->frame_height_in_pixel, 16) / 16;
8646 /* 4x downscaled width and height */
8647 generic_state->frame_width_4x = ALIGN(generic_state->frame_width_in_pixel / 4, 16);
8648 generic_state->frame_height_4x = ALIGN(generic_state->frame_height_in_pixel / 4, 16);
8649 generic_state->downscaled_width_4x_in_mb = generic_state->frame_width_4x / 16 ;
8650 generic_state->downscaled_height_4x_in_mb = generic_state->frame_height_4x / 16;
8652 /* reset hme types for preenc */
8653 if (generic_state->frame_type != SLICE_TYPE_I)
8654 generic_state->hme_enabled = 1;
8656 /* ensure frame width is not too small */
8657 if (generic_state->frame_width_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8658 generic_state->frame_width_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8659 generic_state->downscaled_width_4x_in_mb =
8660 WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8663 /* ensure frame height is not too small*/
8664 if (generic_state->frame_height_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8665 generic_state->frame_height_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8666 generic_state->downscaled_height_4x_in_mb =
8667 WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8670 /********** Ensure buffer object parameters ********/
8671 frame_mb_nums = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs;
8673 /* mv predictor buffer */
8674 if (stat_param_h264->mv_predictor_ctrl) {
8675 if (stat_param->mv_predictor == VA_INVALID_ID)
8677 size = frame_mb_nums * FEI_AVC_MV_PREDICTOR_BUFFER_SIZE;
8678 obj_buffer = BUFFER(stat_param->mv_predictor);
8679 buffer_store = obj_buffer->buffer_store;
8680 if (buffer_store->bo->size < size)
8682 if (avc_ctx->preproc_mv_predictor_buffer.bo != NULL)
8683 i965_free_gpe_resource(&avc_ctx->preproc_mv_predictor_buffer);
8684 i965_dri_object_to_buffer_gpe_resource(
8685 &avc_ctx->preproc_mv_predictor_buffer,
8690 if (stat_param_h264->mb_qp) {
8691 if (stat_param->qp == VA_INVALID_ID)
8693 size = frame_mb_nums * FEI_AVC_QP_BUFFER_SIZE;
8694 obj_buffer = BUFFER(stat_param->qp);
8695 buffer_store = obj_buffer->buffer_store;
8696 if (buffer_store->bo->size < size)
8698 if (avc_ctx->preproc_mb_qp_buffer.bo != NULL)
8699 i965_free_gpe_resource(&avc_ctx->preproc_mb_qp_buffer);
8700 i965_dri_object_to_buffer_gpe_resource(
8701 &avc_ctx->preproc_mb_qp_buffer,
8705 /* locate mv and stat buffer */
8706 if (!stat_param_h264->disable_mv_output ||
8707 !stat_param_h264->disable_statistics_output) {
8709 if (!stat_param->outputs)
8712 for (i = 0; i < 2 ; i++) {
8713 if (stat_param->outputs[i] != VA_INVALID_ID) {
8714 obj_buffer = BUFFER(stat_param->outputs[i]);
8715 switch (obj_buffer->type) {
8716 case VAStatsMVBufferType:
8717 obj_buffer_mv = obj_buffer;
8719 case VAStatsStatisticsBufferType:
8720 obj_buffer_stat = obj_buffer;
8726 if (!(!stat_param_h264->disable_mv_output &&
8727 !stat_param_h264->disable_statistics_output))
8731 /* mv data output buffer */
8732 if (!stat_param_h264->disable_mv_output) {
8733 size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
8734 buffer_store = obj_buffer_mv->buffer_store;
8735 if (buffer_store->bo->size < size)
8737 if (avc_ctx->preproc_mv_data_out_buffer.bo != NULL)
8738 i965_free_gpe_resource(&avc_ctx->preproc_mv_data_out_buffer);
8739 i965_dri_object_to_buffer_gpe_resource(
8740 &avc_ctx->preproc_mv_data_out_buffer,
8743 /* statistics output buffer */
8744 if (!stat_param_h264->disable_statistics_output) {
8745 size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
8746 buffer_store = obj_buffer_stat->buffer_store;
8747 if (buffer_store->bo->size < size)
8749 if (avc_ctx->preproc_stat_data_out_buffer.bo != NULL)
8750 i965_free_gpe_resource(&avc_ctx->preproc_stat_data_out_buffer);
8751 i965_dri_object_to_buffer_gpe_resource(
8752 &avc_ctx->preproc_stat_data_out_buffer,
8756 /* past ref stat out buffer */
8757 if (stat_param->num_past_references && stat_param->past_ref_stat_buf &&
8758 stat_param->past_ref_stat_buf[0] != VA_INVALID_ID) {
8759 size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
8760 obj_buffer = BUFFER(stat_param->past_ref_stat_buf[0]);
8761 buffer_store = obj_buffer->buffer_store;
8762 if (buffer_store->bo->size < size)
8764 if (avc_ctx->preenc_past_ref_stat_data_out_buffer.bo != NULL)
8765 i965_free_gpe_resource(&avc_ctx->preenc_past_ref_stat_data_out_buffer);
8766 i965_dri_object_to_buffer_gpe_resource(
8767 &avc_ctx->preenc_past_ref_stat_data_out_buffer,
8770 /* future ref stat out buffer */
8771 if (stat_param->num_past_references && stat_param->future_ref_stat_buf &&
8772 stat_param->future_ref_stat_buf[0] != VA_INVALID_ID) {
8773 size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
8774 obj_buffer = BUFFER(stat_param->future_ref_stat_buf[0]);
8775 buffer_store = obj_buffer->buffer_store;
8776 if (buffer_store->bo->size < size)
8778 if (avc_ctx->preenc_future_ref_stat_data_out_buffer.bo != NULL)
8779 i965_free_gpe_resource(&avc_ctx->preenc_future_ref_stat_data_out_buffer);
8780 i965_dri_object_to_buffer_gpe_resource(
8781 &avc_ctx->preenc_future_ref_stat_data_out_buffer,
8784 return VA_STATUS_SUCCESS;
8787 return VA_STATUS_ERROR_INVALID_BUFFER;
8790 /* allocate internal resouces required for PreEenc */
8792 gen9_avc_preenc_allocate_internal_resources(VADriverContextP ctx,
8793 struct encode_state *encode_state,
8794 struct intel_encoder_context *encoder_context)
8796 struct i965_driver_data *i965 = i965_driver_data(ctx);
8797 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8798 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
8799 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8800 unsigned int width = 0;
8801 unsigned int height = 0;
8802 unsigned int size = 0;
8803 int allocate_flag = 1;
8805 /* 4x MEMV data buffer */
8806 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 32, 64);
8807 height = generic_state->downscaled_height_4x_in_mb * 4 * 2 * 10;
8808 i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
8809 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
8810 &avc_ctx->s4x_memv_data_buffer,
8813 "4x MEMV data buffer");
8815 goto failed_allocation;
8816 i965_zero_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
8818 /* Output DISTORTION surface from 4x ME */
8819 width = generic_state->downscaled_width_4x_in_mb * 8;
8820 height = generic_state->downscaled_height_4x_in_mb * 4 * 10;
8821 i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
8822 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
8823 &avc_ctx->s4x_memv_distortion_buffer,
8826 "4x MEMV distortion buffer");
8828 goto failed_allocation;
8829 i965_zero_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
8831 /* output BRC DISTORTION surface from 4x ME */
8832 width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
8833 height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
8834 i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
8835 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
8836 &avc_ctx->res_brc_dist_data_surface,
8839 "brc dist data buffer");
8841 goto failed_allocation;
8842 i965_zero_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
8845 /* FTQ Lut buffer,whichs is the mbbrc_const_data_buffer */
8846 i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
8847 size = 16 * AVC_QP_MAX * 4;
8848 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
8849 &avc_ctx->res_mbbrc_const_data_buffer,
8850 ALIGN(size, 0x1000),
8851 "mbbrc const data buffer");
8853 goto failed_allocation;
8854 i965_zero_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
8856 /* 4x downscaled surface */
8857 if (!avc_ctx->preenc_scaled_4x_surface_obj) {
8858 i965_CreateSurfaces(ctx,
8859 generic_state->frame_width_4x,
8860 generic_state->frame_height_4x,
8861 VA_RT_FORMAT_YUV420,
8863 &avc_ctx->preenc_scaled_4x_surface_id);
8864 avc_ctx->preenc_scaled_4x_surface_obj = SURFACE(avc_ctx->preenc_scaled_4x_surface_id);
8865 if (!avc_ctx->preenc_scaled_4x_surface_obj)
8866 goto failed_allocation;
8867 i965_check_alloc_surface_bo(ctx, avc_ctx->preenc_scaled_4x_surface_obj, 1,
8868 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
8871 /* 4x downscaled past ref surface */
8872 if (!avc_ctx->preenc_past_ref_scaled_4x_surface_obj) {
8873 i965_CreateSurfaces(ctx,
8874 generic_state->frame_width_4x,
8875 generic_state->frame_height_4x,
8876 VA_RT_FORMAT_YUV420,
8878 &avc_ctx->preenc_past_ref_scaled_4x_surface_id);
8879 avc_ctx->preenc_past_ref_scaled_4x_surface_obj =
8880 SURFACE(avc_ctx->preenc_past_ref_scaled_4x_surface_id);
8881 if (!avc_ctx->preenc_past_ref_scaled_4x_surface_obj)
8882 goto failed_allocation;
8883 i965_check_alloc_surface_bo(ctx, avc_ctx->preenc_past_ref_scaled_4x_surface_obj, 1,
8884 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
8887 /* 4x downscaled future ref surface */
8888 if (!avc_ctx->preenc_future_ref_scaled_4x_surface_obj) {
8889 i965_CreateSurfaces(ctx,
8890 generic_state->frame_width_4x,
8891 generic_state->frame_height_4x,
8892 VA_RT_FORMAT_YUV420,
8894 &avc_ctx->preenc_future_ref_scaled_4x_surface_id);
8895 avc_ctx->preenc_future_ref_scaled_4x_surface_obj =
8896 SURFACE(avc_ctx->preenc_future_ref_scaled_4x_surface_id);
8897 if (!avc_ctx->preenc_future_ref_scaled_4x_surface_obj)
8898 goto failed_allocation;
8899 i965_check_alloc_surface_bo(ctx, avc_ctx->preenc_future_ref_scaled_4x_surface_obj, 1,
8900 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
8903 /* FeiPreEncFixme: dummy coded buffer. This is a tweak which helps to use
8904 * the generic AVC Encdoe codepath which allocate status buffer as extension
8906 if (!avc_ctx->status_buffer.bo) {
8908 generic_state->frame_width_in_pixel * generic_state->frame_height_in_pixel * 12;
8909 size += I965_CODEDBUFFER_HEADER_SIZE;
8911 avc_ctx->status_buffer.bo = dri_bo_alloc(i965->intel.bufmgr,
8912 "Dummy Coded Buffer",
8916 return VA_STATUS_SUCCESS;
8919 return VA_STATUS_ERROR_ALLOCATION_FAILED;
8924 gen9_avc_preenc_gpe_kernel_run(VADriverContextP ctx,
8925 struct encode_state *encode_state,
8926 struct intel_encoder_context *encoder_context)
8928 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8929 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8930 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8931 VAStatsStatisticsParameterH264 *stat_param_h264 = avc_state->stat_param;;
8932 VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
8934 /* FeiPreEncFixme: Optimize the scaling. Keep a cache of already scaled surfaces
8935 * to avoid repeated scaling of same surfaces */
8938 gen9_avc_preenc_kernel_scaling(ctx, encode_state, encoder_context,
8939 INTEL_ENC_HME_4x, SCALE_CUR_PIC);
8940 if (stat_param->num_past_references > 0) {
8941 gen9_avc_preenc_kernel_scaling(ctx, encode_state, encoder_context,
8942 INTEL_ENC_HME_4x, SCALE_PAST_REF_PIC);
8944 if (stat_param->num_future_references > 0) {
8945 gen9_avc_preenc_kernel_scaling(ctx, encode_state, encoder_context,
8946 INTEL_ENC_HME_4x, SCALE_FUTURE_REF_PIC);
8950 if (generic_state->hme_enabled) {
8951 gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
8954 /* preproc kernel */
8955 if (!stat_param_h264->disable_mv_output || !stat_param_h264->disable_statistics_output) {
8956 gen9_avc_preenc_kernel_preproc(ctx, encode_state, encoder_context);
8959 return VA_STATUS_SUCCESS;
8963 gen9_avc_preenc_pipeline(VADriverContextP ctx,
8965 struct encode_state *encode_state,
8966 struct intel_encoder_context *encoder_context)
8970 va_status = gen9_avc_preenc_update_parameters(ctx, profile, encode_state, encoder_context);
8971 if (va_status != VA_STATUS_SUCCESS)
8974 va_status = gen9_avc_preenc_allocate_internal_resources(ctx, encode_state, encoder_context);
8975 if (va_status != VA_STATUS_SUCCESS)
8978 va_status = gen9_avc_preenc_gpe_kernel_run(ctx, encode_state, encoder_context);
8979 if (va_status != VA_STATUS_SUCCESS)
8982 return VA_STATUS_SUCCESS;
8986 gen9_avc_vme_context_destroy(void * context)
8988 struct encoder_vme_mfc_context *vme_context = (struct encoder_vme_mfc_context *)context;
8989 struct generic_encoder_context *generic_ctx;
8990 struct i965_avc_encoder_context *avc_ctx;
8991 struct generic_enc_codec_state *generic_state;
8992 struct avc_enc_state *avc_state;
8997 generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
8998 avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
8999 generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
9000 avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
9002 gen9_avc_kernel_destroy(vme_context);
9006 free(generic_state);
9014 gen8_avc_kernel_init(VADriverContextP ctx,
9015 struct intel_encoder_context *encoder_context)
9017 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9018 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
9019 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
9020 int fei_enabled = encoder_context->fei_enabled;
9022 generic_ctx->get_kernel_header_and_size = fei_enabled ?
9023 intel_avc_fei_get_kernel_header_and_size :
9024 intel_avc_get_kernel_header_and_size ;
9025 gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling, false);
9026 gen9_avc_kernel_init_brc(ctx, generic_ctx, &avc_ctx->context_brc);
9027 gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me, false);
9028 gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc, fei_enabled);
9029 gen9_avc_kernel_init_sfd(ctx, generic_ctx, &avc_ctx->context_sfd);
9032 generic_ctx->pfn_set_curbe_scaling2x = gen9_avc_set_curbe_scaling2x;
9033 generic_ctx->pfn_set_curbe_scaling4x = gen8_avc_set_curbe_scaling4x;
9034 generic_ctx->pfn_set_curbe_me = gen8_avc_set_curbe_me;
9035 generic_ctx->pfn_set_curbe_mbenc = gen8_avc_set_curbe_mbenc;
9036 generic_ctx->pfn_set_curbe_brc_init_reset = gen9_avc_set_curbe_brc_init_reset;
9037 generic_ctx->pfn_set_curbe_brc_frame_update = gen8_avc_set_curbe_brc_frame_update;
9038 generic_ctx->pfn_set_curbe_sfd = gen9_avc_set_curbe_sfd;
9040 generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
9041 generic_ctx->pfn_send_me_surface = gen9_avc_send_surface_me;
9042 generic_ctx->pfn_send_mbenc_surface = gen9_avc_send_surface_mbenc;
9043 generic_ctx->pfn_send_brc_init_reset_surface = gen9_avc_send_surface_brc_init_reset;
9044 generic_ctx->pfn_send_brc_frame_update_surface = gen9_avc_send_surface_brc_frame_update;
9045 generic_ctx->pfn_send_sfd_surface = gen9_avc_send_surface_sfd;
9048 gen9_avc_kernel_init(VADriverContextP ctx,
9049 struct intel_encoder_context *encoder_context)
9051 struct i965_driver_data *i965 = i965_driver_data(ctx);
9052 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9053 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
9054 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
9055 int fei_enabled = encoder_context->fei_enabled;
9056 int preenc_enabled = encoder_context->preenc_enabled;
9058 generic_ctx->get_kernel_header_and_size = (fei_enabled || preenc_enabled) ?
9059 intel_avc_fei_get_kernel_header_and_size :
9060 intel_avc_get_kernel_header_and_size ;
9062 if (!fei_enabled && !preenc_enabled) {
9063 /* generic AVC Encoder */
9064 gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling, false);
9065 gen9_avc_kernel_init_brc(ctx, generic_ctx, &avc_ctx->context_brc);
9066 gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me, false);
9067 gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc,
9068 encoder_context->fei_enabled);
9069 gen9_avc_kernel_init_wp(ctx, generic_ctx, &avc_ctx->context_wp);
9070 gen9_avc_kernel_init_sfd(ctx, generic_ctx, &avc_ctx->context_sfd);
9073 generic_ctx->pfn_set_curbe_scaling2x = gen9_avc_set_curbe_scaling2x;
9074 generic_ctx->pfn_set_curbe_scaling4x = gen9_avc_set_curbe_scaling4x;
9075 generic_ctx->pfn_set_curbe_me = gen9_avc_set_curbe_me;
9076 generic_ctx->pfn_set_curbe_mbenc = gen9_avc_set_curbe_mbenc;
9077 generic_ctx->pfn_set_curbe_brc_init_reset = gen9_avc_set_curbe_brc_init_reset;
9078 generic_ctx->pfn_set_curbe_brc_frame_update = gen9_avc_set_curbe_brc_frame_update;
9079 generic_ctx->pfn_set_curbe_brc_mb_update = gen9_avc_set_curbe_brc_mb_update;
9080 generic_ctx->pfn_set_curbe_sfd = gen9_avc_set_curbe_sfd;
9081 generic_ctx->pfn_set_curbe_wp = gen9_avc_set_curbe_wp;
9083 generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
9084 generic_ctx->pfn_send_me_surface = gen9_avc_send_surface_me;
9085 generic_ctx->pfn_send_mbenc_surface = gen9_avc_send_surface_mbenc;
9086 generic_ctx->pfn_send_brc_init_reset_surface = gen9_avc_send_surface_brc_init_reset;
9087 generic_ctx->pfn_send_brc_frame_update_surface = gen9_avc_send_surface_brc_frame_update;
9088 generic_ctx->pfn_send_brc_mb_update_surface = gen9_avc_send_surface_brc_mb_update;
9089 generic_ctx->pfn_send_sfd_surface = gen9_avc_send_surface_sfd;
9090 generic_ctx->pfn_send_wp_surface = gen9_avc_send_surface_wp;
9092 if (IS_SKL(i965->intel.device_info) ||
9093 IS_BXT(i965->intel.device_info))
9094 generic_ctx->pfn_set_curbe_scaling4x = gen9_avc_set_curbe_scaling4x;
9095 else if (IS_KBL(i965->intel.device_info) ||
9096 IS_GEN10(i965->intel.device_info) ||
9097 IS_GLK(i965->intel.device_info))
9098 generic_ctx->pfn_set_curbe_scaling4x = gen95_avc_set_curbe_scaling4x;
9100 } else if (fei_enabled) {
9101 /* FEI AVC Encoding */
9102 gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc,
9103 encoder_context->fei_enabled);
9104 generic_ctx->pfn_set_curbe_mbenc = gen9_avc_fei_set_curbe_mbenc;
9105 generic_ctx->pfn_send_mbenc_surface = gen9_avc_fei_send_surface_mbenc;
9108 /* PreEnc for AVC */
9109 gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling,
9110 encoder_context->preenc_enabled);
9111 gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me,
9112 encoder_context->preenc_enabled);
9113 gen9_avc_kernel_init_preproc(ctx, generic_ctx, &avc_ctx->context_preproc);
9115 /* preenc 4x scaling uses the gen95 kernel */
9116 generic_ctx->pfn_set_curbe_scaling4x = gen95_avc_set_curbe_scaling4x;
9117 generic_ctx->pfn_set_curbe_me = gen9_avc_preenc_set_curbe_me;
9118 generic_ctx->pfn_set_curbe_preproc = gen9_avc_preenc_set_curbe_preproc;
9120 generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
9121 generic_ctx->pfn_send_me_surface = gen9_avc_preenc_send_surface_me;
9122 generic_ctx->pfn_send_preproc_surface = gen9_avc_preenc_send_surface_preproc;
9127 PAK pipeline related function
9130 intel_avc_enc_slice_type_fixup(int slice_type);
9132 /* Allocate resources needed for PAK only mode (get invoked only in FEI encode) */
9134 gen9_avc_allocate_pak_resources(VADriverContextP ctx,
9135 struct encode_state *encode_state,
9136 struct intel_encoder_context *encoder_context)
9138 struct i965_driver_data *i965 = i965_driver_data(ctx);
9139 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9140 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
9141 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
9142 unsigned int size = 0;
9143 int allocate_flag = 1;
9145 /*second level batch buffer for image state write when cqp etc*/
9146 i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
9147 size = INTEL_AVC_IMAGE_STATE_CMD_SIZE ;
9148 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
9149 &avc_ctx->res_image_state_batch_buffer_2nd_level,
9150 ALIGN(size, 0x1000),
9151 "second levle batch (image state write) buffer");
9153 goto failed_allocation;
9155 if (!generic_state->brc_allocated) {
9156 i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
9158 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
9159 &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
9160 ALIGN(size, 0x1000),
9161 "brc pak statistic buffer");
9163 goto failed_allocation;
9166 return VA_STATUS_SUCCESS;
9169 return VA_STATUS_ERROR_ALLOCATION_FAILED;
9173 gen9_mfc_avc_pipe_mode_select(VADriverContextP ctx,
9174 struct encode_state *encode_state,
9175 struct intel_encoder_context *encoder_context)
9177 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9178 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9179 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9180 struct intel_batchbuffer *batch = encoder_context->base.batch;
9182 BEGIN_BCS_BATCH(batch, 5);
9184 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
9185 OUT_BCS_BATCH(batch,
9187 (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
9188 (MFD_MODE_VLD << 15) |
9189 (0 << 13) | /* Non-VDEnc mode is 0*/
9190 ((generic_state->curr_pak_pass != (generic_state->num_pak_passes - 1)) << 10) | /* Stream-Out Enable */
9191 ((!!avc_ctx->res_post_deblocking_output.bo) << 9) | /* Post Deblocking Output */
9192 ((!!avc_ctx->res_pre_deblocking_output.bo) << 8) | /* Pre Deblocking Output */
9193 (0 << 7) | /* Scaled surface enable */
9194 (0 << 6) | /* Frame statistics stream out enable */
9195 (0 << 5) | /* not in stitch mode */
9196 (1 << 4) | /* encoding mode */
9197 (MFX_FORMAT_AVC << 0));
9198 OUT_BCS_BATCH(batch,
9199 (0 << 7) | /* expand NOA bus flag */
9200 (0 << 6) | /* disable slice-level clock gating */
9201 (0 << 5) | /* disable clock gating for NOA */
9202 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
9203 (0 << 3) | /* terminate if AVC mbdata error occurs */
9204 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
9207 OUT_BCS_BATCH(batch, 0);
9208 OUT_BCS_BATCH(batch, 0);
9210 ADVANCE_BCS_BATCH(batch);
9214 gen9_mfc_avc_surface_state(VADriverContextP ctx,
9215 struct intel_encoder_context *encoder_context,
9216 struct i965_gpe_resource *gpe_resource,
9219 struct intel_batchbuffer *batch = encoder_context->base.batch;
9221 BEGIN_BCS_BATCH(batch, 6);
9223 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
9224 OUT_BCS_BATCH(batch, id);
9225 OUT_BCS_BATCH(batch,
9226 ((gpe_resource->height - 1) << 18) |
9227 ((gpe_resource->width - 1) << 4));
9228 OUT_BCS_BATCH(batch,
9229 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
9230 (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
9231 ((gpe_resource->pitch - 1) << 3) | /* pitch */
9232 (0 << 2) | /* must be 0 for interleave U/V */
9233 (1 << 1) | /* must be tiled */
9234 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
9235 OUT_BCS_BATCH(batch,
9236 (0 << 16) | /* must be 0 for interleave U/V */
9237 (gpe_resource->y_cb_offset)); /* y offset for U(cb) */
9238 OUT_BCS_BATCH(batch,
9239 (0 << 16) | /* must be 0 for interleave U/V */
9240 (gpe_resource->y_cb_offset)); /* y offset for U(cb) */
9242 ADVANCE_BCS_BATCH(batch);
9246 gen9_mfc_avc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
9248 struct i965_driver_data *i965 = i965_driver_data(ctx);
9249 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9250 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
9251 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9252 struct intel_batchbuffer *batch = encoder_context->base.batch;
9254 unsigned int cmd_len = 65;
9256 if (IS_GEN10(i965->intel.device_info))
9259 BEGIN_BCS_BATCH(batch, cmd_len);
9261 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (cmd_len - 2));
9263 /* the DW1-3 is for pre_deblocking */
9264 OUT_BUFFER_3DW(batch, avc_ctx->res_pre_deblocking_output.bo, 1, 0, i965->intel.mocs_state);
9266 /* the DW4-6 is for the post_deblocking */
9267 OUT_BUFFER_3DW(batch, avc_ctx->res_post_deblocking_output.bo, 1, 0, i965->intel.mocs_state);
9269 /* the DW7-9 is for the uncompressed_picture */
9270 OUT_BUFFER_3DW(batch, generic_ctx->res_uncompressed_input_surface.bo, 0, 0, i965->intel.mocs_state);
9272 /* the DW10-12 is for PAK information (write) */
9273 OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, i965->intel.mocs_state);//?
9275 /* the DW13-15 is for the intra_row_store_scratch */
9276 OUT_BUFFER_3DW(batch, avc_ctx->res_intra_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
9278 /* the DW16-18 is for the deblocking filter */
9279 OUT_BUFFER_3DW(batch, avc_ctx->res_deblocking_filter_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
9281 /* the DW 19-50 is for Reference pictures*/
9282 for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
9283 OUT_BUFFER_2DW(batch, avc_ctx->list_reference_res[i].bo, 0, 0);
9286 /* DW 51, reference picture attributes */
9287 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
9289 /* The DW 52-54 is for PAK information (read) */
9290 OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, i965->intel.mocs_state);
9292 /* the DW 55-57 is the ILDB buffer */
9293 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9295 /* the DW 58-60 is the second ILDB buffer */
9296 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9298 /* DW 61, memory compress enable & mode */
9299 OUT_BCS_BATCH(batch, 0);
9301 /* the DW 62-64 is the buffer */
9302 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9305 if (IS_GEN10(i965->intel.device_info)) {
9306 OUT_BCS_BATCH(batch, 0);
9307 OUT_BCS_BATCH(batch, 0);
9308 OUT_BCS_BATCH(batch, 0);
9311 ADVANCE_BCS_BATCH(batch);
9315 gen9_mfc_avc_ind_obj_base_addr_state(VADriverContextP ctx,
9316 struct encode_state *encode_state,
9317 struct intel_encoder_context *encoder_context)
9319 struct i965_driver_data *i965 = i965_driver_data(ctx);
9320 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9321 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
9322 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9323 struct intel_batchbuffer *batch = encoder_context->base.batch;
9324 struct object_surface *obj_surface;
9325 struct gen9_surface_avc *avc_priv_surface;
9326 unsigned int size = 0;
9327 unsigned int w_mb = generic_state->frame_width_in_mbs;
9328 unsigned int h_mb = generic_state->frame_height_in_mbs;
9330 obj_surface = encode_state->reconstructed_object;
9332 if (!obj_surface || !obj_surface->private_data)
9334 avc_priv_surface = obj_surface->private_data;
9336 BEGIN_BCS_BATCH(batch, 26);
9338 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
9339 /* The DW1-5 is for the MFX indirect bistream offset */
9340 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9341 OUT_BUFFER_2DW(batch, NULL, 0, 0);
9343 /* the DW6-10 is for MFX Indirect MV Object Base Address */
9344 size = w_mb * h_mb * 32 * 4;
9345 OUT_BUFFER_3DW(batch,
9346 avc_priv_surface->res_mv_data_surface.bo,
9349 i965->intel.mocs_state);
9350 OUT_BUFFER_2DW(batch,
9351 avc_priv_surface->res_mv_data_surface.bo,
9353 ALIGN(size, 0x1000));
9355 /* The DW11-15 is for MFX IT-COFF. Not used on encoder */
9356 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9357 OUT_BUFFER_2DW(batch, NULL, 0, 0);
9359 /* The DW16-20 is for MFX indirect DBLK. Not used on encoder */
9360 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9361 OUT_BUFFER_2DW(batch, NULL, 0, 0);
9363 /* The DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder
9364 * Note: an offset is specified in MFX_AVC_SLICE_STATE
9366 OUT_BUFFER_3DW(batch,
9367 generic_ctx->compressed_bitstream.res.bo,
9370 i965->intel.mocs_state);
9371 OUT_BUFFER_2DW(batch,
9372 generic_ctx->compressed_bitstream.res.bo,
9374 generic_ctx->compressed_bitstream.end_offset);
9376 ADVANCE_BCS_BATCH(batch);
9380 gen9_mfc_avc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
9382 struct i965_driver_data *i965 = i965_driver_data(ctx);
9383 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9384 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9385 struct intel_batchbuffer *batch = encoder_context->base.batch;
9387 BEGIN_BCS_BATCH(batch, 10);
9389 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
9391 /* The DW1-3 is for bsd/mpc row store scratch buffer */
9392 OUT_BUFFER_3DW(batch, avc_ctx->res_bsd_mpc_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
9394 /* The DW4-6 is for MPR Row Store Scratch Buffer Base Address, ignore for encoder */
9395 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9397 /* The DW7-9 is for Bitplane Read Buffer Base Address, ignore for encoder */
9398 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9400 ADVANCE_BCS_BATCH(batch);
9404 gen9_mfc_avc_directmode_state(VADriverContextP ctx,
9405 struct intel_encoder_context *encoder_context)
9407 struct i965_driver_data *i965 = i965_driver_data(ctx);
9408 struct intel_batchbuffer *batch = encoder_context->base.batch;
9409 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9410 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9411 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
9415 BEGIN_BCS_BATCH(batch, 71);
9417 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
9419 /* Reference frames and Current frames */
9420 /* the DW1-32 is for the direct MV for reference */
9421 for (i = 0; i < NUM_MFC_AVC_DMV_BUFFERS - 2; i += 2) {
9422 if (avc_ctx->res_direct_mv_buffersr[i].bo != NULL) {
9423 OUT_BCS_RELOC64(batch, avc_ctx->res_direct_mv_buffersr[i].bo,
9424 I915_GEM_DOMAIN_INSTRUCTION, 0,
9427 OUT_BCS_BATCH(batch, 0);
9428 OUT_BCS_BATCH(batch, 0);
9432 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
9434 /* the DW34-36 is the MV for the current frame */
9435 OUT_BCS_RELOC64(batch, avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2].bo,
9436 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
9439 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
9442 for (i = 0; i < 32; i++) {
9443 OUT_BCS_BATCH(batch, avc_state->top_field_poc[i]);
9445 OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2]);
9446 OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1]);
9448 ADVANCE_BCS_BATCH(batch);
9452 gen9_mfc_qm_state(VADriverContextP ctx,
9454 const unsigned int *qm,
9456 struct intel_encoder_context *encoder_context)
9458 struct intel_batchbuffer *batch = encoder_context->base.batch;
9459 unsigned int qm_buffer[16];
9461 assert(qm_length <= 16);
9462 assert(sizeof(*qm) == 4);
9463 memset(qm_buffer, 0, 16 * 4);
9464 memcpy(qm_buffer, qm, qm_length * 4);
9466 BEGIN_BCS_BATCH(batch, 18);
9467 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
9468 OUT_BCS_BATCH(batch, qm_type << 0);
9469 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
9470 ADVANCE_BCS_BATCH(batch);
9474 gen9_mfc_avc_qm_state(VADriverContextP ctx,
9475 struct encode_state *encode_state,
9476 struct intel_encoder_context *encoder_context)
9478 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9479 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
9480 VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
9481 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
9484 const unsigned int *qm_4x4_intra;
9485 const unsigned int *qm_4x4_inter;
9486 const unsigned int *qm_8x8_intra;
9487 const unsigned int *qm_8x8_inter;
9489 if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
9490 && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
9491 qm_4x4_intra = qm_4x4_inter = qm_8x8_intra = qm_8x8_inter = qm_flat;
9493 VAIQMatrixBufferH264 *qm;
9494 assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
9495 qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
9496 qm_4x4_intra = (unsigned int *)qm->ScalingList4x4[0];
9497 qm_4x4_inter = (unsigned int *)qm->ScalingList4x4[3];
9498 qm_8x8_intra = (unsigned int *)qm->ScalingList8x8[0];
9499 qm_8x8_inter = (unsigned int *)qm->ScalingList8x8[1];
9502 gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm_4x4_intra, 12, encoder_context);
9503 gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm_4x4_inter, 12, encoder_context);
9504 gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm_8x8_intra, 16, encoder_context);
9505 gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm_8x8_inter, 16, encoder_context);
9509 gen9_mfc_fqm_state(VADriverContextP ctx,
9511 const unsigned int *fqm,
9513 struct intel_encoder_context *encoder_context)
9515 struct intel_batchbuffer *batch = encoder_context->base.batch;
9516 unsigned int fqm_buffer[32];
9518 assert(fqm_length <= 32);
9519 assert(sizeof(*fqm) == 4);
9520 memset(fqm_buffer, 0, 32 * 4);
9521 memcpy(fqm_buffer, fqm, fqm_length * 4);
9523 BEGIN_BCS_BATCH(batch, 34);
9524 OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
9525 OUT_BCS_BATCH(batch, fqm_type << 0);
9526 intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
9527 ADVANCE_BCS_BATCH(batch);
9531 gen9_mfc_fill_fqm(uint8_t *qm, uint16_t *fqm, int len)
9534 for (i = 0; i < len; i++)
9535 for (j = 0; j < len; j++) {
9536 assert(qm[j * len + i]);
9537 fqm[i * len + j] = (1 << 16) / qm[j * len + i];
9542 gen9_mfc_avc_fqm_state(VADriverContextP ctx,
9543 struct encode_state *encode_state,
9544 struct intel_encoder_context *encoder_context)
9546 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9547 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
9548 VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
9549 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
9551 if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
9552 && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
9553 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm_flat, 24, encoder_context);
9554 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm_flat, 24, encoder_context);
9555 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm_flat, 32, encoder_context);
9556 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm_flat, 32, encoder_context);
9560 VAIQMatrixBufferH264 *qm;
9561 assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
9562 qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
9564 for (i = 0; i < 3; i++)
9565 gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * i, 4);
9566 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm, 24, encoder_context);
9568 for (i = 3; i < 6; i++)
9569 gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * (i - 3), 4);
9570 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm, 24, encoder_context);
9572 gen9_mfc_fill_fqm(qm->ScalingList8x8[0], (uint16_t *)fqm, 8);
9573 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm, 32, encoder_context);
9575 gen9_mfc_fill_fqm(qm->ScalingList8x8[1], (uint16_t *)fqm, 8);
9576 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm, 32, encoder_context);
9581 gen9_mfc_avc_insert_object(VADriverContextP ctx,
9582 struct intel_encoder_context *encoder_context,
9583 unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
9584 int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
9585 int slice_header_indicator,
9586 struct intel_batchbuffer *batch)
9588 if (data_bits_in_last_dw == 0)
9589 data_bits_in_last_dw = 32;
9591 BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
9593 OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws));
9594 OUT_BCS_BATCH(batch,
9595 (0 << 16) | /* always start at offset 0 */
9596 (slice_header_indicator << 14) |
9597 (data_bits_in_last_dw << 8) |
9598 (skip_emul_byte_count << 4) |
9599 (!!emulation_flag << 3) |
9600 ((!!is_last_header) << 2) |
9601 ((!!is_end_of_slice) << 1) |
9602 (0 << 0)); /* check this flag */
9603 intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
9605 ADVANCE_BCS_BATCH(batch);
9609 gen9_mfc_avc_insert_aud_packed_data(VADriverContextP ctx,
9610 struct encode_state *encode_state,
9611 struct intel_encoder_context *encoder_context,
9612 struct intel_batchbuffer *batch)
9614 VAEncPackedHeaderParameterBuffer *param = NULL;
9615 unsigned int length_in_bits;
9616 unsigned int *header_data = NULL;
9617 unsigned char *nal_type = NULL;
9618 int count, i, start_index;
9620 count = encode_state->slice_rawdata_count[0];
9621 start_index = (encode_state->slice_rawdata_index[0] & SLICE_PACKED_DATA_INDEX_MASK);
9623 for (i = 0; i < count; i++) {
9624 unsigned int skip_emul_byte_cnt;
9626 header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
9627 nal_type = (unsigned char *)header_data;
9629 param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
9630 if (param->type != VAEncPackedHeaderRawData)
9633 length_in_bits = param->bit_length;
9635 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9637 if ((*(nal_type + skip_emul_byte_cnt - 1) & 0x1f) == AVC_NAL_DELIMITER) {
9638 gen9_mfc_avc_insert_object(ctx,
9641 ALIGN(length_in_bits, 32) >> 5,
9642 length_in_bits & 0x1f,
9646 !param->has_emulation_bytes,
9655 gen9_mfc_avc_insert_slice_packed_data(VADriverContextP ctx,
9656 struct encode_state *encode_state,
9657 struct intel_encoder_context *encoder_context,
9659 struct intel_batchbuffer *batch)
9661 VAEncPackedHeaderParameterBuffer *param = NULL;
9662 unsigned int length_in_bits;
9663 unsigned int *header_data = NULL;
9664 int count, i, start_index;
9665 int slice_header_index;
9666 unsigned char *nal_type = NULL;
9668 if (encode_state->slice_header_index[slice_index] == 0)
9669 slice_header_index = -1;
9671 slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
9673 count = encode_state->slice_rawdata_count[slice_index];
9674 start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
9676 for (i = 0; i < count; i++) {
9677 unsigned int skip_emul_byte_cnt;
9679 header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
9680 nal_type = (unsigned char *)header_data;
9682 param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
9684 length_in_bits = param->bit_length;
9686 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9688 /* skip the slice header packed data type as it is lastly inserted */
9689 if (param->type == VAEncPackedHeaderSlice || (*(nal_type + skip_emul_byte_cnt - 1) & 0x1f) == AVC_NAL_DELIMITER)
9692 /* as the slice header is still required, the last header flag is set to
9695 gen9_mfc_avc_insert_object(ctx,
9698 ALIGN(length_in_bits, 32) >> 5,
9699 length_in_bits & 0x1f,
9703 !param->has_emulation_bytes,
9708 if (slice_header_index == -1) {
9709 VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
9710 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
9711 VAEncSliceParameterBufferH264 *slice_params = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
9712 unsigned char *slice_header = NULL;
9713 int slice_header_length_in_bits = 0;
9715 /* No slice header data is passed. And the driver needs to generate it */
9716 /* For the Normal H264 */
9717 slice_header_length_in_bits = build_avc_slice_header(seq_param,
9721 gen9_mfc_avc_insert_object(ctx,
9723 (unsigned int *)slice_header,
9724 ALIGN(slice_header_length_in_bits, 32) >> 5,
9725 slice_header_length_in_bits & 0x1f,
9726 5, /* first 5 bytes are start code + nal unit type */
9733 unsigned int skip_emul_byte_cnt;
9735 header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
9737 param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[slice_header_index]->buffer);
9738 length_in_bits = param->bit_length;
9740 /* as the slice header is the last header data for one slice,
9741 * the last header flag is set to one.
9743 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9745 gen9_mfc_avc_insert_object(ctx,
9748 ALIGN(length_in_bits, 32) >> 5,
9749 length_in_bits & 0x1f,
9753 !param->has_emulation_bytes,
9762 gen9_mfc_avc_inset_headers(VADriverContextP ctx,
9763 struct encode_state *encode_state,
9764 struct intel_encoder_context *encoder_context,
9765 VAEncSliceParameterBufferH264 *slice_param,
9767 struct intel_batchbuffer *batch)
9769 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9770 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9771 int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
9772 unsigned int internal_rate_mode = generic_state->internal_rate_mode;
9773 unsigned int skip_emul_byte_cnt;
9775 if (slice_index == 0) {
9777 /* if AUD exist and insert it firstly */
9778 gen9_mfc_avc_insert_aud_packed_data(ctx, encode_state, encoder_context, batch);
9780 if (encode_state->packed_header_data[idx]) {
9781 VAEncPackedHeaderParameterBuffer *param = NULL;
9782 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
9783 unsigned int length_in_bits;
9785 assert(encode_state->packed_header_param[idx]);
9786 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
9787 length_in_bits = param->bit_length;
9789 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9790 gen9_mfc_avc_insert_object(ctx,
9793 ALIGN(length_in_bits, 32) >> 5,
9794 length_in_bits & 0x1f,
9798 !param->has_emulation_bytes,
9803 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
9805 if (encode_state->packed_header_data[idx]) {
9806 VAEncPackedHeaderParameterBuffer *param = NULL;
9807 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
9808 unsigned int length_in_bits;
9810 assert(encode_state->packed_header_param[idx]);
9811 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
9812 length_in_bits = param->bit_length;
9814 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9816 gen9_mfc_avc_insert_object(ctx,
9819 ALIGN(length_in_bits, 32) >> 5,
9820 length_in_bits & 0x1f,
9824 !param->has_emulation_bytes,
9829 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
9831 if (encode_state->packed_header_data[idx]) {
9832 VAEncPackedHeaderParameterBuffer *param = NULL;
9833 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
9834 unsigned int length_in_bits;
9836 assert(encode_state->packed_header_param[idx]);
9837 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
9838 length_in_bits = param->bit_length;
9840 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9841 gen9_mfc_avc_insert_object(ctx,
9844 ALIGN(length_in_bits, 32) >> 5,
9845 length_in_bits & 0x1f,
9849 !param->has_emulation_bytes,
9852 } else if (internal_rate_mode == VA_RC_CBR) {
9857 gen9_mfc_avc_insert_slice_packed_data(ctx,
9865 gen9_mfc_avc_slice_state(VADriverContextP ctx,
9866 struct encode_state *encode_state,
9867 struct intel_encoder_context *encoder_context,
9868 VAEncPictureParameterBufferH264 *pic_param,
9869 VAEncSliceParameterBufferH264 *slice_param,
9870 VAEncSliceParameterBufferH264 *next_slice_param,
9871 struct intel_batchbuffer *batch)
9873 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9874 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
9875 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9876 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
9877 unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
9878 unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
9879 unsigned char correct[6], grow, shrink;
9880 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
9881 int max_qp_n, max_qp_p;
9883 int weighted_pred_idc = 0;
9884 int num_ref_l0 = 0, num_ref_l1 = 0;
9885 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
9886 int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
9887 unsigned int rc_panic_enable = 0;
9888 unsigned int rate_control_counter_enable = 0;
9889 unsigned int rounding_value = 0;
9890 unsigned int rounding_inter_enable = 0;
9892 slice_hor_pos = slice_param->macroblock_address % generic_state->frame_width_in_mbs;
9893 slice_ver_pos = slice_param->macroblock_address / generic_state->frame_width_in_mbs;
9895 if (next_slice_param) {
9896 next_slice_hor_pos = next_slice_param->macroblock_address % generic_state->frame_width_in_mbs;
9897 next_slice_ver_pos = next_slice_param->macroblock_address / generic_state->frame_width_in_mbs;
9899 next_slice_hor_pos = 0;
9900 next_slice_ver_pos = generic_state->frame_height_in_mbs;
9903 if (slice_type == SLICE_TYPE_I) {
9904 luma_log2_weight_denom = 0;
9905 chroma_log2_weight_denom = 0;
9906 } else if (slice_type == SLICE_TYPE_P) {
9907 weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
9908 num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
9909 rounding_inter_enable = avc_state->rounding_inter_enable;
9910 rounding_value = avc_state->rounding_value;
9912 if (slice_param->num_ref_idx_active_override_flag)
9913 num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
9914 } else if (slice_type == SLICE_TYPE_B) {
9915 weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
9916 num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
9917 num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
9918 rounding_inter_enable = avc_state->rounding_inter_enable;
9919 rounding_value = avc_state->rounding_value;
9921 if (slice_param->num_ref_idx_active_override_flag) {
9922 num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
9923 num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
9926 if (weighted_pred_idc == 2) {
9927 /* 8.4.3 - Derivation process for prediction weights (8-279) */
9928 luma_log2_weight_denom = 5;
9929 chroma_log2_weight_denom = 5;
9938 rate_control_counter_enable = (generic_state->brc_enabled && (generic_state->curr_pak_pass != 0));
9939 rc_panic_enable = (avc_state->rc_panic_enable &&
9940 (!avc_state->min_max_qp_enable) &&
9941 (encoder_context->rate_control_mode != VA_RC_CQP) &&
9942 (generic_state->curr_pak_pass == (generic_state->num_pak_passes - 1)));
9944 for (i = 0; i < 6; i++)
9947 BEGIN_BCS_BATCH(batch, 11);
9949 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
9950 OUT_BCS_BATCH(batch, slice_type);
9951 OUT_BCS_BATCH(batch,
9952 (num_ref_l1 << 24) |
9953 (num_ref_l0 << 16) |
9954 (chroma_log2_weight_denom << 8) |
9955 (luma_log2_weight_denom << 0));
9956 OUT_BCS_BATCH(batch,
9957 (weighted_pred_idc << 30) |
9958 (((slice_type == SLICE_TYPE_B) ? slice_param->direct_spatial_mv_pred_flag : 0) << 29) |
9959 (slice_param->disable_deblocking_filter_idc << 27) |
9960 (slice_param->cabac_init_idc << 24) |
9962 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
9963 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
9965 OUT_BCS_BATCH(batch,
9966 slice_ver_pos << 24 |
9967 slice_hor_pos << 16 |
9968 slice_param->macroblock_address);
9969 OUT_BCS_BATCH(batch,
9970 next_slice_ver_pos << 16 |
9971 next_slice_hor_pos);
9973 OUT_BCS_BATCH(batch,
9974 (rate_control_counter_enable << 31) |
9975 (1 << 30) | /* ResetRateControlCounter */
9976 (2 << 28) | /* Loose Rate Control */
9977 (0 << 24) | /* RC Stable Tolerance */
9978 (rc_panic_enable << 23) | /* RC Panic Enable */
9979 (1 << 22) | /* CBP mode */
9980 (0 << 21) | /* MB Type Direct Conversion, 0: Enable, 1: Disable */
9981 (0 << 20) | /* MB Type Skip Conversion, 0: Enable, 1: Disable */
9982 (!next_slice_param << 19) | /* Is Last Slice */
9983 (0 << 18) | /* BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable */
9984 (1 << 17) | /* HeaderPresentFlag */
9985 (1 << 16) | /* SliceData PresentFlag */
9986 (0 << 15) | /* TailPresentFlag */
9987 (1 << 13) | /* RBSP NAL TYPE */
9988 (1 << 12)); /* CabacZeroWordInsertionEnable */
9990 OUT_BCS_BATCH(batch, generic_ctx->compressed_bitstream.start_offset);
9992 OUT_BCS_BATCH(batch,
9993 (max_qp_n << 24) | /*Target QP - 24 is lowest QP*/
9994 (max_qp_p << 16) | /*Target QP + 20 is highest QP*/
9997 OUT_BCS_BATCH(batch,
9998 (rounding_inter_enable << 31) |
9999 (rounding_value << 28) |
10002 (correct[5] << 20) |
10003 (correct[4] << 16) |
10004 (correct[3] << 12) |
10005 (correct[2] << 8) |
10006 (correct[1] << 4) |
10007 (correct[0] << 0));
10008 OUT_BCS_BATCH(batch, 0);
10010 ADVANCE_BCS_BATCH(batch);
10014 gen9_mfc_avc_get_ref_idx_state(VAPictureH264 *va_pic, unsigned int frame_store_id)
10016 unsigned int is_long_term =
10017 !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
10018 unsigned int is_top_field =
10019 !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
10020 unsigned int is_bottom_field =
10021 !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
10023 return ((is_long_term << 6) |
10025 (frame_store_id << 1) |
10026 ((is_top_field ^ 1) & is_bottom_field));
10030 gen9_mfc_avc_ref_idx_state(VADriverContextP ctx,
10031 struct encode_state *encode_state,
10032 struct intel_encoder_context *encoder_context,
10033 VAEncSliceParameterBufferH264 *slice_param,
10034 struct intel_batchbuffer *batch)
10036 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10037 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10038 VAPictureH264 *ref_pic;
10039 int i, slice_type, ref_idx_shift;
10040 unsigned int fwd_ref_entry;
10041 unsigned int bwd_ref_entry;
10043 /* max 4 ref frames are allowed for l0 and l1 */
10044 fwd_ref_entry = 0x80808080;
10045 slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
10047 if ((slice_type == SLICE_TYPE_P) ||
10048 (slice_type == SLICE_TYPE_B)) {
10049 for (i = 0; i < MIN(avc_state->num_refs[0], 4); i++) {
10050 ref_pic = &slice_param->RefPicList0[i];
10051 ref_idx_shift = i * 8;
10053 fwd_ref_entry &= ~(0xFF << ref_idx_shift);
10054 fwd_ref_entry += (gen9_mfc_avc_get_ref_idx_state(ref_pic, avc_state->list_ref_idx[0][i]) << ref_idx_shift);
10058 bwd_ref_entry = 0x80808080;
10059 if (slice_type == SLICE_TYPE_B) {
10060 for (i = 0; i < MIN(avc_state->num_refs[1], 4); i++) {
10061 ref_pic = &slice_param->RefPicList1[i];
10062 ref_idx_shift = i * 8;
10064 bwd_ref_entry &= ~(0xFF << ref_idx_shift);
10065 bwd_ref_entry += (gen9_mfc_avc_get_ref_idx_state(ref_pic, avc_state->list_ref_idx[1][i]) << ref_idx_shift);
10069 if ((slice_type == SLICE_TYPE_P) ||
10070 (slice_type == SLICE_TYPE_B)) {
10071 BEGIN_BCS_BATCH(batch, 10);
10072 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
10073 OUT_BCS_BATCH(batch, 0); // L0
10074 OUT_BCS_BATCH(batch, fwd_ref_entry);
10076 for (i = 0; i < 7; i++) {
10077 OUT_BCS_BATCH(batch, 0x80808080);
10080 ADVANCE_BCS_BATCH(batch);
10083 if (slice_type == SLICE_TYPE_B) {
10084 BEGIN_BCS_BATCH(batch, 10);
10085 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
10086 OUT_BCS_BATCH(batch, 1); //Select L1
10087 OUT_BCS_BATCH(batch, bwd_ref_entry); //max 4 reference allowed
10088 for (i = 0; i < 7; i++) {
10089 OUT_BCS_BATCH(batch, 0x80808080);
10091 ADVANCE_BCS_BATCH(batch);
10096 gen9_mfc_avc_weightoffset_state(VADriverContextP ctx,
10097 struct encode_state *encode_state,
10098 struct intel_encoder_context *encoder_context,
10099 VAEncPictureParameterBufferH264 *pic_param,
10100 VAEncSliceParameterBufferH264 *slice_param,
10101 struct intel_batchbuffer *batch)
10104 short weightoffsets[32 * 6];
10106 slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
10108 if (slice_type == SLICE_TYPE_P &&
10109 pic_param->pic_fields.bits.weighted_pred_flag == 1) {
10110 memset(weightoffsets, 0, 32 * 6 * sizeof(short));
10111 for (i = 0; i < 32; i++) {
10112 weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
10113 weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
10114 weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
10115 weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
10116 weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
10117 weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
10120 BEGIN_BCS_BATCH(batch, 98);
10121 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
10122 OUT_BCS_BATCH(batch, 0);
10123 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
10125 ADVANCE_BCS_BATCH(batch);
10128 if (slice_type == SLICE_TYPE_B &&
10129 (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
10130 memset(weightoffsets, 0, 32 * 6 * sizeof(short));
10131 for (i = 0; i < 32; i++) {
10132 weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
10133 weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
10134 weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
10135 weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
10136 weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
10137 weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
10140 BEGIN_BCS_BATCH(batch, 98);
10141 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
10142 OUT_BCS_BATCH(batch, 0);
10143 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
10144 ADVANCE_BCS_BATCH(batch);
10146 memset(weightoffsets, 0, 32 * 6 * sizeof(short));
10147 for (i = 0; i < 32; i++) {
10148 weightoffsets[i * 6 + 0] = slice_param->luma_weight_l1[i];
10149 weightoffsets[i * 6 + 1] = slice_param->luma_offset_l1[i];
10150 weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l1[i][0];
10151 weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l1[i][0];
10152 weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l1[i][1];
10153 weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l1[i][1];
10156 BEGIN_BCS_BATCH(batch, 98);
10157 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
10158 OUT_BCS_BATCH(batch, 1);
10159 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
10160 ADVANCE_BCS_BATCH(batch);
10165 gen9_mfc_avc_single_slice(VADriverContextP ctx,
10166 struct encode_state *encode_state,
10167 struct intel_encoder_context *encoder_context,
10168 VAEncSliceParameterBufferH264 *slice_param,
10169 VAEncSliceParameterBufferH264 *next_slice_param,
10172 struct i965_driver_data *i965 = i965_driver_data(ctx);
10173 struct i965_gpe_table *gpe = &i965->gpe_table;
10174 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10175 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10176 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10177 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10178 struct intel_batchbuffer *batch = encoder_context->base.batch;
10179 struct intel_batchbuffer *slice_batch = avc_ctx->pres_slice_batch_buffer_2nd_level;
10180 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
10181 struct gpe_mi_batch_buffer_start_parameter second_level_batch;
10182 struct object_surface *obj_surface;
10183 struct gen9_surface_avc *avc_priv_surface;
10185 unsigned int slice_offset = 0;
10187 if (generic_state->curr_pak_pass == 0) {
10188 slice_offset = intel_batchbuffer_used_size(slice_batch);
10189 avc_state->slice_batch_offset[slice_index] = slice_offset;
10190 gen9_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context, slice_param, slice_batch);
10191 gen9_mfc_avc_weightoffset_state(ctx,
10197 gen9_mfc_avc_slice_state(ctx,
10204 gen9_mfc_avc_inset_headers(ctx,
10211 BEGIN_BCS_BATCH(slice_batch, 2);
10212 OUT_BCS_BATCH(slice_batch, 0);
10213 OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END);
10214 ADVANCE_BCS_BATCH(slice_batch);
10217 slice_offset = avc_state->slice_batch_offset[slice_index];
10219 /* insert slice as second level.*/
10220 memset(&second_level_batch, 0, sizeof(second_level_batch));
10221 second_level_batch.is_second_level = 1; /* Must be the second level batch buffer */
10222 second_level_batch.offset = slice_offset;
10223 second_level_batch.bo = slice_batch->buffer;
10224 gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
10226 /* insert mb code as second level.*/
10227 obj_surface = encode_state->reconstructed_object;
10228 assert(obj_surface->private_data);
10229 avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
10231 memset(&second_level_batch, 0, sizeof(second_level_batch));
10232 second_level_batch.is_second_level = 1; /* Must be the second level batch buffer */
10233 second_level_batch.offset = slice_param->macroblock_address * 16 * 4;
10234 second_level_batch.bo = avc_priv_surface->res_mb_code_surface.bo;
10235 gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
10240 gen9_avc_pak_slice_level(VADriverContextP ctx,
10241 struct encode_state *encode_state,
10242 struct intel_encoder_context *encoder_context)
10244 struct i965_driver_data *i965 = i965_driver_data(ctx);
10245 struct i965_gpe_table *gpe = &i965->gpe_table;
10246 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10247 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10248 struct intel_batchbuffer *batch = encoder_context->base.batch;
10249 struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
10250 VAEncSliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
10252 int slice_index = 0;
10253 int is_frame_level = (avc_state->slice_num > 1) ? 0 : 1; /* check it for SKL,now single slice per frame */
10254 int has_tail = 0; /* check it later */
10256 for (j = 0; j < encode_state->num_slice_params_ext; j++) {
10257 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
10259 if (j == encode_state->num_slice_params_ext - 1)
10260 next_slice_group_param = NULL;
10262 next_slice_group_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j + 1]->buffer;
10264 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
10265 if (i < encode_state->slice_params_ext[j]->num_elements - 1)
10266 next_slice_param = slice_param + 1;
10268 next_slice_param = next_slice_group_param;
10270 gen9_mfc_avc_single_slice(ctx,
10279 if (is_frame_level)
10283 if (is_frame_level)
10288 /* insert a tail if required */
10291 memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
10292 mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
10293 gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_params);
10296 gen9_avc_pak_picture_level(VADriverContextP ctx,
10297 struct encode_state *encode_state,
10298 struct intel_encoder_context *encoder_context)
10300 struct i965_driver_data *i965 = i965_driver_data(ctx);
10301 struct i965_gpe_table *gpe = &i965->gpe_table;
10302 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10303 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
10304 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10305 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10306 struct gpe_mi_batch_buffer_start_parameter second_level_batch;
10307 struct intel_batchbuffer *batch = encoder_context->base.batch;
10309 if (generic_state->brc_enabled &&
10310 generic_state->curr_pak_pass) {
10311 struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
10312 struct encoder_status_buffer_internal *status_buffer;
10313 status_buffer = &(avc_ctx->status_buffer);
10315 memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
10316 mi_conditional_batch_buffer_end_params.offset = status_buffer->image_status_mask_offset;
10317 mi_conditional_batch_buffer_end_params.bo = status_buffer->bo;
10318 mi_conditional_batch_buffer_end_params.compare_data = 0;
10319 mi_conditional_batch_buffer_end_params.compare_mask_mode_disabled = 0;
10320 gpe->mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
10323 gen9_mfc_avc_pipe_mode_select(ctx, encode_state, encoder_context);
10324 gen9_mfc_avc_surface_state(ctx, encoder_context, &(generic_ctx->res_reconstructed_surface), 0);
10325 gen9_mfc_avc_surface_state(ctx, encoder_context, &(generic_ctx->res_uncompressed_input_surface), 4);
10326 gen9_mfc_avc_pipe_buf_addr_state(ctx, encoder_context);
10327 gen9_mfc_avc_ind_obj_base_addr_state(ctx, encode_state, encoder_context);
10328 gen9_mfc_avc_bsp_buf_base_addr_state(ctx, encoder_context);
10330 if (generic_state->brc_enabled) {
10331 memset(&second_level_batch, 0, sizeof(second_level_batch));
10332 if (generic_state->curr_pak_pass == 0) {
10333 second_level_batch.offset = 0;
10335 second_level_batch.offset = generic_state->curr_pak_pass * INTEL_AVC_IMAGE_STATE_CMD_SIZE;
10337 second_level_batch.is_second_level = 1;
10338 second_level_batch.bo = avc_ctx->res_brc_image_state_read_buffer.bo;
10339 gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
10341 /*generate a new image state */
10342 gen9_avc_set_image_state_non_brc(ctx, encode_state, encoder_context, &(avc_ctx->res_image_state_batch_buffer_2nd_level));
10343 memset(&second_level_batch, 0, sizeof(second_level_batch));
10344 second_level_batch.offset = 0;
10345 second_level_batch.is_second_level = 1;
10346 second_level_batch.bo = avc_ctx->res_image_state_batch_buffer_2nd_level.bo;
10347 gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
10350 gen9_mfc_avc_qm_state(ctx, encode_state, encoder_context);
10351 gen9_mfc_avc_fqm_state(ctx, encode_state, encoder_context);
10352 gen9_mfc_avc_directmode_state(ctx, encoder_context);
10357 gen9_avc_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
10359 struct i965_driver_data *i965 = i965_driver_data(ctx);
10360 struct i965_gpe_table *gpe = &i965->gpe_table;
10361 struct intel_batchbuffer *batch = encoder_context->base.batch;
10362 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10363 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10364 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10366 struct gpe_mi_store_register_mem_parameter mi_store_reg_mem_param;
10367 struct gpe_mi_store_data_imm_parameter mi_store_data_imm_param;
10368 struct gpe_mi_flush_dw_parameter mi_flush_dw_param;
10369 struct encoder_status_buffer_internal *status_buffer;
10371 status_buffer = &(avc_ctx->status_buffer);
10373 memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
10374 gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
10376 /* read register and store into status_buffer and pak_statitistic info */
10377 memset(&mi_store_reg_mem_param, 0, sizeof(mi_store_reg_mem_param));
10378 mi_store_reg_mem_param.bo = status_buffer->bo;
10379 mi_store_reg_mem_param.offset = status_buffer->bs_byte_count_frame_offset;
10380 mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_reg_offset;
10381 gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10383 mi_store_reg_mem_param.bo = status_buffer->bo;
10384 mi_store_reg_mem_param.offset = status_buffer->image_status_mask_offset;
10385 mi_store_reg_mem_param.mmio_offset = status_buffer->image_status_mask_reg_offset;
10386 gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10388 /*update the status in the pak_statistic_surface */
10389 mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
10390 mi_store_reg_mem_param.offset = 0;
10391 mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_reg_offset;
10392 gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10394 mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
10395 mi_store_reg_mem_param.offset = 4;
10396 mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_nh_reg_offset;
10397 gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10399 memset(&mi_store_data_imm_param, 0, sizeof(mi_store_data_imm_param));
10400 mi_store_data_imm_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
10401 mi_store_data_imm_param.offset = sizeof(unsigned int) * 2;
10402 mi_store_data_imm_param.dw0 = (generic_state->curr_pak_pass + 1);
10403 gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm_param);
10405 mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
10406 mi_store_reg_mem_param.offset = sizeof(unsigned int) * (4 + generic_state->curr_pak_pass) ;
10407 mi_store_reg_mem_param.mmio_offset = status_buffer->image_status_ctrl_reg_offset;
10408 gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10410 memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
10411 gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
10417 gen9_avc_pak_brc_prepare(struct encode_state *encode_state,
10418 struct intel_encoder_context *encoder_context)
10420 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10421 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10422 unsigned int rate_control_mode = encoder_context->rate_control_mode;
10424 switch (rate_control_mode & 0x7f) {
10426 generic_state->internal_rate_mode = VA_RC_CBR;
10430 generic_state->internal_rate_mode = VA_RC_VBR;//AVBR
10435 generic_state->internal_rate_mode = VA_RC_CQP;
10439 if (encoder_context->quality_level == 0)
10440 encoder_context->quality_level = ENCODER_DEFAULT_QUALITY_AVC;
10443 /* allcate resources for pak only (fei mode) */
10445 gen9_avc_fei_pak_pipeline_prepare(VADriverContextP ctx,
10447 struct encode_state *encode_state,
10448 struct intel_encoder_context *encoder_context)
10450 VAStatus va_status;
10451 struct i965_driver_data *i965 = i965_driver_data(ctx);
10452 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10453 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
10454 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10455 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10456 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10457 struct gen9_surface_avc *avc_priv_surface;
10458 VAEncPictureParameterBufferH264 *pic_param;
10459 VAEncSliceParameterBufferH264 *slice_param;
10460 VAEncMiscParameterFEIFrameControlH264 *fei_param = NULL;
10461 unsigned int size = 0, i, j;
10462 unsigned int frame_mb_nums;
10463 struct object_buffer *obj_buffer = NULL;
10464 struct buffer_store *buffer_store = NULL;
10465 struct object_surface *obj_surface = NULL;
10466 struct avc_surface_param surface_param;
10467 struct i965_coded_buffer_segment *coded_buffer_segment;
10469 unsigned char * pdata;
10471 gen9_avc_update_parameters(ctx, profile, encode_state, encoder_context);
10473 pic_param = avc_state->pic_param;
10474 slice_param = avc_state->slice_param[0];
10476 va_status = gen9_avc_encode_check_parameter(ctx, encode_state, encoder_context);
10477 if (va_status != VA_STATUS_SUCCESS)
10480 va_status = gen9_avc_allocate_pak_resources(ctx, encode_state, encoder_context);
10481 if (va_status != VA_STATUS_SUCCESS)
10484 /* Encoded bitstream ?*/
10485 obj_buffer = encode_state->coded_buf_object;
10486 bo = obj_buffer->buffer_store->bo;
10487 i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
10488 i965_dri_object_to_buffer_gpe_resource(&generic_ctx->compressed_bitstream.res, bo);
10489 generic_ctx->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
10490 generic_ctx->compressed_bitstream.end_offset =
10491 ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
10494 dri_bo_unreference(avc_ctx->status_buffer.bo);
10495 avc_ctx->status_buffer.bo = bo;
10496 dri_bo_reference(bo);
10498 /* set the internal flag to 0 to indicate the coded size is unknown */
10500 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
10501 coded_buffer_segment->mapped = 0;
10502 coded_buffer_segment->codec = encoder_context->codec;
10503 coded_buffer_segment->status_support = 1;
10505 pdata = bo->virtual + avc_ctx->status_buffer.base_offset;
10506 memset(pdata, 0, avc_ctx->status_buffer.status_buffer_size);
10508 //frame id, it is the ref pic id in the reference_objects list.
10509 avc_state->num_refs[0] = 0;
10510 avc_state->num_refs[1] = 0;
10511 if (generic_state->frame_type == SLICE_TYPE_P) {
10512 avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
10514 if (slice_param->num_ref_idx_active_override_flag)
10515 avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
10516 } else if (generic_state->frame_type == SLICE_TYPE_B) {
10517 avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
10518 avc_state->num_refs[1] = pic_param->num_ref_idx_l1_active_minus1 + 1;
10520 if (slice_param->num_ref_idx_active_override_flag) {
10521 avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
10522 avc_state->num_refs[1] = slice_param->num_ref_idx_l1_active_minus1 + 1;
10525 for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[0]); i++) {
10526 VAPictureH264 *va_pic;
10528 assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(avc_state->list_ref_idx[0]));
10529 avc_state->list_ref_idx[0][i] = 0;
10531 if (i >= avc_state->num_refs[0])
10534 va_pic = &slice_param->RefPicList0[i];
10536 for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
10537 obj_surface = encode_state->reference_objects[j];
10541 obj_surface->base.id == va_pic->picture_id) {
10543 assert(obj_surface->base.id != VA_INVALID_SURFACE);
10544 avc_state->list_ref_idx[0][i] = j;
10550 for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[1]); i++) {
10551 VAPictureH264 *va_pic;
10553 assert(ARRAY_ELEMS(slice_param->RefPicList1) == ARRAY_ELEMS(avc_state->list_ref_idx[1]));
10554 avc_state->list_ref_idx[1][i] = 0;
10556 if (i >= avc_state->num_refs[1])
10559 va_pic = &slice_param->RefPicList1[i];
10561 for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
10562 obj_surface = encode_state->reference_objects[j];
10567 obj_surface->base.id == va_pic->picture_id) {
10569 assert(obj_surface->base.id != VA_INVALID_SURFACE);
10570 avc_state->list_ref_idx[1][i] = j;
10578 obj_surface = encode_state->reconstructed_object;
10579 fei_param = avc_state->fei_framectl_param;
10580 frame_mb_nums = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs;
10582 /* Setup current reconstruct frame */
10583 obj_surface = encode_state->reconstructed_object;
10584 va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
10586 if (va_status != VA_STATUS_SUCCESS)
10589 memset(&surface_param, 0, sizeof(surface_param));
10590 surface_param.frame_width = generic_state->frame_width_in_pixel;
10591 surface_param.frame_height = generic_state->frame_height_in_pixel;
10592 va_status = gen9_avc_init_check_surfaces(ctx,
10593 obj_surface, encoder_context,
10595 avc_priv_surface = obj_surface->private_data;
10597 /* res_mb_code_surface for MB code */
10598 /* PAK only mode must have the mb_code_surface from middleware,
10599 * so the code shouldn't reach here without an externally provided
10600 * MB Code buffer */
10601 assert(fei_param->mb_code_data != VA_INVALID_ID);
10602 size = frame_mb_nums * FEI_AVC_MB_CODE_BUFFER_SIZE;
10603 obj_buffer = BUFFER(fei_param->mb_code_data);
10604 assert(obj_buffer != NULL);
10605 buffer_store = obj_buffer->buffer_store;
10606 assert(size <= buffer_store->bo->size);
10607 if (avc_priv_surface->res_mb_code_surface.bo != NULL)
10608 i965_free_gpe_resource(&avc_priv_surface->res_mb_code_surface);
10609 i965_dri_object_to_buffer_gpe_resource(&avc_priv_surface->res_mb_code_surface,
10611 /* res_mv_data_surface for MV data */
10612 size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
10613 if (fei_param->mv_data != VA_INVALID_ID) {
10614 obj_buffer = BUFFER(fei_param->mv_data);
10615 assert(obj_buffer != NULL);
10616 buffer_store = obj_buffer->buffer_store;
10617 assert(size <= buffer_store->bo->size);
10618 if (avc_priv_surface->res_mv_data_surface.bo != NULL)
10619 i965_free_gpe_resource(&avc_priv_surface->res_mv_data_surface);
10620 i965_dri_object_to_buffer_gpe_resource(&avc_priv_surface->res_mv_data_surface,
10624 return VA_STATUS_SUCCESS;
10629 gen9_avc_pak_pipeline_prepare(VADriverContextP ctx,
10631 struct encode_state *encode_state,
10632 struct intel_encoder_context *encoder_context)
10634 VAStatus va_status;
10635 struct i965_driver_data *i965 = i965_driver_data(ctx);
10636 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10637 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
10638 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10639 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10640 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10642 struct object_surface *obj_surface;
10643 VAEncPictureParameterBufferH264 *pic_param;
10644 VAEncSliceParameterBufferH264 *slice_param;
10646 struct gen9_surface_avc *avc_priv_surface;
10647 struct avc_surface_param surface_param;
10648 int i, j, enable_avc_ildb = 0;
10649 unsigned int allocate_flag = 1;
10650 unsigned int size, w_mb, h_mb;
10652 if (encoder_context->fei_function_mode == VA_FEI_FUNCTION_PAK) {
10653 va_status = gen9_avc_fei_pak_pipeline_prepare(ctx, profile, encode_state, encoder_context);
10654 if (va_status != VA_STATUS_SUCCESS)
10658 pic_param = avc_state->pic_param;
10659 slice_param = avc_state->slice_param[0];
10660 w_mb = generic_state->frame_width_in_mbs;
10661 h_mb = generic_state->frame_height_in_mbs;
10663 /* update the parameter and check slice parameter */
10664 for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
10665 assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
10666 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
10668 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
10669 assert((slice_param->slice_type == SLICE_TYPE_I) ||
10670 (slice_param->slice_type == SLICE_TYPE_SI) ||
10671 (slice_param->slice_type == SLICE_TYPE_P) ||
10672 (slice_param->slice_type == SLICE_TYPE_SP) ||
10673 (slice_param->slice_type == SLICE_TYPE_B));
10675 if (slice_param->disable_deblocking_filter_idc != 1) {
10676 enable_avc_ildb = 1;
10683 avc_state->enable_avc_ildb = enable_avc_ildb;
10685 /* setup the all surface and buffer for PAK */
10686 /* Setup current reconstruct frame */
10687 obj_surface = encode_state->reconstructed_object;
10688 va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
10690 if (va_status != VA_STATUS_SUCCESS)
10693 memset(&surface_param, 0, sizeof(surface_param));
10694 surface_param.frame_width = generic_state->frame_width_in_pixel;
10695 surface_param.frame_height = generic_state->frame_height_in_pixel;
10696 va_status = gen9_avc_init_check_surfaces(ctx,
10697 obj_surface, encoder_context,
10699 if (va_status != VA_STATUS_SUCCESS)
10701 /* init the member of avc_priv_surface,frame_store_id,qp_value */
10703 avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
10704 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = 0;
10705 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = 0;
10706 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2]);
10707 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1]);
10708 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2], avc_priv_surface->dmv_top);
10709 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1], avc_priv_surface->dmv_bottom);
10710 avc_priv_surface->qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
10711 avc_priv_surface->frame_store_id = 0;
10712 avc_priv_surface->frame_idx = pic_param->CurrPic.frame_idx;
10713 avc_priv_surface->top_field_order_cnt = pic_param->CurrPic.TopFieldOrderCnt;
10714 avc_priv_surface->is_as_ref = pic_param->pic_fields.bits.reference_pic_flag;
10715 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = avc_priv_surface->top_field_order_cnt;
10716 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = avc_priv_surface->top_field_order_cnt + 1;
10718 i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
10719 i965_free_gpe_resource(&avc_ctx->res_post_deblocking_output);
10720 i965_free_gpe_resource(&avc_ctx->res_pre_deblocking_output);
10721 i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_reconstructed_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
10724 if (avc_state->enable_avc_ildb) {
10725 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_post_deblocking_output, obj_surface, GPE_RESOURCE_ALIGNMENT);
10727 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_pre_deblocking_output, obj_surface, GPE_RESOURCE_ALIGNMENT);
10729 /* input YUV surface */
10730 obj_surface = encode_state->input_yuv_object;
10731 va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
10733 if (va_status != VA_STATUS_SUCCESS)
10735 i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
10736 i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_uncompressed_input_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
10738 /* Reference surfaces */
10739 for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
10740 i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
10741 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2]);
10742 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1]);
10743 obj_surface = encode_state->reference_objects[i];
10744 avc_state->top_field_poc[2 * i] = 0;
10745 avc_state->top_field_poc[2 * i + 1] = 0;
10747 if (obj_surface && obj_surface->bo) {
10748 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->list_reference_res[i], obj_surface, GPE_RESOURCE_ALIGNMENT);
10750 /* actually it should be handled when it is reconstructed surface */
10751 va_status = gen9_avc_init_check_surfaces(ctx,
10752 obj_surface, encoder_context,
10754 if (va_status != VA_STATUS_SUCCESS)
10756 avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
10757 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2], avc_priv_surface->dmv_top);
10758 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1], avc_priv_surface->dmv_bottom);
10759 avc_priv_surface->frame_store_id = i;
10760 avc_state->top_field_poc[2 * i] = avc_priv_surface->top_field_order_cnt;
10761 avc_state->top_field_poc[2 * i + 1] = avc_priv_surface->top_field_order_cnt + 1;
10767 if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
10768 intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
10769 avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
10772 avc_ctx->pres_slice_batch_buffer_2nd_level =
10773 intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD,
10775 encode_state->num_slice_params_ext);
10776 if (!avc_ctx->pres_slice_batch_buffer_2nd_level)
10777 return VA_STATUS_ERROR_ALLOCATION_FAILED;
10779 for (i = 0; i < MAX_AVC_SLICE_NUM; i++) {
10780 avc_state->slice_batch_offset[i] = 0;
10785 i965_free_gpe_resource(&avc_ctx->res_intra_row_store_scratch_buffer);
10786 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
10787 &avc_ctx->res_intra_row_store_scratch_buffer,
10789 "PAK Intra row store scratch buffer");
10790 if (!allocate_flag)
10791 goto failed_allocation;
10793 size = w_mb * 4 * 64;
10794 i965_free_gpe_resource(&avc_ctx->res_deblocking_filter_row_store_scratch_buffer);
10795 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
10796 &avc_ctx->res_deblocking_filter_row_store_scratch_buffer,
10798 "PAK Deblocking filter row store scratch buffer");
10799 if (!allocate_flag)
10800 goto failed_allocation;
10802 size = w_mb * 2 * 64;
10803 i965_free_gpe_resource(&avc_ctx->res_bsd_mpc_row_store_scratch_buffer);
10804 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
10805 &avc_ctx->res_bsd_mpc_row_store_scratch_buffer,
10807 "PAK BSD/MPC row store scratch buffer");
10808 if (!allocate_flag)
10809 goto failed_allocation;
10811 size = w_mb * h_mb * 16;
10812 i965_free_gpe_resource(&avc_ctx->res_pak_mb_status_buffer);
10813 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
10814 &avc_ctx->res_pak_mb_status_buffer,
10816 "PAK MB status buffer");
10817 if (!allocate_flag)
10818 goto failed_allocation;
10820 return VA_STATUS_SUCCESS;
10823 return VA_STATUS_ERROR_ALLOCATION_FAILED;
10827 gen9_avc_encode_picture(VADriverContextP ctx,
10829 struct encode_state *encode_state,
10830 struct intel_encoder_context *encoder_context)
10832 VAStatus va_status;
10833 struct i965_driver_data *i965 = i965_driver_data(ctx);
10834 struct i965_gpe_table *gpe = &i965->gpe_table;
10835 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10836 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
10837 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
10838 struct intel_batchbuffer *batch = encoder_context->base.batch;
10840 va_status = gen9_avc_pak_pipeline_prepare(ctx, profile, encode_state, encoder_context);
10842 if (va_status != VA_STATUS_SUCCESS)
10845 if (i965->intel.has_bsd2)
10846 intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
10848 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
10849 intel_batchbuffer_emit_mi_flush(batch);
10850 for (generic_state->curr_pak_pass = 0;
10851 generic_state->curr_pak_pass < generic_state->num_pak_passes;
10852 generic_state->curr_pak_pass++) {
10854 if (generic_state->curr_pak_pass == 0) {
10855 /* Initialize the avc Image Ctrl reg for the first pass,write 0 to staturs/control register, is it needed in AVC? */
10856 struct gpe_mi_load_register_imm_parameter mi_load_reg_imm;
10857 struct encoder_status_buffer_internal *status_buffer;
10859 status_buffer = &(avc_ctx->status_buffer);
10860 memset(&mi_load_reg_imm, 0, sizeof(mi_load_reg_imm));
10861 mi_load_reg_imm.mmio_offset = status_buffer->image_status_ctrl_reg_offset;
10862 mi_load_reg_imm.data = 0;
10863 gpe->mi_load_register_imm(ctx, batch, &mi_load_reg_imm);
10865 gen9_avc_pak_picture_level(ctx, encode_state, encoder_context);
10866 gen9_avc_pak_slice_level(ctx, encode_state, encoder_context);
10867 gen9_avc_read_mfc_status(ctx, encoder_context);
10870 if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
10871 intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
10872 avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
10875 intel_batchbuffer_end_atomic(batch);
10876 intel_batchbuffer_flush(batch);
10878 generic_state->seq_frame_number++;
10879 generic_state->total_frame_number++;
10880 generic_state->first_frame = 0;
10881 return VA_STATUS_SUCCESS;
10885 gen9_avc_pak_pipeline(VADriverContextP ctx,
10887 struct encode_state *encode_state,
10888 struct intel_encoder_context *encoder_context)
10893 case VAProfileH264ConstrainedBaseline:
10894 case VAProfileH264Main:
10895 case VAProfileH264High:
10896 case VAProfileH264MultiviewHigh:
10897 case VAProfileH264StereoHigh:
10898 vaStatus = gen9_avc_encode_picture(ctx, profile, encode_state, encoder_context);
10902 vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
10910 gen9_avc_pak_context_destroy(void * context)
10912 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)context;
10913 struct generic_encoder_context * generic_ctx;
10914 struct i965_avc_encoder_context * avc_ctx;
10920 generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
10921 avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10924 i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
10925 i965_free_gpe_resource(&avc_ctx->res_post_deblocking_output);
10926 i965_free_gpe_resource(&avc_ctx->res_pre_deblocking_output);
10927 i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
10929 i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
10930 i965_free_gpe_resource(&avc_ctx->res_intra_row_store_scratch_buffer);
10931 i965_free_gpe_resource(&avc_ctx->res_deblocking_filter_row_store_scratch_buffer);
10932 i965_free_gpe_resource(&avc_ctx->res_bsd_mpc_row_store_scratch_buffer);
10933 i965_free_gpe_resource(&avc_ctx->res_pak_mb_status_buffer);
10935 for (i = 0 ; i < MAX_MFC_AVC_REFERENCE_SURFACES; i++) {
10936 i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
10939 for (i = 0 ; i < NUM_MFC_AVC_DMV_BUFFERS; i++) {
10940 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i]);
10943 if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
10944 intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
10945 avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
10951 gen9_avc_get_coded_status(VADriverContextP ctx,
10952 struct intel_encoder_context *encoder_context,
10953 struct i965_coded_buffer_segment *coded_buf_seg)
10955 struct encoder_status *avc_encode_status;
10957 if (!encoder_context || !coded_buf_seg)
10958 return VA_STATUS_ERROR_INVALID_BUFFER;
10960 avc_encode_status = (struct encoder_status *)coded_buf_seg->codec_private_data;
10961 coded_buf_seg->base.size = avc_encode_status->bs_byte_count_frame;
10963 return VA_STATUS_SUCCESS;
10967 gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
10969 /* VME & PAK share the same context */
10970 struct i965_driver_data *i965 = i965_driver_data(ctx);
10971 struct encoder_vme_mfc_context * vme_context = NULL;
10972 struct generic_encoder_context * generic_ctx = NULL;
10973 struct i965_avc_encoder_context * avc_ctx = NULL;
10974 struct generic_enc_codec_state * generic_state = NULL;
10975 struct avc_enc_state * avc_state = NULL;
10976 struct encoder_status_buffer_internal *status_buffer;
10977 uint32_t base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
10979 vme_context = calloc(1, sizeof(struct encoder_vme_mfc_context));
10980 generic_ctx = calloc(1, sizeof(struct generic_encoder_context));
10981 avc_ctx = calloc(1, sizeof(struct i965_avc_encoder_context));
10982 generic_state = calloc(1, sizeof(struct generic_enc_codec_state));
10983 avc_state = calloc(1, sizeof(struct avc_enc_state));
10985 if (!vme_context || !generic_ctx || !avc_ctx || !generic_state || !avc_state)
10986 goto allocate_structure_failed;
10988 memset(vme_context, 0, sizeof(struct encoder_vme_mfc_context));
10989 memset(generic_ctx, 0, sizeof(struct generic_encoder_context));
10990 memset(avc_ctx, 0, sizeof(struct i965_avc_encoder_context));
10991 memset(generic_state, 0, sizeof(struct generic_enc_codec_state));
10992 memset(avc_state, 0, sizeof(struct avc_enc_state));
10994 encoder_context->vme_context = vme_context;
10995 vme_context->generic_enc_ctx = generic_ctx;
10996 vme_context->private_enc_ctx = avc_ctx;
10997 vme_context->generic_enc_state = generic_state;
10998 vme_context->private_enc_state = avc_state;
11000 if (IS_SKL(i965->intel.device_info) ||
11001 IS_BXT(i965->intel.device_info)) {
11002 if (!encoder_context->fei_enabled && !encoder_context->preenc_enabled) {
11003 generic_ctx->enc_kernel_ptr = (void *)skl_avc_encoder_kernels;
11004 generic_ctx->enc_kernel_size = sizeof(skl_avc_encoder_kernels);
11006 /* FEI and PreEnc operation kernels are included in
11007 * the monolithic kernel binary */
11008 generic_ctx->enc_kernel_ptr = (void *)skl_avc_fei_encoder_kernels;
11009 generic_ctx->enc_kernel_size = sizeof(skl_avc_fei_encoder_kernels);
11011 } else if (IS_GEN8(i965->intel.device_info)) {
11012 generic_ctx->enc_kernel_ptr = (void *)bdw_avc_encoder_kernels;
11013 generic_ctx->enc_kernel_size = sizeof(bdw_avc_encoder_kernels);
11014 } else if (IS_KBL(i965->intel.device_info) ||
11015 IS_GLK(i965->intel.device_info)) {
11016 generic_ctx->enc_kernel_ptr = (void *)kbl_avc_encoder_kernels;
11017 generic_ctx->enc_kernel_size = sizeof(kbl_avc_encoder_kernels);
11018 } else if (IS_GEN10(i965->intel.device_info)) {
11019 generic_ctx->enc_kernel_ptr = (void *)cnl_avc_encoder_kernels;
11020 generic_ctx->enc_kernel_size = sizeof(cnl_avc_encoder_kernels);
11022 goto allocate_structure_failed;
11024 /* initialize misc ? */
11025 avc_ctx->ctx = ctx;
11026 generic_ctx->use_hw_scoreboard = 1;
11027 generic_ctx->use_hw_non_stalling_scoreboard = 1;
11029 /* initialize generic state */
11031 generic_state->kernel_mode = INTEL_ENC_KERNEL_NORMAL;
11032 generic_state->preset = INTEL_PRESET_RT_SPEED;
11033 generic_state->seq_frame_number = 0;
11034 generic_state->total_frame_number = 0;
11035 generic_state->frame_type = 0;
11036 generic_state->first_frame = 1;
11038 generic_state->frame_width_in_pixel = 0;
11039 generic_state->frame_height_in_pixel = 0;
11040 generic_state->frame_width_in_mbs = 0;
11041 generic_state->frame_height_in_mbs = 0;
11042 generic_state->frame_width_4x = 0;
11043 generic_state->frame_height_4x = 0;
11044 generic_state->frame_width_16x = 0;
11045 generic_state->frame_height_16x = 0;
11046 generic_state->frame_width_32x = 0;
11047 generic_state->downscaled_width_4x_in_mb = 0;
11048 generic_state->downscaled_height_4x_in_mb = 0;
11049 generic_state->downscaled_width_16x_in_mb = 0;
11050 generic_state->downscaled_height_16x_in_mb = 0;
11051 generic_state->downscaled_width_32x_in_mb = 0;
11052 generic_state->downscaled_height_32x_in_mb = 0;
11054 generic_state->hme_supported = 1;
11055 generic_state->b16xme_supported = 1;
11056 generic_state->b32xme_supported = 0;
11057 generic_state->hme_enabled = 0;
11058 generic_state->b16xme_enabled = 0;
11059 generic_state->b32xme_enabled = 0;
11061 if (encoder_context->fei_enabled) {
11062 /* Disabling HME in FEI encode */
11063 generic_state->hme_supported = 0;
11064 generic_state->b16xme_supported = 0;
11065 } else if (encoder_context->preenc_enabled) {
11066 /* Disabling 16x16ME in PreEnc */
11067 generic_state->b16xme_supported = 0;
11070 generic_state->brc_distortion_buffer_supported = 1;
11071 generic_state->brc_constant_buffer_supported = 0;
11073 generic_state->frame_rate = 30;
11074 generic_state->brc_allocated = 0;
11075 generic_state->brc_inited = 0;
11076 generic_state->brc_need_reset = 0;
11077 generic_state->is_low_delay = 0;
11078 generic_state->brc_enabled = 0;//default
11079 generic_state->internal_rate_mode = 0;
11080 generic_state->curr_pak_pass = 0;
11081 generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
11082 generic_state->is_first_pass = 1;
11083 generic_state->is_last_pass = 0;
11084 generic_state->mb_brc_enabled = 0; // enable mb brc
11085 generic_state->brc_roi_enable = 0;
11086 generic_state->brc_dirty_roi_enable = 0;
11087 generic_state->skip_frame_enbale = 0;
11089 generic_state->target_bit_rate = 0;
11090 generic_state->max_bit_rate = 0;
11091 generic_state->min_bit_rate = 0;
11092 generic_state->init_vbv_buffer_fullness_in_bit = 0;
11093 generic_state->vbv_buffer_size_in_bit = 0;
11094 generic_state->frames_per_100s = 0;
11095 generic_state->gop_size = 0;
11096 generic_state->gop_ref_distance = 0;
11097 generic_state->brc_target_size = 0;
11098 generic_state->brc_mode = 0;
11099 generic_state->brc_init_current_target_buf_full_in_bits = 0.0;
11100 generic_state->brc_init_reset_input_bits_per_frame = 0.0;
11101 generic_state->brc_init_reset_buf_size_in_bits = 0;
11102 generic_state->brc_init_previous_target_buf_full_in_bits = 0;
11103 generic_state->frames_per_window_size = 0;//default
11104 generic_state->target_percentage = 0;
11106 generic_state->avbr_curracy = 0;
11107 generic_state->avbr_convergence = 0;
11109 generic_state->num_skip_frames = 0;
11110 generic_state->size_skip_frames = 0;
11112 generic_state->num_roi = 0;
11113 generic_state->max_delta_qp = 0;
11114 generic_state->min_delta_qp = 0;
11116 if (encoder_context->rate_control_mode != VA_RC_NONE &&
11117 encoder_context->rate_control_mode != VA_RC_CQP) {
11118 generic_state->brc_enabled = 1;
11119 generic_state->brc_distortion_buffer_supported = 1;
11120 generic_state->brc_constant_buffer_supported = 1;
11121 generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
11123 /*avc state initialization */
11124 avc_state->mad_enable = 0;
11125 avc_state->mb_disable_skip_map_enable = 0;
11126 avc_state->sfd_enable = 1;//default
11127 avc_state->sfd_mb_enable = 1;//set it true
11128 avc_state->adaptive_search_window_enable = 1;//default
11129 avc_state->mb_qp_data_enable = 0;
11130 avc_state->intra_refresh_i_enable = 0;
11131 avc_state->min_max_qp_enable = 0;
11132 avc_state->skip_bias_adjustment_enable = 0;//default,same as skip_bias_adjustment_supporte? no
11135 avc_state->non_ftq_skip_threshold_lut_input_enable = 0;
11136 avc_state->ftq_skip_threshold_lut_input_enable = 0;
11137 avc_state->ftq_override = 0;
11139 avc_state->direct_bias_adjustment_enable = 0;
11140 avc_state->global_motion_bias_adjustment_enable = 0;
11141 avc_state->disable_sub_mb_partion = 0;
11142 avc_state->arbitrary_num_mbs_in_slice = 0;
11143 avc_state->adaptive_transform_decision_enable = 0;//default
11144 avc_state->skip_check_disable = 0;
11145 avc_state->tq_enable = 0;
11146 avc_state->enable_avc_ildb = 0;
11147 avc_state->mbaff_flag = 0;
11148 avc_state->enable_force_skip = 1;//default
11149 avc_state->rc_panic_enable = 1;//default
11150 avc_state->suppress_recon_enable = 1;//default
11152 avc_state->ref_pic_select_list_supported = 1;
11153 avc_state->mb_brc_supported = 1;//?,default
11154 avc_state->multi_pre_enable = 1;//default
11155 avc_state->ftq_enable = 1;//default
11156 avc_state->caf_supported = 1; //default
11157 avc_state->caf_enable = 0;
11158 avc_state->caf_disable_hd = 1;//default
11159 avc_state->skip_bias_adjustment_supported = 1;//default
11161 avc_state->adaptive_intra_scaling_enable = 1;//default
11162 avc_state->old_mode_cost_enable = 0;//default
11163 avc_state->multi_ref_qp_enable = 1;//default
11164 avc_state->weighted_ref_l0_enable = 1;//default
11165 avc_state->weighted_ref_l1_enable = 1;//default
11166 avc_state->weighted_prediction_supported = 0;
11167 avc_state->brc_split_enable = 0;
11168 avc_state->slice_level_report_supported = 0;
11170 avc_state->fbr_bypass_enable = 1;//default
11171 avc_state->field_scaling_output_interleaved = 0;
11172 avc_state->mb_variance_output_enable = 0;
11173 avc_state->mb_pixel_average_output_enable = 0;
11174 avc_state->rolling_intra_refresh_enable = 0;// same as intra_refresh_i_enable?
11175 avc_state->mbenc_curbe_set_in_brc_update = 0;
11176 avc_state->rounding_inter_enable = 1; //default
11177 avc_state->adaptive_rounding_inter_enable = 1;//default
11179 avc_state->mbenc_i_frame_dist_in_use = 0;
11180 avc_state->mb_status_supported = 1; //set in intialization for gen9
11181 avc_state->mb_status_enable = 0;
11182 avc_state->mb_vproc_stats_enable = 0;
11183 avc_state->flatness_check_enable = 0;
11184 avc_state->flatness_check_supported = 1;//default
11185 avc_state->block_based_skip_enable = 0;
11186 avc_state->use_widi_mbenc_kernel = 0;
11187 avc_state->kernel_trellis_enable = 0;
11188 avc_state->generic_reserved = 0;
11190 avc_state->rounding_value = 0;
11191 avc_state->rounding_inter_p = AVC_INVALID_ROUNDING_VALUE;//default
11192 avc_state->rounding_inter_b = AVC_INVALID_ROUNDING_VALUE; //default
11193 avc_state->rounding_inter_b_ref = AVC_INVALID_ROUNDING_VALUE; //default
11194 avc_state->min_qp_i = INTEL_AVC_MIN_QP;
11195 avc_state->min_qp_p = INTEL_AVC_MIN_QP;
11196 avc_state->min_qp_b = INTEL_AVC_MIN_QP;
11197 avc_state->max_qp_i = INTEL_AVC_MAX_QP;
11198 avc_state->max_qp_p = INTEL_AVC_MAX_QP;
11199 avc_state->max_qp_b = INTEL_AVC_MAX_QP;
11201 memset(avc_state->non_ftq_skip_threshold_lut, 0, AVC_QP_MAX * sizeof(uint8_t));
11202 memset(avc_state->ftq_skip_threshold_lut, 0, AVC_QP_MAX * sizeof(uint8_t));
11203 memset(avc_state->lamda_value_lut, 0, AVC_QP_MAX * 2 * sizeof(uint32_t));
11205 avc_state->intra_refresh_qp_threshold = 0;
11206 avc_state->trellis_flag = 0;
11207 avc_state->hme_mv_cost_scaling_factor = 0;
11208 avc_state->slice_height = 1;
11209 avc_state->slice_num = 1;
11210 memset(avc_state->dist_scale_factor_list0, 0, 32 * sizeof(uint32_t));
11211 avc_state->bi_weight = 0;
11213 avc_state->lambda_table_enable = 0;
11215 if (IS_GEN8(i965->intel.device_info)) {
11216 avc_state->brc_const_data_surface_width = 64;
11217 avc_state->brc_const_data_surface_height = 44;
11218 avc_state->mb_status_supported = 0;
11219 } else if (IS_SKL(i965->intel.device_info) ||
11220 IS_BXT(i965->intel.device_info)) {
11221 avc_state->brc_const_data_surface_width = 64;
11222 avc_state->brc_const_data_surface_height = 44;
11223 avc_state->brc_split_enable = 1;
11224 } else if (IS_KBL(i965->intel.device_info) ||
11225 IS_GEN10(i965->intel.device_info) ||
11226 IS_GLK(i965->intel.device_info)) {
11227 avc_state->brc_const_data_surface_width = 64;
11228 avc_state->brc_const_data_surface_height = 53;
11230 avc_state->decouple_mbenc_curbe_from_brc_enable = 1;
11231 avc_state->extended_mv_cost_range_enable = 0;
11232 avc_state->reserved_g95 = 0;
11233 avc_state->mbenc_brc_buffer_size = 128;
11234 avc_state->kernel_trellis_enable = 1;
11235 avc_state->lambda_table_enable = 1;
11236 avc_state->brc_split_enable = 1;
11238 if (IS_GEN10(i965->intel.device_info))
11239 avc_state->adaptive_transform_decision_enable = 1;// CNL
11242 avc_state->num_refs[0] = 0;
11243 avc_state->num_refs[1] = 0;
11244 memset(avc_state->list_ref_idx, 0, 32 * 2 * sizeof(uint32_t));
11245 memset(avc_state->top_field_poc, 0, NUM_MFC_AVC_DMV_BUFFERS * sizeof(int32_t));
11246 avc_state->tq_rounding = 0;
11247 avc_state->zero_mv_threshold = 0;
11248 avc_state->slice_second_levle_batch_buffer_in_use = 0;
11252 /* the definition of status buffer offset for Encoder */
11254 status_buffer = &avc_ctx->status_buffer;
11255 memset(status_buffer, 0, sizeof(struct encoder_status_buffer_internal));
11257 status_buffer->base_offset = base_offset;
11258 status_buffer->bs_byte_count_frame_offset = base_offset + offsetof(struct encoder_status, bs_byte_count_frame);
11259 status_buffer->bs_byte_count_frame_nh_offset = base_offset + offsetof(struct encoder_status, bs_byte_count_frame_nh);
11260 status_buffer->image_status_mask_offset = base_offset + offsetof(struct encoder_status, image_status_mask);
11261 status_buffer->image_status_ctrl_offset = base_offset + offsetof(struct encoder_status, image_status_ctrl);
11262 status_buffer->mfc_qp_status_count_offset = base_offset + offsetof(struct encoder_status, mfc_qp_status_count);
11263 status_buffer->media_index_offset = base_offset + offsetof(struct encoder_status, media_index);
11265 status_buffer->status_buffer_size = sizeof(struct encoder_status);
11266 status_buffer->bs_byte_count_frame_reg_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG;
11267 status_buffer->bs_byte_count_frame_nh_reg_offset = MFC_BITSTREAM_BYTECOUNT_SLICE_REG;
11268 status_buffer->image_status_mask_reg_offset = MFC_IMAGE_STATUS_MASK_REG;
11269 status_buffer->image_status_ctrl_reg_offset = MFC_IMAGE_STATUS_CTRL_REG;
11270 status_buffer->mfc_qp_status_count_reg_offset = MFC_QP_STATUS_COUNT_REG;
11272 if (IS_GEN8(i965->intel.device_info)) {
11273 gen8_avc_kernel_init(ctx, encoder_context);
11275 gen9_avc_kernel_init(ctx, encoder_context);
11277 encoder_context->vme_context = vme_context;
11278 /* Handling PreEnc operations separately since it gives better
11279 * code readability, avoid possible vme operations mess-up */
11280 encoder_context->vme_pipeline =
11281 !encoder_context->preenc_enabled ? gen9_avc_vme_pipeline : gen9_avc_preenc_pipeline;
11282 encoder_context->vme_context_destroy = gen9_avc_vme_context_destroy;
11286 allocate_structure_failed:
11291 free(generic_state);
11297 gen9_avc_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
11299 /* VME & PAK share the same context */
11300 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
11305 encoder_context->mfc_context = pak_context;
11306 encoder_context->mfc_context_destroy = gen9_avc_pak_context_destroy;
11307 encoder_context->mfc_pipeline = gen9_avc_pak_pipeline;
11308 encoder_context->mfc_brc_prepare = gen9_avc_pak_brc_prepare;
11309 encoder_context->get_status = gen9_avc_get_coded_status;