2 * Copyright @ 2017 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWAR OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Pengfei Qu <Pengfei.qu@intel.com>
37 #include "intel_batchbuffer.h"
38 #include "intel_driver.h"
40 #include "i965_defines.h"
41 #include "i965_structs.h"
42 #include "i965_drv_video.h"
43 #include "i965_encoder.h"
44 #include "i965_encoder_utils.h"
45 #include "intel_media.h"
47 #include "i965_gpe_utils.h"
48 #include "i965_encoder_common.h"
49 #include "i965_avc_encoder_common.h"
50 #include "gen9_avc_encoder_kernels.h"
51 #include "gen9_avc_encoder.h"
52 #include "gen9_avc_const_def.h"
54 #define MAX_URB_SIZE 4096 /* In register */
55 #define NUM_KERNELS_PER_GPE_CONTEXT 1
56 #define MBENC_KERNEL_BASE GEN9_AVC_KERNEL_MBENC_QUALITY_I
57 #define GPE_RESOURCE_ALIGNMENT 4 /* 4 means 16 = 1 << 4) */
59 #define OUT_BUFFER_2DW(batch, bo, is_target, delta) do { \
61 OUT_BCS_RELOC64(batch, \
63 I915_GEM_DOMAIN_INSTRUCTION, \
64 is_target ? I915_GEM_DOMAIN_RENDER : 0, \
67 OUT_BCS_BATCH(batch, 0); \
68 OUT_BCS_BATCH(batch, 0); \
72 #define OUT_BUFFER_3DW(batch, bo, is_target, delta, attr) do { \
73 OUT_BUFFER_2DW(batch, bo, is_target, delta); \
74 OUT_BCS_BATCH(batch, attr); \
77 static const uint32_t qm_flat[16] = {
78 0x10101010, 0x10101010, 0x10101010, 0x10101010,
79 0x10101010, 0x10101010, 0x10101010, 0x10101010,
80 0x10101010, 0x10101010, 0x10101010, 0x10101010,
81 0x10101010, 0x10101010, 0x10101010, 0x10101010
84 static const uint32_t fqm_flat[32] = {
85 0x10001000, 0x10001000, 0x10001000, 0x10001000,
86 0x10001000, 0x10001000, 0x10001000, 0x10001000,
87 0x10001000, 0x10001000, 0x10001000, 0x10001000,
88 0x10001000, 0x10001000, 0x10001000, 0x10001000,
89 0x10001000, 0x10001000, 0x10001000, 0x10001000,
90 0x10001000, 0x10001000, 0x10001000, 0x10001000,
91 0x10001000, 0x10001000, 0x10001000, 0x10001000,
92 0x10001000, 0x10001000, 0x10001000, 0x10001000
95 static const unsigned int slice_type_kernel[3] = {1, 2, 0};
97 static const gen9_avc_brc_init_reset_curbe_data gen9_avc_brc_init_reset_curbe_init_data = {
254 static const gen9_avc_frame_brc_update_curbe_data gen9_avc_frame_brc_update_curbe_init_data = {
412 gen9_avc_update_misc_parameters(VADriverContextP ctx,
413 struct encode_state *encode_state,
414 struct intel_encoder_context *encoder_context)
416 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
417 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
421 generic_state->max_bit_rate = (encoder_context->brc.bits_per_second[0] + 1000 - 1) / 1000;
423 generic_state->brc_need_reset = encoder_context->brc.need_reset;
425 if (generic_state->internal_rate_mode == VA_RC_CBR) {
426 generic_state->min_bit_rate = generic_state->max_bit_rate;
427 generic_state->mb_brc_enabled = encoder_context->brc.mb_rate_control[0] == 1;
429 if (generic_state->target_bit_rate != generic_state->max_bit_rate) {
430 generic_state->target_bit_rate = generic_state->max_bit_rate;
431 generic_state->brc_need_reset = 1;
433 } else if (generic_state->internal_rate_mode == VA_RC_VBR) {
434 generic_state->min_bit_rate = generic_state->max_bit_rate * (2 * encoder_context->brc.target_percentage[0] - 100) / 100;
435 generic_state->mb_brc_enabled = encoder_context->brc.mb_rate_control[0] == 1;
437 if (generic_state->target_bit_rate != generic_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100) {
438 generic_state->target_bit_rate = generic_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100;
439 generic_state->brc_need_reset = 1;
444 if (generic_state->internal_rate_mode != VA_RC_CQP) {
445 generic_state->frames_per_100s = encoder_context->brc.framerate[0].num * 100 / encoder_context->brc.framerate[0].den ;
446 generic_state->frame_rate = encoder_context->brc.framerate[0].num / encoder_context->brc.framerate[0].den ;
447 generic_state->frames_per_window_size = (int)(encoder_context->brc.window_size * generic_state->frame_rate / 1000); // brc.windows size in ms as the unit
449 generic_state->frames_per_100s = 30 * 100;
450 generic_state->frame_rate = 30 ;
451 generic_state->frames_per_window_size = 30;
455 if (generic_state->internal_rate_mode != VA_RC_CQP) {
456 generic_state->vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;//misc->buffer_size;
457 generic_state->init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;//misc->initial_buffer_fullness;
461 generic_state->num_roi = MIN(encoder_context->brc.num_roi, 3);
462 if (generic_state->num_roi > 0) {
463 generic_state->max_delta_qp = encoder_context->brc.roi_max_delta_qp;
464 generic_state->min_delta_qp = encoder_context->brc.roi_min_delta_qp;
466 for (i = 0; i < generic_state->num_roi; i++) {
467 generic_state->roi[i].left = encoder_context->brc.roi[i].left;
468 generic_state->roi[i].right = encoder_context->brc.roi[i].right;
469 generic_state->roi[i].top = encoder_context->brc.roi[i].top;
470 generic_state->roi[i].bottom = encoder_context->brc.roi[i].bottom;
471 generic_state->roi[i].value = encoder_context->brc.roi[i].value;
473 generic_state->roi[i].left /= 16;
474 generic_state->roi[i].right /= 16;
475 generic_state->roi[i].top /= 16;
476 generic_state->roi[i].bottom /= 16;
483 intel_avc_get_kernel_header_and_size(void *pvbinary,
485 INTEL_GENERIC_ENC_OPERATION operation,
487 struct i965_kernel *ret_kernel)
489 typedef uint32_t BIN_PTR[4];
492 gen9_avc_encoder_kernel_header *pkh_table;
493 kernel_header *pcurr_header, *pinvalid_entry, *pnext_header;
496 if (!pvbinary || !ret_kernel)
499 bin_start = (char *)pvbinary;
500 pkh_table = (gen9_avc_encoder_kernel_header *)pvbinary;
501 pinvalid_entry = &(pkh_table->static_detection) + 1;
502 next_krnoffset = binary_size;
504 if (operation == INTEL_GENERIC_ENC_SCALING4X) {
505 pcurr_header = &pkh_table->ply_dscale_ply;
506 } else if (operation == INTEL_GENERIC_ENC_SCALING2X) {
507 pcurr_header = &pkh_table->ply_2xdscale_ply;
508 } else if (operation == INTEL_GENERIC_ENC_ME) {
509 pcurr_header = &pkh_table->me_p;
510 } else if (operation == INTEL_GENERIC_ENC_BRC) {
511 pcurr_header = &pkh_table->frame_brc_init;
512 } else if (operation == INTEL_GENERIC_ENC_MBENC) {
513 pcurr_header = &pkh_table->mbenc_quality_I;
514 } else if (operation == INTEL_GENERIC_ENC_WP) {
515 pcurr_header = &pkh_table->wp;
516 } else if (operation == INTEL_GENERIC_ENC_SFD) {
517 pcurr_header = &pkh_table->static_detection;
522 pcurr_header += krnstate_idx;
523 ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
525 pnext_header = (pcurr_header + 1);
526 if (pnext_header < pinvalid_entry) {
527 next_krnoffset = pnext_header->kernel_start_pointer << 6;
529 ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
534 gen9_free_surfaces_avc(void **data)
536 struct gen9_surface_avc *avc_surface;
543 if (avc_surface->scaled_4x_surface_obj) {
544 i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_4x_surface_id, 1);
545 avc_surface->scaled_4x_surface_id = VA_INVALID_SURFACE;
546 avc_surface->scaled_4x_surface_obj = NULL;
549 if (avc_surface->scaled_16x_surface_obj) {
550 i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_16x_surface_id, 1);
551 avc_surface->scaled_16x_surface_id = VA_INVALID_SURFACE;
552 avc_surface->scaled_16x_surface_obj = NULL;
555 if (avc_surface->scaled_32x_surface_obj) {
556 i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_32x_surface_id, 1);
557 avc_surface->scaled_32x_surface_id = VA_INVALID_SURFACE;
558 avc_surface->scaled_32x_surface_obj = NULL;
561 i965_free_gpe_resource(&avc_surface->res_mb_code_surface);
562 i965_free_gpe_resource(&avc_surface->res_mv_data_surface);
563 i965_free_gpe_resource(&avc_surface->res_ref_pic_select_surface);
565 dri_bo_unreference(avc_surface->dmv_top);
566 avc_surface->dmv_top = NULL;
567 dri_bo_unreference(avc_surface->dmv_bottom);
568 avc_surface->dmv_bottom = NULL;
578 gen9_avc_init_check_surfaces(VADriverContextP ctx,
579 struct object_surface *obj_surface,
580 struct intel_encoder_context *encoder_context,
581 struct avc_surface_param *surface_param)
583 struct i965_driver_data *i965 = i965_driver_data(ctx);
584 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
585 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
586 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
588 struct gen9_surface_avc *avc_surface;
589 int downscaled_width_4x, downscaled_height_4x;
590 int downscaled_width_16x, downscaled_height_16x;
591 int downscaled_width_32x, downscaled_height_32x;
593 unsigned int frame_width_in_mbs = ALIGN(surface_param->frame_width, 16) / 16;
594 unsigned int frame_height_in_mbs = ALIGN(surface_param->frame_height, 16) / 16;
595 unsigned int frame_mb_nums = frame_width_in_mbs * frame_height_in_mbs;
596 int allocate_flag = 1;
599 if (!obj_surface || !obj_surface->bo)
600 return VA_STATUS_ERROR_INVALID_SURFACE;
602 if (obj_surface->private_data) {
603 return VA_STATUS_SUCCESS;
606 avc_surface = calloc(1, sizeof(struct gen9_surface_avc));
609 return VA_STATUS_ERROR_ALLOCATION_FAILED;
611 avc_surface->ctx = ctx;
612 obj_surface->private_data = avc_surface;
613 obj_surface->free_private_data = gen9_free_surfaces_avc;
615 downscaled_width_4x = generic_state->frame_width_4x;
616 downscaled_height_4x = generic_state->frame_height_4x;
618 i965_CreateSurfaces(ctx,
620 downscaled_height_4x,
623 &avc_surface->scaled_4x_surface_id);
625 avc_surface->scaled_4x_surface_obj = SURFACE(avc_surface->scaled_4x_surface_id);
627 if (!avc_surface->scaled_4x_surface_obj) {
628 return VA_STATUS_ERROR_ALLOCATION_FAILED;
631 i965_check_alloc_surface_bo(ctx, avc_surface->scaled_4x_surface_obj, 1,
632 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
634 downscaled_width_16x = generic_state->frame_width_16x;
635 downscaled_height_16x = generic_state->frame_height_16x;
636 i965_CreateSurfaces(ctx,
637 downscaled_width_16x,
638 downscaled_height_16x,
641 &avc_surface->scaled_16x_surface_id);
642 avc_surface->scaled_16x_surface_obj = SURFACE(avc_surface->scaled_16x_surface_id);
644 if (!avc_surface->scaled_16x_surface_obj) {
645 return VA_STATUS_ERROR_ALLOCATION_FAILED;
648 i965_check_alloc_surface_bo(ctx, avc_surface->scaled_16x_surface_obj, 1,
649 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
651 if (generic_state->b32xme_supported ||
652 generic_state->b32xme_enabled) {
653 downscaled_width_32x = generic_state->frame_width_32x;
654 downscaled_height_32x = generic_state->frame_height_32x;
655 i965_CreateSurfaces(ctx,
656 downscaled_width_32x,
657 downscaled_height_32x,
660 &avc_surface->scaled_32x_surface_id);
661 avc_surface->scaled_32x_surface_obj = SURFACE(avc_surface->scaled_32x_surface_id);
663 if (!avc_surface->scaled_32x_surface_obj) {
664 return VA_STATUS_ERROR_ALLOCATION_FAILED;
667 i965_check_alloc_surface_bo(ctx, avc_surface->scaled_32x_surface_obj, 1,
668 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
671 /*mb code and mv data for each frame*/
672 size = frame_mb_nums * 16 * 4;
673 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
674 &avc_surface->res_mb_code_surface,
678 goto failed_allocation;
680 size = frame_mb_nums * 32 * 4;
681 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
682 &avc_surface->res_mv_data_surface,
686 goto failed_allocation;
689 if (avc_state->ref_pic_select_list_supported) {
690 width = ALIGN(frame_width_in_mbs * 8, 64);
691 height = frame_height_in_mbs ;
692 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
693 &avc_surface->res_ref_pic_select_surface,
696 "Ref pic select list buffer");
698 goto failed_allocation;
702 avc_surface->dmv_top =
703 dri_bo_alloc(i965->intel.bufmgr,
704 "direct mv top Buffer",
707 avc_surface->dmv_bottom =
708 dri_bo_alloc(i965->intel.bufmgr,
709 "direct mv bottom Buffer",
712 assert(avc_surface->dmv_top);
713 assert(avc_surface->dmv_bottom);
715 return VA_STATUS_SUCCESS;
718 return VA_STATUS_ERROR_ALLOCATION_FAILED;
722 gen9_avc_generate_slice_map(VADriverContextP ctx,
723 struct encode_state *encode_state,
724 struct intel_encoder_context *encoder_context)
726 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
727 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
728 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
729 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
731 struct i965_gpe_resource *gpe_resource = NULL;
732 VAEncSliceParameterBufferH264 * slice_param = NULL;
733 unsigned int * data = NULL;
734 unsigned int * data_row = NULL;
736 unsigned int pitch = ALIGN((generic_state->frame_width_in_mbs + 1) * 4, 64) / 4;
738 if (!avc_state->arbitrary_num_mbs_in_slice)
741 gpe_resource = &(avc_ctx->res_mbenc_slice_map_surface);
742 assert(gpe_resource);
744 i965_zero_gpe_resource(gpe_resource);
746 data_row = (unsigned int *)i965_map_gpe_resource(gpe_resource);
750 for (i = 0; i < avc_state->slice_num; i++) {
751 slice_param = avc_state->slice_param[i];
752 for (j = 0; j < slice_param->num_macroblocks; j++) {
754 if ((count > 0) && (count % generic_state->frame_width_in_mbs == 0)) {
762 *data++ = 0xFFFFFFFF;
764 i965_unmap_gpe_resource(gpe_resource);
768 gen9_avc_allocate_resources(VADriverContextP ctx,
769 struct encode_state *encode_state,
770 struct intel_encoder_context *encoder_context)
772 struct i965_driver_data *i965 = i965_driver_data(ctx);
773 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
774 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
775 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
776 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
777 unsigned int size = 0;
778 unsigned int width = 0;
779 unsigned int height = 0;
780 unsigned char * data = NULL;
781 int allocate_flag = 1;
784 /*all the surface/buffer are allocated here*/
786 /*second level batch buffer for image state write when cqp etc*/
787 i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
788 size = INTEL_AVC_IMAGE_STATE_CMD_SIZE ;
789 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
790 &avc_ctx->res_image_state_batch_buffer_2nd_level,
792 "second levle batch (image state write) buffer");
794 goto failed_allocation;
796 /* scaling related surface */
797 if (avc_state->mb_status_supported) {
798 i965_free_gpe_resource(&avc_ctx->res_mb_status_buffer);
799 size = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * 16 * 4 + 1023) & ~0x3ff;
800 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
801 &avc_ctx->res_mb_status_buffer,
803 "MB statistics output buffer");
805 goto failed_allocation;
806 i965_zero_gpe_resource(&avc_ctx->res_mb_status_buffer);
809 if (avc_state->flatness_check_supported) {
810 width = generic_state->frame_width_in_mbs * 4;
811 height = generic_state->frame_height_in_mbs * 4;
812 i965_free_gpe_resource(&avc_ctx->res_flatness_check_surface);
813 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
814 &avc_ctx->res_flatness_check_surface,
817 "Flatness check buffer");
819 goto failed_allocation;
821 /* me related surface */
822 width = generic_state->downscaled_width_4x_in_mb * 8;
823 height = generic_state->downscaled_height_4x_in_mb * 4 * 10;
824 i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
825 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
826 &avc_ctx->s4x_memv_distortion_buffer,
829 "4x MEMV distortion buffer");
831 goto failed_allocation;
832 i965_zero_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
834 width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
835 height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
836 i965_free_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
837 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
838 &avc_ctx->s4x_memv_min_distortion_brc_buffer,
841 "4x MEMV min distortion brc buffer");
843 goto failed_allocation;
844 i965_zero_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
847 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 32, 64);
848 height = generic_state->downscaled_height_4x_in_mb * 4 * 2 * 10;
849 i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
850 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
851 &avc_ctx->s4x_memv_data_buffer,
854 "4x MEMV data buffer");
856 goto failed_allocation;
857 i965_zero_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
860 width = ALIGN(generic_state->downscaled_width_16x_in_mb * 32, 64);
861 height = generic_state->downscaled_height_16x_in_mb * 4 * 2 * 10 ;
862 i965_free_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
863 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
864 &avc_ctx->s16x_memv_data_buffer,
867 "16x MEMV data buffer");
869 goto failed_allocation;
870 i965_zero_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
873 width = ALIGN(generic_state->downscaled_width_32x_in_mb * 32, 64);
874 height = generic_state->downscaled_height_32x_in_mb * 4 * 2 * 10 ;
875 i965_free_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
876 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
877 &avc_ctx->s32x_memv_data_buffer,
880 "32x MEMV data buffer");
882 goto failed_allocation;
883 i965_zero_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
886 if (!generic_state->brc_allocated) {
887 /*brc related surface */
888 i965_free_gpe_resource(&avc_ctx->res_brc_history_buffer);
890 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
891 &avc_ctx->res_brc_history_buffer,
893 "brc history buffer");
895 goto failed_allocation;
897 i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
899 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
900 &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
902 "brc pak statistic buffer");
904 goto failed_allocation;
906 i965_free_gpe_resource(&avc_ctx->res_brc_image_state_read_buffer);
907 size = INTEL_AVC_IMAGE_STATE_CMD_SIZE * 7;
908 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
909 &avc_ctx->res_brc_image_state_read_buffer,
911 "brc image state read buffer");
913 goto failed_allocation;
915 i965_free_gpe_resource(&avc_ctx->res_brc_image_state_write_buffer);
916 size = INTEL_AVC_IMAGE_STATE_CMD_SIZE * 7;
917 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
918 &avc_ctx->res_brc_image_state_write_buffer,
920 "brc image state write buffer");
922 goto failed_allocation;
924 width = ALIGN(avc_state->brc_const_data_surface_width, 64);
925 height = avc_state->brc_const_data_surface_height;
926 i965_free_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
927 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
928 &avc_ctx->res_brc_const_data_buffer,
931 "brc const data buffer");
933 goto failed_allocation;
935 if (generic_state->brc_distortion_buffer_supported) {
936 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 8, 64);
937 height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
938 width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
939 height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
940 i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
941 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
942 &avc_ctx->res_brc_dist_data_surface,
945 "brc dist data buffer");
947 goto failed_allocation;
948 i965_zero_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
951 if (generic_state->brc_roi_enable) {
952 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 16, 64);
953 height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
954 i965_free_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
955 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
956 &avc_ctx->res_mbbrc_roi_surface,
961 goto failed_allocation;
962 i965_zero_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
966 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 4, 64);
967 height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
968 i965_free_gpe_resource(&avc_ctx->res_mbbrc_mb_qp_data_surface);
969 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
970 &avc_ctx->res_mbbrc_mb_qp_data_surface,
973 "mbbrc mb qp buffer");
975 goto failed_allocation;
977 i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
978 size = 16 * AVC_QP_MAX * 4;
979 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
980 &avc_ctx->res_mbbrc_const_data_buffer,
982 "mbbrc const data buffer");
984 goto failed_allocation;
986 if (avc_state->decouple_mbenc_curbe_from_brc_enable) {
987 i965_free_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
988 size = avc_state->mbenc_brc_buffer_size;
989 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
990 &avc_ctx->res_mbenc_brc_buffer,
994 goto failed_allocation;
995 i965_zero_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
997 generic_state->brc_allocated = 1;
1001 if (avc_state->mb_qp_data_enable) {
1002 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 4, 64);
1003 height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1004 i965_free_gpe_resource(&avc_ctx->res_mb_qp_data_surface);
1005 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1006 &avc_ctx->res_mb_qp_data_surface,
1009 "external mb qp buffer");
1011 goto failed_allocation;
1014 /* mbenc related surface. it share most of surface with other kernels */
1015 if (avc_state->arbitrary_num_mbs_in_slice) {
1016 width = ALIGN((generic_state->frame_width_in_mbs + 1) * 4, 64);
1017 height = generic_state->frame_height_in_mbs ;
1018 i965_free_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1019 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1020 &avc_ctx->res_mbenc_slice_map_surface,
1023 "slice map buffer");
1025 goto failed_allocation;
1026 i965_zero_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1028 /*generate slice map,default one slice per frame.*/
1031 /* sfd related surface */
1032 if (avc_state->sfd_enable) {
1033 i965_free_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1035 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1036 &avc_ctx->res_sfd_output_buffer,
1038 "sfd output buffer");
1040 goto failed_allocation;
1042 i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer);
1043 size = ALIGN(52, 64);
1044 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1045 &avc_ctx->res_sfd_cost_table_p_frame_buffer,
1047 "sfd P frame cost table buffer");
1049 goto failed_allocation;
1050 data = i965_map_gpe_resource(&(avc_ctx->res_sfd_cost_table_p_frame_buffer));
1052 memcpy(data, gen9_avc_sfd_cost_table_p_frame, sizeof(unsigned char) * 52);
1053 i965_unmap_gpe_resource(&(avc_ctx->res_sfd_cost_table_p_frame_buffer));
1055 i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_b_frame_buffer);
1056 size = ALIGN(52, 64);
1057 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1058 &avc_ctx->res_sfd_cost_table_b_frame_buffer,
1060 "sfd B frame cost table buffer");
1062 goto failed_allocation;
1063 data = i965_map_gpe_resource(&(avc_ctx->res_sfd_cost_table_b_frame_buffer));
1065 memcpy(data, gen9_avc_sfd_cost_table_b_frame, sizeof(unsigned char) * 52);
1066 i965_unmap_gpe_resource(&(avc_ctx->res_sfd_cost_table_b_frame_buffer));
1069 /* wp related surfaces */
1070 if (avc_state->weighted_prediction_supported) {
1071 for (i = 0; i < 2 ; i++) {
1072 if (avc_ctx->wp_output_pic_select_surface_obj[i]) {
1076 width = generic_state->frame_width_in_pixel;
1077 height = generic_state->frame_height_in_pixel ;
1078 i965_CreateSurfaces(ctx,
1081 VA_RT_FORMAT_YUV420,
1083 &avc_ctx->wp_output_pic_select_surface_id[i]);
1084 avc_ctx->wp_output_pic_select_surface_obj[i] = SURFACE(avc_ctx->wp_output_pic_select_surface_id[i]);
1086 if (!avc_ctx->wp_output_pic_select_surface_obj[i]) {
1087 goto failed_allocation;
1090 i965_check_alloc_surface_bo(ctx, avc_ctx->wp_output_pic_select_surface_obj[i], 1,
1091 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
1093 i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[0]);
1094 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_wp_output_pic_select_surface_list[0], avc_ctx->wp_output_pic_select_surface_obj[0], GPE_RESOURCE_ALIGNMENT);
1095 i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[1]);
1096 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_wp_output_pic_select_surface_list[1], avc_ctx->wp_output_pic_select_surface_obj[1], GPE_RESOURCE_ALIGNMENT);
1101 i965_free_gpe_resource(&avc_ctx->res_mad_data_buffer);
1103 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1104 &avc_ctx->res_mad_data_buffer,
1105 ALIGN(size, 0x1000),
1108 goto failed_allocation;
1110 return VA_STATUS_SUCCESS;
1113 return VA_STATUS_ERROR_ALLOCATION_FAILED;
1117 gen9_avc_free_resources(struct encoder_vme_mfc_context * vme_context)
1122 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1123 VADriverContextP ctx = avc_ctx->ctx;
1126 /* free all the surface/buffer here*/
1127 i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
1128 i965_free_gpe_resource(&avc_ctx->res_mb_status_buffer);
1129 i965_free_gpe_resource(&avc_ctx->res_flatness_check_surface);
1130 i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
1131 i965_free_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
1132 i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
1133 i965_free_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
1134 i965_free_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
1135 i965_free_gpe_resource(&avc_ctx->res_brc_history_buffer);
1136 i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
1137 i965_free_gpe_resource(&avc_ctx->res_brc_image_state_read_buffer);
1138 i965_free_gpe_resource(&avc_ctx->res_brc_image_state_write_buffer);
1139 i965_free_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
1140 i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
1141 i965_free_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
1142 i965_free_gpe_resource(&avc_ctx->res_mbbrc_mb_qp_data_surface);
1143 i965_free_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
1144 i965_free_gpe_resource(&avc_ctx->res_mb_qp_data_surface);
1145 i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
1146 i965_free_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1147 i965_free_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1148 i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer);
1149 i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_b_frame_buffer);
1150 i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[0]);
1151 i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[1]);
1152 i965_free_gpe_resource(&avc_ctx->res_mad_data_buffer);
1154 for (i = 0; i < 2 ; i++) {
1155 if (avc_ctx->wp_output_pic_select_surface_obj[i]) {
1156 i965_DestroySurfaces(ctx, &avc_ctx->wp_output_pic_select_surface_id[i], 1);
1157 avc_ctx->wp_output_pic_select_surface_id[i] = VA_INVALID_SURFACE;
1158 avc_ctx->wp_output_pic_select_surface_obj[i] = NULL;
1165 gen9_avc_run_kernel_media_object(VADriverContextP ctx,
1166 struct intel_encoder_context *encoder_context,
1167 struct i965_gpe_context *gpe_context,
1169 struct gpe_media_object_parameter *param)
1171 struct i965_driver_data *i965 = i965_driver_data(ctx);
1172 struct i965_gpe_table *gpe = &i965->gpe_table;
1173 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1174 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1176 struct intel_batchbuffer *batch = encoder_context->base.batch;
1177 struct encoder_status_buffer_internal *status_buffer;
1178 struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1183 intel_batchbuffer_start_atomic(batch, 0x1000);
1184 intel_batchbuffer_emit_mi_flush(batch);
1186 status_buffer = &(avc_ctx->status_buffer);
1187 memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1188 mi_store_data_imm.bo = status_buffer->bo;
1189 mi_store_data_imm.offset = status_buffer->media_index_offset;
1190 mi_store_data_imm.dw0 = media_function;
1191 gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1193 gpe->pipeline_setup(ctx, gpe_context, batch);
1194 gpe->media_object(ctx, gpe_context, batch, param);
1195 gpe->media_state_flush(ctx, gpe_context, batch);
1197 gpe->pipeline_end(ctx, gpe_context, batch);
1199 intel_batchbuffer_end_atomic(batch);
1201 intel_batchbuffer_flush(batch);
1205 gen9_avc_run_kernel_media_object_walker(VADriverContextP ctx,
1206 struct intel_encoder_context *encoder_context,
1207 struct i965_gpe_context *gpe_context,
1209 struct gpe_media_object_walker_parameter *param)
1211 struct i965_driver_data *i965 = i965_driver_data(ctx);
1212 struct i965_gpe_table *gpe = &i965->gpe_table;
1213 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1214 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1216 struct intel_batchbuffer *batch = encoder_context->base.batch;
1217 struct encoder_status_buffer_internal *status_buffer;
1218 struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1223 intel_batchbuffer_start_atomic(batch, 0x1000);
1225 intel_batchbuffer_emit_mi_flush(batch);
1227 status_buffer = &(avc_ctx->status_buffer);
1228 memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1229 mi_store_data_imm.bo = status_buffer->bo;
1230 mi_store_data_imm.offset = status_buffer->media_index_offset;
1231 mi_store_data_imm.dw0 = media_function;
1232 gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1234 gpe->pipeline_setup(ctx, gpe_context, batch);
1235 gpe->media_object_walker(ctx, gpe_context, batch, param);
1236 gpe->media_state_flush(ctx, gpe_context, batch);
1238 gpe->pipeline_end(ctx, gpe_context, batch);
1240 intel_batchbuffer_end_atomic(batch);
1242 intel_batchbuffer_flush(batch);
1246 gen9_init_gpe_context_avc(VADriverContextP ctx,
1247 struct i965_gpe_context *gpe_context,
1248 struct encoder_kernel_parameter *kernel_param)
1250 struct i965_driver_data *i965 = i965_driver_data(ctx);
1252 gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
1254 gpe_context->sampler.entry_size = 0;
1255 gpe_context->sampler.max_entries = 0;
1257 if (kernel_param->sampler_size) {
1258 gpe_context->sampler.entry_size = ALIGN(kernel_param->sampler_size, 64);
1259 gpe_context->sampler.max_entries = 1;
1262 gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
1263 gpe_context->idrt.max_entries = NUM_KERNELS_PER_GPE_CONTEXT;
1265 gpe_context->surface_state_binding_table.max_entries = MAX_AVC_ENCODER_SURFACES;
1266 gpe_context->surface_state_binding_table.binding_table_offset = 0;
1267 gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_AVC_ENCODER_SURFACES * 4, 64);
1268 gpe_context->surface_state_binding_table.length = ALIGN(MAX_AVC_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_AVC_ENCODER_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
1270 if (i965->intel.eu_total > 0)
1271 gpe_context->vfe_state.max_num_threads = 6 * i965->intel.eu_total;
1273 gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads
1275 gpe_context->vfe_state.curbe_allocation_size = MAX(1, ALIGN(gpe_context->curbe.length, 32) >> 5); // in registers
1276 gpe_context->vfe_state.urb_entry_size = MAX(1, ALIGN(kernel_param->inline_data_size, 32) >> 5); // in registers
1277 gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE -
1278 gpe_context->vfe_state.curbe_allocation_size -
1279 ((gpe_context->idrt.entry_size >> 5) *
1280 gpe_context->idrt.max_entries)) / gpe_context->vfe_state.urb_entry_size;
1281 gpe_context->vfe_state.num_urb_entries = CLAMP(1, 127, gpe_context->vfe_state.num_urb_entries);
1282 gpe_context->vfe_state.gpgpu_mode = 0;
1286 gen9_init_vfe_scoreboard_avc(struct i965_gpe_context *gpe_context,
1287 struct encoder_scoreboard_parameter *scoreboard_param)
1289 gpe_context->vfe_desc5.scoreboard0.mask = scoreboard_param->mask;
1290 gpe_context->vfe_desc5.scoreboard0.type = scoreboard_param->type;
1291 gpe_context->vfe_desc5.scoreboard0.enable = scoreboard_param->enable;
1293 if (scoreboard_param->walkpat_flag) {
1294 gpe_context->vfe_desc5.scoreboard0.mask = 0x0F;
1295 gpe_context->vfe_desc5.scoreboard0.type = 1;
1297 gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0x0;
1298 gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0xF;
1300 gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
1301 gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xE;
1303 gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0xF;
1304 gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0x3;
1306 gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
1307 gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0x1;
1310 gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0xF;
1311 gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0x0;
1314 gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
1315 gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xF;
1318 gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0x1;
1319 gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0xF;
1322 gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
1323 gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0xF;
1326 gpe_context->vfe_desc7.scoreboard2.delta_x4 = 0xF;
1327 gpe_context->vfe_desc7.scoreboard2.delta_y4 = 0x1;
1330 gpe_context->vfe_desc7.scoreboard2.delta_x5 = 0x0;
1331 gpe_context->vfe_desc7.scoreboard2.delta_y5 = 0xE;
1334 gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0x1;
1335 gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
1338 gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0xF;
1339 gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
1343 VME pipeline related function
1347 scaling kernel related function
1350 gen9_avc_set_curbe_scaling4x(VADriverContextP ctx,
1351 struct encode_state *encode_state,
1352 struct i965_gpe_context *gpe_context,
1353 struct intel_encoder_context *encoder_context,
1356 gen9_avc_scaling4x_curbe_data *curbe_cmd;
1357 struct scaling_param *surface_param = (struct scaling_param *)param;
1359 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1364 memset(curbe_cmd, 0, sizeof(gen9_avc_scaling4x_curbe_data));
1366 curbe_cmd->dw0.input_picture_width = surface_param->input_frame_width;
1367 curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1369 curbe_cmd->dw1.input_y_bti = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1370 curbe_cmd->dw2.output_y_bti = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1373 curbe_cmd->dw5.flatness_threshold = 128;
1374 curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
1375 curbe_cmd->dw7.enable_mb_variance_output = surface_param->enable_mb_variance_output;
1376 curbe_cmd->dw8.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
1378 if (curbe_cmd->dw6.enable_mb_flatness_check ||
1379 curbe_cmd->dw7.enable_mb_variance_output ||
1380 curbe_cmd->dw8.enable_mb_pixel_average_output) {
1381 curbe_cmd->dw10.mbv_proc_stat_bti = GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX;
1384 i965_gpe_context_unmap_curbe(gpe_context);
1389 gen95_avc_set_curbe_scaling4x(VADriverContextP ctx,
1390 struct encode_state *encode_state,
1391 struct i965_gpe_context *gpe_context,
1392 struct intel_encoder_context *encoder_context,
1395 gen95_avc_scaling4x_curbe_data *curbe_cmd;
1396 struct scaling_param *surface_param = (struct scaling_param *)param;
1398 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1403 memset(curbe_cmd, 0, sizeof(gen95_avc_scaling4x_curbe_data));
1405 curbe_cmd->dw0.input_picture_width = surface_param->input_frame_width;
1406 curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1408 curbe_cmd->dw1.input_y_bti_frame = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1409 curbe_cmd->dw2.output_y_bti_frame = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1411 if (surface_param->enable_mb_flatness_check)
1412 curbe_cmd->dw5.flatness_threshold = 128;
1413 curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
1414 curbe_cmd->dw6.enable_mb_variance_output = surface_param->enable_mb_variance_output;
1415 curbe_cmd->dw6.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
1416 curbe_cmd->dw6.enable_block8x8_statistics_output = surface_param->blk8x8_stat_enabled;
1418 if (curbe_cmd->dw6.enable_mb_flatness_check ||
1419 curbe_cmd->dw6.enable_mb_variance_output ||
1420 curbe_cmd->dw6.enable_mb_pixel_average_output) {
1421 curbe_cmd->dw8.mbv_proc_stat_bti_frame = GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX;
1424 i965_gpe_context_unmap_curbe(gpe_context);
1429 gen9_avc_set_curbe_scaling2x(VADriverContextP ctx,
1430 struct encode_state *encode_state,
1431 struct i965_gpe_context *gpe_context,
1432 struct intel_encoder_context *encoder_context,
1435 gen9_avc_scaling2x_curbe_data *curbe_cmd;
1436 struct scaling_param *surface_param = (struct scaling_param *)param;
1438 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1443 memset(curbe_cmd, 0, sizeof(gen9_avc_scaling2x_curbe_data));
1445 curbe_cmd->dw0.input_picture_width = surface_param->input_frame_width;
1446 curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1448 curbe_cmd->dw8.input_y_bti = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1449 curbe_cmd->dw9.output_y_bti = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1451 i965_gpe_context_unmap_curbe(gpe_context);
1456 gen9_avc_send_surface_scaling(VADriverContextP ctx,
1457 struct encode_state *encode_state,
1458 struct i965_gpe_context *gpe_context,
1459 struct intel_encoder_context *encoder_context,
1462 struct scaling_param *surface_param = (struct scaling_param *)param;
1463 unsigned int surface_format;
1464 unsigned int res_size;
1466 if (surface_param->scaling_out_use_32unorm_surf_fmt)
1467 surface_format = I965_SURFACEFORMAT_R32_UNORM;
1468 else if (surface_param->scaling_out_use_16unorm_surf_fmt)
1469 surface_format = I965_SURFACEFORMAT_R16_UNORM;
1471 surface_format = I965_SURFACEFORMAT_R8_UNORM;
1473 gen9_add_2d_gpe_surface(ctx, gpe_context,
1474 surface_param->input_surface,
1475 0, 1, surface_format,
1476 GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX);
1478 gen9_add_2d_gpe_surface(ctx, gpe_context,
1479 surface_param->output_surface,
1480 0, 1, surface_format,
1481 GEN9_AVC_SCALING_FRAME_DST_Y_INDEX);
1483 /*add buffer mv_proc_stat, here need change*/
1484 if (surface_param->mbv_proc_stat_enabled) {
1485 res_size = 16 * (surface_param->input_frame_width / 16) * (surface_param->input_frame_height / 16) * sizeof(unsigned int);
1487 gen9_add_buffer_gpe_surface(ctx,
1489 surface_param->pres_mbv_proc_stat_buffer,
1493 GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
1494 } else if (surface_param->enable_mb_flatness_check) {
1495 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
1496 surface_param->pres_flatness_check_surface,
1498 I965_SURFACEFORMAT_R8_UNORM,
1499 GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
1506 gen9_avc_kernel_scaling(VADriverContextP ctx,
1507 struct encode_state *encode_state,
1508 struct intel_encoder_context *encoder_context,
1511 struct i965_driver_data *i965 = i965_driver_data(ctx);
1512 struct i965_gpe_table *gpe = &i965->gpe_table;
1513 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1514 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1515 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1516 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1517 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
1519 struct i965_gpe_context *gpe_context;
1520 struct scaling_param surface_param;
1521 struct object_surface *obj_surface;
1522 struct gen9_surface_avc *avc_priv_surface;
1523 struct gpe_media_object_walker_parameter media_object_walker_param;
1524 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
1525 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
1526 int media_function = 0;
1529 obj_surface = encode_state->reconstructed_object;
1530 avc_priv_surface = obj_surface->private_data;
1532 memset(&surface_param, 0, sizeof(struct scaling_param));
1534 case INTEL_ENC_HME_4x : {
1535 media_function = INTEL_MEDIA_STATE_4X_SCALING;
1536 kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
1537 downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
1538 downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
1540 surface_param.input_surface = encode_state->input_yuv_object ;
1541 surface_param.input_frame_width = generic_state->frame_width_in_pixel ;
1542 surface_param.input_frame_height = generic_state->frame_height_in_pixel ;
1544 surface_param.output_surface = avc_priv_surface->scaled_4x_surface_obj ;
1545 surface_param.output_frame_width = generic_state->frame_width_4x ;
1546 surface_param.output_frame_height = generic_state->frame_height_4x ;
1548 surface_param.enable_mb_flatness_check = avc_state->flatness_check_enable;
1549 surface_param.enable_mb_variance_output = avc_state->mb_status_enable;
1550 surface_param.enable_mb_pixel_average_output = avc_state->mb_status_enable;
1552 surface_param.blk8x8_stat_enabled = 0 ;
1553 surface_param.use_4x_scaling = 1 ;
1554 surface_param.use_16x_scaling = 0 ;
1555 surface_param.use_32x_scaling = 0 ;
1558 case INTEL_ENC_HME_16x : {
1559 media_function = INTEL_MEDIA_STATE_16X_SCALING;
1560 kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
1561 downscaled_width_in_mb = generic_state->downscaled_width_16x_in_mb;
1562 downscaled_height_in_mb = generic_state->downscaled_height_16x_in_mb;
1564 surface_param.input_surface = avc_priv_surface->scaled_4x_surface_obj ;
1565 surface_param.input_frame_width = generic_state->frame_width_4x ;
1566 surface_param.input_frame_height = generic_state->frame_height_4x ;
1568 surface_param.output_surface = avc_priv_surface->scaled_16x_surface_obj ;
1569 surface_param.output_frame_width = generic_state->frame_width_16x ;
1570 surface_param.output_frame_height = generic_state->frame_height_16x ;
1572 surface_param.enable_mb_flatness_check = 0 ;
1573 surface_param.enable_mb_variance_output = 0 ;
1574 surface_param.enable_mb_pixel_average_output = 0 ;
1576 surface_param.blk8x8_stat_enabled = 0 ;
1577 surface_param.use_4x_scaling = 0 ;
1578 surface_param.use_16x_scaling = 1 ;
1579 surface_param.use_32x_scaling = 0 ;
1583 case INTEL_ENC_HME_32x : {
1584 media_function = INTEL_MEDIA_STATE_32X_SCALING;
1585 kernel_idx = GEN9_AVC_KERNEL_SCALING_2X_IDX;
1586 downscaled_width_in_mb = generic_state->downscaled_width_32x_in_mb;
1587 downscaled_height_in_mb = generic_state->downscaled_height_32x_in_mb;
1589 surface_param.input_surface = avc_priv_surface->scaled_16x_surface_obj ;
1590 surface_param.input_frame_width = generic_state->frame_width_16x ;
1591 surface_param.input_frame_height = generic_state->frame_height_16x ;
1593 surface_param.output_surface = avc_priv_surface->scaled_32x_surface_obj ;
1594 surface_param.output_frame_width = generic_state->frame_width_32x ;
1595 surface_param.output_frame_height = generic_state->frame_height_32x ;
1597 surface_param.enable_mb_flatness_check = 0 ;
1598 surface_param.enable_mb_variance_output = 0 ;
1599 surface_param.enable_mb_pixel_average_output = 0 ;
1601 surface_param.blk8x8_stat_enabled = 0 ;
1602 surface_param.use_4x_scaling = 0 ;
1603 surface_param.use_16x_scaling = 0 ;
1604 surface_param.use_32x_scaling = 1 ;
1612 gpe_context = &(avc_ctx->context_scaling.gpe_contexts[kernel_idx]);
1614 gpe->context_init(ctx, gpe_context);
1615 gpe->reset_binding_table(ctx, gpe_context);
1617 if (surface_param.use_32x_scaling) {
1618 generic_ctx->pfn_set_curbe_scaling2x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1620 generic_ctx->pfn_set_curbe_scaling4x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1623 if (surface_param.use_32x_scaling) {
1624 surface_param.scaling_out_use_16unorm_surf_fmt = 1 ;
1625 surface_param.scaling_out_use_32unorm_surf_fmt = 0 ;
1627 surface_param.scaling_out_use_16unorm_surf_fmt = 0 ;
1628 surface_param.scaling_out_use_32unorm_surf_fmt = 1 ;
1631 if (surface_param.use_4x_scaling) {
1632 if (avc_state->mb_status_supported) {
1633 surface_param.enable_mb_flatness_check = 0;
1634 surface_param.mbv_proc_stat_enabled = (surface_param.use_4x_scaling) ? (avc_state->mb_status_enable || avc_state->flatness_check_enable) : 0 ;
1635 surface_param.pres_mbv_proc_stat_buffer = &(avc_ctx->res_mb_status_buffer);
1638 surface_param.enable_mb_flatness_check = (surface_param.use_4x_scaling) ? avc_state->flatness_check_enable : 0;
1639 surface_param.mbv_proc_stat_enabled = 0 ;
1640 surface_param.pres_flatness_check_surface = &(avc_ctx->res_flatness_check_surface);
1644 generic_ctx->pfn_send_scaling_surface(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1646 /* setup the interface data */
1647 gpe->setup_interface_data(ctx, gpe_context);
1649 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
1650 if (surface_param.use_32x_scaling) {
1651 kernel_walker_param.resolution_x = downscaled_width_in_mb ;
1652 kernel_walker_param.resolution_y = downscaled_height_in_mb ;
1654 /* the scaling is based on 8x8 blk level */
1655 kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
1656 kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
1658 kernel_walker_param.no_dependency = 1;
1660 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
1662 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
1665 &media_object_walker_param);
1667 return VA_STATUS_SUCCESS;
1671 frame/mb brc related function
1674 gen9_avc_init_mfx_avc_img_state(VADriverContextP ctx,
1675 struct encode_state *encode_state,
1676 struct intel_encoder_context *encoder_context,
1677 struct gen9_mfx_avc_img_state *pstate)
1679 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1680 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1681 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1683 VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
1684 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
1686 memset(pstate, 0, sizeof(*pstate));
1688 pstate->dw0.dword_length = (sizeof(struct gen9_mfx_avc_img_state)) / 4 - 2;
1689 pstate->dw0.sub_opcode_b = 0;
1690 pstate->dw0.sub_opcode_a = 0;
1691 pstate->dw0.command_opcode = 1;
1692 pstate->dw0.pipeline = 2;
1693 pstate->dw0.command_type = 3;
1695 pstate->dw1.frame_size_in_mbs = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs ;
1697 pstate->dw2.frame_width_in_mbs_minus1 = generic_state->frame_width_in_mbs - 1;
1698 pstate->dw2.frame_height_in_mbs_minus1 = generic_state->frame_height_in_mbs - 1;
1700 pstate->dw3.image_structure = 0;//frame is zero
1701 pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
1702 pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
1703 pstate->dw3.brc_domain_rate_control_enable = 0;//0,set for non-vdenc mode;
1704 pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
1705 pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
1707 pstate->dw4.field_picture_flag = 0;
1708 pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
1709 pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
1710 pstate->dw4.transform_8x8_idct_mode_flag = pic_param->pic_fields.bits.transform_8x8_mode_flag;
1711 pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
1712 pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
1713 pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
1714 pstate->dw4.mb_mv_format_flag = 1;
1715 pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
1716 pstate->dw4.mv_unpacked_flag = 1;
1717 pstate->dw4.insert_test_flag = 0;
1718 pstate->dw4.load_slice_pointer_flag = 0;
1719 pstate->dw4.macroblock_stat_enable = 0; /* disable in the first pass */
1720 pstate->dw4.minimum_frame_size = 0;
1721 pstate->dw5.intra_mb_max_bit_flag = 1;
1722 pstate->dw5.inter_mb_max_bit_flag = 1;
1723 pstate->dw5.frame_size_over_flag = 1;
1724 pstate->dw5.frame_size_under_flag = 1;
1725 pstate->dw5.intra_mb_ipcm_flag = 1;
1726 pstate->dw5.mb_rate_ctrl_flag = 0;
1727 pstate->dw5.non_first_pass_flag = 0;
1728 pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
1729 pstate->dw5.aq_chroma_disable = 1;
1730 if (pstate->dw4.entropy_coding_flag && (avc_state->tq_enable)) {
1731 pstate->dw5.aq_enable = avc_state->tq_enable;
1732 pstate->dw5.aq_rounding = avc_state->tq_rounding;
1734 pstate->dw5.aq_rounding = 0;
1737 pstate->dw6.intra_mb_max_size = 2700;
1738 pstate->dw6.inter_mb_max_size = 4095;
1740 pstate->dw8.slice_delta_qp_max0 = 0;
1741 pstate->dw8.slice_delta_qp_max1 = 0;
1742 pstate->dw8.slice_delta_qp_max2 = 0;
1743 pstate->dw8.slice_delta_qp_max3 = 0;
1745 pstate->dw9.slice_delta_qp_min0 = 0;
1746 pstate->dw9.slice_delta_qp_min1 = 0;
1747 pstate->dw9.slice_delta_qp_min2 = 0;
1748 pstate->dw9.slice_delta_qp_min3 = 0;
1750 pstate->dw10.frame_bitrate_min = 0;
1751 pstate->dw10.frame_bitrate_min_unit = 1;
1752 pstate->dw10.frame_bitrate_min_unit_mode = 1;
1753 pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
1754 pstate->dw10.frame_bitrate_max_unit = 1;
1755 pstate->dw10.frame_bitrate_max_unit_mode = 1;
1757 pstate->dw11.frame_bitrate_min_delta = 0;
1758 pstate->dw11.frame_bitrate_max_delta = 0;
1760 pstate->dw12.vad_error_logic = 1;
1761 /* set paramters DW19/DW20 for slices */
1764 void gen9_avc_set_image_state(VADriverContextP ctx,
1765 struct encode_state *encode_state,
1766 struct intel_encoder_context *encoder_context,
1767 struct i965_gpe_resource *gpe_resource)
1769 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1770 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
1773 unsigned int * data;
1774 struct gen9_mfx_avc_img_state cmd;
1776 pdata = i965_map_gpe_resource(gpe_resource);
1781 gen9_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
1782 for (i = 0; i < generic_state->num_pak_passes; i++) {
1785 cmd.dw4.macroblock_stat_enable = 0;
1786 cmd.dw5.non_first_pass_flag = 0;
1788 cmd.dw4.macroblock_stat_enable = 1;
1789 cmd.dw5.non_first_pass_flag = 1;
1790 cmd.dw5.intra_mb_ipcm_flag = 1;
1793 cmd.dw5.mb_rate_ctrl_flag = 0;
1794 memcpy(pdata, &cmd, sizeof(struct gen9_mfx_avc_img_state));
1795 data = (unsigned int *)(pdata + sizeof(struct gen9_mfx_avc_img_state));
1796 *data = MI_BATCH_BUFFER_END;
1798 pdata += INTEL_AVC_IMAGE_STATE_CMD_SIZE;
1800 i965_unmap_gpe_resource(gpe_resource);
1804 void gen9_avc_set_image_state_non_brc(VADriverContextP ctx,
1805 struct encode_state *encode_state,
1806 struct intel_encoder_context *encoder_context,
1807 struct i965_gpe_resource *gpe_resource)
1809 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1810 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
1813 unsigned int * data;
1814 struct gen9_mfx_avc_img_state cmd;
1816 pdata = i965_map_gpe_resource(gpe_resource);
1821 gen9_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
1823 if (generic_state->curr_pak_pass == 0) {
1824 cmd.dw4.macroblock_stat_enable = 0;
1825 cmd.dw5.non_first_pass_flag = 0;
1828 cmd.dw4.macroblock_stat_enable = 1;
1829 cmd.dw5.non_first_pass_flag = 0;
1830 cmd.dw5.intra_mb_ipcm_flag = 1;
1833 cmd.dw5.mb_rate_ctrl_flag = 0;
1834 memcpy(pdata, &cmd, sizeof(struct gen9_mfx_avc_img_state));
1835 data = (unsigned int *)(pdata + sizeof(struct gen9_mfx_avc_img_state));
1836 *data = MI_BATCH_BUFFER_END;
1838 i965_unmap_gpe_resource(gpe_resource);
1843 gen95_avc_calc_lambda_table(VADriverContextP ctx,
1844 struct encode_state *encode_state,
1845 struct intel_encoder_context *encoder_context)
1847 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1848 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1849 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1850 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
1851 unsigned int value, inter, intra;
1852 unsigned int rounding_value = 0;
1853 unsigned int size = 0;
1856 unsigned int * lambda_table = (unsigned int *)(avc_state->lamda_value_lut);
1862 size = AVC_QP_MAX * 2 * sizeof(unsigned int);
1863 switch (generic_state->frame_type) {
1865 memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_i_frame[0][0], size * sizeof(unsigned char));
1868 memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_p_frame[0][0], size * sizeof(unsigned char));
1871 memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_b_frame[0][0], size * sizeof(unsigned char));
1878 for (i = 0; i < AVC_QP_MAX ; i++) {
1879 for (col = 0; col < 2; col++) {
1880 value = *(lambda_table + i * 2 + col);
1881 intra = value >> 16;
1883 if (intra < GEN95_AVC_MAX_LAMBDA) {
1884 if (intra == 0xfffa) {
1885 intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
1889 intra = intra << 16;
1890 inter = value & 0xffff;
1892 if (inter < GEN95_AVC_MAX_LAMBDA) {
1893 if (inter == 0xffef) {
1894 if (generic_state->frame_type == SLICE_TYPE_P) {
1895 if (avc_state->rounding_inter_p == AVC_INVALID_ROUNDING_VALUE)
1896 rounding_value = gen9_avc_inter_rounding_p[generic_state->preset];
1898 rounding_value = avc_state->rounding_inter_p;
1899 } else if (generic_state->frame_type == SLICE_TYPE_B) {
1900 if (pic_param->pic_fields.bits.reference_pic_flag) {
1901 if (avc_state->rounding_inter_b_ref == AVC_INVALID_ROUNDING_VALUE)
1902 rounding_value = gen9_avc_inter_rounding_b_ref[generic_state->preset];
1904 rounding_value = avc_state->rounding_inter_b_ref;
1906 if (avc_state->rounding_inter_b == AVC_INVALID_ROUNDING_VALUE)
1907 rounding_value = gen9_avc_inter_rounding_b[generic_state->preset];
1909 rounding_value = avc_state->rounding_inter_b;
1913 inter = 0xf000 + rounding_value;
1915 *(lambda_table + i * 2 + col) = intra + inter;
1921 gen9_avc_init_brc_const_data(VADriverContextP ctx,
1922 struct encode_state *encode_state,
1923 struct intel_encoder_context *encoder_context)
1925 struct i965_driver_data *i965 = i965_driver_data(ctx);
1926 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1927 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1928 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1929 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1931 struct i965_gpe_resource *gpe_resource = NULL;
1932 unsigned char * data = NULL;
1933 unsigned char * data_tmp = NULL;
1934 unsigned int size = 0;
1935 unsigned int table_idx = 0;
1936 unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
1939 struct object_surface *obj_surface;
1940 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
1941 VASurfaceID surface_id;
1942 unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
1944 gpe_resource = &(avc_ctx->res_brc_const_data_buffer);
1945 assert(gpe_resource);
1947 i965_zero_gpe_resource(gpe_resource);
1949 data = i965_map_gpe_resource(gpe_resource);
1952 table_idx = slice_type_kernel[generic_state->frame_type];
1954 /* Fill surface with QP Adjustment table, Distortion threshold table, MaxFrame threshold table, Distortion QP Adjustment Table*/
1955 size = sizeof(gen9_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb);
1956 memcpy(data, gen9_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb, size * sizeof(unsigned char));
1960 /* skip threshold table*/
1962 switch (generic_state->frame_type) {
1964 memcpy(data, gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
1967 memcpy(data, gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
1970 /*SLICE_TYPE_I,no change */
1974 if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->non_ftq_skip_threshold_lut_input_enable) {
1975 for (i = 0; i < AVC_QP_MAX ; i++) {
1976 *(data + 1 + (i * 2)) = (unsigned char)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
1981 /*fill the qp for ref list*/
1982 size = 32 + 32 + 32 + 160;
1983 memset(data, 0xff, 32);
1984 memset(data + 32 + 32, 0xff, 32);
1985 switch (generic_state->frame_type) {
1986 case SLICE_TYPE_P: {
1987 for (i = 0 ; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
1988 surface_id = slice_param->RefPicList0[i].picture_id;
1989 obj_surface = SURFACE(surface_id);
1992 *(data + i) = avc_state->list_ref_idx[0][i];//?
1996 case SLICE_TYPE_B: {
1997 data = data + 32 + 32;
1998 for (i = 0 ; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
1999 surface_id = slice_param->RefPicList1[i].picture_id;
2000 obj_surface = SURFACE(surface_id);
2003 *(data + i) = avc_state->list_ref_idx[1][i];//?
2006 data = data - 32 - 32;
2008 for (i = 0 ; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
2009 surface_id = slice_param->RefPicList0[i].picture_id;
2010 obj_surface = SURFACE(surface_id);
2013 *(data + i) = avc_state->list_ref_idx[0][i];//?
2018 /*SLICE_TYPE_I,no change */
2023 /*mv cost and mode cost*/
2025 memcpy(data, (unsigned char *)&gen9_avc_mode_mv_cost_table[table_idx][0][0], size * sizeof(unsigned char));
2027 if (avc_state->old_mode_cost_enable) {
2029 for (i = 0; i < AVC_QP_MAX ; i++) {
2030 *(data_tmp + 3) = (unsigned int)gen9_avc_old_intra_mode_cost[i];
2035 if (avc_state->ftq_skip_threshold_lut_input_enable) {
2036 for (i = 0; i < AVC_QP_MAX ; i++) {
2037 *(data + (i * 32) + 24) =
2038 *(data + (i * 32) + 25) =
2039 *(data + (i * 32) + 27) =
2040 *(data + (i * 32) + 28) =
2041 *(data + (i * 32) + 29) =
2042 *(data + (i * 32) + 30) =
2043 *(data + (i * 32) + 31) = avc_state->ftq_skip_threshold_lut[i];
2051 memcpy(data, (unsigned char *)&gen9_avc_ref_cost[table_idx][0], size * sizeof(unsigned char));
2056 if (avc_state->adaptive_intra_scaling_enable) {
2057 memcpy(data, (unsigned char *)gen9_avc_adaptive_intra_scaling_factor, size * sizeof(unsigned char));
2059 memcpy(data, (unsigned char *)gen9_avc_intra_scaling_factor, size * sizeof(unsigned char));
2062 if (IS_KBL(i965->intel.device_info) ||
2063 IS_GLK(i965->intel.device_info)) {
2067 memcpy(data, (unsigned char *)gen95_avc_lambda_data, size * sizeof(unsigned char));
2071 memcpy(data, (unsigned char *)gen95_avc_ftq25, size * sizeof(unsigned char));
2074 i965_unmap_gpe_resource(gpe_resource);
2078 gen9_avc_init_brc_const_data_old(VADriverContextP ctx,
2079 struct encode_state *encode_state,
2080 struct intel_encoder_context *encoder_context)
2082 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2083 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2084 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2085 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2087 struct i965_gpe_resource *gpe_resource = NULL;
2088 unsigned int * data = NULL;
2089 unsigned int * data_tmp = NULL;
2090 unsigned int size = 0;
2091 unsigned int table_idx = 0;
2092 unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
2093 unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
2096 gpe_resource = &(avc_ctx->res_brc_const_data_buffer);
2097 assert(gpe_resource);
2099 i965_zero_gpe_resource(gpe_resource);
2101 data = i965_map_gpe_resource(gpe_resource);
2104 table_idx = slice_type_kernel[generic_state->frame_type];
2106 /* Fill surface with QP Adjustment table, Distortion threshold table, MaxFrame threshold table, Distortion QP Adjustment Table*/
2107 size = sizeof(gen75_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb);
2108 memcpy(data, gen75_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb, size * sizeof(unsigned char));
2112 /* skip threshold table*/
2114 switch (generic_state->frame_type) {
2116 memcpy(data, gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2119 memcpy(data, gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2122 /*SLICE_TYPE_I,no change */
2126 if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->non_ftq_skip_threshold_lut_input_enable) {
2127 for (i = 0; i < AVC_QP_MAX ; i++) {
2128 *(data + 1 + (i * 2)) = (unsigned char)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
2133 /*fill the qp for ref list*/
2139 /*mv cost and mode cost*/
2141 memcpy(data, (unsigned char *)&gen75_avc_mode_mv_cost_table[table_idx][0][0], size * sizeof(unsigned char));
2143 if (avc_state->old_mode_cost_enable) {
2145 for (i = 0; i < AVC_QP_MAX ; i++) {
2146 *(data_tmp + 3) = (unsigned int)gen9_avc_old_intra_mode_cost[i];
2151 if (avc_state->ftq_skip_threshold_lut_input_enable) {
2152 for (i = 0; i < AVC_QP_MAX ; i++) {
2153 *(data + (i * 32) + 24) =
2154 *(data + (i * 32) + 25) =
2155 *(data + (i * 32) + 27) =
2156 *(data + (i * 32) + 28) =
2157 *(data + (i * 32) + 29) =
2158 *(data + (i * 32) + 30) =
2159 *(data + (i * 32) + 31) = avc_state->ftq_skip_threshold_lut[i];
2167 memcpy(data, (unsigned char *)&gen9_avc_ref_cost[table_idx][0], size * sizeof(unsigned char));
2169 i965_unmap_gpe_resource(gpe_resource);
2172 gen9_avc_set_curbe_brc_init_reset(VADriverContextP ctx,
2173 struct encode_state *encode_state,
2174 struct i965_gpe_context *gpe_context,
2175 struct intel_encoder_context *encoder_context,
2178 gen9_avc_brc_init_reset_curbe_data *cmd;
2179 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2180 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2181 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2182 double input_bits_per_frame = 0;
2183 double bps_ratio = 0;
2184 VAEncSequenceParameterBufferH264 * seq_param = avc_state->seq_param;
2185 struct avc_param common_param;
2187 cmd = i965_gpe_context_map_curbe(gpe_context);
2192 memcpy(cmd, &gen9_avc_brc_init_reset_curbe_init_data, sizeof(gen9_avc_brc_init_reset_curbe_data));
2194 memset(&common_param, 0, sizeof(common_param));
2195 common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
2196 common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
2197 common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
2198 common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
2199 common_param.frames_per_100s = generic_state->frames_per_100s;
2200 common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
2201 common_param.target_bit_rate = generic_state->target_bit_rate;
2203 cmd->dw0.profile_level_max_frame = i965_avc_get_profile_level_max_frame(&common_param, seq_param->level_idc);
2204 cmd->dw1.init_buf_full_in_bits = generic_state->init_vbv_buffer_fullness_in_bit;
2205 cmd->dw2.buf_size_in_bits = generic_state->vbv_buffer_size_in_bit;
2206 cmd->dw3.average_bit_rate = generic_state->target_bit_rate * 1000;
2207 cmd->dw4.max_bit_rate = generic_state->max_bit_rate * 1000;
2208 cmd->dw8.gop_p = (generic_state->gop_ref_distance) ? ((generic_state->gop_size - 1) / generic_state->gop_ref_distance) : 0;
2209 cmd->dw9.gop_b = (generic_state->gop_size - 1 - cmd->dw8.gop_p);
2210 cmd->dw9.frame_width_in_bytes = generic_state->frame_width_in_pixel;
2211 cmd->dw10.frame_height_in_bytes = generic_state->frame_height_in_pixel;
2212 cmd->dw12.no_slices = avc_state->slice_num;
2215 if (seq_param->vui_parameters_present_flag && generic_state->internal_rate_mode != INTEL_BRC_AVBR) {
2216 cmd->dw4.max_bit_rate = cmd->dw4.max_bit_rate;
2217 if (generic_state->internal_rate_mode == VA_RC_CBR) {
2218 cmd->dw3.average_bit_rate = cmd->dw4.max_bit_rate;
2223 cmd->dw6.frame_rate_m = generic_state->frames_per_100s;
2224 cmd->dw7.frame_rate_d = 100;
2225 cmd->dw8.brc_flag = 0;
2226 cmd->dw8.brc_flag |= (generic_state->mb_brc_enabled) ? 0 : 0x8000;
2229 if (generic_state->internal_rate_mode == VA_RC_CBR) {
2231 cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate;
2232 cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISCBR;
2234 } else if (generic_state->internal_rate_mode == VA_RC_VBR) {
2236 if (cmd->dw4.max_bit_rate < cmd->dw3.average_bit_rate) {
2237 cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate << 1;
2239 cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISVBR;
2241 } else if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2243 cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate;
2244 cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISAVBR;
2247 //igonre icq/vcm/qvbr
2249 cmd->dw10.avbr_accuracy = generic_state->avbr_curracy;
2250 cmd->dw11.avbr_convergence = generic_state->avbr_convergence;
2253 input_bits_per_frame = (double)(cmd->dw4.max_bit_rate) * (double)(cmd->dw7.frame_rate_d) / (double)(cmd->dw6.frame_rate_m);;
2255 if (cmd->dw2.buf_size_in_bits == 0) {
2256 cmd->dw2.buf_size_in_bits = (unsigned int)(input_bits_per_frame * 4);
2259 if (cmd->dw1.init_buf_full_in_bits == 0) {
2260 cmd->dw1.init_buf_full_in_bits = cmd->dw2.buf_size_in_bits * 7 / 8;
2262 if (cmd->dw1.init_buf_full_in_bits < (unsigned int)(input_bits_per_frame * 2)) {
2263 cmd->dw1.init_buf_full_in_bits = (unsigned int)(input_bits_per_frame * 2);
2265 if (cmd->dw1.init_buf_full_in_bits > cmd->dw2.buf_size_in_bits) {
2266 cmd->dw1.init_buf_full_in_bits = cmd->dw2.buf_size_in_bits;
2270 if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2271 cmd->dw2.buf_size_in_bits = 2 * generic_state->target_bit_rate * 1000;
2272 cmd->dw1.init_buf_full_in_bits = (unsigned int)(3 * cmd->dw2.buf_size_in_bits / 4);
2276 bps_ratio = input_bits_per_frame / (cmd->dw2.buf_size_in_bits / 30.0);
2277 bps_ratio = (bps_ratio < 0.1) ? 0.1 : (bps_ratio > 3.5) ? 3.5 : bps_ratio;
2280 cmd->dw16.deviation_threshold_0_pand_b = (unsigned int)(-50 * pow(0.90, bps_ratio));
2281 cmd->dw16.deviation_threshold_1_pand_b = (unsigned int)(-50 * pow(0.66, bps_ratio));
2282 cmd->dw16.deviation_threshold_2_pand_b = (unsigned int)(-50 * pow(0.46, bps_ratio));
2283 cmd->dw16.deviation_threshold_3_pand_b = (unsigned int)(-50 * pow(0.3, bps_ratio));
2284 cmd->dw17.deviation_threshold_4_pand_b = (unsigned int)(50 * pow(0.3, bps_ratio));
2285 cmd->dw17.deviation_threshold_5_pand_b = (unsigned int)(50 * pow(0.46, bps_ratio));
2286 cmd->dw17.deviation_threshold_6_pand_b = (unsigned int)(50 * pow(0.7, bps_ratio));
2287 cmd->dw17.deviation_threshold_7_pand_b = (unsigned int)(50 * pow(0.9, bps_ratio));
2288 cmd->dw18.deviation_threshold_0_vbr = (unsigned int)(-50 * pow(0.9, bps_ratio));
2289 cmd->dw18.deviation_threshold_1_vbr = (unsigned int)(-50 * pow(0.7, bps_ratio));
2290 cmd->dw18.deviation_threshold_2_vbr = (unsigned int)(-50 * pow(0.5, bps_ratio));
2291 cmd->dw18.deviation_threshold_3_vbr = (unsigned int)(-50 * pow(0.3, bps_ratio));
2292 cmd->dw19.deviation_threshold_4_vbr = (unsigned int)(100 * pow(0.4, bps_ratio));
2293 cmd->dw19.deviation_threshold_5_vbr = (unsigned int)(100 * pow(0.5, bps_ratio));
2294 cmd->dw19.deviation_threshold_6_vbr = (unsigned int)(100 * pow(0.75, bps_ratio));
2295 cmd->dw19.deviation_threshold_7_vbr = (unsigned int)(100 * pow(0.9, bps_ratio));
2296 cmd->dw20.deviation_threshold_0_i = (unsigned int)(-50 * pow(0.8, bps_ratio));
2297 cmd->dw20.deviation_threshold_1_i = (unsigned int)(-50 * pow(0.6, bps_ratio));
2298 cmd->dw20.deviation_threshold_2_i = (unsigned int)(-50 * pow(0.34, bps_ratio));
2299 cmd->dw20.deviation_threshold_3_i = (unsigned int)(-50 * pow(0.2, bps_ratio));
2300 cmd->dw21.deviation_threshold_4_i = (unsigned int)(50 * pow(0.2, bps_ratio));
2301 cmd->dw21.deviation_threshold_5_i = (unsigned int)(50 * pow(0.4, bps_ratio));
2302 cmd->dw21.deviation_threshold_6_i = (unsigned int)(50 * pow(0.66, bps_ratio));
2303 cmd->dw21.deviation_threshold_7_i = (unsigned int)(50 * pow(0.9, bps_ratio));
2305 cmd->dw22.sliding_window_size = generic_state->frames_per_window_size;
2307 i965_gpe_context_unmap_curbe(gpe_context);
2313 gen9_avc_send_surface_brc_init_reset(VADriverContextP ctx,
2314 struct encode_state *encode_state,
2315 struct i965_gpe_context *gpe_context,
2316 struct intel_encoder_context *encoder_context,
2319 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2320 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2322 gen9_add_buffer_gpe_surface(ctx,
2324 &avc_ctx->res_brc_history_buffer,
2326 avc_ctx->res_brc_history_buffer.size,
2328 GEN9_AVC_BRC_INIT_RESET_HISTORY_INDEX);
2330 gen9_add_buffer_2d_gpe_surface(ctx,
2332 &avc_ctx->res_brc_dist_data_surface,
2334 I965_SURFACEFORMAT_R8_UNORM,
2335 GEN9_AVC_BRC_INIT_RESET_DISTORTION_INDEX);
2341 gen9_avc_kernel_brc_init_reset(VADriverContextP ctx,
2342 struct encode_state *encode_state,
2343 struct intel_encoder_context *encoder_context)
2345 struct i965_driver_data *i965 = i965_driver_data(ctx);
2346 struct i965_gpe_table *gpe = &i965->gpe_table;
2347 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2348 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2349 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2350 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
2352 struct i965_gpe_context *gpe_context;
2353 struct gpe_media_object_parameter media_object_param;
2354 struct gpe_media_object_inline_data media_object_inline_data;
2355 int media_function = 0;
2356 int kernel_idx = GEN9_AVC_KERNEL_BRC_INIT;
2358 media_function = INTEL_MEDIA_STATE_BRC_INIT_RESET;
2360 if (generic_state->brc_inited)
2361 kernel_idx = GEN9_AVC_KERNEL_BRC_RESET;
2363 gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
2365 gpe->context_init(ctx, gpe_context);
2366 gpe->reset_binding_table(ctx, gpe_context);
2368 generic_ctx->pfn_set_curbe_brc_init_reset(ctx, encode_state, gpe_context, encoder_context, NULL);
2370 generic_ctx->pfn_send_brc_init_reset_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
2372 gpe->setup_interface_data(ctx, gpe_context);
2374 memset(&media_object_param, 0, sizeof(media_object_param));
2375 memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
2376 media_object_param.pinline_data = &media_object_inline_data;
2377 media_object_param.inline_size = sizeof(media_object_inline_data);
2379 gen9_avc_run_kernel_media_object(ctx, encoder_context,
2382 &media_object_param);
2384 return VA_STATUS_SUCCESS;
2388 gen9_avc_set_curbe_brc_frame_update(VADriverContextP ctx,
2389 struct encode_state *encode_state,
2390 struct i965_gpe_context *gpe_context,
2391 struct intel_encoder_context *encoder_context,
2394 gen9_avc_frame_brc_update_curbe_data *cmd;
2395 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2396 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2397 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2398 struct object_surface *obj_surface;
2399 struct gen9_surface_avc *avc_priv_surface;
2400 struct avc_param common_param;
2401 VAEncSequenceParameterBufferH264 * seq_param = avc_state->seq_param;
2403 obj_surface = encode_state->reconstructed_object;
2405 if (!obj_surface || !obj_surface->private_data)
2407 avc_priv_surface = obj_surface->private_data;
2409 cmd = i965_gpe_context_map_curbe(gpe_context);
2414 memcpy(cmd, &gen9_avc_frame_brc_update_curbe_init_data, sizeof(gen9_avc_frame_brc_update_curbe_data));
2416 cmd->dw5.target_size_flag = 0 ;
2417 if (generic_state->brc_init_current_target_buf_full_in_bits > (double)generic_state->brc_init_reset_buf_size_in_bits) {
2419 generic_state->brc_init_current_target_buf_full_in_bits -= (double)generic_state->brc_init_reset_buf_size_in_bits;
2420 cmd->dw5.target_size_flag = 1 ;
2423 if (generic_state->skip_frame_enbale) {
2424 cmd->dw6.num_skip_frames = generic_state->num_skip_frames ;
2425 cmd->dw7.size_skip_frames = generic_state->size_skip_frames;
2427 generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame * generic_state->num_skip_frames;
2430 cmd->dw0.target_size = (unsigned int)generic_state->brc_init_current_target_buf_full_in_bits ;
2431 cmd->dw1.frame_number = generic_state->seq_frame_number ;
2432 cmd->dw2.size_of_pic_headers = generic_state->herder_bytes_inserted << 3 ;
2433 cmd->dw5.cur_frame_type = generic_state->frame_type ;
2434 cmd->dw5.brc_flag = 0 ;
2435 cmd->dw5.brc_flag |= (avc_priv_surface->is_as_ref) ? INTEL_ENCODE_BRCUPDATE_IS_REFERENCE : 0 ;
2437 if (avc_state->multi_pre_enable) {
2438 cmd->dw5.brc_flag |= INTEL_ENCODE_BRCUPDATE_IS_ACTUALQP ;
2439 cmd->dw14.qp_index_of_cur_pic = avc_priv_surface->frame_idx ; //do not know this. use -1
2442 cmd->dw5.max_num_paks = generic_state->num_pak_passes ;
2443 if (avc_state->min_max_qp_enable) {
2444 switch (generic_state->frame_type) {
2446 cmd->dw6.minimum_qp = avc_state->min_qp_i ;
2447 cmd->dw6.maximum_qp = avc_state->max_qp_i ;
2450 cmd->dw6.minimum_qp = avc_state->min_qp_p ;
2451 cmd->dw6.maximum_qp = avc_state->max_qp_p ;
2454 cmd->dw6.minimum_qp = avc_state->min_qp_b ;
2455 cmd->dw6.maximum_qp = avc_state->max_qp_b ;
2459 cmd->dw6.minimum_qp = 0 ;
2460 cmd->dw6.maximum_qp = 0 ;
2462 cmd->dw6.enable_force_skip = avc_state->enable_force_skip ;
2463 cmd->dw6.enable_sliding_window = 0 ;
2465 generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame;
2467 if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2468 cmd->dw3.start_gadj_frame0 = (unsigned int)((10 * generic_state->avbr_convergence) / (double)150);
2469 cmd->dw3.start_gadj_frame1 = (unsigned int)((50 * generic_state->avbr_convergence) / (double)150);
2470 cmd->dw4.start_gadj_frame2 = (unsigned int)((100 * generic_state->avbr_convergence) / (double)150);
2471 cmd->dw4.start_gadj_frame3 = (unsigned int)((150 * generic_state->avbr_convergence) / (double)150);
2472 cmd->dw11.g_rate_ratio_threshold_0 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 40)));
2473 cmd->dw11.g_rate_ratio_threshold_1 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 75)));
2474 cmd->dw12.g_rate_ratio_threshold_2 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 97)));
2475 cmd->dw12.g_rate_ratio_threshold_3 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (103 - 100)));
2476 cmd->dw12.g_rate_ratio_threshold_4 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (125 - 100)));
2477 cmd->dw12.g_rate_ratio_threshold_5 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (160 - 100)));
2480 cmd->dw15.enable_roi = generic_state->brc_roi_enable ;
2482 memset(&common_param, 0, sizeof(common_param));
2483 common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
2484 common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
2485 common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
2486 common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
2487 common_param.frames_per_100s = generic_state->frames_per_100s;
2488 common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
2489 common_param.target_bit_rate = generic_state->target_bit_rate;
2491 cmd->dw19.user_max_frame = i965_avc_get_profile_level_max_frame(&common_param, seq_param->level_idc);
2492 i965_gpe_context_unmap_curbe(gpe_context);
2498 gen9_avc_send_surface_brc_frame_update(VADriverContextP ctx,
2499 struct encode_state *encode_state,
2500 struct i965_gpe_context *gpe_context,
2501 struct intel_encoder_context *encoder_context,
2504 struct i965_driver_data *i965 = i965_driver_data(ctx);
2505 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2506 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2507 struct brc_param * param = (struct brc_param *)param_brc ;
2508 struct i965_gpe_context * gpe_context_mbenc = param->gpe_context_mbenc;
2509 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2510 unsigned char is_g95 = 0;
2512 if (IS_SKL(i965->intel.device_info) ||
2513 IS_BXT(i965->intel.device_info))
2515 else if (IS_KBL(i965->intel.device_info) ||
2516 IS_GLK(i965->intel.device_info))
2519 /* brc history buffer*/
2520 gen9_add_buffer_gpe_surface(ctx,
2522 &avc_ctx->res_brc_history_buffer,
2524 avc_ctx->res_brc_history_buffer.size,
2526 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_HISTORY_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_HISTORY_INDEX));
2528 /* previous pak buffer*/
2529 gen9_add_buffer_gpe_surface(ctx,
2531 &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
2533 avc_ctx->res_brc_pre_pak_statistics_output_buffer.size,
2535 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_PAK_STATISTICS_OUTPUT_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_PAK_STATISTICS_OUTPUT_INDEX));
2537 /* image state command buffer read only*/
2538 gen9_add_buffer_gpe_surface(ctx,
2540 &avc_ctx->res_brc_image_state_read_buffer,
2542 avc_ctx->res_brc_image_state_read_buffer.size,
2544 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_READ_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_READ_INDEX));
2546 /* image state command buffer write only*/
2547 gen9_add_buffer_gpe_surface(ctx,
2549 &avc_ctx->res_brc_image_state_write_buffer,
2551 avc_ctx->res_brc_image_state_write_buffer.size,
2553 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_WRITE_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_WRITE_INDEX));
2555 if (avc_state->mbenc_brc_buffer_size > 0) {
2556 gen9_add_buffer_gpe_surface(ctx,
2558 &(avc_ctx->res_mbenc_brc_buffer),
2560 avc_ctx->res_mbenc_brc_buffer.size,
2562 GEN95_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_WRITE_INDEX);
2564 /* Mbenc curbe input buffer */
2565 gen9_add_dri_buffer_gpe_surface(ctx,
2567 gpe_context_mbenc->dynamic_state.bo,
2569 ALIGN(gpe_context_mbenc->curbe.length, 64),
2570 gpe_context_mbenc->curbe.offset,
2571 GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_READ_INDEX);
2572 /* Mbenc curbe output buffer */
2573 gen9_add_dri_buffer_gpe_surface(ctx,
2575 gpe_context_mbenc->dynamic_state.bo,
2577 ALIGN(gpe_context_mbenc->curbe.length, 64),
2578 gpe_context_mbenc->curbe.offset,
2579 GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_WRITE_INDEX);
2582 /* AVC_ME Distortion 2D surface buffer,input/output. is it res_brc_dist_data_surface*/
2583 gen9_add_buffer_2d_gpe_surface(ctx,
2585 &avc_ctx->res_brc_dist_data_surface,
2587 I965_SURFACEFORMAT_R8_UNORM,
2588 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_DISTORTION_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_DISTORTION_INDEX));
2590 /* BRC const data 2D surface buffer */
2591 gen9_add_buffer_2d_gpe_surface(ctx,
2593 &avc_ctx->res_brc_const_data_buffer,
2595 I965_SURFACEFORMAT_R8_UNORM,
2596 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX));
2598 /* MB statistical data surface*/
2599 gen9_add_buffer_gpe_surface(ctx,
2601 &avc_ctx->res_mb_status_buffer,
2603 avc_ctx->res_mb_status_buffer.size,
2605 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX));
2611 gen9_avc_kernel_brc_frame_update(VADriverContextP ctx,
2612 struct encode_state *encode_state,
2613 struct intel_encoder_context *encoder_context)
2616 struct i965_driver_data *i965 = i965_driver_data(ctx);
2617 struct i965_gpe_table *gpe = &i965->gpe_table;
2618 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2619 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
2620 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2621 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2622 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2624 struct i965_gpe_context *gpe_context = NULL;
2625 struct gpe_media_object_parameter media_object_param;
2626 struct gpe_media_object_inline_data media_object_inline_data;
2627 int media_function = 0;
2629 unsigned int mb_const_data_buffer_in_use, mb_qp_buffer_in_use;
2630 unsigned int brc_enabled = 0;
2631 unsigned int roi_enable = (generic_state->num_roi > 0) ? 1 : 0;
2632 unsigned int dirty_roi_enable = ((generic_state->dirty_num_roi > 0) && (generic_state->frame_type == SLICE_TYPE_P) && (0));
2634 /* the following set the mbenc curbe*/
2635 struct mbenc_param curbe_mbenc_param ;
2636 struct brc_param curbe_brc_param ;
2638 mb_const_data_buffer_in_use =
2639 generic_state->mb_brc_enabled ||
2642 avc_state->mb_qp_data_enable ||
2643 avc_state->rolling_intra_refresh_enable;
2644 mb_qp_buffer_in_use =
2645 generic_state->mb_brc_enabled ||
2646 generic_state->brc_roi_enable ||
2647 avc_state->mb_qp_data_enable;
2649 switch (generic_state->kernel_mode) {
2650 case INTEL_ENC_KERNEL_NORMAL : {
2651 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_NORMAL_I;
2654 case INTEL_ENC_KERNEL_PERFORMANCE : {
2655 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_PERFORMANCE_I;
2658 case INTEL_ENC_KERNEL_QUALITY : {
2659 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_QUALITY_I;
2667 if (generic_state->frame_type == SLICE_TYPE_P) {
2669 } else if (generic_state->frame_type == SLICE_TYPE_B) {
2673 gpe_context = &(avc_ctx->context_mbenc.gpe_contexts[kernel_idx]);
2674 gpe->context_init(ctx, gpe_context);
2676 memset(&curbe_mbenc_param, 0, sizeof(struct mbenc_param));
2678 curbe_mbenc_param.mb_const_data_buffer_in_use = mb_const_data_buffer_in_use;
2679 curbe_mbenc_param.mb_qp_buffer_in_use = mb_qp_buffer_in_use;
2680 curbe_mbenc_param.mbenc_i_frame_dist_in_use = 0;
2681 curbe_mbenc_param.brc_enabled = brc_enabled;
2682 curbe_mbenc_param.roi_enabled = roi_enable;
2684 /* set curbe mbenc*/
2685 generic_ctx->pfn_set_curbe_mbenc(ctx, encode_state, gpe_context, encoder_context, &curbe_mbenc_param);
2687 // gen95 set curbe out of the brc. gen9 do it here
2688 avc_state->mbenc_curbe_set_in_brc_update = !avc_state->decouple_mbenc_curbe_from_brc_enable;
2689 /*begin brc frame update*/
2690 memset(&curbe_brc_param, 0, sizeof(struct brc_param));
2691 curbe_brc_param.gpe_context_mbenc = gpe_context;
2692 media_function = INTEL_MEDIA_STATE_BRC_UPDATE;
2693 kernel_idx = GEN9_AVC_KERNEL_BRC_FRAME_UPDATE;
2694 gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
2695 curbe_brc_param.gpe_context_brc_frame_update = gpe_context;
2697 gpe->context_init(ctx, gpe_context);
2698 gpe->reset_binding_table(ctx, gpe_context);
2699 /*brc copy ignored*/
2701 /* set curbe frame update*/
2702 generic_ctx->pfn_set_curbe_brc_frame_update(ctx, encode_state, gpe_context, encoder_context, &curbe_brc_param);
2704 /* load brc constant data, is it same as mbenc mb brc constant data? no.*/
2705 if (avc_state->multi_pre_enable) {
2706 gen9_avc_init_brc_const_data(ctx, encode_state, encoder_context);
2708 gen9_avc_init_brc_const_data_old(ctx, encode_state, encoder_context);
2710 /* image state construct*/
2711 gen9_avc_set_image_state(ctx, encode_state, encoder_context, &(avc_ctx->res_brc_image_state_read_buffer));
2712 /* set surface frame mbenc*/
2713 generic_ctx->pfn_send_brc_frame_update_surface(ctx, encode_state, gpe_context, encoder_context, &curbe_brc_param);
2716 gpe->setup_interface_data(ctx, gpe_context);
2718 memset(&media_object_param, 0, sizeof(media_object_param));
2719 memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
2720 media_object_param.pinline_data = &media_object_inline_data;
2721 media_object_param.inline_size = sizeof(media_object_inline_data);
2723 gen9_avc_run_kernel_media_object(ctx, encoder_context,
2726 &media_object_param);
2728 return VA_STATUS_SUCCESS;
2732 gen9_avc_set_curbe_brc_mb_update(VADriverContextP ctx,
2733 struct encode_state *encode_state,
2734 struct i965_gpe_context *gpe_context,
2735 struct intel_encoder_context *encoder_context,
2738 gen9_avc_mb_brc_curbe_data *cmd;
2739 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2740 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2742 cmd = i965_gpe_context_map_curbe(gpe_context);
2747 memset(cmd, 0, sizeof(gen9_avc_mb_brc_curbe_data));
2749 cmd->dw0.cur_frame_type = generic_state->frame_type;
2750 if (generic_state->brc_roi_enable) {
2751 cmd->dw0.enable_roi = 1;
2753 cmd->dw0.enable_roi = 0;
2756 i965_gpe_context_unmap_curbe(gpe_context);
2762 gen9_avc_send_surface_brc_mb_update(VADriverContextP ctx,
2763 struct encode_state *encode_state,
2764 struct i965_gpe_context *gpe_context,
2765 struct intel_encoder_context *encoder_context,
2768 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2769 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2770 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2772 /* brc history buffer*/
2773 gen9_add_buffer_gpe_surface(ctx,
2775 &avc_ctx->res_brc_history_buffer,
2777 avc_ctx->res_brc_history_buffer.size,
2779 GEN9_AVC_MB_BRC_UPDATE_HISTORY_INDEX);
2781 /* MB qp data buffer is it same as res_mbbrc_mb_qp_data_surface*/
2782 if (generic_state->mb_brc_enabled) {
2783 gen9_add_buffer_2d_gpe_surface(ctx,
2785 &avc_ctx->res_mbbrc_mb_qp_data_surface,
2787 I965_SURFACEFORMAT_R8_UNORM,
2788 GEN9_AVC_MB_BRC_UPDATE_MB_QP_INDEX);
2792 /* BRC roi feature*/
2793 if (generic_state->brc_roi_enable) {
2794 gen9_add_buffer_gpe_surface(ctx,
2796 &avc_ctx->res_mbbrc_roi_surface,
2798 avc_ctx->res_mbbrc_roi_surface.size,
2800 GEN9_AVC_MB_BRC_UPDATE_ROI_INDEX);
2804 /* MB statistical data surface*/
2805 gen9_add_buffer_gpe_surface(ctx,
2807 &avc_ctx->res_mb_status_buffer,
2809 avc_ctx->res_mb_status_buffer.size,
2811 GEN9_AVC_MB_BRC_UPDATE_MB_STATUS_INDEX);
2817 gen9_avc_kernel_brc_mb_update(VADriverContextP ctx,
2818 struct encode_state *encode_state,
2819 struct intel_encoder_context *encoder_context)
2822 struct i965_driver_data *i965 = i965_driver_data(ctx);
2823 struct i965_gpe_table *gpe = &i965->gpe_table;
2824 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2825 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2826 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2827 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
2829 struct i965_gpe_context *gpe_context;
2830 struct gpe_media_object_walker_parameter media_object_walker_param;
2831 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
2832 int media_function = 0;
2835 media_function = INTEL_MEDIA_STATE_MB_BRC_UPDATE;
2836 kernel_idx = GEN9_AVC_KERNEL_BRC_MB_UPDATE;
2837 gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
2839 gpe->context_init(ctx, gpe_context);
2840 gpe->reset_binding_table(ctx, gpe_context);
2842 /* set curbe brc mb update*/
2843 generic_ctx->pfn_set_curbe_brc_mb_update(ctx, encode_state, gpe_context, encoder_context, NULL);
2846 /* set surface brc mb update*/
2847 generic_ctx->pfn_send_brc_mb_update_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
2850 gpe->setup_interface_data(ctx, gpe_context);
2852 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2853 /* the scaling is based on 8x8 blk level */
2854 kernel_walker_param.resolution_x = (generic_state->frame_width_in_mbs + 1) / 2;
2855 kernel_walker_param.resolution_y = (generic_state->frame_height_in_mbs + 1) / 2 ;
2856 kernel_walker_param.no_dependency = 1;
2858 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
2860 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
2863 &media_object_walker_param);
2865 return VA_STATUS_SUCCESS;
2869 mbenc kernel related function,it include intra dist kernel
2872 gen9_avc_get_biweight(int dist_scale_factor_ref_id0_list0, unsigned short weighted_bipredidc)
2874 int biweight = 32; // default value
2876 /* based on kernel HLD*/
2877 if (weighted_bipredidc != INTEL_AVC_WP_MODE_IMPLICIT) {
2880 biweight = (dist_scale_factor_ref_id0_list0 + 2) >> 2;
2882 if (biweight != 16 && biweight != 21 &&
2883 biweight != 32 && biweight != 43 && biweight != 48) {
2884 biweight = 32; // If # of B-pics between two refs is more than 3. VME does not support it.
2892 gen9_avc_get_dist_scale_factor(VADriverContextP ctx,
2893 struct encode_state *encode_state,
2894 struct intel_encoder_context *encoder_context)
2896 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2897 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2898 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
2899 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
2901 int max_num_references;
2902 VAPictureH264 *curr_pic;
2903 VAPictureH264 *ref_pic_l0;
2904 VAPictureH264 *ref_pic_l1;
2913 max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
2915 memset(avc_state->dist_scale_factor_list0, 0, 32 * sizeof(unsigned int));
2916 curr_pic = &pic_param->CurrPic;
2917 for (i = 0; i < max_num_references; i++) {
2918 ref_pic_l0 = &(slice_param->RefPicList0[i]);
2920 if ((ref_pic_l0->flags & VA_PICTURE_H264_INVALID) ||
2921 (ref_pic_l0->picture_id == VA_INVALID_SURFACE))
2923 ref_pic_l1 = &(slice_param->RefPicList1[0]);
2924 if ((ref_pic_l0->flags & VA_PICTURE_H264_INVALID) ||
2925 (ref_pic_l0->picture_id == VA_INVALID_SURFACE))
2928 poc0 = (curr_pic->TopFieldOrderCnt - ref_pic_l0->TopFieldOrderCnt);
2929 poc1 = (ref_pic_l1->TopFieldOrderCnt - ref_pic_l0->TopFieldOrderCnt);
2930 CLIP(poc0, -128, 127);
2931 CLIP(poc1, -128, 127);
2938 tmp = (td / 2 > 0) ? (td / 2) : (-(td / 2));
2939 tx = (16384 + tmp) / td ;
2940 tmp = (tb * tx + 32) >> 6;
2941 CLIP(tmp, -1024, 1023);
2942 avc_state->dist_scale_factor_list0[i] = tmp;
2948 gen9_avc_get_qp_from_ref_list(VADriverContextP ctx,
2949 VAEncSliceParameterBufferH264 *slice_param,
2953 struct i965_driver_data *i965 = i965_driver_data(ctx);
2954 struct object_surface *obj_surface;
2955 struct gen9_surface_avc *avc_priv_surface;
2956 VASurfaceID surface_id;
2958 assert(slice_param);
2962 if (ref_frame_idx < slice_param->num_ref_idx_l0_active_minus1 + 1)
2963 surface_id = slice_param->RefPicList0[ref_frame_idx].picture_id;
2967 if (ref_frame_idx < slice_param->num_ref_idx_l1_active_minus1 + 1)
2968 surface_id = slice_param->RefPicList1[ref_frame_idx].picture_id;
2972 obj_surface = SURFACE(surface_id);
2973 if (obj_surface && obj_surface->private_data) {
2974 avc_priv_surface = obj_surface->private_data;
2975 return avc_priv_surface->qp_value;
2982 gen9_avc_load_mb_brc_const_data(VADriverContextP ctx,
2983 struct encode_state *encode_state,
2984 struct intel_encoder_context *encoder_context)
2986 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2987 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2988 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2989 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2991 struct i965_gpe_resource *gpe_resource = NULL;
2992 unsigned int * data = NULL;
2993 unsigned int * data_tmp = NULL;
2994 unsigned int size = 16 * 52;
2995 unsigned int table_idx = 0;
2996 unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
2997 unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
3000 gpe_resource = &(avc_ctx->res_mbbrc_const_data_buffer);
3001 assert(gpe_resource);
3002 data = i965_map_gpe_resource(gpe_resource);
3005 table_idx = slice_type_kernel[generic_state->frame_type];
3007 memcpy(data, gen9_avc_mb_brc_const_data[table_idx][0], size * sizeof(unsigned int));
3011 switch (generic_state->frame_type) {
3013 for (i = 0; i < AVC_QP_MAX ; i++) {
3014 if (avc_state->old_mode_cost_enable)
3015 *data = (unsigned int)gen9_avc_old_intra_mode_cost[i];
3021 for (i = 0; i < AVC_QP_MAX ; i++) {
3022 if (generic_state->frame_type == SLICE_TYPE_P) {
3023 if (avc_state->skip_bias_adjustment_enable)
3024 *(data + 3) = (unsigned int)gen9_avc_mv_cost_p_skip_adjustment[i];
3026 if (avc_state->non_ftq_skip_threshold_lut_input_enable) {
3027 *(data + 9) = (unsigned int)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
3028 } else if (generic_state->frame_type == SLICE_TYPE_P) {
3029 *(data + 9) = (unsigned int)gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag][i];
3031 *(data + 9) = (unsigned int)gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag][i];
3034 if (avc_state->adaptive_intra_scaling_enable) {
3035 *(data + 10) = (unsigned int)gen9_avc_adaptive_intra_scaling_factor[i];
3037 *(data + 10) = (unsigned int)gen9_avc_intra_scaling_factor[i];
3049 for (i = 0; i < AVC_QP_MAX ; i++) {
3050 if (avc_state->ftq_skip_threshold_lut_input_enable) {
3051 *(data + 6) = (avc_state->ftq_skip_threshold_lut[i] |
3052 (avc_state->ftq_skip_threshold_lut[i] << 16) |
3053 (avc_state->ftq_skip_threshold_lut[i] << 24));
3054 *(data + 7) = (avc_state->ftq_skip_threshold_lut[i] |
3055 (avc_state->ftq_skip_threshold_lut[i] << 8) |
3056 (avc_state->ftq_skip_threshold_lut[i] << 16) |
3057 (avc_state->ftq_skip_threshold_lut[i] << 24));
3060 if (avc_state->kernel_trellis_enable) {
3061 *(data + 11) = (unsigned int)avc_state->lamda_value_lut[i][0];
3062 *(data + 12) = (unsigned int)avc_state->lamda_value_lut[i][1];
3068 i965_unmap_gpe_resource(gpe_resource);
3072 gen9_avc_set_curbe_mbenc(VADriverContextP ctx,
3073 struct encode_state *encode_state,
3074 struct i965_gpe_context *gpe_context,
3075 struct intel_encoder_context *encoder_context,
3078 struct i965_driver_data *i965 = i965_driver_data(ctx);
3080 gen9_avc_mbenc_curbe_data *g9;
3081 gen95_avc_mbenc_curbe_data *g95;
3083 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3084 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3085 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3087 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3088 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
3089 VASurfaceID surface_id;
3090 struct object_surface *obj_surface;
3092 struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
3093 unsigned char qp = 0;
3094 unsigned char me_method = 0;
3095 unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
3096 unsigned int table_idx = 0;
3097 unsigned char is_g9 = 0;
3098 unsigned char is_g95 = 0;
3099 unsigned int curbe_size = 0;
3101 unsigned int preset = generic_state->preset;
3102 if (IS_SKL(i965->intel.device_info) ||
3103 IS_BXT(i965->intel.device_info)) {
3104 cmd.g9 = (gen9_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
3108 curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
3109 memset(cmd.g9, 0, curbe_size);
3111 if (mbenc_i_frame_dist_in_use) {
3112 memcpy(cmd.g9, gen9_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
3115 switch (generic_state->frame_type) {
3117 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
3120 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
3123 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
3130 } else if (IS_KBL(i965->intel.device_info) ||
3131 IS_GLK(i965->intel.device_info)) {
3132 cmd.g95 = (gen95_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
3136 curbe_size = sizeof(gen95_avc_mbenc_curbe_data);
3137 memset(cmd.g9, 0, curbe_size);
3139 if (mbenc_i_frame_dist_in_use) {
3140 memcpy(cmd.g95, gen95_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
3143 switch (generic_state->frame_type) {
3145 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
3148 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
3151 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
3159 /* Never get here, just silence a gcc warning */
3165 me_method = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_b_me_method[preset] : gen9_avc_p_me_method[preset];
3166 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
3168 cmd.g9->dw0.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
3169 cmd.g9->dw37.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
3170 cmd.g9->dw0.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
3171 cmd.g9->dw37.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
3173 cmd.g9->dw2.max_len_sp = gen9_avc_max_len_sp[preset];
3174 cmd.g9->dw38.max_len_sp = 0;
3177 cmd.g95->dw1.extended_mv_cost_range = avc_state->extended_mv_cost_range_enable;
3179 cmd.g9->dw3.src_access = 0;
3180 cmd.g9->dw3.ref_access = 0;
3182 if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) {
3183 //disable ftq_override by now.
3184 if (avc_state->ftq_override) {
3185 cmd.g9->dw3.ftq_enable = avc_state->ftq_enable;
3188 // both gen9 and gen95 come here by now
3189 if (generic_state->frame_type == SLICE_TYPE_P) {
3190 cmd.g9->dw3.ftq_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
3193 cmd.g9->dw3.ftq_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
3197 cmd.g9->dw3.ftq_enable = 0;
3200 if (avc_state->disable_sub_mb_partion)
3201 cmd.g9->dw3.sub_mb_part_mask = 0x7;
3203 if (mbenc_i_frame_dist_in_use) {
3204 cmd.g9->dw2.pitch_width = generic_state->downscaled_width_4x_in_mb;
3205 cmd.g9->dw4.picture_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
3206 cmd.g9->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4;
3207 cmd.g9->dw6.batch_buffer_end = 0;
3208 cmd.g9->dw31.intra_compute_type = 1;
3211 cmd.g9->dw2.pitch_width = generic_state->frame_width_in_mbs;
3212 cmd.g9->dw4.picture_height_minus1 = generic_state->frame_height_in_mbs - 1;
3213 cmd.g9->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ? generic_state->frame_height_in_mbs : avc_state->slice_height;
3216 memcpy(&(cmd.g9->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
3217 if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
3218 //cmd.g9->dw8 = gen9_avc_old_intra_mode_cost[qp];
3219 } else if (avc_state->skip_bias_adjustment_enable) {
3220 /* Load different MvCost for P picture when SkipBiasAdjustment is enabled
3221 // No need to check for P picture as the flag is only enabled for P picture */
3222 cmd.g9->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
3227 table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
3228 memcpy(&(cmd.g9->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
3230 cmd.g9->dw4.enable_fbr_bypass = avc_state->fbr_bypass_enable;
3231 cmd.g9->dw4.enable_intra_cost_scaling_for_static_frame = avc_state->sfd_enable && generic_state->hme_enabled;
3232 cmd.g9->dw4.field_parity_flag = 0;//bottom field
3233 cmd.g9->dw4.enable_cur_fld_idr = 0;//field realted
3234 cmd.g9->dw4.contrained_intra_pred_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
3235 cmd.g9->dw4.hme_enable = generic_state->hme_enabled;
3236 cmd.g9->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
3237 cmd.g9->dw4.use_actual_ref_qp_value = generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
3240 cmd.g9->dw7.intra_part_mask = avc_state->transform_8x8_mode_enable ? 0 : 0x02;
3241 cmd.g9->dw7.src_field_polarity = 0;//field related
3243 /*ftq_skip_threshold_lut set,dw14 /15*/
3245 /*r5 disable NonFTQSkipThresholdLUT*/
3246 if (generic_state->frame_type == SLICE_TYPE_P) {
3247 cmd.g9->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
3249 } else if (generic_state->frame_type == SLICE_TYPE_B) {
3250 cmd.g9->dw32.skip_val = gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
3254 cmd.g9->dw13.qp_prime_y = qp;
3255 cmd.g9->dw13.qp_prime_cb = qp;
3256 cmd.g9->dw13.qp_prime_cr = qp;
3257 cmd.g9->dw13.target_size_in_word = 0xff;//hardcode for brc disable
3259 if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) {
3260 switch (gen9_avc_multi_pred[preset]) {
3262 cmd.g9->dw32.mult_pred_l0_disable = 128;
3263 cmd.g9->dw32.mult_pred_l1_disable = 128;
3266 cmd.g9->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_P) ? 1 : 128;
3267 cmd.g9->dw32.mult_pred_l1_disable = 128;
3270 cmd.g9->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3271 cmd.g9->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3274 cmd.g9->dw32.mult_pred_l0_disable = 1;
3275 cmd.g9->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3281 cmd.g9->dw32.mult_pred_l0_disable = 128;
3282 cmd.g9->dw32.mult_pred_l1_disable = 128;
3285 /*field setting for dw33 34, ignored*/
3287 if (avc_state->adaptive_transform_decision_enable) {
3288 if (generic_state->frame_type != SLICE_TYPE_I) {
3289 cmd.g9->dw34.enable_adaptive_tx_decision = 1;
3291 cmd.g95->dw60.mb_texture_threshold = 1024;
3292 cmd.g95->dw60.tx_decision_threshold = 128;
3298 cmd.g9->dw58.mb_texture_threshold = 1024;
3299 cmd.g9->dw58.tx_decision_threshold = 128;
3304 if (generic_state->frame_type == SLICE_TYPE_B) {
3305 cmd.g9->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
3306 cmd.g9->dw34.list1_ref_id0_frm_field_parity = 0;
3307 cmd.g9->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
3310 cmd.g9->dw34.b_original_bff = 0; //frame only
3311 cmd.g9->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
3312 cmd.g9->dw34.roi_enable_flag = curbe_param->roi_enabled;
3313 cmd.g9->dw34.mad_enable_falg = avc_state->mad_enable;
3314 cmd.g9->dw34.mb_brc_enable = avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled;
3315 cmd.g9->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
3317 cmd.g95->dw34.tq_enable = avc_state->tq_enable;
3318 cmd.g95->dw34.cqp_flag = !generic_state->brc_enabled;
3322 cmd.g9->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
3324 if (cmd.g9->dw34.force_non_skip_check) {
3325 cmd.g9->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
3330 cmd.g9->dw36.check_all_fractional_enable = avc_state->caf_enable;
3331 cmd.g9->dw38.ref_threshold = 400;
3332 cmd.g9->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
3334 /* Default:2 used for MBBRC (MB QP Surface width and height are 4x downscaled picture in MB unit * 4 bytes)
3335 0 used for MBQP data surface (MB QP Surface width and height are same as the input picture size in MB unit * 1bytes)
3336 starting GEN9, BRC use split kernel, MB QP surface is same size as input picture */
3337 cmd.g9->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled) ? 0 : 2;
3339 if (mbenc_i_frame_dist_in_use) {
3340 cmd.g9->dw13.qp_prime_y = 0;
3341 cmd.g9->dw13.qp_prime_cb = 0;
3342 cmd.g9->dw13.qp_prime_cr = 0;
3343 cmd.g9->dw33.intra_16x16_nondc_penalty = 0;
3344 cmd.g9->dw33.intra_8x8_nondc_penalty = 0;
3345 cmd.g9->dw33.intra_4x4_nondc_penalty = 0;
3348 if (cmd.g9->dw4.use_actual_ref_qp_value) {
3349 cmd.g9->dw44.actual_qp_value_for_ref_id0_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 0);
3350 cmd.g9->dw44.actual_qp_value_for_ref_id1_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 1);
3351 cmd.g9->dw44.actual_qp_value_for_ref_id2_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 2);
3352 cmd.g9->dw44.actual_qp_value_for_ref_id3_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 3);
3353 cmd.g9->dw45.actual_qp_value_for_ref_id4_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 4);
3354 cmd.g9->dw45.actual_qp_value_for_ref_id5_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 5);
3355 cmd.g9->dw45.actual_qp_value_for_ref_id6_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 6);
3356 cmd.g9->dw45.actual_qp_value_for_ref_id7_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 7);
3357 cmd.g9->dw46.actual_qp_value_for_ref_id0_list1 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 0);
3358 cmd.g9->dw46.actual_qp_value_for_ref_id1_list1 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 1);
3361 table_idx = slice_type_kernel[generic_state->frame_type];
3362 cmd.g9->dw46.ref_cost = gen9_avc_ref_cost[table_idx][qp];
3364 if (generic_state->frame_type == SLICE_TYPE_I) {
3365 cmd.g9->dw0.skip_mode_enable = 0;
3366 cmd.g9->dw37.skip_mode_enable = 0;
3367 cmd.g9->dw36.hme_combine_overlap = 0;
3368 cmd.g9->dw47.intra_cost_sf = 16;
3369 cmd.g9->dw34.enable_direct_bias_adjustment = 0;
3371 cmd.g9->dw34.enable_global_motion_bias_adjustment = 0;
3373 } else if (generic_state->frame_type == SLICE_TYPE_P) {
3374 cmd.g9->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
3375 cmd.g9->dw3.bme_disable_fbr = 1;
3376 cmd.g9->dw5.ref_width = gen9_avc_search_x[preset];
3377 cmd.g9->dw5.ref_height = gen9_avc_search_y[preset];
3378 cmd.g9->dw7.non_skip_zmv_added = 1;
3379 cmd.g9->dw7.non_skip_mode_added = 1;
3380 cmd.g9->dw7.skip_center_mask = 1;
3381 cmd.g9->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
3382 cmd.g9->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame onlys
3383 cmd.g9->dw36.hme_combine_overlap = 1;
3384 cmd.g9->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
3385 cmd.g9->dw39.ref_width = gen9_avc_search_x[preset];
3386 cmd.g9->dw39.ref_height = gen9_avc_search_y[preset];
3387 cmd.g9->dw34.enable_direct_bias_adjustment = 0;
3388 cmd.g9->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
3389 if (is_g9 && avc_state->global_motion_bias_adjustment_enable)
3390 cmd.g9->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
3393 cmd.g9->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
3394 cmd.g9->dw1.bi_weight = avc_state->bi_weight;
3395 cmd.g9->dw3.search_ctrl = 7;
3396 cmd.g9->dw3.skip_type = 1;
3397 cmd.g9->dw5.ref_width = gen9_avc_b_search_x[preset];
3398 cmd.g9->dw5.ref_height = gen9_avc_b_search_y[preset];
3399 cmd.g9->dw7.skip_center_mask = 0xff;
3400 cmd.g9->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
3401 cmd.g9->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
3402 cmd.g9->dw36.hme_combine_overlap = 1;
3403 surface_id = slice_param->RefPicList1[0].picture_id;
3404 obj_surface = SURFACE(surface_id);
3406 WARN_ONCE("Invalid backward reference frame\n");
3409 cmd.g9->dw36.is_fwd_frame_short_term_ref = !!(slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
3411 cmd.g9->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
3412 cmd.g9->dw36.num_ref_idx_l1_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l1_active_minus1 : 0;
3413 cmd.g9->dw39.ref_width = gen9_avc_b_search_x[preset];
3414 cmd.g9->dw39.ref_height = gen9_avc_b_search_y[preset];
3415 cmd.g9->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
3416 cmd.g9->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
3417 cmd.g9->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
3418 cmd.g9->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
3419 cmd.g9->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
3420 cmd.g9->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
3421 cmd.g9->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
3422 cmd.g9->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
3424 cmd.g9->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
3425 if (cmd.g9->dw34.enable_direct_bias_adjustment) {
3426 cmd.g9->dw7.non_skip_zmv_added = 1;
3427 cmd.g9->dw7.non_skip_mode_added = 1;
3430 cmd.g9->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
3431 if (is_g9 && avc_state->global_motion_bias_adjustment_enable)
3432 cmd.g9->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
3436 avc_state->block_based_skip_enable = cmd.g9->dw3.block_based_skip_enable;
3438 if (avc_state->rolling_intra_refresh_enable) {
3439 /*by now disable it*/
3440 cmd.g9->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
3441 cmd.g9->dw32.mult_pred_l0_disable = 128;
3442 /* Pass the same IntraRefreshUnit to the kernel w/o the adjustment by -1, so as to have an overlap of one MB row or column of Intra macroblocks
3443 across one P frame to another P frame, as needed by the RollingI algo */
3445 cmd.g9->dw48.widi_intra_refresh_mb_num = 0;
3446 cmd.g9->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
3447 cmd.g9->dw48.widi_intra_refresh_qp_delta = 0;
3451 if (avc_state->rolling_intra_refresh_enable == INTEL_ROLLING_I_SQUARE && generic_state->brc_enabled) {
3452 cmd.g95->dw4.enable_intra_refresh = 0;
3453 cmd.g95->dw34.widi_intra_refresh_en = INTEL_ROLLING_I_DISABLED;
3454 cmd.g95->dw48.widi_intra_refresh_mb_x = 0;
3455 cmd.g95->dw61.widi_intra_refresh_mb_y = 0;
3457 cmd.g95->dw4.enable_intra_refresh = 1;
3458 cmd.g95->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
3459 cmd.g95->dw48.widi_intra_refresh_mb_x = 0;
3460 cmd.g95->dw61.widi_intra_refresh_mb_y = 0;
3461 cmd.g9->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
3462 cmd.g9->dw48.widi_intra_refresh_qp_delta = 0;
3467 cmd.g9->dw34.widi_intra_refresh_en = 0;
3470 cmd.g9->dw34.enable_per_mb_static_check = avc_state->sfd_enable && generic_state->hme_enabled;
3471 cmd.g9->dw34.enable_adaptive_search_window_size = avc_state->adaptive_search_window_enable;
3473 /*roi set disable by now. 49-56*/
3474 if (curbe_param->roi_enabled) {
3475 cmd.g9->dw49.roi_1_x_left = generic_state->roi[0].left;
3476 cmd.g9->dw49.roi_1_y_top = generic_state->roi[0].top;
3477 cmd.g9->dw50.roi_1_x_right = generic_state->roi[0].right;
3478 cmd.g9->dw50.roi_1_y_bottom = generic_state->roi[0].bottom;
3480 cmd.g9->dw51.roi_2_x_left = generic_state->roi[1].left;
3481 cmd.g9->dw51.roi_2_y_top = generic_state->roi[1].top;
3482 cmd.g9->dw52.roi_2_x_right = generic_state->roi[1].right;
3483 cmd.g9->dw52.roi_2_y_bottom = generic_state->roi[1].bottom;
3485 cmd.g9->dw53.roi_3_x_left = generic_state->roi[2].left;
3486 cmd.g9->dw53.roi_3_y_top = generic_state->roi[2].top;
3487 cmd.g9->dw54.roi_3_x_right = generic_state->roi[2].right;
3488 cmd.g9->dw54.roi_3_y_bottom = generic_state->roi[2].bottom;
3490 cmd.g9->dw55.roi_4_x_left = generic_state->roi[3].left;
3491 cmd.g9->dw55.roi_4_y_top = generic_state->roi[3].top;
3492 cmd.g9->dw56.roi_4_x_right = generic_state->roi[3].right;
3493 cmd.g9->dw56.roi_4_y_bottom = generic_state->roi[3].bottom;
3495 if (!generic_state->brc_enabled) {
3497 tmp = generic_state->roi[0].value;
3498 CLIP(tmp, -qp, AVC_QP_MAX - qp);
3499 cmd.g9->dw57.roi_1_dqp_prime_y = tmp;
3500 tmp = generic_state->roi[1].value;
3501 CLIP(tmp, -qp, AVC_QP_MAX - qp);
3502 cmd.g9->dw57.roi_2_dqp_prime_y = tmp;
3503 tmp = generic_state->roi[2].value;
3504 CLIP(tmp, -qp, AVC_QP_MAX - qp);
3505 cmd.g9->dw57.roi_3_dqp_prime_y = tmp;
3506 tmp = generic_state->roi[3].value;
3507 CLIP(tmp, -qp, AVC_QP_MAX - qp);
3508 cmd.g9->dw57.roi_4_dqp_prime_y = tmp;
3510 cmd.g9->dw34.roi_enable_flag = 0;
3515 if (avc_state->tq_enable) {
3516 if (generic_state->frame_type == SLICE_TYPE_I) {
3517 cmd.g95->dw58.value = gen95_avc_tq_lambda_i_frame[qp][0];
3518 cmd.g95->dw59.value = gen95_avc_tq_lambda_i_frame[qp][1];
3520 } else if (generic_state->frame_type == SLICE_TYPE_P) {
3521 cmd.g95->dw58.value = gen95_avc_tq_lambda_p_frame[qp][0];
3522 cmd.g95->dw59.value = gen95_avc_tq_lambda_p_frame[qp][1];
3525 cmd.g95->dw58.value = gen95_avc_tq_lambda_b_frame[qp][0];
3526 cmd.g95->dw59.value = gen95_avc_tq_lambda_b_frame[qp][1];
3529 if (cmd.g95->dw58.lambda_8x8_inter > GEN95_AVC_MAX_LAMBDA)
3530 cmd.g95->dw58.lambda_8x8_inter = 0xf000 + avc_state->rounding_value;
3532 if (cmd.g95->dw58.lambda_8x8_intra > GEN95_AVC_MAX_LAMBDA)
3533 cmd.g95->dw58.lambda_8x8_intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
3535 if (cmd.g95->dw59.lambda_inter > GEN95_AVC_MAX_LAMBDA)
3536 cmd.g95->dw59.lambda_inter = 0xf000 + avc_state->rounding_value;
3538 if (cmd.g95->dw59.lambda_intra > GEN95_AVC_MAX_LAMBDA)
3539 cmd.g95->dw59.lambda_intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
3544 cmd.g95->dw66.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
3545 cmd.g95->dw67.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
3546 cmd.g95->dw68.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
3547 cmd.g95->dw69.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
3548 cmd.g95->dw70.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
3549 cmd.g95->dw71.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
3550 cmd.g95->dw72.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
3551 cmd.g95->dw73.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
3552 cmd.g95->dw74.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
3553 cmd.g95->dw75.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
3554 cmd.g95->dw76.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
3555 cmd.g95->dw77.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
3556 cmd.g95->dw78.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
3557 cmd.g95->dw79.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
3558 cmd.g95->dw80.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
3559 cmd.g95->dw81.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
3560 cmd.g95->dw82.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
3561 cmd.g95->dw83.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
3562 cmd.g95->dw84.brc_curbe_surf_index = GEN95_AVC_MBENC_BRC_CURBE_DATA_INDEX;
3563 cmd.g95->dw85.force_non_skip_mb_map_surface = GEN95_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
3564 cmd.g95->dw86.widi_wa_surf_index = GEN95_AVC_MBENC_WIDI_WA_INDEX;
3565 cmd.g95->dw87.static_detection_cost_table_index = GEN95_AVC_MBENC_SFD_COST_TABLE_INDEX;
3569 cmd.g9->dw64.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
3570 cmd.g9->dw65.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
3571 cmd.g9->dw66.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
3572 cmd.g9->dw67.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
3573 cmd.g9->dw68.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
3574 cmd.g9->dw69.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
3575 cmd.g9->dw70.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
3576 cmd.g9->dw71.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
3577 cmd.g9->dw72.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
3578 cmd.g9->dw73.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
3579 cmd.g9->dw74.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
3580 cmd.g9->dw75.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
3581 cmd.g9->dw76.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
3582 cmd.g9->dw77.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
3583 cmd.g9->dw78.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
3584 cmd.g9->dw79.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
3585 cmd.g9->dw80.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
3586 cmd.g9->dw81.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
3587 cmd.g9->dw82.force_non_skip_mb_map_surface = GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
3588 cmd.g9->dw83.widi_wa_surf_index = GEN9_AVC_MBENC_WIDI_WA_INDEX;
3589 cmd.g9->dw84.brc_curbe_surf_index = GEN9_AVC_MBENC_BRC_CURBE_DATA_INDEX;
3590 cmd.g9->dw85.static_detection_cost_table_index = GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX;
3593 i965_gpe_context_unmap_curbe(gpe_context);
3599 gen9_avc_send_surface_mbenc(VADriverContextP ctx,
3600 struct encode_state *encode_state,
3601 struct i965_gpe_context *gpe_context,
3602 struct intel_encoder_context *encoder_context,
3605 struct i965_driver_data *i965 = i965_driver_data(ctx);
3606 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3607 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3608 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3609 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3610 struct object_surface *obj_surface;
3611 struct gen9_surface_avc *avc_priv_surface;
3612 struct i965_gpe_resource *gpe_resource;
3613 struct mbenc_param * param = (struct mbenc_param *)param_mbenc ;
3614 VASurfaceID surface_id;
3615 unsigned int mbenc_i_frame_dist_in_use = param->mbenc_i_frame_dist_in_use;
3616 unsigned int size = 0;
3617 unsigned int frame_mb_size = generic_state->frame_width_in_mbs *
3618 generic_state->frame_height_in_mbs;
3620 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3621 unsigned char is_g95 = 0;
3623 if (IS_SKL(i965->intel.device_info) ||
3624 IS_BXT(i965->intel.device_info))
3626 else if (IS_KBL(i965->intel.device_info) ||
3627 IS_GLK(i965->intel.device_info))
3630 obj_surface = encode_state->reconstructed_object;
3632 if (!obj_surface || !obj_surface->private_data)
3634 avc_priv_surface = obj_surface->private_data;
3636 /*pak obj command buffer output*/
3637 size = frame_mb_size * 16 * 4;
3638 gpe_resource = &avc_priv_surface->res_mb_code_surface;
3639 gen9_add_buffer_gpe_surface(ctx,
3645 GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX);
3647 /*mv data buffer output*/
3648 size = frame_mb_size * 32 * 4;
3649 gpe_resource = &avc_priv_surface->res_mv_data_surface;
3650 gen9_add_buffer_gpe_surface(ctx,
3656 GEN9_AVC_MBENC_IND_MV_DATA_INDEX);
3658 /*input current YUV surface, current input Y/UV object*/
3659 if (mbenc_i_frame_dist_in_use) {
3660 obj_surface = encode_state->reconstructed_object;
3661 if (!obj_surface || !obj_surface->private_data)
3663 avc_priv_surface = obj_surface->private_data;
3664 obj_surface = avc_priv_surface->scaled_4x_surface_obj;
3666 obj_surface = encode_state->input_yuv_object;
3668 gen9_add_2d_gpe_surface(ctx,
3673 I965_SURFACEFORMAT_R8_UNORM,
3674 GEN9_AVC_MBENC_CURR_Y_INDEX);
3676 gen9_add_2d_gpe_surface(ctx,
3681 I965_SURFACEFORMAT_R16_UINT,
3682 GEN9_AVC_MBENC_CURR_UV_INDEX);
3684 if (generic_state->hme_enabled) {
3686 gpe_resource = &(avc_ctx->s4x_memv_data_buffer);
3687 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3690 I965_SURFACEFORMAT_R8_UNORM,
3691 GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX);
3692 /* memv distortion input*/
3693 gpe_resource = &(avc_ctx->s4x_memv_distortion_buffer);
3694 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3697 I965_SURFACEFORMAT_R8_UNORM,
3698 GEN9_AVC_MBENC_4XME_DISTORTION_INDEX);
3701 /*mbbrc const data_buffer*/
3702 if (param->mb_const_data_buffer_in_use) {
3703 size = 16 * AVC_QP_MAX * sizeof(unsigned int);
3704 gpe_resource = &avc_ctx->res_mbbrc_const_data_buffer;
3705 gen9_add_buffer_gpe_surface(ctx,
3711 GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX);
3715 /*mb qp data_buffer*/
3716 if (param->mb_qp_buffer_in_use) {
3717 if (avc_state->mb_qp_data_enable)
3718 gpe_resource = &(avc_ctx->res_mb_qp_data_surface);
3720 gpe_resource = &(avc_ctx->res_mbbrc_mb_qp_data_surface);
3721 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3724 I965_SURFACEFORMAT_R8_UNORM,
3725 GEN9_AVC_MBENC_MBQP_INDEX);
3728 /*input current YUV surface, current input Y/UV object*/
3729 if (mbenc_i_frame_dist_in_use) {
3730 obj_surface = encode_state->reconstructed_object;
3731 if (!obj_surface || !obj_surface->private_data)
3733 avc_priv_surface = obj_surface->private_data;
3734 obj_surface = avc_priv_surface->scaled_4x_surface_obj;
3736 obj_surface = encode_state->input_yuv_object;
3738 gen9_add_adv_gpe_surface(ctx, gpe_context,
3740 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX);
3741 /*input ref YUV surface*/
3742 for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
3743 surface_id = slice_param->RefPicList0[i].picture_id;
3744 obj_surface = SURFACE(surface_id);
3745 if (!obj_surface || !obj_surface->private_data)
3748 gen9_add_adv_gpe_surface(ctx, gpe_context,
3750 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 1);
3752 /*input current YUV surface, current input Y/UV object*/
3753 if (mbenc_i_frame_dist_in_use) {
3754 obj_surface = encode_state->reconstructed_object;
3755 if (!obj_surface || !obj_surface->private_data)
3757 avc_priv_surface = obj_surface->private_data;
3758 obj_surface = avc_priv_surface->scaled_4x_surface_obj;
3760 obj_surface = encode_state->input_yuv_object;
3762 gen9_add_adv_gpe_surface(ctx, gpe_context,
3764 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX);
3766 for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
3767 if (i > 0) break; // only one ref supported here for B frame
3768 surface_id = slice_param->RefPicList1[i].picture_id;
3769 obj_surface = SURFACE(surface_id);
3770 if (!obj_surface || !obj_surface->private_data)
3773 gen9_add_adv_gpe_surface(ctx, gpe_context,
3775 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1);
3776 gen9_add_adv_gpe_surface(ctx, gpe_context,
3778 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 2);
3780 avc_priv_surface = obj_surface->private_data;
3781 /*pak obj command buffer output(mb code)*/
3782 size = frame_mb_size * 16 * 4;
3783 gpe_resource = &avc_priv_surface->res_mb_code_surface;
3784 gen9_add_buffer_gpe_surface(ctx,
3790 GEN9_AVC_MBENC_FWD_MB_DATA_INDEX);
3792 /*mv data buffer output*/
3793 size = frame_mb_size * 32 * 4;
3794 gpe_resource = &avc_priv_surface->res_mv_data_surface;
3795 gen9_add_buffer_gpe_surface(ctx,
3801 GEN9_AVC_MBENC_FWD_MV_DATA_INDEX);
3805 if (i < INTEL_AVC_MAX_BWD_REF_NUM) {
3806 gen9_add_adv_gpe_surface(ctx, gpe_context,
3808 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1 + INTEL_AVC_MAX_BWD_REF_NUM);
3813 /* BRC distortion data buffer for I frame*/
3814 if (mbenc_i_frame_dist_in_use) {
3815 gpe_resource = &(avc_ctx->res_brc_dist_data_surface);
3816 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3819 I965_SURFACEFORMAT_R8_UNORM,
3820 GEN9_AVC_MBENC_BRC_DISTORTION_INDEX);
3823 /* as ref frame ,update later RefPicSelect of Current Picture*/
3824 obj_surface = encode_state->reconstructed_object;
3825 avc_priv_surface = obj_surface->private_data;
3826 if (avc_state->ref_pic_select_list_supported && avc_priv_surface->is_as_ref) {
3827 gpe_resource = &(avc_priv_surface->res_ref_pic_select_surface);
3828 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3831 I965_SURFACEFORMAT_R8_UNORM,
3832 GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX);
3836 if (param->mb_vproc_stats_enable) {
3837 /*mb status buffer input*/
3838 size = frame_mb_size * 16 * 4;
3839 gpe_resource = &(avc_ctx->res_mb_status_buffer);
3840 gen9_add_buffer_gpe_surface(ctx,
3846 GEN9_AVC_MBENC_MB_STATS_INDEX);
3848 } else if (avc_state->flatness_check_enable) {
3850 gpe_resource = &(avc_ctx->res_flatness_check_surface);
3851 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3854 I965_SURFACEFORMAT_R8_UNORM,
3855 GEN9_AVC_MBENC_MB_STATS_INDEX);
3858 if (param->mad_enable) {
3859 /*mad buffer input*/
3861 gpe_resource = &(avc_ctx->res_mad_data_buffer);
3862 gen9_add_buffer_gpe_surface(ctx,
3868 GEN9_AVC_MBENC_MAD_DATA_INDEX);
3869 i965_zero_gpe_resource(gpe_resource);
3872 /*brc updated mbenc curbe data buffer,it is ignored by gen9 and used in gen95*/
3873 if (avc_state->mbenc_brc_buffer_size > 0) {
3874 size = avc_state->mbenc_brc_buffer_size;
3875 gpe_resource = &(avc_ctx->res_mbenc_brc_buffer);
3876 gen9_add_buffer_gpe_surface(ctx,
3882 GEN95_AVC_MBENC_BRC_CURBE_DATA_INDEX);
3885 /*artitratry num mbs in slice*/
3886 if (avc_state->arbitrary_num_mbs_in_slice) {
3887 /*slice surface input*/
3888 gpe_resource = &(avc_ctx->res_mbenc_slice_map_surface);
3889 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3892 I965_SURFACEFORMAT_R8_UNORM,
3893 GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX);
3894 gen9_avc_generate_slice_map(ctx, encode_state, encoder_context);
3897 /* BRC distortion data buffer for I frame */
3898 if (!mbenc_i_frame_dist_in_use) {
3899 if (avc_state->mb_disable_skip_map_enable) {
3900 gpe_resource = &(avc_ctx->res_mb_disable_skip_map_surface);
3901 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3904 I965_SURFACEFORMAT_R8_UNORM,
3905 (is_g95 ? GEN95_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX : GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX));
3908 if (avc_state->sfd_enable && generic_state->hme_enabled) {
3909 if (generic_state->frame_type == SLICE_TYPE_P) {
3910 gpe_resource = &(avc_ctx->res_sfd_cost_table_p_frame_buffer);
3912 } else if (generic_state->frame_type == SLICE_TYPE_B) {
3913 gpe_resource = &(avc_ctx->res_sfd_cost_table_b_frame_buffer);
3916 if (generic_state->frame_type != SLICE_TYPE_I) {
3917 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3920 I965_SURFACEFORMAT_R8_UNORM,
3921 (is_g95 ? GEN95_AVC_MBENC_SFD_COST_TABLE_INDEX : GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX));
3930 gen9_avc_kernel_mbenc(VADriverContextP ctx,
3931 struct encode_state *encode_state,
3932 struct intel_encoder_context *encoder_context,
3933 bool i_frame_dist_in_use)
3935 struct i965_driver_data *i965 = i965_driver_data(ctx);
3936 struct i965_gpe_table *gpe = &i965->gpe_table;
3937 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3938 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
3939 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3940 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3941 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3943 struct i965_gpe_context *gpe_context;
3944 struct gpe_media_object_walker_parameter media_object_walker_param;
3945 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
3946 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
3947 int media_function = 0;
3949 unsigned int mb_const_data_buffer_in_use = 0;
3950 unsigned int mb_qp_buffer_in_use = 0;
3951 unsigned int brc_enabled = 0;
3952 unsigned int roi_enable = (generic_state->num_roi > 0) ? 1 : 0;
3953 unsigned int dirty_roi_enable = ((generic_state->dirty_num_roi > 0) && (generic_state->frame_type == SLICE_TYPE_P) && (0));
3954 struct mbenc_param param ;
3956 int mbenc_i_frame_dist_in_use = i_frame_dist_in_use;
3958 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3960 mb_const_data_buffer_in_use =
3961 generic_state->mb_brc_enabled ||
3964 avc_state->mb_qp_data_enable ||
3965 avc_state->rolling_intra_refresh_enable;
3966 mb_qp_buffer_in_use =
3967 generic_state->mb_brc_enabled ||
3968 generic_state->brc_roi_enable ||
3969 avc_state->mb_qp_data_enable;
3971 if (mbenc_i_frame_dist_in_use) {
3972 media_function = INTEL_MEDIA_STATE_ENC_I_FRAME_DIST;
3973 kernel_idx = GEN9_AVC_KERNEL_BRC_I_FRAME_DIST;
3974 downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
3975 downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
3979 gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
3981 switch (generic_state->kernel_mode) {
3982 case INTEL_ENC_KERNEL_NORMAL : {
3983 media_function = INTEL_MEDIA_STATE_ENC_NORMAL;
3984 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_NORMAL_I;
3987 case INTEL_ENC_KERNEL_PERFORMANCE : {
3988 media_function = INTEL_MEDIA_STATE_ENC_PERFORMANCE;
3989 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_PERFORMANCE_I;
3992 case INTEL_ENC_KERNEL_QUALITY : {
3993 media_function = INTEL_MEDIA_STATE_ENC_QUALITY;
3994 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_QUALITY_I;
4002 if (generic_state->frame_type == SLICE_TYPE_P) {
4004 } else if (generic_state->frame_type == SLICE_TYPE_B) {
4008 downscaled_width_in_mb = generic_state->frame_width_in_mbs;
4009 downscaled_height_in_mb = generic_state->frame_height_in_mbs;
4010 mad_enable = avc_state->mad_enable;
4011 brc_enabled = generic_state->brc_enabled;
4013 gpe_context = &(avc_ctx->context_mbenc.gpe_contexts[kernel_idx]);
4016 memset(¶m, 0, sizeof(struct mbenc_param));
4018 param.mb_const_data_buffer_in_use = mb_const_data_buffer_in_use;
4019 param.mb_qp_buffer_in_use = mb_qp_buffer_in_use;
4020 param.mbenc_i_frame_dist_in_use = mbenc_i_frame_dist_in_use;
4021 param.mad_enable = mad_enable;
4022 param.brc_enabled = brc_enabled;
4023 param.roi_enabled = roi_enable;
4025 if (avc_state->mb_status_supported) {
4026 param.mb_vproc_stats_enable = avc_state->flatness_check_enable || avc_state->adaptive_transform_decision_enable;
4029 if (!avc_state->mbenc_curbe_set_in_brc_update) {
4030 gpe->context_init(ctx, gpe_context);
4033 gpe->reset_binding_table(ctx, gpe_context);
4035 if (!avc_state->mbenc_curbe_set_in_brc_update) {
4037 generic_ctx->pfn_set_curbe_mbenc(ctx, encode_state, gpe_context, encoder_context, ¶m);
4040 /* MB brc const data buffer set up*/
4041 if (mb_const_data_buffer_in_use) {
4042 // caculate the lambda table, it is kernel controlled trellis quantization,gen95+
4043 if (avc_state->lambda_table_enable)
4044 gen95_avc_calc_lambda_table(ctx, encode_state, encoder_context);
4046 gen9_avc_load_mb_brc_const_data(ctx, encode_state, encoder_context);
4049 /*clear the mad buffer*/
4051 i965_zero_gpe_resource(&(avc_ctx->res_mad_data_buffer));
4054 generic_ctx->pfn_send_mbenc_surface(ctx, encode_state, gpe_context, encoder_context, ¶m);
4056 gpe->setup_interface_data(ctx, gpe_context);
4059 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
4061 kernel_walker_param.use_scoreboard = 1;
4062 kernel_walker_param.resolution_x = downscaled_width_in_mb ;
4063 kernel_walker_param.resolution_y = downscaled_height_in_mb ;
4064 if (mbenc_i_frame_dist_in_use) {
4065 kernel_walker_param.no_dependency = 1;
4067 switch (generic_state->frame_type) {
4069 kernel_walker_param.walker_degree = WALKER_45_DEGREE;
4072 kernel_walker_param.walker_degree = WALKER_26_DEGREE;
4075 kernel_walker_param.walker_degree = WALKER_26_DEGREE;
4076 if (!slice_param->direct_spatial_mv_pred_flag) {
4077 kernel_walker_param.walker_degree = WALKER_45_DEGREE;
4083 kernel_walker_param.no_dependency = 0;
4086 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
4088 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
4091 &media_object_walker_param);
4092 return VA_STATUS_SUCCESS;
4096 me kernle related function
4099 gen9_avc_set_curbe_me(VADriverContextP ctx,
4100 struct encode_state *encode_state,
4101 struct i965_gpe_context *gpe_context,
4102 struct intel_encoder_context *encoder_context,
4105 gen9_avc_me_curbe_data *curbe_cmd;
4106 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4107 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4108 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4110 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4112 struct me_param * curbe_param = (struct me_param *)param ;
4113 unsigned char use_mv_from_prev_step = 0;
4114 unsigned char write_distortions = 0;
4115 unsigned char qp_prime_y = 0;
4116 unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
4117 unsigned char seach_table_idx = 0;
4118 unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
4119 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
4120 unsigned int scale_factor = 0;
4122 qp_prime_y = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta;
4123 switch (curbe_param->hme_type) {
4124 case INTEL_ENC_HME_4x : {
4125 use_mv_from_prev_step = (generic_state->b16xme_enabled) ? 1 : 0;
4126 write_distortions = 1;
4127 mv_shift_factor = 2;
4129 prev_mv_read_pos_factor = 0;
4132 case INTEL_ENC_HME_16x : {
4133 use_mv_from_prev_step = (generic_state->b32xme_enabled) ? 1 : 0;
4134 write_distortions = 0;
4135 mv_shift_factor = 2;
4137 prev_mv_read_pos_factor = 1;
4140 case INTEL_ENC_HME_32x : {
4141 use_mv_from_prev_step = 0;
4142 write_distortions = 0;
4143 mv_shift_factor = 1;
4145 prev_mv_read_pos_factor = 0;
4152 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
4157 downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
4158 downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
4160 memcpy(curbe_cmd, gen9_avc_me_curbe_init_data, sizeof(gen9_avc_me_curbe_data));
4162 curbe_cmd->dw3.sub_pel_mode = 3;
4163 if (avc_state->field_scaling_output_interleaved) {
4164 /*frame set to zero,field specified*/
4165 curbe_cmd->dw3.src_access = 0;
4166 curbe_cmd->dw3.ref_access = 0;
4167 curbe_cmd->dw7.src_field_polarity = 0;
4169 curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
4170 curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
4171 curbe_cmd->dw5.qp_prime_y = qp_prime_y;
4173 curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
4174 curbe_cmd->dw6.write_distortions = write_distortions;
4175 curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
4176 curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
4178 if (generic_state->frame_type == SLICE_TYPE_B) {
4179 curbe_cmd->dw1.bi_weight = 32;
4180 curbe_cmd->dw13.num_ref_idx_l1_minus1 = slice_param->num_ref_idx_l1_active_minus1;
4181 me_method = gen9_avc_b_me_method[generic_state->preset];
4182 seach_table_idx = 1;
4185 if (generic_state->frame_type == SLICE_TYPE_P ||
4186 generic_state->frame_type == SLICE_TYPE_B)
4187 curbe_cmd->dw13.num_ref_idx_l0_minus1 = slice_param->num_ref_idx_l0_active_minus1;
4189 curbe_cmd->dw13.ref_streamin_cost = 5;
4190 curbe_cmd->dw13.roi_enable = 0;
4192 curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
4193 curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
4195 memcpy(&curbe_cmd->dw16, table_enc_search_path[seach_table_idx][me_method], 14 * sizeof(int));
4197 curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN9_AVC_ME_MV_DATA_SURFACE_INDEX;
4198 curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x) ? GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX : GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX ;
4199 curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN9_AVC_ME_DISTORTION_SURFACE_INDEX;
4200 curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN9_AVC_ME_BRC_DISTORTION_INDEX;
4201 curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX;
4202 curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX;
4203 curbe_cmd->dw38.reserved = GEN9_AVC_ME_VDENC_STREAMIN_INDEX;
4205 i965_gpe_context_unmap_curbe(gpe_context);
4210 gen9_avc_send_surface_me(VADriverContextP ctx,
4211 struct encode_state *encode_state,
4212 struct i965_gpe_context *gpe_context,
4213 struct intel_encoder_context *encoder_context,
4216 struct i965_driver_data *i965 = i965_driver_data(ctx);
4218 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4219 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4220 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4221 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4223 struct object_surface *obj_surface, *input_surface;
4224 struct gen9_surface_avc *avc_priv_surface;
4225 struct i965_gpe_resource *gpe_resource;
4226 struct me_param * curbe_param = (struct me_param *)param ;
4228 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4229 VASurfaceID surface_id;
4232 /* all scaled input surface stored in reconstructed_object*/
4233 obj_surface = encode_state->reconstructed_object;
4234 if (!obj_surface || !obj_surface->private_data)
4236 avc_priv_surface = obj_surface->private_data;
4239 switch (curbe_param->hme_type) {
4240 case INTEL_ENC_HME_4x : {
4242 gpe_resource = &avc_ctx->s4x_memv_data_buffer;
4243 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4246 I965_SURFACEFORMAT_R8_UNORM,
4247 GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
4250 if (generic_state->b16xme_enabled) {
4251 gpe_resource = &avc_ctx->s16x_memv_data_buffer;
4252 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4255 I965_SURFACEFORMAT_R8_UNORM,
4256 GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX);
4258 /* brc distortion output*/
4259 gpe_resource = &avc_ctx->res_brc_dist_data_surface;
4260 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4263 I965_SURFACEFORMAT_R8_UNORM,
4264 GEN9_AVC_ME_BRC_DISTORTION_INDEX);
4265 /* memv distortion output*/
4266 gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
4267 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4270 I965_SURFACEFORMAT_R8_UNORM,
4271 GEN9_AVC_ME_DISTORTION_SURFACE_INDEX);
4272 /*input current down scaled YUV surface*/
4273 obj_surface = encode_state->reconstructed_object;
4274 avc_priv_surface = obj_surface->private_data;
4275 input_surface = avc_priv_surface->scaled_4x_surface_obj;
4276 gen9_add_adv_gpe_surface(ctx, gpe_context,
4278 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
4279 /*input ref scaled YUV surface*/
4280 for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
4281 surface_id = slice_param->RefPicList0[i].picture_id;
4282 obj_surface = SURFACE(surface_id);
4283 if (!obj_surface || !obj_surface->private_data)
4285 avc_priv_surface = obj_surface->private_data;
4287 input_surface = avc_priv_surface->scaled_4x_surface_obj;
4289 gen9_add_adv_gpe_surface(ctx, gpe_context,
4291 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
4294 obj_surface = encode_state->reconstructed_object;
4295 avc_priv_surface = obj_surface->private_data;
4296 input_surface = avc_priv_surface->scaled_4x_surface_obj;
4298 gen9_add_adv_gpe_surface(ctx, gpe_context,
4300 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
4302 for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
4303 surface_id = slice_param->RefPicList1[i].picture_id;
4304 obj_surface = SURFACE(surface_id);
4305 if (!obj_surface || !obj_surface->private_data)
4307 avc_priv_surface = obj_surface->private_data;
4309 input_surface = avc_priv_surface->scaled_4x_surface_obj;
4311 gen9_add_adv_gpe_surface(ctx, gpe_context,
4313 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
4318 case INTEL_ENC_HME_16x : {
4319 gpe_resource = &avc_ctx->s16x_memv_data_buffer;
4320 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4323 I965_SURFACEFORMAT_R8_UNORM,
4324 GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
4326 if (generic_state->b32xme_enabled) {
4327 gpe_resource = &avc_ctx->s32x_memv_data_buffer;
4328 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4331 I965_SURFACEFORMAT_R8_UNORM,
4332 GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX);
4335 obj_surface = encode_state->reconstructed_object;
4336 avc_priv_surface = obj_surface->private_data;
4337 input_surface = avc_priv_surface->scaled_16x_surface_obj;
4338 gen9_add_adv_gpe_surface(ctx, gpe_context,
4340 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
4342 for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
4343 surface_id = slice_param->RefPicList0[i].picture_id;
4344 obj_surface = SURFACE(surface_id);
4345 if (!obj_surface || !obj_surface->private_data)
4347 avc_priv_surface = obj_surface->private_data;
4349 input_surface = avc_priv_surface->scaled_16x_surface_obj;
4351 gen9_add_adv_gpe_surface(ctx, gpe_context,
4353 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
4356 obj_surface = encode_state->reconstructed_object;
4357 avc_priv_surface = obj_surface->private_data;
4358 input_surface = avc_priv_surface->scaled_16x_surface_obj;
4360 gen9_add_adv_gpe_surface(ctx, gpe_context,
4362 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
4364 for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
4365 surface_id = slice_param->RefPicList1[i].picture_id;
4366 obj_surface = SURFACE(surface_id);
4367 if (!obj_surface || !obj_surface->private_data)
4369 avc_priv_surface = obj_surface->private_data;
4371 input_surface = avc_priv_surface->scaled_16x_surface_obj;
4373 gen9_add_adv_gpe_surface(ctx, gpe_context,
4375 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
4379 case INTEL_ENC_HME_32x : {
4380 gpe_resource = &avc_ctx->s32x_memv_data_buffer;
4381 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4384 I965_SURFACEFORMAT_R8_UNORM,
4385 GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
4387 obj_surface = encode_state->reconstructed_object;
4388 avc_priv_surface = obj_surface->private_data;
4389 input_surface = avc_priv_surface->scaled_32x_surface_obj;
4390 gen9_add_adv_gpe_surface(ctx, gpe_context,
4392 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
4394 for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
4395 surface_id = slice_param->RefPicList0[i].picture_id;
4396 obj_surface = SURFACE(surface_id);
4397 if (!obj_surface || !obj_surface->private_data)
4399 avc_priv_surface = obj_surface->private_data;
4401 input_surface = avc_priv_surface->scaled_32x_surface_obj;
4403 gen9_add_adv_gpe_surface(ctx, gpe_context,
4405 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
4408 obj_surface = encode_state->reconstructed_object;
4409 avc_priv_surface = obj_surface->private_data;
4410 input_surface = avc_priv_surface->scaled_32x_surface_obj;
4412 gen9_add_adv_gpe_surface(ctx, gpe_context,
4414 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
4416 for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
4417 surface_id = slice_param->RefPicList1[i].picture_id;
4418 obj_surface = SURFACE(surface_id);
4419 if (!obj_surface || !obj_surface->private_data)
4421 avc_priv_surface = obj_surface->private_data;
4423 input_surface = avc_priv_surface->scaled_32x_surface_obj;
4425 gen9_add_adv_gpe_surface(ctx, gpe_context,
4427 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
4438 gen9_avc_kernel_me(VADriverContextP ctx,
4439 struct encode_state *encode_state,
4440 struct intel_encoder_context *encoder_context,
4443 struct i965_driver_data *i965 = i965_driver_data(ctx);
4444 struct i965_gpe_table *gpe = &i965->gpe_table;
4445 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4446 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
4447 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4448 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4450 struct i965_gpe_context *gpe_context;
4451 struct gpe_media_object_walker_parameter media_object_walker_param;
4452 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
4453 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
4454 int media_function = 0;
4456 struct me_param param ;
4457 unsigned int scale_factor = 0;
4460 case INTEL_ENC_HME_4x : {
4461 media_function = INTEL_MEDIA_STATE_4X_ME;
4465 case INTEL_ENC_HME_16x : {
4466 media_function = INTEL_MEDIA_STATE_16X_ME;
4470 case INTEL_ENC_HME_32x : {
4471 media_function = INTEL_MEDIA_STATE_32X_ME;
4480 downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
4481 downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
4483 /* I frame should not come here.*/
4484 kernel_idx = (generic_state->frame_type == SLICE_TYPE_P) ? GEN9_AVC_KERNEL_ME_P_IDX : GEN9_AVC_KERNEL_ME_B_IDX;
4485 gpe_context = &(avc_ctx->context_me.gpe_contexts[kernel_idx]);
4487 gpe->context_init(ctx, gpe_context);
4488 gpe->reset_binding_table(ctx, gpe_context);
4491 memset(¶m, 0, sizeof(param));
4492 param.hme_type = hme_type;
4493 generic_ctx->pfn_set_curbe_me(ctx, encode_state, gpe_context, encoder_context, ¶m);
4496 generic_ctx->pfn_send_me_surface(ctx, encode_state, gpe_context, encoder_context, ¶m);
4498 gpe->setup_interface_data(ctx, gpe_context);
4500 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
4501 /* the scaling is based on 8x8 blk level */
4502 kernel_walker_param.resolution_x = downscaled_width_in_mb ;
4503 kernel_walker_param.resolution_y = downscaled_height_in_mb ;
4504 kernel_walker_param.no_dependency = 1;
4506 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
4508 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
4511 &media_object_walker_param);
4513 return VA_STATUS_SUCCESS;
4520 gen9_avc_set_curbe_wp(VADriverContextP ctx,
4521 struct encode_state *encode_state,
4522 struct i965_gpe_context *gpe_context,
4523 struct intel_encoder_context *encoder_context,
4526 gen9_avc_wp_curbe_data *cmd;
4527 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4528 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4529 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4530 struct wp_param * curbe_param = (struct wp_param *)param;
4532 cmd = i965_gpe_context_map_curbe(gpe_context);
4536 memset(cmd, 0, sizeof(gen9_avc_wp_curbe_data));
4537 if (curbe_param->ref_list_idx) {
4538 cmd->dw0.default_weight = slice_param->luma_weight_l1[0];
4539 cmd->dw0.default_offset = slice_param->luma_offset_l1[0];
4541 cmd->dw0.default_weight = slice_param->luma_weight_l0[0];
4542 cmd->dw0.default_offset = slice_param->luma_offset_l0[0];
4545 cmd->dw49.input_surface = GEN9_AVC_WP_INPUT_REF_SURFACE_INDEX;
4546 cmd->dw50.output_surface = GEN9_AVC_WP_OUTPUT_SCALED_SURFACE_INDEX;
4548 i965_gpe_context_unmap_curbe(gpe_context);
4553 gen9_avc_send_surface_wp(VADriverContextP ctx,
4554 struct encode_state *encode_state,
4555 struct i965_gpe_context *gpe_context,
4556 struct intel_encoder_context *encoder_context,
4559 struct i965_driver_data *i965 = i965_driver_data(ctx);
4560 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4561 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4562 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4563 struct wp_param * curbe_param = (struct wp_param *)param;
4564 struct object_surface *obj_surface;
4565 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4566 VASurfaceID surface_id;
4568 if (curbe_param->ref_list_idx) {
4569 surface_id = slice_param->RefPicList1[0].picture_id;
4570 obj_surface = SURFACE(surface_id);
4571 if (!obj_surface || !obj_surface->private_data)
4572 avc_state->weighted_ref_l1_enable = 0;
4574 avc_state->weighted_ref_l1_enable = 1;
4576 surface_id = slice_param->RefPicList0[0].picture_id;
4577 obj_surface = SURFACE(surface_id);
4578 if (!obj_surface || !obj_surface->private_data)
4579 avc_state->weighted_ref_l0_enable = 0;
4581 avc_state->weighted_ref_l0_enable = 1;
4584 obj_surface = encode_state->reference_objects[0];
4587 gen9_add_adv_gpe_surface(ctx, gpe_context,
4589 GEN9_AVC_WP_INPUT_REF_SURFACE_INDEX);
4591 obj_surface = avc_ctx->wp_output_pic_select_surface_obj[curbe_param->ref_list_idx];
4592 gen9_add_adv_gpe_surface(ctx, gpe_context,
4594 GEN9_AVC_WP_OUTPUT_SCALED_SURFACE_INDEX);
4599 gen9_avc_kernel_wp(VADriverContextP ctx,
4600 struct encode_state *encode_state,
4601 struct intel_encoder_context *encoder_context,
4602 unsigned int list1_in_use)
4604 struct i965_driver_data *i965 = i965_driver_data(ctx);
4605 struct i965_gpe_table *gpe = &i965->gpe_table;
4606 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4607 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4608 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4609 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
4611 struct i965_gpe_context *gpe_context;
4612 struct gpe_media_object_walker_parameter media_object_walker_param;
4613 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
4614 int media_function = INTEL_MEDIA_STATE_ENC_WP;
4615 struct wp_param param;
4617 gpe_context = &(avc_ctx->context_wp.gpe_contexts);
4619 gpe->context_init(ctx, gpe_context);
4620 gpe->reset_binding_table(ctx, gpe_context);
4622 memset(¶m, 0, sizeof(param));
4623 param.ref_list_idx = (list1_in_use == 1) ? 1 : 0;
4625 generic_ctx->pfn_set_curbe_wp(ctx, encode_state, gpe_context, encoder_context, ¶m);
4628 generic_ctx->pfn_send_wp_surface(ctx, encode_state, gpe_context, encoder_context, ¶m);
4630 gpe->setup_interface_data(ctx, gpe_context);
4632 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
4633 /* the scaling is based on 8x8 blk level */
4634 kernel_walker_param.resolution_x = generic_state->frame_width_in_mbs;
4635 kernel_walker_param.resolution_y = generic_state->frame_height_in_mbs;
4636 kernel_walker_param.no_dependency = 1;
4638 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
4640 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
4643 &media_object_walker_param);
4645 return VA_STATUS_SUCCESS;
4650 sfd related function
4653 gen9_avc_set_curbe_sfd(VADriverContextP ctx,
4654 struct encode_state *encode_state,
4655 struct i965_gpe_context *gpe_context,
4656 struct intel_encoder_context *encoder_context,
4659 gen9_avc_sfd_curbe_data *cmd;
4660 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4661 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4662 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4663 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4665 cmd = i965_gpe_context_map_curbe(gpe_context);
4669 memset(cmd, 0, sizeof(gen9_avc_sfd_curbe_data));
4671 cmd->dw0.enable_intra_cost_scaling_for_static_frame = 1 ;
4672 cmd->dw0.enable_adaptive_mv_stream_in = 0 ;
4673 cmd->dw0.stream_in_type = 7 ;
4674 cmd->dw0.slice_type = slice_type_kernel[generic_state->frame_type] ;
4675 cmd->dw0.brc_mode_enable = generic_state->brc_enabled ;
4676 cmd->dw0.vdenc_mode_disable = 1 ;
4678 cmd->dw1.hme_stream_in_ref_cost = 5 ;
4679 cmd->dw1.num_of_refs = slice_param->num_ref_idx_l0_active_minus1 ;
4680 cmd->dw1.qp_value = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta ;
4682 cmd->dw2.frame_width_in_mbs = generic_state->frame_width_in_mbs ;
4683 cmd->dw2.frame_height_in_mbs = generic_state->frame_height_in_mbs ;
4685 cmd->dw3.large_mv_threshold = 128 ;
4686 cmd->dw4.total_large_mv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs) / 100 ;
4687 cmd->dw5.zmv_threshold = 4 ;
4688 cmd->dw6.total_zmv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * avc_state->zero_mv_threshold) / 100 ; // zero_mv_threshold = 60;
4689 cmd->dw7.min_dist_threshold = 10 ;
4691 if (generic_state->frame_type == SLICE_TYPE_P) {
4692 memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_p_frame, AVC_QP_MAX * sizeof(unsigned char));
4694 } else if (generic_state->frame_type == SLICE_TYPE_B) {
4695 memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_b_frame, AVC_QP_MAX * sizeof(unsigned char));
4698 cmd->dw21.actual_width_in_mb = cmd->dw2.frame_width_in_mbs ;
4699 cmd->dw21.actual_height_in_mb = cmd->dw2.frame_height_in_mbs ;
4700 cmd->dw24.vdenc_input_image_state_index = GEN9_AVC_SFD_VDENC_INPUT_IMAGE_STATE_INDEX ;
4701 cmd->dw26.mv_data_surface_index = GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX ;
4702 cmd->dw27.inter_distortion_surface_index = GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX ;
4703 cmd->dw28.output_data_surface_index = GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX ;
4704 cmd->dw29.vdenc_output_image_state_index = GEN9_AVC_SFD_VDENC_OUTPUT_IMAGE_STATE_INDEX ;
4706 i965_gpe_context_unmap_curbe(gpe_context);
4711 gen9_avc_send_surface_sfd(VADriverContextP ctx,
4712 struct encode_state *encode_state,
4713 struct i965_gpe_context *gpe_context,
4714 struct intel_encoder_context *encoder_context,
4717 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4718 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4719 struct i965_gpe_resource *gpe_resource;
4722 /*HME mv data surface memv output 4x*/
4723 gpe_resource = &avc_ctx->s4x_memv_data_buffer;
4724 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4727 I965_SURFACEFORMAT_R8_UNORM,
4728 GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX);
4730 /* memv distortion */
4731 gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
4732 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4735 I965_SURFACEFORMAT_R8_UNORM,
4736 GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX);
4739 gpe_resource = &avc_ctx->res_sfd_output_buffer;
4740 gen9_add_buffer_gpe_surface(ctx,
4746 GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX);
4751 gen9_avc_kernel_sfd(VADriverContextP ctx,
4752 struct encode_state *encode_state,
4753 struct intel_encoder_context *encoder_context)
4755 struct i965_driver_data *i965 = i965_driver_data(ctx);
4756 struct i965_gpe_table *gpe = &i965->gpe_table;
4757 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4758 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4759 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
4761 struct i965_gpe_context *gpe_context;
4762 struct gpe_media_object_parameter media_object_param;
4763 struct gpe_media_object_inline_data media_object_inline_data;
4764 int media_function = INTEL_MEDIA_STATE_STATIC_FRAME_DETECTION;
4765 gpe_context = &(avc_ctx->context_sfd.gpe_contexts);
4767 gpe->context_init(ctx, gpe_context);
4768 gpe->reset_binding_table(ctx, gpe_context);
4771 generic_ctx->pfn_set_curbe_sfd(ctx, encode_state, gpe_context, encoder_context, NULL);
4774 generic_ctx->pfn_send_sfd_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
4776 gpe->setup_interface_data(ctx, gpe_context);
4778 memset(&media_object_param, 0, sizeof(media_object_param));
4779 memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
4780 media_object_param.pinline_data = &media_object_inline_data;
4781 media_object_param.inline_size = sizeof(media_object_inline_data);
4783 gen9_avc_run_kernel_media_object(ctx, encoder_context,
4786 &media_object_param);
4788 return VA_STATUS_SUCCESS;
4792 kernel related function:init/destroy etc
4795 gen9_avc_kernel_init_scaling(VADriverContextP ctx,
4796 struct generic_encoder_context *generic_context,
4797 struct gen_avc_scaling_context *kernel_context)
4799 struct i965_driver_data *i965 = i965_driver_data(ctx);
4800 struct i965_gpe_table *gpe = &i965->gpe_table;
4801 struct i965_gpe_context *gpe_context = NULL;
4802 struct encoder_kernel_parameter kernel_param ;
4803 struct encoder_scoreboard_parameter scoreboard_param;
4804 struct i965_kernel common_kernel;
4806 if (IS_SKL(i965->intel.device_info) ||
4807 IS_BXT(i965->intel.device_info)) {
4808 kernel_param.curbe_size = sizeof(gen9_avc_scaling4x_curbe_data);
4809 kernel_param.inline_data_size = sizeof(gen9_avc_scaling4x_curbe_data);
4810 } else if (IS_KBL(i965->intel.device_info) ||
4811 IS_GLK(i965->intel.device_info)) {
4812 kernel_param.curbe_size = sizeof(gen95_avc_scaling4x_curbe_data);
4813 kernel_param.inline_data_size = sizeof(gen95_avc_scaling4x_curbe_data);
4816 /* 4x scaling kernel*/
4817 kernel_param.sampler_size = 0;
4819 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4820 scoreboard_param.mask = 0xFF;
4821 scoreboard_param.enable = generic_context->use_hw_scoreboard;
4822 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
4823 scoreboard_param.walkpat_flag = 0;
4825 gpe_context = &kernel_context->gpe_contexts[GEN9_AVC_KERNEL_SCALING_4X_IDX];
4826 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4827 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4829 memset(&common_kernel, 0, sizeof(common_kernel));
4831 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4832 generic_context->enc_kernel_size,
4833 INTEL_GENERIC_ENC_SCALING4X,
4837 gpe->load_kernels(ctx,
4842 /*2x scaling kernel*/
4843 kernel_param.curbe_size = sizeof(gen9_avc_scaling2x_curbe_data);
4844 kernel_param.inline_data_size = 0;
4845 kernel_param.sampler_size = 0;
4847 gpe_context = &kernel_context->gpe_contexts[GEN9_AVC_KERNEL_SCALING_2X_IDX];
4848 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4849 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4851 memset(&common_kernel, 0, sizeof(common_kernel));
4853 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4854 generic_context->enc_kernel_size,
4855 INTEL_GENERIC_ENC_SCALING2X,
4859 gpe->load_kernels(ctx,
4867 gen9_avc_kernel_init_me(VADriverContextP ctx,
4868 struct generic_encoder_context *generic_context,
4869 struct gen_avc_me_context *kernel_context)
4871 struct i965_driver_data *i965 = i965_driver_data(ctx);
4872 struct i965_gpe_table *gpe = &i965->gpe_table;
4873 struct i965_gpe_context *gpe_context = NULL;
4874 struct encoder_kernel_parameter kernel_param ;
4875 struct encoder_scoreboard_parameter scoreboard_param;
4876 struct i965_kernel common_kernel;
4879 kernel_param.curbe_size = sizeof(gen9_avc_me_curbe_data);
4880 kernel_param.inline_data_size = 0;
4881 kernel_param.sampler_size = 0;
4883 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4884 scoreboard_param.mask = 0xFF;
4885 scoreboard_param.enable = generic_context->use_hw_scoreboard;
4886 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
4887 scoreboard_param.walkpat_flag = 0;
4889 for (i = 0; i < 2; i++) {
4890 gpe_context = &kernel_context->gpe_contexts[i];
4891 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4892 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4894 memset(&common_kernel, 0, sizeof(common_kernel));
4896 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4897 generic_context->enc_kernel_size,
4898 INTEL_GENERIC_ENC_ME,
4902 gpe->load_kernels(ctx,
4911 gen9_avc_kernel_init_mbenc(VADriverContextP ctx,
4912 struct generic_encoder_context *generic_context,
4913 struct gen_avc_mbenc_context *kernel_context)
4915 struct i965_driver_data *i965 = i965_driver_data(ctx);
4916 struct i965_gpe_table *gpe = &i965->gpe_table;
4917 struct i965_gpe_context *gpe_context = NULL;
4918 struct encoder_kernel_parameter kernel_param ;
4919 struct encoder_scoreboard_parameter scoreboard_param;
4920 struct i965_kernel common_kernel;
4922 unsigned int curbe_size = 0;
4924 if (IS_SKL(i965->intel.device_info) ||
4925 IS_BXT(i965->intel.device_info)) {
4926 curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
4927 } else if (IS_KBL(i965->intel.device_info) ||
4928 IS_GLK(i965->intel.device_info)) {
4929 curbe_size = sizeof(gen95_avc_mbenc_curbe_data);
4932 assert(curbe_size > 0);
4933 kernel_param.curbe_size = curbe_size;
4934 kernel_param.inline_data_size = 0;
4935 kernel_param.sampler_size = 0;
4937 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4938 scoreboard_param.mask = 0xFF;
4939 scoreboard_param.enable = generic_context->use_hw_scoreboard;
4940 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
4941 scoreboard_param.walkpat_flag = 0;
4943 for (i = 0; i < NUM_GEN9_AVC_KERNEL_MBENC ; i++) {
4944 gpe_context = &kernel_context->gpe_contexts[i];
4945 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4946 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4948 memset(&common_kernel, 0, sizeof(common_kernel));
4950 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4951 generic_context->enc_kernel_size,
4952 INTEL_GENERIC_ENC_MBENC,
4956 gpe->load_kernels(ctx,
4965 gen9_avc_kernel_init_brc(VADriverContextP ctx,
4966 struct generic_encoder_context *generic_context,
4967 struct gen_avc_brc_context *kernel_context)
4969 struct i965_driver_data *i965 = i965_driver_data(ctx);
4970 struct i965_gpe_table *gpe = &i965->gpe_table;
4971 struct i965_gpe_context *gpe_context = NULL;
4972 struct encoder_kernel_parameter kernel_param ;
4973 struct encoder_scoreboard_parameter scoreboard_param;
4974 struct i965_kernel common_kernel;
4977 const int brc_curbe_size[NUM_GEN9_AVC_KERNEL_BRC] = {
4978 (sizeof(gen9_avc_brc_init_reset_curbe_data)),
4979 (sizeof(gen9_avc_frame_brc_update_curbe_data)),
4980 (sizeof(gen9_avc_brc_init_reset_curbe_data)),
4981 ((IS_SKL(i965->intel.device_info) || IS_BXT(i965->intel.device_info)) ? sizeof(gen9_avc_mbenc_curbe_data) : sizeof(gen95_avc_mbenc_curbe_data)),
4983 (sizeof(gen9_avc_mb_brc_curbe_data))
4986 kernel_param.inline_data_size = 0;
4987 kernel_param.sampler_size = 0;
4989 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4990 scoreboard_param.mask = 0xFF;
4991 scoreboard_param.enable = generic_context->use_hw_scoreboard;
4992 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
4993 scoreboard_param.walkpat_flag = 0;
4995 for (i = 0; i < NUM_GEN9_AVC_KERNEL_BRC; i++) {
4996 kernel_param.curbe_size = brc_curbe_size[i];
4997 gpe_context = &kernel_context->gpe_contexts[i];
4998 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4999 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
5001 memset(&common_kernel, 0, sizeof(common_kernel));
5003 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
5004 generic_context->enc_kernel_size,
5005 INTEL_GENERIC_ENC_BRC,
5009 gpe->load_kernels(ctx,
5018 gen9_avc_kernel_init_wp(VADriverContextP ctx,
5019 struct generic_encoder_context *generic_context,
5020 struct gen_avc_wp_context *kernel_context)
5022 struct i965_driver_data *i965 = i965_driver_data(ctx);
5023 struct i965_gpe_table *gpe = &i965->gpe_table;
5024 struct i965_gpe_context *gpe_context = NULL;
5025 struct encoder_kernel_parameter kernel_param ;
5026 struct encoder_scoreboard_parameter scoreboard_param;
5027 struct i965_kernel common_kernel;
5029 kernel_param.curbe_size = sizeof(gen9_avc_wp_curbe_data);
5030 kernel_param.inline_data_size = 0;
5031 kernel_param.sampler_size = 0;
5033 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
5034 scoreboard_param.mask = 0xFF;
5035 scoreboard_param.enable = generic_context->use_hw_scoreboard;
5036 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
5037 scoreboard_param.walkpat_flag = 0;
5039 gpe_context = &kernel_context->gpe_contexts;
5040 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
5041 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
5043 memset(&common_kernel, 0, sizeof(common_kernel));
5045 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
5046 generic_context->enc_kernel_size,
5047 INTEL_GENERIC_ENC_WP,
5051 gpe->load_kernels(ctx,
5059 gen9_avc_kernel_init_sfd(VADriverContextP ctx,
5060 struct generic_encoder_context *generic_context,
5061 struct gen_avc_sfd_context *kernel_context)
5063 struct i965_driver_data *i965 = i965_driver_data(ctx);
5064 struct i965_gpe_table *gpe = &i965->gpe_table;
5065 struct i965_gpe_context *gpe_context = NULL;
5066 struct encoder_kernel_parameter kernel_param ;
5067 struct encoder_scoreboard_parameter scoreboard_param;
5068 struct i965_kernel common_kernel;
5070 kernel_param.curbe_size = sizeof(gen9_avc_sfd_curbe_data);
5071 kernel_param.inline_data_size = 0;
5072 kernel_param.sampler_size = 0;
5074 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
5075 scoreboard_param.mask = 0xFF;
5076 scoreboard_param.enable = generic_context->use_hw_scoreboard;
5077 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
5078 scoreboard_param.walkpat_flag = 0;
5080 gpe_context = &kernel_context->gpe_contexts;
5081 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
5082 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
5084 memset(&common_kernel, 0, sizeof(common_kernel));
5086 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
5087 generic_context->enc_kernel_size,
5088 INTEL_GENERIC_ENC_SFD,
5092 gpe->load_kernels(ctx,
5100 gen9_avc_kernel_destroy(struct encoder_vme_mfc_context * vme_context)
5103 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5104 struct i965_driver_data *i965 = i965_driver_data(avc_ctx->ctx);
5105 struct i965_gpe_table *gpe = &i965->gpe_table;
5109 gen9_avc_free_resources(vme_context);
5111 for (i = 0; i < NUM_GEN9_AVC_KERNEL_SCALING; i++)
5112 gpe->context_destroy(&avc_ctx->context_scaling.gpe_contexts[i]);
5114 for (i = 0; i < NUM_GEN9_AVC_KERNEL_BRC; i++)
5115 gpe->context_destroy(&avc_ctx->context_brc.gpe_contexts[i]);
5117 for (i = 0; i < NUM_GEN9_AVC_KERNEL_ME; i++)
5118 gpe->context_destroy(&avc_ctx->context_me.gpe_contexts[i]);
5120 for (i = 0; i < NUM_GEN9_AVC_KERNEL_MBENC; i++)
5121 gpe->context_destroy(&avc_ctx->context_mbenc.gpe_contexts[i]);
5123 gpe->context_destroy(&avc_ctx->context_wp.gpe_contexts);
5125 gpe->context_destroy(&avc_ctx->context_sfd.gpe_contexts);
5133 gen9_avc_update_parameters(VADriverContextP ctx,
5135 struct encode_state *encode_state,
5136 struct intel_encoder_context *encoder_context)
5138 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5139 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5140 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5141 VAEncSequenceParameterBufferH264 *seq_param;
5142 VAEncSliceParameterBufferH264 *slice_param;
5143 int i, j, slice_index;
5144 unsigned int preset = generic_state->preset;
5146 /* seq/pic/slice parameter setting */
5147 generic_state->b16xme_supported = gen9_avc_super_hme[preset];
5148 generic_state->b32xme_supported = gen9_avc_ultra_hme[preset];
5150 avc_state->seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
5151 avc_state->pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
5153 avc_state->slice_num = 0;
5155 for (j = 0; j < encode_state->num_slice_params_ext; j++) {
5156 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
5157 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
5158 avc_state->slice_param[slice_index] = slice_param;
5161 avc_state->slice_num++;
5165 /* how many slices support by now? 1 slice or multi slices, but row slice.not slice group. */
5166 seq_param = avc_state->seq_param;
5167 slice_param = avc_state->slice_param[0];
5169 generic_state->frame_type = avc_state->slice_param[0]->slice_type;
5171 if (slice_param->slice_type == SLICE_TYPE_I ||
5172 slice_param->slice_type == SLICE_TYPE_SI)
5173 generic_state->frame_type = SLICE_TYPE_I;
5174 else if (slice_param->slice_type == SLICE_TYPE_P)
5175 generic_state->frame_type = SLICE_TYPE_P;
5176 else if (slice_param->slice_type == SLICE_TYPE_B)
5177 generic_state->frame_type = SLICE_TYPE_B;
5178 if (profile == VAProfileH264High)
5179 avc_state->transform_8x8_mode_enable = 0;//work around for high profile to disabel pic_param->pic_fields.bits.transform_8x8_mode_flag
5181 avc_state->transform_8x8_mode_enable = 0;
5184 if (generic_state->brc_enabled && (!generic_state->brc_inited || generic_state->brc_need_reset)) {
5185 generic_state->target_bit_rate = ALIGN(seq_param->bits_per_second, 1000) / 1000;
5186 generic_state->init_vbv_buffer_fullness_in_bit = seq_param->bits_per_second;
5187 generic_state->vbv_buffer_size_in_bit = (uint64_t)seq_param->bits_per_second << 1;
5188 generic_state->frames_per_100s = 3000; /* 30fps */
5191 generic_state->gop_size = seq_param->intra_period;
5192 generic_state->gop_ref_distance = seq_param->ip_period;
5194 if (generic_state->internal_rate_mode == VA_RC_CBR) {
5195 generic_state->max_bit_rate = generic_state->target_bit_rate;
5196 generic_state->min_bit_rate = generic_state->target_bit_rate;
5199 if (generic_state->frame_type == SLICE_TYPE_I || generic_state->first_frame) {
5200 gen9_avc_update_misc_parameters(ctx, encode_state, encoder_context);
5203 generic_state->preset = encoder_context->quality_level;
5204 if (encoder_context->quality_level == INTEL_PRESET_UNKNOWN) {
5205 generic_state->preset = INTEL_PRESET_RT_SPEED;
5207 generic_state->kernel_mode = gen9_avc_kernel_mode[generic_state->preset];
5209 if (!generic_state->brc_inited) {
5210 generic_state->brc_init_reset_input_bits_per_frame = ((double)(generic_state->max_bit_rate * 1000) * 100) / generic_state->frames_per_100s;;
5211 generic_state->brc_init_current_target_buf_full_in_bits = generic_state->init_vbv_buffer_fullness_in_bit;
5212 generic_state->brc_init_reset_buf_size_in_bits = generic_state->vbv_buffer_size_in_bit;
5213 generic_state->brc_target_size = generic_state->init_vbv_buffer_fullness_in_bit;
5217 generic_state->curr_pak_pass = 0;
5218 generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
5220 if (generic_state->internal_rate_mode == VA_RC_CBR ||
5221 generic_state->internal_rate_mode == VA_RC_VBR)
5222 generic_state->brc_enabled = 1;
5224 generic_state->brc_enabled = 0;
5226 if (generic_state->brc_enabled &&
5227 (!generic_state->init_vbv_buffer_fullness_in_bit ||
5228 !generic_state->vbv_buffer_size_in_bit ||
5229 !generic_state->max_bit_rate ||
5230 !generic_state->target_bit_rate ||
5231 !generic_state->frames_per_100s)) {
5232 WARN_ONCE("Rate control parameter is required for BRC\n");
5233 generic_state->brc_enabled = 0;
5236 if (!generic_state->brc_enabled) {
5237 generic_state->target_bit_rate = 0;
5238 generic_state->max_bit_rate = 0;
5239 generic_state->min_bit_rate = 0;
5240 generic_state->init_vbv_buffer_fullness_in_bit = 0;
5241 generic_state->vbv_buffer_size_in_bit = 0;
5242 generic_state->num_pak_passes = 1;
5244 generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
5248 generic_state->frame_width_in_mbs = seq_param->picture_width_in_mbs;
5249 generic_state->frame_height_in_mbs = seq_param->picture_height_in_mbs;
5250 generic_state->frame_width_in_pixel = generic_state->frame_width_in_mbs * 16;
5251 generic_state->frame_height_in_pixel = generic_state->frame_height_in_mbs * 16;
5253 generic_state->frame_width_4x = ALIGN(generic_state->frame_width_in_pixel / 4, 16);
5254 generic_state->frame_height_4x = ALIGN(generic_state->frame_height_in_pixel / 4, 16);
5255 generic_state->downscaled_width_4x_in_mb = generic_state->frame_width_4x / 16 ;
5256 generic_state->downscaled_height_4x_in_mb = generic_state->frame_height_4x / 16;
5258 generic_state->frame_width_16x = ALIGN(generic_state->frame_width_in_pixel / 16, 16);
5259 generic_state->frame_height_16x = ALIGN(generic_state->frame_height_in_pixel / 16, 16);
5260 generic_state->downscaled_width_16x_in_mb = generic_state->frame_width_16x / 16 ;
5261 generic_state->downscaled_height_16x_in_mb = generic_state->frame_height_16x / 16;
5263 generic_state->frame_width_32x = ALIGN(generic_state->frame_width_in_pixel / 32, 16);
5264 generic_state->frame_height_32x = ALIGN(generic_state->frame_height_in_pixel / 32, 16);
5265 generic_state->downscaled_width_32x_in_mb = generic_state->frame_width_32x / 16 ;
5266 generic_state->downscaled_height_32x_in_mb = generic_state->frame_height_32x / 16;
5268 if (generic_state->hme_supported) {
5269 generic_state->hme_enabled = 1;
5271 generic_state->hme_enabled = 0;
5274 if (generic_state->b16xme_supported) {
5275 generic_state->b16xme_enabled = 1;
5277 generic_state->b16xme_enabled = 0;
5280 if (generic_state->b32xme_supported) {
5281 generic_state->b32xme_enabled = 1;
5283 generic_state->b32xme_enabled = 0;
5285 /* disable HME/16xME if the size is too small */
5286 if (generic_state->frame_width_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5287 generic_state->b32xme_supported = 0;
5288 generic_state->b32xme_enabled = 0;
5289 generic_state->b16xme_supported = 0;
5290 generic_state->b16xme_enabled = 0;
5291 generic_state->frame_width_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5292 generic_state->downscaled_width_4x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5294 if (generic_state->frame_height_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5295 generic_state->b32xme_supported = 0;
5296 generic_state->b32xme_enabled = 0;
5297 generic_state->b16xme_supported = 0;
5298 generic_state->b16xme_enabled = 0;
5299 generic_state->frame_height_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5300 generic_state->downscaled_height_4x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5303 if (generic_state->frame_width_16x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5304 generic_state->b32xme_supported = 0;
5305 generic_state->b32xme_enabled = 0;
5306 generic_state->frame_width_16x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5307 generic_state->downscaled_width_16x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5309 if (generic_state->frame_height_16x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5310 generic_state->b32xme_supported = 0;
5311 generic_state->b32xme_enabled = 0;
5312 generic_state->frame_height_16x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5313 generic_state->downscaled_height_16x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5316 if (generic_state->frame_width_32x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5317 generic_state->frame_width_32x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5318 generic_state->downscaled_width_32x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5320 if (generic_state->frame_height_32x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5321 generic_state->frame_height_32x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5322 generic_state->downscaled_height_32x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5328 gen9_avc_encode_check_parameter(VADriverContextP ctx,
5329 struct encode_state *encode_state,
5330 struct intel_encoder_context *encoder_context)
5332 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5333 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5334 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5335 unsigned int rate_control_mode = encoder_context->rate_control_mode;
5336 unsigned int preset = generic_state->preset;
5337 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param ;
5338 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5340 int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
5342 generic_state->avbr_curracy = 30;
5343 generic_state->avbr_convergence = 150;
5345 switch (rate_control_mode & 0x7f) {
5347 generic_state->internal_rate_mode = VA_RC_CBR;
5351 generic_state->internal_rate_mode = VA_RC_VBR;
5356 generic_state->internal_rate_mode = VA_RC_CQP;
5360 if (rate_control_mode != VA_RC_NONE &&
5361 rate_control_mode != VA_RC_CQP) {
5362 generic_state->brc_enabled = 1;
5363 generic_state->brc_distortion_buffer_supported = 1;
5364 generic_state->brc_constant_buffer_supported = 1;
5365 generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
5368 /*check brc parameter*/
5369 if (generic_state->brc_enabled) {
5370 avc_state->mb_qp_data_enable = 0;
5373 /*set the brc init and reset accordingly*/
5374 if (generic_state->brc_need_reset &&
5375 (generic_state->brc_distortion_buffer_supported == 0 ||
5376 rate_control_mode == VA_RC_CQP)) {
5377 generic_state->brc_need_reset = 0;// not support by CQP
5380 if (generic_state->brc_need_reset && !avc_state->sfd_mb_enable) {
5381 avc_state->sfd_enable = 0;
5384 if (generic_state->frames_per_window_size == 0) {
5385 generic_state->frames_per_window_size = (generic_state->frames_per_100s / 100 < 60) ? (generic_state->frames_per_100s / 100) : 60;
5386 } else if (generic_state->frames_per_window_size > 2 * generic_state->frames_per_100s / 100) {
5387 generic_state->frames_per_window_size = (generic_state->frames_per_100s / 100 < 60) ? (generic_state->frames_per_100s / 100) : 60;
5390 if (generic_state->brc_enabled) {
5391 generic_state->hme_enabled = generic_state->frame_type != SLICE_TYPE_I;
5392 if (avc_state->min_max_qp_enable) {
5393 generic_state->num_pak_passes = 1;
5395 generic_state->brc_roi_enable = (rate_control_mode != VA_RC_CQP) && (generic_state->num_roi > 0);// only !CQP
5396 generic_state->mb_brc_enabled = generic_state->mb_brc_enabled || generic_state->brc_roi_enable;
5398 generic_state->num_pak_passes = 1;// CQP only one pass
5401 avc_state->mbenc_i_frame_dist_in_use = 0;
5402 avc_state->mbenc_i_frame_dist_in_use = (generic_state->brc_enabled) && (generic_state->brc_distortion_buffer_supported) && (generic_state->frame_type == SLICE_TYPE_I);
5404 /*ROI must enable mbbrc.*/
5407 if (avc_state->caf_supported) {
5408 switch (generic_state->frame_type) {
5412 avc_state->caf_enable = gen9_avc_all_fractional[preset] & 0x01;
5415 avc_state->caf_enable = (gen9_avc_all_fractional[preset] >> 1) & 0x01;
5419 if (avc_state->caf_enable && avc_state->caf_disable_hd && gen9_avc_disable_all_fractional_check_for_high_res[preset]) {
5420 if (generic_state->frame_width_in_pixel >= 1280 && generic_state->frame_height_in_pixel >= 720)
5421 avc_state->caf_enable = 0;
5425 avc_state->adaptive_transform_decision_enable &= gen9_avc_enable_adaptive_tx_decision[preset & 0x7];
5427 /* Flatness check is enabled only if scaling will be performed and CAF is enabled. here only frame */
5428 if (avc_state->flatness_check_supported) {
5429 avc_state->flatness_check_enable = ((avc_state->caf_enable) && (generic_state->brc_enabled || generic_state->hme_supported)) ;
5431 avc_state->flatness_check_enable = 0;
5434 /* check mb_status_supported/enbale*/
5435 if (avc_state->adaptive_transform_decision_enable) {
5436 avc_state->mb_status_enable = 1;
5438 avc_state->mb_status_enable = 0;
5440 /*slice check,all the slices use the same slice height except the last slice*/
5441 avc_state->arbitrary_num_mbs_in_slice = 0;
5442 for (i = 0; i < avc_state->slice_num; i++) {
5443 if (avc_state->slice_param[i]->num_macroblocks % generic_state->frame_width_in_mbs > 0) {
5444 avc_state->arbitrary_num_mbs_in_slice = 1;
5445 avc_state->slice_height = 1; /* slice height will be ignored by kernel ans here set it as default value */
5447 avc_state->slice_height = avc_state->slice_param[i]->num_macroblocks / generic_state->frame_width_in_mbs;
5451 if (generic_state->frame_type == SLICE_TYPE_I) {
5452 generic_state->hme_enabled = 0;
5453 generic_state->b16xme_enabled = 0;
5454 generic_state->b32xme_enabled = 0;
5457 if (generic_state->frame_type == SLICE_TYPE_B) {
5458 gen9_avc_get_dist_scale_factor(ctx, encode_state, encoder_context);
5459 avc_state->bi_weight = gen9_avc_get_biweight(avc_state->dist_scale_factor_list0[0], pic_param->pic_fields.bits.weighted_bipred_idc);
5462 /* Determine if SkipBiasAdjustment should be enabled for P picture 1. No B frame 2. Qp >= 22 3. CQP mode */
5463 avc_state->skip_bias_adjustment_enable = avc_state->skip_bias_adjustment_supported && (generic_state->frame_type == SLICE_TYPE_P)
5464 && (generic_state->gop_ref_distance == 1) && (avc_state->pic_param->pic_init_qp + avc_state->slice_param[0]->slice_qp_delta >= 22) && !generic_state->brc_enabled;
5466 if (generic_state->kernel_mode == INTEL_ENC_KERNEL_QUALITY) {
5467 avc_state->tq_enable = 1;
5468 avc_state->tq_rounding = 6;
5469 if (generic_state->brc_enabled) {
5470 generic_state->mb_brc_enabled = 1;
5474 //check the inter rounding
5475 avc_state->rounding_value = 0;
5476 avc_state->rounding_inter_p = 255;//default
5477 avc_state->rounding_inter_b = 255; //default
5478 avc_state->rounding_inter_b_ref = 255; //default
5480 if (generic_state->frame_type == SLICE_TYPE_P) {
5481 if (avc_state->rounding_inter_p == AVC_INVALID_ROUNDING_VALUE) {
5482 if (avc_state->adaptive_rounding_inter_enable && !(generic_state->brc_enabled)) {
5483 if (generic_state->gop_ref_distance == 1)
5484 avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_p_without_b[slice_qp];
5486 avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_p[slice_qp];
5488 avc_state->rounding_value = gen9_avc_inter_rounding_p[generic_state->preset];
5492 avc_state->rounding_value = avc_state->rounding_inter_p;
5494 } else if (generic_state->frame_type == SLICE_TYPE_B) {
5495 if (pic_param->pic_fields.bits.reference_pic_flag) {
5496 if (avc_state->rounding_inter_b_ref == AVC_INVALID_ROUNDING_VALUE)
5497 avc_state->rounding_value = gen9_avc_inter_rounding_b_ref[generic_state->preset];
5499 avc_state->rounding_value = avc_state->rounding_inter_b_ref;
5501 if (avc_state->rounding_inter_b == AVC_INVALID_ROUNDING_VALUE) {
5502 if (avc_state->adaptive_rounding_inter_enable && !(generic_state->brc_enabled))
5503 avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_b[slice_qp];
5505 avc_state->rounding_value = gen9_avc_inter_rounding_b[generic_state->preset];
5507 avc_state->rounding_value = avc_state->rounding_inter_b;
5511 return VA_STATUS_SUCCESS;
5515 gen9_avc_vme_gpe_kernel_prepare(VADriverContextP ctx,
5516 struct encode_state *encode_state,
5517 struct intel_encoder_context *encoder_context)
5520 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5521 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5522 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5523 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5524 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5526 struct object_surface *obj_surface;
5527 struct object_buffer *obj_buffer;
5528 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5529 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
5530 struct i965_coded_buffer_segment *coded_buffer_segment;
5532 struct gen9_surface_avc *avc_priv_surface;
5534 struct avc_surface_param surface_param;
5536 unsigned char * pdata;
5538 /* Setup current reconstruct frame */
5539 obj_surface = encode_state->reconstructed_object;
5540 va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5542 if (va_status != VA_STATUS_SUCCESS)
5545 memset(&surface_param, 0, sizeof(surface_param));
5546 surface_param.frame_width = generic_state->frame_width_in_pixel;
5547 surface_param.frame_height = generic_state->frame_height_in_pixel;
5548 va_status = gen9_avc_init_check_surfaces(ctx,
5552 if (va_status != VA_STATUS_SUCCESS)
5555 /* init the member of avc_priv_surface,frame_store_id,qp_value*/
5556 avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
5557 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = 0;
5558 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = 0;
5559 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2]);
5560 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1]);
5561 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2], avc_priv_surface->dmv_top);
5562 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1], avc_priv_surface->dmv_bottom);
5563 avc_priv_surface->qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
5564 avc_priv_surface->frame_store_id = 0;
5565 avc_priv_surface->frame_idx = pic_param->CurrPic.frame_idx;
5566 avc_priv_surface->top_field_order_cnt = pic_param->CurrPic.TopFieldOrderCnt;
5567 avc_priv_surface->is_as_ref = pic_param->pic_fields.bits.reference_pic_flag;
5568 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = avc_priv_surface->top_field_order_cnt;
5569 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = avc_priv_surface->top_field_order_cnt + 1;
5571 i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
5572 i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_reconstructed_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
5574 /* input YUV surface*/
5575 obj_surface = encode_state->input_yuv_object;
5576 va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5578 if (va_status != VA_STATUS_SUCCESS)
5580 i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
5581 i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_uncompressed_input_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
5583 /* Reference surfaces */
5584 for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
5585 i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
5586 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2]);
5587 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1]);
5588 obj_surface = encode_state->reference_objects[i];
5589 avc_state->top_field_poc[2 * i] = 0;
5590 avc_state->top_field_poc[2 * i + 1] = 0;
5592 if (obj_surface && obj_surface->bo) {
5593 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->list_reference_res[i], obj_surface, GPE_RESOURCE_ALIGNMENT);
5595 /* actually it should be handled when it is reconstructed surface*/
5596 va_status = gen9_avc_init_check_surfaces(ctx,
5597 obj_surface, encoder_context,
5599 if (va_status != VA_STATUS_SUCCESS)
5601 avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
5602 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2], avc_priv_surface->dmv_top);
5603 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1], avc_priv_surface->dmv_bottom);
5604 avc_state->top_field_poc[2 * i] = avc_priv_surface->top_field_order_cnt;
5605 avc_state->top_field_poc[2 * i + 1] = avc_priv_surface->top_field_order_cnt + 1;
5606 avc_priv_surface->frame_store_id = i;
5612 /* Encoded bitstream ?*/
5613 obj_buffer = encode_state->coded_buf_object;
5614 bo = obj_buffer->buffer_store->bo;
5615 i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
5616 i965_dri_object_to_buffer_gpe_resource(&generic_ctx->compressed_bitstream.res, bo);
5617 generic_ctx->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
5618 generic_ctx->compressed_bitstream.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
5621 avc_ctx->status_buffer.bo = bo;
5623 /* set the internal flag to 0 to indicate the coded size is unknown */
5625 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
5626 coded_buffer_segment->mapped = 0;
5627 coded_buffer_segment->codec = encoder_context->codec;
5628 coded_buffer_segment->status_support = 1;
5630 pdata = bo->virtual + avc_ctx->status_buffer.base_offset;
5631 memset(pdata, 0, avc_ctx->status_buffer.status_buffer_size);
5634 //frame id, it is the ref pic id in the reference_objects list.
5635 avc_state->num_refs[0] = 0;
5636 avc_state->num_refs[1] = 0;
5637 if (generic_state->frame_type == SLICE_TYPE_P) {
5638 avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
5640 if (slice_param->num_ref_idx_active_override_flag)
5641 avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
5642 } else if (generic_state->frame_type == SLICE_TYPE_B) {
5643 avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
5644 avc_state->num_refs[1] = pic_param->num_ref_idx_l1_active_minus1 + 1;
5646 if (slice_param->num_ref_idx_active_override_flag) {
5647 avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
5648 avc_state->num_refs[1] = slice_param->num_ref_idx_l1_active_minus1 + 1;
5652 if (avc_state->num_refs[0] > ARRAY_ELEMS(avc_state->list_ref_idx[0]))
5653 return VA_STATUS_ERROR_INVALID_VALUE;
5654 if (avc_state->num_refs[1] > ARRAY_ELEMS(avc_state->list_ref_idx[1]))
5655 return VA_STATUS_ERROR_INVALID_VALUE;
5657 for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[0]); i++) {
5658 VAPictureH264 *va_pic;
5660 assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(avc_state->list_ref_idx[0]));
5661 avc_state->list_ref_idx[0][i] = 0;
5663 if (i >= avc_state->num_refs[0])
5666 va_pic = &slice_param->RefPicList0[i];
5668 for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
5669 obj_surface = encode_state->reference_objects[j];
5673 obj_surface->base.id == va_pic->picture_id) {
5675 assert(obj_surface->base.id != VA_INVALID_SURFACE);
5676 avc_state->list_ref_idx[0][i] = j;
5682 for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[1]); i++) {
5683 VAPictureH264 *va_pic;
5685 assert(ARRAY_ELEMS(slice_param->RefPicList1) == ARRAY_ELEMS(avc_state->list_ref_idx[1]));
5686 avc_state->list_ref_idx[1][i] = 0;
5688 if (i >= avc_state->num_refs[1])
5691 va_pic = &slice_param->RefPicList1[i];
5693 for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
5694 obj_surface = encode_state->reference_objects[j];
5698 obj_surface->base.id == va_pic->picture_id) {
5700 assert(obj_surface->base.id != VA_INVALID_SURFACE);
5701 avc_state->list_ref_idx[1][i] = j;
5708 return VA_STATUS_SUCCESS;
5712 gen9_avc_vme_gpe_kernel_init(VADriverContextP ctx,
5713 struct encode_state *encode_state,
5714 struct intel_encoder_context *encoder_context)
5716 return VA_STATUS_SUCCESS;
5720 gen9_avc_vme_gpe_kernel_final(VADriverContextP ctx,
5721 struct encode_state *encode_state,
5722 struct intel_encoder_context *encoder_context)
5725 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5726 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5727 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5729 /*set this flag when all kernel is finished*/
5730 if (generic_state->brc_enabled) {
5731 generic_state->brc_inited = 1;
5732 generic_state->brc_need_reset = 0;
5733 avc_state->mbenc_curbe_set_in_brc_update = 0;
5735 return VA_STATUS_SUCCESS;
5739 gen9_avc_vme_gpe_kernel_run(VADriverContextP ctx,
5740 struct encode_state *encode_state,
5741 struct intel_encoder_context *encoder_context)
5743 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5744 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5745 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5747 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
5748 VAEncSliceParameterBufferH264 *slice_param = avc_state->slice_param[0];
5751 /* BRC init/reset needs to be called before HME since it will reset the Brc Distortion surface*/
5752 if (generic_state->brc_enabled && (!generic_state->brc_inited || generic_state->brc_need_reset)) {
5753 gen9_avc_kernel_brc_init_reset(ctx, encode_state, encoder_context);
5757 if (generic_state->hme_supported) {
5758 gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
5759 if (generic_state->b16xme_supported) {
5760 gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_16x);
5761 if (generic_state->b32xme_supported) {
5762 gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_32x);
5768 if (generic_state->hme_enabled) {
5769 if (generic_state->b16xme_enabled) {
5770 if (generic_state->b32xme_enabled) {
5771 gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_32x);
5773 gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_16x);
5775 gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
5778 /*call SFD kernel after HME in same command buffer*/
5779 sfd_in_use = avc_state->sfd_enable && generic_state->hme_enabled;
5780 sfd_in_use = sfd_in_use && !avc_state->sfd_mb_enable;
5782 gen9_avc_kernel_sfd(ctx, encode_state, encoder_context);
5785 /* BRC and MbEnc are included in the same task phase*/
5786 if (generic_state->brc_enabled) {
5787 if (avc_state->mbenc_i_frame_dist_in_use) {
5788 gen9_avc_kernel_mbenc(ctx, encode_state, encoder_context, true);
5790 gen9_avc_kernel_brc_frame_update(ctx, encode_state, encoder_context);
5792 if (avc_state->brc_split_enable && generic_state->mb_brc_enabled) {
5793 gen9_avc_kernel_brc_mb_update(ctx, encode_state, encoder_context);
5797 /*weight prediction,disable by now */
5798 avc_state->weighted_ref_l0_enable = 0;
5799 avc_state->weighted_ref_l1_enable = 0;
5800 if (avc_state->weighted_prediction_supported &&
5801 ((generic_state->frame_type == SLICE_TYPE_P && pic_param->pic_fields.bits.weighted_pred_flag) ||
5802 (generic_state->frame_type == SLICE_TYPE_B && pic_param->pic_fields.bits.weighted_bipred_idc == INTEL_AVC_WP_MODE_EXPLICIT))) {
5803 if (slice_param->luma_weight_l0_flag & 1) {
5804 gen9_avc_kernel_wp(ctx, encode_state, encoder_context, 0);
5806 } else if (!(slice_param->chroma_weight_l0_flag & 1)) {
5807 pic_param->pic_fields.bits.weighted_pred_flag = 0;// it should be handled in app
5810 if (generic_state->frame_type == SLICE_TYPE_B && pic_param->pic_fields.bits.weighted_bipred_idc == INTEL_AVC_WP_MODE_EXPLICIT) {
5811 if (slice_param->luma_weight_l1_flag & 1) {
5812 gen9_avc_kernel_wp(ctx, encode_state, encoder_context, 1);
5813 } else if (!((slice_param->luma_weight_l0_flag & 1) ||
5814 (slice_param->chroma_weight_l0_flag & 1) ||
5815 (slice_param->chroma_weight_l1_flag & 1))) {
5816 pic_param->pic_fields.bits.weighted_bipred_idc = INTEL_AVC_WP_MODE_DEFAULT;// it should be handled in app
5822 gen9_avc_kernel_mbenc(ctx, encode_state, encoder_context, false);
5824 /*ignore the reset vertical line kernel*/
5826 return VA_STATUS_SUCCESS;
5830 gen9_avc_vme_pipeline(VADriverContextP ctx,
5832 struct encode_state *encode_state,
5833 struct intel_encoder_context *encoder_context)
5837 gen9_avc_update_parameters(ctx, profile, encode_state, encoder_context);
5839 va_status = gen9_avc_encode_check_parameter(ctx, encode_state, encoder_context);
5840 if (va_status != VA_STATUS_SUCCESS)
5843 va_status = gen9_avc_allocate_resources(ctx, encode_state, encoder_context);
5844 if (va_status != VA_STATUS_SUCCESS)
5847 va_status = gen9_avc_vme_gpe_kernel_prepare(ctx, encode_state, encoder_context);
5848 if (va_status != VA_STATUS_SUCCESS)
5851 va_status = gen9_avc_vme_gpe_kernel_init(ctx, encode_state, encoder_context);
5852 if (va_status != VA_STATUS_SUCCESS)
5855 va_status = gen9_avc_vme_gpe_kernel_run(ctx, encode_state, encoder_context);
5856 if (va_status != VA_STATUS_SUCCESS)
5859 gen9_avc_vme_gpe_kernel_final(ctx, encode_state, encoder_context);
5861 return VA_STATUS_SUCCESS;
5865 gen9_avc_vme_context_destroy(void * context)
5867 struct encoder_vme_mfc_context *vme_context = (struct encoder_vme_mfc_context *)context;
5868 struct generic_encoder_context *generic_ctx;
5869 struct i965_avc_encoder_context *avc_ctx;
5870 struct generic_enc_codec_state *generic_state;
5871 struct avc_enc_state *avc_state;
5876 generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5877 avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5878 generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5879 avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5881 gen9_avc_kernel_destroy(vme_context);
5885 free(generic_state);
5893 gen9_avc_kernel_init(VADriverContextP ctx,
5894 struct intel_encoder_context *encoder_context)
5896 struct i965_driver_data *i965 = i965_driver_data(ctx);
5897 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5898 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5899 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5901 gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling);
5902 gen9_avc_kernel_init_brc(ctx, generic_ctx, &avc_ctx->context_brc);
5903 gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me);
5904 gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc);
5905 gen9_avc_kernel_init_wp(ctx, generic_ctx, &avc_ctx->context_wp);
5906 gen9_avc_kernel_init_sfd(ctx, generic_ctx, &avc_ctx->context_sfd);
5909 generic_ctx->pfn_set_curbe_scaling2x = gen9_avc_set_curbe_scaling2x;
5910 generic_ctx->pfn_set_curbe_scaling4x = gen9_avc_set_curbe_scaling4x;
5911 generic_ctx->pfn_set_curbe_me = gen9_avc_set_curbe_me;
5912 generic_ctx->pfn_set_curbe_mbenc = gen9_avc_set_curbe_mbenc;
5913 generic_ctx->pfn_set_curbe_brc_init_reset = gen9_avc_set_curbe_brc_init_reset;
5914 generic_ctx->pfn_set_curbe_brc_frame_update = gen9_avc_set_curbe_brc_frame_update;
5915 generic_ctx->pfn_set_curbe_brc_mb_update = gen9_avc_set_curbe_brc_mb_update;
5916 generic_ctx->pfn_set_curbe_sfd = gen9_avc_set_curbe_sfd;
5917 generic_ctx->pfn_set_curbe_wp = gen9_avc_set_curbe_wp;
5919 generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
5920 generic_ctx->pfn_send_me_surface = gen9_avc_send_surface_me;
5921 generic_ctx->pfn_send_mbenc_surface = gen9_avc_send_surface_mbenc;
5922 generic_ctx->pfn_send_brc_init_reset_surface = gen9_avc_send_surface_brc_init_reset;
5923 generic_ctx->pfn_send_brc_frame_update_surface = gen9_avc_send_surface_brc_frame_update;
5924 generic_ctx->pfn_send_brc_mb_update_surface = gen9_avc_send_surface_brc_mb_update;
5925 generic_ctx->pfn_send_sfd_surface = gen9_avc_send_surface_sfd;
5926 generic_ctx->pfn_send_wp_surface = gen9_avc_send_surface_wp;
5928 if (IS_SKL(i965->intel.device_info) ||
5929 IS_BXT(i965->intel.device_info))
5930 generic_ctx->pfn_set_curbe_scaling4x = gen9_avc_set_curbe_scaling4x;
5931 else if (IS_KBL(i965->intel.device_info) ||
5932 IS_GLK(i965->intel.device_info))
5933 generic_ctx->pfn_set_curbe_scaling4x = gen95_avc_set_curbe_scaling4x;
5938 PAK pipeline related function
5941 intel_avc_enc_slice_type_fixup(int slice_type);
5944 gen9_mfc_avc_pipe_mode_select(VADriverContextP ctx,
5945 struct encode_state *encode_state,
5946 struct intel_encoder_context *encoder_context)
5948 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5949 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
5950 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
5951 struct intel_batchbuffer *batch = encoder_context->base.batch;
5953 BEGIN_BCS_BATCH(batch, 5);
5955 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
5956 OUT_BCS_BATCH(batch,
5958 (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
5959 (MFD_MODE_VLD << 15) |
5960 (0 << 13) | /* Non-VDEnc mode is 0*/
5961 ((generic_state->curr_pak_pass != (generic_state->num_pak_passes - 1)) << 10) | /* Stream-Out Enable */
5962 ((!!avc_ctx->res_post_deblocking_output.bo) << 9) | /* Post Deblocking Output */
5963 ((!!avc_ctx->res_pre_deblocking_output.bo) << 8) | /* Pre Deblocking Output */
5964 (0 << 7) | /* Scaled surface enable */
5965 (0 << 6) | /* Frame statistics stream out enable */
5966 (0 << 5) | /* not in stitch mode */
5967 (1 << 4) | /* encoding mode */
5968 (MFX_FORMAT_AVC << 0));
5969 OUT_BCS_BATCH(batch,
5970 (0 << 7) | /* expand NOA bus flag */
5971 (0 << 6) | /* disable slice-level clock gating */
5972 (0 << 5) | /* disable clock gating for NOA */
5973 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
5974 (0 << 3) | /* terminate if AVC mbdata error occurs */
5975 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
5978 OUT_BCS_BATCH(batch, 0);
5979 OUT_BCS_BATCH(batch, 0);
5981 ADVANCE_BCS_BATCH(batch);
5985 gen9_mfc_avc_surface_state(VADriverContextP ctx,
5986 struct intel_encoder_context *encoder_context,
5987 struct i965_gpe_resource *gpe_resource,
5990 struct intel_batchbuffer *batch = encoder_context->base.batch;
5992 BEGIN_BCS_BATCH(batch, 6);
5994 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
5995 OUT_BCS_BATCH(batch, id);
5996 OUT_BCS_BATCH(batch,
5997 ((gpe_resource->height - 1) << 18) |
5998 ((gpe_resource->width - 1) << 4));
5999 OUT_BCS_BATCH(batch,
6000 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
6001 (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
6002 ((gpe_resource->pitch - 1) << 3) | /* pitch */
6003 (0 << 2) | /* must be 0 for interleave U/V */
6004 (1 << 1) | /* must be tiled */
6005 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
6006 OUT_BCS_BATCH(batch,
6007 (0 << 16) | /* must be 0 for interleave U/V */
6008 (gpe_resource->y_cb_offset)); /* y offset for U(cb) */
6009 OUT_BCS_BATCH(batch,
6010 (0 << 16) | /* must be 0 for interleave U/V */
6011 (gpe_resource->y_cb_offset)); /* y offset for U(cb) */
6013 ADVANCE_BCS_BATCH(batch);
6017 gen9_mfc_avc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
6019 struct i965_driver_data *i965 = i965_driver_data(ctx);
6020 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6021 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
6022 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
6023 struct intel_batchbuffer *batch = encoder_context->base.batch;
6026 BEGIN_BCS_BATCH(batch, 65);
6028 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (65 - 2));
6030 /* the DW1-3 is for pre_deblocking */
6031 OUT_BUFFER_3DW(batch, avc_ctx->res_pre_deblocking_output.bo, 1, 0, i965->intel.mocs_state);
6033 /* the DW4-6 is for the post_deblocking */
6034 OUT_BUFFER_3DW(batch, avc_ctx->res_post_deblocking_output.bo, 1, 0, i965->intel.mocs_state);
6036 /* the DW7-9 is for the uncompressed_picture */
6037 OUT_BUFFER_3DW(batch, generic_ctx->res_uncompressed_input_surface.bo, 0, 0, i965->intel.mocs_state);
6039 /* the DW10-12 is for PAK information (write) */
6040 OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, i965->intel.mocs_state);//?
6042 /* the DW13-15 is for the intra_row_store_scratch */
6043 OUT_BUFFER_3DW(batch, avc_ctx->res_intra_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
6045 /* the DW16-18 is for the deblocking filter */
6046 OUT_BUFFER_3DW(batch, avc_ctx->res_deblocking_filter_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
6048 /* the DW 19-50 is for Reference pictures*/
6049 for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
6050 OUT_BUFFER_2DW(batch, avc_ctx->list_reference_res[i].bo, 0, 0);
6053 /* DW 51, reference picture attributes */
6054 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
6056 /* The DW 52-54 is for PAK information (read) */
6057 OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, i965->intel.mocs_state);
6059 /* the DW 55-57 is the ILDB buffer */
6060 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6062 /* the DW 58-60 is the second ILDB buffer */
6063 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6065 /* DW 61, memory compress enable & mode */
6066 OUT_BCS_BATCH(batch, 0);
6068 /* the DW 62-64 is the buffer */
6069 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6071 ADVANCE_BCS_BATCH(batch);
6075 gen9_mfc_avc_ind_obj_base_addr_state(VADriverContextP ctx,
6076 struct encode_state *encode_state,
6077 struct intel_encoder_context *encoder_context)
6079 struct i965_driver_data *i965 = i965_driver_data(ctx);
6080 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6081 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
6082 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
6083 struct intel_batchbuffer *batch = encoder_context->base.batch;
6084 struct object_surface *obj_surface;
6085 struct gen9_surface_avc *avc_priv_surface;
6086 unsigned int size = 0;
6087 unsigned int w_mb = generic_state->frame_width_in_mbs;
6088 unsigned int h_mb = generic_state->frame_height_in_mbs;
6090 obj_surface = encode_state->reconstructed_object;
6092 if (!obj_surface || !obj_surface->private_data)
6094 avc_priv_surface = obj_surface->private_data;
6096 BEGIN_BCS_BATCH(batch, 26);
6098 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
6099 /* The DW1-5 is for the MFX indirect bistream offset */
6100 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6101 OUT_BUFFER_2DW(batch, NULL, 0, 0);
6103 /* the DW6-10 is for MFX Indirect MV Object Base Address */
6104 size = w_mb * h_mb * 32 * 4;
6105 OUT_BUFFER_3DW(batch,
6106 avc_priv_surface->res_mv_data_surface.bo,
6109 i965->intel.mocs_state);
6110 OUT_BUFFER_2DW(batch,
6111 avc_priv_surface->res_mv_data_surface.bo,
6113 ALIGN(size, 0x1000));
6115 /* The DW11-15 is for MFX IT-COFF. Not used on encoder */
6116 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6117 OUT_BUFFER_2DW(batch, NULL, 0, 0);
6119 /* The DW16-20 is for MFX indirect DBLK. Not used on encoder */
6120 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6121 OUT_BUFFER_2DW(batch, NULL, 0, 0);
6123 /* The DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder
6124 * Note: an offset is specified in MFX_AVC_SLICE_STATE
6126 OUT_BUFFER_3DW(batch,
6127 generic_ctx->compressed_bitstream.res.bo,
6130 i965->intel.mocs_state);
6131 OUT_BUFFER_2DW(batch,
6132 generic_ctx->compressed_bitstream.res.bo,
6134 generic_ctx->compressed_bitstream.end_offset);
6136 ADVANCE_BCS_BATCH(batch);
6140 gen9_mfc_avc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
6142 struct i965_driver_data *i965 = i965_driver_data(ctx);
6143 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6144 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
6145 struct intel_batchbuffer *batch = encoder_context->base.batch;
6147 BEGIN_BCS_BATCH(batch, 10);
6149 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
6151 /* The DW1-3 is for bsd/mpc row store scratch buffer */
6152 OUT_BUFFER_3DW(batch, avc_ctx->res_bsd_mpc_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
6154 /* The DW4-6 is for MPR Row Store Scratch Buffer Base Address, ignore for encoder */
6155 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6157 /* The DW7-9 is for Bitplane Read Buffer Base Address, ignore for encoder */
6158 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6160 ADVANCE_BCS_BATCH(batch);
6164 gen9_mfc_avc_directmode_state(VADriverContextP ctx,
6165 struct intel_encoder_context *encoder_context)
6167 struct i965_driver_data *i965 = i965_driver_data(ctx);
6168 struct intel_batchbuffer *batch = encoder_context->base.batch;
6169 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6170 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
6171 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
6175 BEGIN_BCS_BATCH(batch, 71);
6177 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
6179 /* Reference frames and Current frames */
6180 /* the DW1-32 is for the direct MV for reference */
6181 for (i = 0; i < NUM_MFC_AVC_DMV_BUFFERS - 2; i += 2) {
6182 if (avc_ctx->res_direct_mv_buffersr[i].bo != NULL) {
6183 OUT_BCS_RELOC64(batch, avc_ctx->res_direct_mv_buffersr[i].bo,
6184 I915_GEM_DOMAIN_INSTRUCTION, 0,
6187 OUT_BCS_BATCH(batch, 0);
6188 OUT_BCS_BATCH(batch, 0);
6192 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
6194 /* the DW34-36 is the MV for the current frame */
6195 OUT_BCS_RELOC64(batch, avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2].bo,
6196 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
6199 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
6202 for (i = 0; i < 32; i++) {
6203 OUT_BCS_BATCH(batch, avc_state->top_field_poc[i]);
6205 OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2]);
6206 OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1]);
6208 ADVANCE_BCS_BATCH(batch);
6212 gen9_mfc_qm_state(VADriverContextP ctx,
6214 const unsigned int *qm,
6216 struct intel_encoder_context *encoder_context)
6218 struct intel_batchbuffer *batch = encoder_context->base.batch;
6219 unsigned int qm_buffer[16];
6221 assert(qm_length <= 16);
6222 assert(sizeof(*qm) == 4);
6223 memset(qm_buffer, 0, 16 * 4);
6224 memcpy(qm_buffer, qm, qm_length * 4);
6226 BEGIN_BCS_BATCH(batch, 18);
6227 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
6228 OUT_BCS_BATCH(batch, qm_type << 0);
6229 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
6230 ADVANCE_BCS_BATCH(batch);
6234 gen9_mfc_avc_qm_state(VADriverContextP ctx,
6235 struct encode_state *encode_state,
6236 struct intel_encoder_context *encoder_context)
6238 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6239 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
6240 VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
6241 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
6244 const unsigned int *qm_4x4_intra;
6245 const unsigned int *qm_4x4_inter;
6246 const unsigned int *qm_8x8_intra;
6247 const unsigned int *qm_8x8_inter;
6249 if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
6250 && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
6251 qm_4x4_intra = qm_4x4_inter = qm_8x8_intra = qm_8x8_inter = qm_flat;
6253 VAIQMatrixBufferH264 *qm;
6254 assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
6255 qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
6256 qm_4x4_intra = (unsigned int *)qm->ScalingList4x4[0];
6257 qm_4x4_inter = (unsigned int *)qm->ScalingList4x4[3];
6258 qm_8x8_intra = (unsigned int *)qm->ScalingList8x8[0];
6259 qm_8x8_inter = (unsigned int *)qm->ScalingList8x8[1];
6262 gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm_4x4_intra, 12, encoder_context);
6263 gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm_4x4_inter, 12, encoder_context);
6264 gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm_8x8_intra, 16, encoder_context);
6265 gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm_8x8_inter, 16, encoder_context);
6269 gen9_mfc_fqm_state(VADriverContextP ctx,
6271 const unsigned int *fqm,
6273 struct intel_encoder_context *encoder_context)
6275 struct intel_batchbuffer *batch = encoder_context->base.batch;
6276 unsigned int fqm_buffer[32];
6278 assert(fqm_length <= 32);
6279 assert(sizeof(*fqm) == 4);
6280 memset(fqm_buffer, 0, 32 * 4);
6281 memcpy(fqm_buffer, fqm, fqm_length * 4);
6283 BEGIN_BCS_BATCH(batch, 34);
6284 OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
6285 OUT_BCS_BATCH(batch, fqm_type << 0);
6286 intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
6287 ADVANCE_BCS_BATCH(batch);
6291 gen9_mfc_fill_fqm(uint8_t *qm, uint16_t *fqm, int len)
6294 for (i = 0; i < len; i++)
6295 for (j = 0; j < len; j++) {
6296 assert(qm[j * len + i]);
6297 fqm[i * len + j] = (1 << 16) / qm[j * len + i];
6302 gen9_mfc_avc_fqm_state(VADriverContextP ctx,
6303 struct encode_state *encode_state,
6304 struct intel_encoder_context *encoder_context)
6306 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6307 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
6308 VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
6309 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
6311 if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
6312 && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
6313 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm_flat, 24, encoder_context);
6314 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm_flat, 24, encoder_context);
6315 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm_flat, 32, encoder_context);
6316 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm_flat, 32, encoder_context);
6320 VAIQMatrixBufferH264 *qm;
6321 assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
6322 qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
6324 for (i = 0; i < 3; i++)
6325 gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * i, 4);
6326 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm, 24, encoder_context);
6328 for (i = 3; i < 6; i++)
6329 gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * (i - 3), 4);
6330 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm, 24, encoder_context);
6332 gen9_mfc_fill_fqm(qm->ScalingList8x8[0], (uint16_t *)fqm, 8);
6333 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm, 32, encoder_context);
6335 gen9_mfc_fill_fqm(qm->ScalingList8x8[1], (uint16_t *)fqm, 8);
6336 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm, 32, encoder_context);
6341 gen9_mfc_avc_insert_object(VADriverContextP ctx,
6342 struct intel_encoder_context *encoder_context,
6343 unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
6344 int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
6345 int slice_header_indicator,
6346 struct intel_batchbuffer *batch)
6348 if (data_bits_in_last_dw == 0)
6349 data_bits_in_last_dw = 32;
6351 BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
6353 OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws));
6354 OUT_BCS_BATCH(batch,
6355 (0 << 16) | /* always start at offset 0 */
6356 (slice_header_indicator << 14) |
6357 (data_bits_in_last_dw << 8) |
6358 (skip_emul_byte_count << 4) |
6359 (!!emulation_flag << 3) |
6360 ((!!is_last_header) << 2) |
6361 ((!!is_end_of_slice) << 1) |
6362 (0 << 0)); /* check this flag */
6363 intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
6365 ADVANCE_BCS_BATCH(batch);
6369 gen9_mfc_avc_insert_aud_packed_data(VADriverContextP ctx,
6370 struct encode_state *encode_state,
6371 struct intel_encoder_context *encoder_context,
6372 struct intel_batchbuffer *batch)
6374 VAEncPackedHeaderParameterBuffer *param = NULL;
6375 unsigned int length_in_bits;
6376 unsigned int *header_data = NULL;
6377 unsigned char *nal_type = NULL;
6378 int count, i, start_index;
6380 count = encode_state->slice_rawdata_count[0];
6381 start_index = (encode_state->slice_rawdata_index[0] & SLICE_PACKED_DATA_INDEX_MASK);
6383 for (i = 0; i < count; i++) {
6384 unsigned int skip_emul_byte_cnt;
6386 header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
6387 nal_type = (unsigned char *)header_data;
6389 param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
6391 length_in_bits = param->bit_length;
6393 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6395 if ((*(nal_type + skip_emul_byte_cnt - 1) & 0x1f) == AVC_NAL_DELIMITER) {
6396 gen9_mfc_avc_insert_object(ctx,
6399 ALIGN(length_in_bits, 32) >> 5,
6400 length_in_bits & 0x1f,
6404 !param->has_emulation_bytes,
6413 gen9_mfc_avc_insert_slice_packed_data(VADriverContextP ctx,
6414 struct encode_state *encode_state,
6415 struct intel_encoder_context *encoder_context,
6417 struct intel_batchbuffer *batch)
6419 VAEncPackedHeaderParameterBuffer *param = NULL;
6420 unsigned int length_in_bits;
6421 unsigned int *header_data = NULL;
6422 int count, i, start_index;
6423 int slice_header_index;
6424 unsigned char *nal_type = NULL;
6426 if (encode_state->slice_header_index[slice_index] == 0)
6427 slice_header_index = -1;
6429 slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
6431 count = encode_state->slice_rawdata_count[slice_index];
6432 start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
6434 for (i = 0; i < count; i++) {
6435 unsigned int skip_emul_byte_cnt;
6437 header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
6438 nal_type = (unsigned char *)header_data;
6440 param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
6442 length_in_bits = param->bit_length;
6444 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6446 /* skip the slice header packed data type as it is lastly inserted */
6447 if (param->type == VAEncPackedHeaderSlice || (*(nal_type + skip_emul_byte_cnt - 1) & 0x1f) == AVC_NAL_DELIMITER)
6450 /* as the slice header is still required, the last header flag is set to
6453 gen9_mfc_avc_insert_object(ctx,
6456 ALIGN(length_in_bits, 32) >> 5,
6457 length_in_bits & 0x1f,
6461 !param->has_emulation_bytes,
6466 if (slice_header_index == -1) {
6467 VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
6468 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
6469 VAEncSliceParameterBufferH264 *slice_params = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
6470 unsigned char *slice_header = NULL;
6471 int slice_header_length_in_bits = 0;
6473 /* No slice header data is passed. And the driver needs to generate it */
6474 /* For the Normal H264 */
6475 slice_header_length_in_bits = build_avc_slice_header(seq_param,
6479 gen9_mfc_avc_insert_object(ctx,
6481 (unsigned int *)slice_header,
6482 ALIGN(slice_header_length_in_bits, 32) >> 5,
6483 slice_header_length_in_bits & 0x1f,
6484 5, /* first 5 bytes are start code + nal unit type */
6491 unsigned int skip_emul_byte_cnt;
6493 header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
6495 param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[slice_header_index]->buffer);
6496 length_in_bits = param->bit_length;
6498 /* as the slice header is the last header data for one slice,
6499 * the last header flag is set to one.
6501 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6503 gen9_mfc_avc_insert_object(ctx,
6506 ALIGN(length_in_bits, 32) >> 5,
6507 length_in_bits & 0x1f,
6511 !param->has_emulation_bytes,
6520 gen9_mfc_avc_inset_headers(VADriverContextP ctx,
6521 struct encode_state *encode_state,
6522 struct intel_encoder_context *encoder_context,
6523 VAEncSliceParameterBufferH264 *slice_param,
6525 struct intel_batchbuffer *batch)
6527 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6528 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
6529 int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
6530 unsigned int internal_rate_mode = generic_state->internal_rate_mode;
6531 unsigned int skip_emul_byte_cnt;
6533 if (slice_index == 0) {
6535 /* if AUD exist and insert it firstly */
6536 gen9_mfc_avc_insert_aud_packed_data(ctx, encode_state, encoder_context, batch);
6538 if (encode_state->packed_header_data[idx]) {
6539 VAEncPackedHeaderParameterBuffer *param = NULL;
6540 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
6541 unsigned int length_in_bits;
6543 assert(encode_state->packed_header_param[idx]);
6544 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
6545 length_in_bits = param->bit_length;
6547 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6548 gen9_mfc_avc_insert_object(ctx,
6551 ALIGN(length_in_bits, 32) >> 5,
6552 length_in_bits & 0x1f,
6556 !param->has_emulation_bytes,
6561 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
6563 if (encode_state->packed_header_data[idx]) {
6564 VAEncPackedHeaderParameterBuffer *param = NULL;
6565 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
6566 unsigned int length_in_bits;
6568 assert(encode_state->packed_header_param[idx]);
6569 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
6570 length_in_bits = param->bit_length;
6572 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6574 gen9_mfc_avc_insert_object(ctx,
6577 ALIGN(length_in_bits, 32) >> 5,
6578 length_in_bits & 0x1f,
6582 !param->has_emulation_bytes,
6587 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
6589 if (encode_state->packed_header_data[idx]) {
6590 VAEncPackedHeaderParameterBuffer *param = NULL;
6591 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
6592 unsigned int length_in_bits;
6594 assert(encode_state->packed_header_param[idx]);
6595 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
6596 length_in_bits = param->bit_length;
6598 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6599 gen9_mfc_avc_insert_object(ctx,
6602 ALIGN(length_in_bits, 32) >> 5,
6603 length_in_bits & 0x1f,
6607 !param->has_emulation_bytes,
6610 } else if (internal_rate_mode == VA_RC_CBR) {
6615 gen9_mfc_avc_insert_slice_packed_data(ctx,
6623 gen9_mfc_avc_slice_state(VADriverContextP ctx,
6624 struct encode_state *encode_state,
6625 struct intel_encoder_context *encoder_context,
6626 VAEncPictureParameterBufferH264 *pic_param,
6627 VAEncSliceParameterBufferH264 *slice_param,
6628 VAEncSliceParameterBufferH264 *next_slice_param,
6629 struct intel_batchbuffer *batch)
6631 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6632 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
6633 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
6634 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
6635 unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
6636 unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
6637 unsigned char correct[6], grow, shrink;
6638 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
6639 int max_qp_n, max_qp_p;
6641 int weighted_pred_idc = 0;
6642 int num_ref_l0 = 0, num_ref_l1 = 0;
6643 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
6644 int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
6645 unsigned int rc_panic_enable = 0;
6646 unsigned int rate_control_counter_enable = 0;
6647 unsigned int rounding_value = 0;
6648 unsigned int rounding_inter_enable = 0;
6650 slice_hor_pos = slice_param->macroblock_address % generic_state->frame_width_in_mbs;
6651 slice_ver_pos = slice_param->macroblock_address / generic_state->frame_width_in_mbs;
6653 if (next_slice_param) {
6654 next_slice_hor_pos = next_slice_param->macroblock_address % generic_state->frame_width_in_mbs;
6655 next_slice_ver_pos = next_slice_param->macroblock_address / generic_state->frame_width_in_mbs;
6657 next_slice_hor_pos = 0;
6658 next_slice_ver_pos = generic_state->frame_height_in_mbs;
6661 if (slice_type == SLICE_TYPE_I) {
6662 luma_log2_weight_denom = 0;
6663 chroma_log2_weight_denom = 0;
6664 } else if (slice_type == SLICE_TYPE_P) {
6665 weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
6666 num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
6667 rounding_inter_enable = avc_state->rounding_inter_enable;
6668 rounding_value = avc_state->rounding_value;
6670 if (slice_param->num_ref_idx_active_override_flag)
6671 num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
6672 } else if (slice_type == SLICE_TYPE_B) {
6673 weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
6674 num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
6675 num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
6676 rounding_inter_enable = avc_state->rounding_inter_enable;
6677 rounding_value = avc_state->rounding_value;
6679 if (slice_param->num_ref_idx_active_override_flag) {
6680 num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
6681 num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
6684 if (weighted_pred_idc == 2) {
6685 /* 8.4.3 - Derivation process for prediction weights (8-279) */
6686 luma_log2_weight_denom = 5;
6687 chroma_log2_weight_denom = 5;
6696 rate_control_counter_enable = (generic_state->brc_enabled && (generic_state->curr_pak_pass != 0));
6697 rc_panic_enable = (avc_state->rc_panic_enable &&
6698 (!avc_state->min_max_qp_enable) &&
6699 (encoder_context->rate_control_mode != VA_RC_CQP) &&
6700 (generic_state->curr_pak_pass == (generic_state->num_pak_passes - 1)));
6702 for (i = 0; i < 6; i++)
6705 BEGIN_BCS_BATCH(batch, 11);
6707 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
6708 OUT_BCS_BATCH(batch, slice_type);
6709 OUT_BCS_BATCH(batch,
6710 (num_ref_l1 << 24) |
6711 (num_ref_l0 << 16) |
6712 (chroma_log2_weight_denom << 8) |
6713 (luma_log2_weight_denom << 0));
6714 OUT_BCS_BATCH(batch,
6715 (weighted_pred_idc << 30) |
6716 (((slice_type == SLICE_TYPE_B) ? slice_param->direct_spatial_mv_pred_flag : 0) << 29) |
6717 (slice_param->disable_deblocking_filter_idc << 27) |
6718 (slice_param->cabac_init_idc << 24) |
6720 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
6721 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
6723 OUT_BCS_BATCH(batch,
6724 slice_ver_pos << 24 |
6725 slice_hor_pos << 16 |
6726 slice_param->macroblock_address);
6727 OUT_BCS_BATCH(batch,
6728 next_slice_ver_pos << 16 |
6729 next_slice_hor_pos);
6731 OUT_BCS_BATCH(batch,
6732 (rate_control_counter_enable << 31) |
6733 (1 << 30) | /* ResetRateControlCounter */
6734 (2 << 28) | /* Loose Rate Control */
6735 (0 << 24) | /* RC Stable Tolerance */
6736 (rc_panic_enable << 23) | /* RC Panic Enable */
6737 (1 << 22) | /* CBP mode */
6738 (0 << 21) | /* MB Type Direct Conversion, 0: Enable, 1: Disable */
6739 (0 << 20) | /* MB Type Skip Conversion, 0: Enable, 1: Disable */
6740 (!next_slice_param << 19) | /* Is Last Slice */
6741 (0 << 18) | /* BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable */
6742 (1 << 17) | /* HeaderPresentFlag */
6743 (1 << 16) | /* SliceData PresentFlag */
6744 (0 << 15) | /* TailPresentFlag */
6745 (1 << 13) | /* RBSP NAL TYPE */
6746 (1 << 12)); /* CabacZeroWordInsertionEnable */
6748 OUT_BCS_BATCH(batch, generic_ctx->compressed_bitstream.start_offset);
6750 OUT_BCS_BATCH(batch,
6751 (max_qp_n << 24) | /*Target QP - 24 is lowest QP*/
6752 (max_qp_p << 16) | /*Target QP + 20 is highest QP*/
6755 OUT_BCS_BATCH(batch,
6756 (rounding_inter_enable << 31) |
6757 (rounding_value << 28) |
6760 (correct[5] << 20) |
6761 (correct[4] << 16) |
6762 (correct[3] << 12) |
6766 OUT_BCS_BATCH(batch, 0);
6768 ADVANCE_BCS_BATCH(batch);
6772 gen9_mfc_avc_get_ref_idx_state(VAPictureH264 *va_pic, unsigned int frame_store_id)
6774 unsigned int is_long_term =
6775 !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
6776 unsigned int is_top_field =
6777 !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
6778 unsigned int is_bottom_field =
6779 !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
6781 return ((is_long_term << 6) |
6783 (frame_store_id << 1) |
6784 ((is_top_field ^ 1) & is_bottom_field));
6788 gen9_mfc_avc_ref_idx_state(VADriverContextP ctx,
6789 struct encode_state *encode_state,
6790 struct intel_encoder_context *encoder_context,
6791 VAEncSliceParameterBufferH264 *slice_param,
6792 struct intel_batchbuffer *batch)
6794 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6795 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
6796 VAPictureH264 *ref_pic;
6797 int i, slice_type, ref_idx_shift;
6798 unsigned int fwd_ref_entry;
6799 unsigned int bwd_ref_entry;
6801 /* max 4 ref frames are allowed for l0 and l1 */
6802 fwd_ref_entry = 0x80808080;
6803 slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
6805 if ((slice_type == SLICE_TYPE_P) ||
6806 (slice_type == SLICE_TYPE_B)) {
6807 for (i = 0; i < MIN(avc_state->num_refs[0], 4); i++) {
6808 ref_pic = &slice_param->RefPicList0[i];
6809 ref_idx_shift = i * 8;
6811 fwd_ref_entry &= ~(0xFF << ref_idx_shift);
6812 fwd_ref_entry += (gen9_mfc_avc_get_ref_idx_state(ref_pic, avc_state->list_ref_idx[0][i]) << ref_idx_shift);
6816 bwd_ref_entry = 0x80808080;
6817 if (slice_type == SLICE_TYPE_B) {
6818 for (i = 0; i < MIN(avc_state->num_refs[1], 4); i++) {
6819 ref_pic = &slice_param->RefPicList1[i];
6820 ref_idx_shift = i * 8;
6822 bwd_ref_entry &= ~(0xFF << ref_idx_shift);
6823 bwd_ref_entry += (gen9_mfc_avc_get_ref_idx_state(ref_pic, avc_state->list_ref_idx[1][i]) << ref_idx_shift);
6827 if ((slice_type == SLICE_TYPE_P) ||
6828 (slice_type == SLICE_TYPE_B)) {
6829 BEGIN_BCS_BATCH(batch, 10);
6830 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
6831 OUT_BCS_BATCH(batch, 0); // L0
6832 OUT_BCS_BATCH(batch, fwd_ref_entry);
6834 for (i = 0; i < 7; i++) {
6835 OUT_BCS_BATCH(batch, 0x80808080);
6838 ADVANCE_BCS_BATCH(batch);
6841 if (slice_type == SLICE_TYPE_B) {
6842 BEGIN_BCS_BATCH(batch, 10);
6843 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
6844 OUT_BCS_BATCH(batch, 1); //Select L1
6845 OUT_BCS_BATCH(batch, bwd_ref_entry); //max 4 reference allowed
6846 for (i = 0; i < 7; i++) {
6847 OUT_BCS_BATCH(batch, 0x80808080);
6849 ADVANCE_BCS_BATCH(batch);
6854 gen9_mfc_avc_weightoffset_state(VADriverContextP ctx,
6855 struct encode_state *encode_state,
6856 struct intel_encoder_context *encoder_context,
6857 VAEncPictureParameterBufferH264 *pic_param,
6858 VAEncSliceParameterBufferH264 *slice_param,
6859 struct intel_batchbuffer *batch)
6862 short weightoffsets[32 * 6];
6864 slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
6866 if (slice_type == SLICE_TYPE_P &&
6867 pic_param->pic_fields.bits.weighted_pred_flag == 1) {
6868 memset(weightoffsets, 0, 32 * 6 * sizeof(short));
6869 for (i = 0; i < 32; i++) {
6870 weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
6871 weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
6872 weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
6873 weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
6874 weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
6875 weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
6878 BEGIN_BCS_BATCH(batch, 98);
6879 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
6880 OUT_BCS_BATCH(batch, 0);
6881 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
6883 ADVANCE_BCS_BATCH(batch);
6886 if (slice_type == SLICE_TYPE_B &&
6887 (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
6888 memset(weightoffsets, 0, 32 * 6 * sizeof(short));
6889 for (i = 0; i < 32; i++) {
6890 weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
6891 weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
6892 weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
6893 weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
6894 weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
6895 weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
6898 BEGIN_BCS_BATCH(batch, 98);
6899 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
6900 OUT_BCS_BATCH(batch, 0);
6901 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
6902 ADVANCE_BCS_BATCH(batch);
6904 memset(weightoffsets, 0, 32 * 6 * sizeof(short));
6905 for (i = 0; i < 32; i++) {
6906 weightoffsets[i * 6 + 0] = slice_param->luma_weight_l1[i];
6907 weightoffsets[i * 6 + 1] = slice_param->luma_offset_l1[i];
6908 weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l1[i][0];
6909 weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l1[i][0];
6910 weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l1[i][1];
6911 weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l1[i][1];
6914 BEGIN_BCS_BATCH(batch, 98);
6915 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
6916 OUT_BCS_BATCH(batch, 1);
6917 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
6918 ADVANCE_BCS_BATCH(batch);
6923 gen9_mfc_avc_single_slice(VADriverContextP ctx,
6924 struct encode_state *encode_state,
6925 struct intel_encoder_context *encoder_context,
6926 VAEncSliceParameterBufferH264 *slice_param,
6927 VAEncSliceParameterBufferH264 *next_slice_param,
6930 struct i965_driver_data *i965 = i965_driver_data(ctx);
6931 struct i965_gpe_table *gpe = &i965->gpe_table;
6932 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6933 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
6934 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
6935 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
6936 struct intel_batchbuffer *batch = encoder_context->base.batch;
6937 struct intel_batchbuffer *slice_batch = avc_ctx->pres_slice_batch_buffer_2nd_level;
6938 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
6939 struct gpe_mi_batch_buffer_start_parameter second_level_batch;
6940 struct object_surface *obj_surface;
6941 struct gen9_surface_avc *avc_priv_surface;
6943 unsigned int slice_offset = 0;
6945 if (generic_state->curr_pak_pass == 0) {
6946 slice_offset = intel_batchbuffer_used_size(slice_batch);
6947 avc_state->slice_batch_offset[slice_index] = slice_offset;
6948 gen9_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context, slice_param, slice_batch);
6949 gen9_mfc_avc_weightoffset_state(ctx,
6955 gen9_mfc_avc_slice_state(ctx,
6962 gen9_mfc_avc_inset_headers(ctx,
6969 BEGIN_BCS_BATCH(slice_batch, 2);
6970 OUT_BCS_BATCH(slice_batch, 0);
6971 OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END);
6972 ADVANCE_BCS_BATCH(slice_batch);
6975 slice_offset = avc_state->slice_batch_offset[slice_index];
6977 /* insert slice as second level.*/
6978 memset(&second_level_batch, 0, sizeof(second_level_batch));
6979 second_level_batch.is_second_level = 1; /* Must be the second level batch buffer */
6980 second_level_batch.offset = slice_offset;
6981 second_level_batch.bo = slice_batch->buffer;
6982 gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
6984 /* insert mb code as second level.*/
6985 obj_surface = encode_state->reconstructed_object;
6986 assert(obj_surface->private_data);
6987 avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
6989 memset(&second_level_batch, 0, sizeof(second_level_batch));
6990 second_level_batch.is_second_level = 1; /* Must be the second level batch buffer */
6991 second_level_batch.offset = slice_param->macroblock_address * 16 * 4;
6992 second_level_batch.bo = avc_priv_surface->res_mb_code_surface.bo;
6993 gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
6998 gen9_avc_pak_slice_level(VADriverContextP ctx,
6999 struct encode_state *encode_state,
7000 struct intel_encoder_context *encoder_context)
7002 struct i965_driver_data *i965 = i965_driver_data(ctx);
7003 struct i965_gpe_table *gpe = &i965->gpe_table;
7004 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7005 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
7006 struct intel_batchbuffer *batch = encoder_context->base.batch;
7007 struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
7008 VAEncSliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
7010 int slice_index = 0;
7011 int is_frame_level = (avc_state->slice_num > 1) ? 0 : 1; /* check it for SKL,now single slice per frame */
7012 int has_tail = 0; /* check it later */
7014 for (j = 0; j < encode_state->num_slice_params_ext; j++) {
7015 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
7017 if (j == encode_state->num_slice_params_ext - 1)
7018 next_slice_group_param = NULL;
7020 next_slice_group_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j + 1]->buffer;
7022 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
7023 if (i < encode_state->slice_params_ext[j]->num_elements - 1)
7024 next_slice_param = slice_param + 1;
7026 next_slice_param = next_slice_group_param;
7028 gen9_mfc_avc_single_slice(ctx,
7046 /* insert a tail if required */
7049 memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
7050 mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
7051 gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_params);
7054 gen9_avc_pak_picture_level(VADriverContextP ctx,
7055 struct encode_state *encode_state,
7056 struct intel_encoder_context *encoder_context)
7058 struct i965_driver_data *i965 = i965_driver_data(ctx);
7059 struct i965_gpe_table *gpe = &i965->gpe_table;
7060 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7061 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
7062 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
7063 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
7064 struct gpe_mi_batch_buffer_start_parameter second_level_batch;
7065 struct intel_batchbuffer *batch = encoder_context->base.batch;
7067 if (generic_state->brc_enabled &&
7068 generic_state->curr_pak_pass) {
7069 struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
7070 struct encoder_status_buffer_internal *status_buffer;
7071 status_buffer = &(avc_ctx->status_buffer);
7073 memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
7074 mi_conditional_batch_buffer_end_params.offset = status_buffer->image_status_mask_offset;
7075 mi_conditional_batch_buffer_end_params.bo = status_buffer->bo;
7076 mi_conditional_batch_buffer_end_params.compare_data = 0;
7077 mi_conditional_batch_buffer_end_params.compare_mask_mode_disabled = 0;
7078 gpe->mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
7081 gen9_mfc_avc_pipe_mode_select(ctx, encode_state, encoder_context);
7082 gen9_mfc_avc_surface_state(ctx, encoder_context, &(generic_ctx->res_reconstructed_surface), 0);
7083 gen9_mfc_avc_surface_state(ctx, encoder_context, &(generic_ctx->res_uncompressed_input_surface), 4);
7084 gen9_mfc_avc_pipe_buf_addr_state(ctx, encoder_context);
7085 gen9_mfc_avc_ind_obj_base_addr_state(ctx, encode_state, encoder_context);
7086 gen9_mfc_avc_bsp_buf_base_addr_state(ctx, encoder_context);
7088 if (generic_state->brc_enabled) {
7089 memset(&second_level_batch, 0, sizeof(second_level_batch));
7090 if (generic_state->curr_pak_pass == 0) {
7091 second_level_batch.offset = 0;
7093 second_level_batch.offset = generic_state->curr_pak_pass * INTEL_AVC_IMAGE_STATE_CMD_SIZE;
7095 second_level_batch.is_second_level = 1;
7096 second_level_batch.bo = avc_ctx->res_brc_image_state_read_buffer.bo;
7097 gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
7099 /*generate a new image state */
7100 gen9_avc_set_image_state_non_brc(ctx, encode_state, encoder_context, &(avc_ctx->res_image_state_batch_buffer_2nd_level));
7101 memset(&second_level_batch, 0, sizeof(second_level_batch));
7102 second_level_batch.offset = 0;
7103 second_level_batch.is_second_level = 1;
7104 second_level_batch.bo = avc_ctx->res_image_state_batch_buffer_2nd_level.bo;
7105 gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
7108 gen9_mfc_avc_qm_state(ctx, encode_state, encoder_context);
7109 gen9_mfc_avc_fqm_state(ctx, encode_state, encoder_context);
7110 gen9_mfc_avc_directmode_state(ctx, encoder_context);
7115 gen9_avc_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
7117 struct i965_driver_data *i965 = i965_driver_data(ctx);
7118 struct i965_gpe_table *gpe = &i965->gpe_table;
7119 struct intel_batchbuffer *batch = encoder_context->base.batch;
7120 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7121 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
7122 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
7124 struct gpe_mi_store_register_mem_parameter mi_store_reg_mem_param;
7125 struct gpe_mi_store_data_imm_parameter mi_store_data_imm_param;
7126 struct gpe_mi_flush_dw_parameter mi_flush_dw_param;
7127 struct encoder_status_buffer_internal *status_buffer;
7129 status_buffer = &(avc_ctx->status_buffer);
7131 memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
7132 gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
7134 /* read register and store into status_buffer and pak_statitistic info */
7135 memset(&mi_store_reg_mem_param, 0, sizeof(mi_store_reg_mem_param));
7136 mi_store_reg_mem_param.bo = status_buffer->bo;
7137 mi_store_reg_mem_param.offset = status_buffer->bs_byte_count_frame_offset;
7138 mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_reg_offset;
7139 gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7141 mi_store_reg_mem_param.bo = status_buffer->bo;
7142 mi_store_reg_mem_param.offset = status_buffer->image_status_mask_offset;
7143 mi_store_reg_mem_param.mmio_offset = status_buffer->image_status_mask_reg_offset;
7144 gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7146 /*update the status in the pak_statistic_surface */
7147 mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
7148 mi_store_reg_mem_param.offset = 0;
7149 mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_reg_offset;
7150 gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7152 mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
7153 mi_store_reg_mem_param.offset = 4;
7154 mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_nh_reg_offset;
7155 gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7157 memset(&mi_store_data_imm_param, 0, sizeof(mi_store_data_imm_param));
7158 mi_store_data_imm_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
7159 mi_store_data_imm_param.offset = sizeof(unsigned int) * 2;
7160 mi_store_data_imm_param.dw0 = (generic_state->curr_pak_pass + 1);
7161 gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm_param);
7163 mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
7164 mi_store_reg_mem_param.offset = sizeof(unsigned int) * (4 + generic_state->curr_pak_pass) ;
7165 mi_store_reg_mem_param.mmio_offset = status_buffer->image_status_ctrl_reg_offset;
7166 gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7168 memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
7169 gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
7175 gen9_avc_pak_brc_prepare(struct encode_state *encode_state,
7176 struct intel_encoder_context *encoder_context)
7178 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7179 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
7180 unsigned int rate_control_mode = encoder_context->rate_control_mode;
7182 switch (rate_control_mode & 0x7f) {
7184 generic_state->internal_rate_mode = VA_RC_CBR;
7188 generic_state->internal_rate_mode = VA_RC_VBR;//AVBR
7193 generic_state->internal_rate_mode = VA_RC_CQP;
7197 if (encoder_context->quality_level == 0)
7198 encoder_context->quality_level = ENCODER_DEFAULT_QUALITY_AVC;
7202 gen9_avc_pak_pipeline_prepare(VADriverContextP ctx,
7203 struct encode_state *encode_state,
7204 struct intel_encoder_context *encoder_context)
7207 struct i965_driver_data *i965 = i965_driver_data(ctx);
7208 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7209 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
7210 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
7211 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
7212 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
7214 struct object_surface *obj_surface;
7215 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
7216 VAEncSliceParameterBufferH264 *slice_param = avc_state->slice_param[0];
7218 struct gen9_surface_avc *avc_priv_surface;
7219 int i, j, enable_avc_ildb = 0;
7220 unsigned int allocate_flag = 1;
7222 unsigned int w_mb = generic_state->frame_width_in_mbs;
7223 unsigned int h_mb = generic_state->frame_height_in_mbs;
7224 struct avc_surface_param surface_param;
7226 /* update the parameter and check slice parameter */
7227 for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
7228 assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
7229 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
7231 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
7232 assert((slice_param->slice_type == SLICE_TYPE_I) ||
7233 (slice_param->slice_type == SLICE_TYPE_SI) ||
7234 (slice_param->slice_type == SLICE_TYPE_P) ||
7235 (slice_param->slice_type == SLICE_TYPE_SP) ||
7236 (slice_param->slice_type == SLICE_TYPE_B));
7238 if (slice_param->disable_deblocking_filter_idc != 1) {
7239 enable_avc_ildb = 1;
7246 avc_state->enable_avc_ildb = enable_avc_ildb;
7248 /* setup the all surface and buffer for PAK */
7249 /* Setup current reconstruct frame */
7250 obj_surface = encode_state->reconstructed_object;
7251 va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
7253 if (va_status != VA_STATUS_SUCCESS)
7256 memset(&surface_param, 0, sizeof(surface_param));
7257 surface_param.frame_width = generic_state->frame_width_in_pixel;
7258 surface_param.frame_height = generic_state->frame_height_in_pixel;
7259 va_status = gen9_avc_init_check_surfaces(ctx,
7260 obj_surface, encoder_context,
7262 if (va_status != VA_STATUS_SUCCESS)
7264 /* init the member of avc_priv_surface,frame_store_id,qp_value */
7266 avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
7267 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = 0;
7268 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = 0;
7269 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2]);
7270 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1]);
7271 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2], avc_priv_surface->dmv_top);
7272 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1], avc_priv_surface->dmv_bottom);
7273 avc_priv_surface->qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
7274 avc_priv_surface->frame_store_id = 0;
7275 avc_priv_surface->frame_idx = pic_param->CurrPic.frame_idx;
7276 avc_priv_surface->top_field_order_cnt = pic_param->CurrPic.TopFieldOrderCnt;
7277 avc_priv_surface->is_as_ref = pic_param->pic_fields.bits.reference_pic_flag;
7278 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = avc_priv_surface->top_field_order_cnt;
7279 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = avc_priv_surface->top_field_order_cnt + 1;
7281 i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
7282 i965_free_gpe_resource(&avc_ctx->res_post_deblocking_output);
7283 i965_free_gpe_resource(&avc_ctx->res_pre_deblocking_output);
7284 i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_reconstructed_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
7287 if (avc_state->enable_avc_ildb) {
7288 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_post_deblocking_output, obj_surface, GPE_RESOURCE_ALIGNMENT);
7290 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_pre_deblocking_output, obj_surface, GPE_RESOURCE_ALIGNMENT);
7292 /* input YUV surface */
7293 obj_surface = encode_state->input_yuv_object;
7294 va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
7296 if (va_status != VA_STATUS_SUCCESS)
7298 i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
7299 i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_uncompressed_input_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
7301 /* Reference surfaces */
7302 for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
7303 i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
7304 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2]);
7305 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1]);
7306 obj_surface = encode_state->reference_objects[i];
7307 avc_state->top_field_poc[2 * i] = 0;
7308 avc_state->top_field_poc[2 * i + 1] = 0;
7310 if (obj_surface && obj_surface->bo) {
7311 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->list_reference_res[i], obj_surface, GPE_RESOURCE_ALIGNMENT);
7313 /* actually it should be handled when it is reconstructed surface */
7314 va_status = gen9_avc_init_check_surfaces(ctx,
7315 obj_surface, encoder_context,
7317 if (va_status != VA_STATUS_SUCCESS)
7319 avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
7320 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2], avc_priv_surface->dmv_top);
7321 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1], avc_priv_surface->dmv_bottom);
7322 avc_priv_surface->frame_store_id = i;
7323 avc_state->top_field_poc[2 * i] = avc_priv_surface->top_field_order_cnt;
7324 avc_state->top_field_poc[2 * i + 1] = avc_priv_surface->top_field_order_cnt + 1;
7330 if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
7331 intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
7332 avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
7335 avc_ctx->pres_slice_batch_buffer_2nd_level =
7336 intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD,
7338 encode_state->num_slice_params_ext);
7339 if (!avc_ctx->pres_slice_batch_buffer_2nd_level)
7340 return VA_STATUS_ERROR_ALLOCATION_FAILED;
7342 for (i = 0; i < MAX_AVC_SLICE_NUM; i++) {
7343 avc_state->slice_batch_offset[i] = 0;
7348 i965_free_gpe_resource(&avc_ctx->res_intra_row_store_scratch_buffer);
7349 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
7350 &avc_ctx->res_intra_row_store_scratch_buffer,
7352 "PAK Intra row store scratch buffer");
7354 goto failed_allocation;
7356 size = w_mb * 4 * 64;
7357 i965_free_gpe_resource(&avc_ctx->res_deblocking_filter_row_store_scratch_buffer);
7358 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
7359 &avc_ctx->res_deblocking_filter_row_store_scratch_buffer,
7361 "PAK Deblocking filter row store scratch buffer");
7363 goto failed_allocation;
7365 size = w_mb * 2 * 64;
7366 i965_free_gpe_resource(&avc_ctx->res_bsd_mpc_row_store_scratch_buffer);
7367 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
7368 &avc_ctx->res_bsd_mpc_row_store_scratch_buffer,
7370 "PAK BSD/MPC row store scratch buffer");
7372 goto failed_allocation;
7374 size = w_mb * h_mb * 16;
7375 i965_free_gpe_resource(&avc_ctx->res_pak_mb_status_buffer);
7376 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
7377 &avc_ctx->res_pak_mb_status_buffer,
7379 "PAK MB status buffer");
7381 goto failed_allocation;
7383 return VA_STATUS_SUCCESS;
7386 return VA_STATUS_ERROR_ALLOCATION_FAILED;
7390 gen9_avc_encode_picture(VADriverContextP ctx,
7392 struct encode_state *encode_state,
7393 struct intel_encoder_context *encoder_context)
7396 struct i965_driver_data *i965 = i965_driver_data(ctx);
7397 struct i965_gpe_table *gpe = &i965->gpe_table;
7398 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7399 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
7400 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
7401 struct intel_batchbuffer *batch = encoder_context->base.batch;
7403 va_status = gen9_avc_pak_pipeline_prepare(ctx, encode_state, encoder_context);
7405 if (va_status != VA_STATUS_SUCCESS)
7408 if (i965->intel.has_bsd2)
7409 intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
7411 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
7412 intel_batchbuffer_emit_mi_flush(batch);
7414 for (generic_state->curr_pak_pass = 0;
7415 generic_state->curr_pak_pass < generic_state->num_pak_passes;
7416 generic_state->curr_pak_pass++) {
7418 if (generic_state->curr_pak_pass == 0) {
7419 /* Initialize the avc Image Ctrl reg for the first pass,write 0 to staturs/control register, is it needed in AVC? */
7420 struct gpe_mi_load_register_imm_parameter mi_load_reg_imm;
7421 struct encoder_status_buffer_internal *status_buffer;
7423 status_buffer = &(avc_ctx->status_buffer);
7424 memset(&mi_load_reg_imm, 0, sizeof(mi_load_reg_imm));
7425 mi_load_reg_imm.mmio_offset = status_buffer->image_status_ctrl_reg_offset;
7426 mi_load_reg_imm.data = 0;
7427 gpe->mi_load_register_imm(ctx, batch, &mi_load_reg_imm);
7429 gen9_avc_pak_picture_level(ctx, encode_state, encoder_context);
7430 gen9_avc_pak_slice_level(ctx, encode_state, encoder_context);
7431 gen9_avc_read_mfc_status(ctx, encoder_context);
7435 if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
7436 intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
7437 avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
7440 intel_batchbuffer_end_atomic(batch);
7441 intel_batchbuffer_flush(batch);
7443 generic_state->seq_frame_number++;
7444 generic_state->total_frame_number++;
7445 generic_state->first_frame = 0;
7446 return VA_STATUS_SUCCESS;
7450 gen9_avc_pak_pipeline(VADriverContextP ctx,
7452 struct encode_state *encode_state,
7453 struct intel_encoder_context *encoder_context)
7458 case VAProfileH264ConstrainedBaseline:
7459 case VAProfileH264Main:
7460 case VAProfileH264High:
7461 case VAProfileH264MultiviewHigh:
7462 case VAProfileH264StereoHigh:
7463 vaStatus = gen9_avc_encode_picture(ctx, profile, encode_state, encoder_context);
7467 vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
7475 gen9_avc_pak_context_destroy(void * context)
7477 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)context;
7478 struct generic_encoder_context * generic_ctx;
7479 struct i965_avc_encoder_context * avc_ctx;
7485 generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
7486 avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
7489 i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
7490 i965_free_gpe_resource(&avc_ctx->res_post_deblocking_output);
7491 i965_free_gpe_resource(&avc_ctx->res_pre_deblocking_output);
7492 i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
7494 i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
7495 i965_free_gpe_resource(&avc_ctx->res_intra_row_store_scratch_buffer);
7496 i965_free_gpe_resource(&avc_ctx->res_deblocking_filter_row_store_scratch_buffer);
7497 i965_free_gpe_resource(&avc_ctx->res_bsd_mpc_row_store_scratch_buffer);
7498 i965_free_gpe_resource(&avc_ctx->res_pak_mb_status_buffer);
7500 for (i = 0 ; i < MAX_MFC_AVC_REFERENCE_SURFACES; i++) {
7501 i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
7504 for (i = 0 ; i < NUM_MFC_AVC_DMV_BUFFERS; i++) {
7505 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i]);
7508 if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
7509 intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
7510 avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
7516 gen9_avc_get_coded_status(VADriverContextP ctx,
7517 struct intel_encoder_context *encoder_context,
7518 struct i965_coded_buffer_segment *coded_buf_seg)
7520 struct encoder_status *avc_encode_status;
7522 if (!encoder_context || !coded_buf_seg)
7523 return VA_STATUS_ERROR_INVALID_BUFFER;
7525 avc_encode_status = (struct encoder_status *)coded_buf_seg->codec_private_data;
7526 coded_buf_seg->base.size = avc_encode_status->bs_byte_count_frame;
7528 return VA_STATUS_SUCCESS;
7532 gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
7534 /* VME & PAK share the same context */
7535 struct i965_driver_data *i965 = i965_driver_data(ctx);
7536 struct encoder_vme_mfc_context * vme_context = NULL;
7537 struct generic_encoder_context * generic_ctx = NULL;
7538 struct i965_avc_encoder_context * avc_ctx = NULL;
7539 struct generic_enc_codec_state * generic_state = NULL;
7540 struct avc_enc_state * avc_state = NULL;
7541 struct encoder_status_buffer_internal *status_buffer;
7542 uint32_t base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
7544 vme_context = calloc(1, sizeof(struct encoder_vme_mfc_context));
7545 generic_ctx = calloc(1, sizeof(struct generic_encoder_context));
7546 avc_ctx = calloc(1, sizeof(struct i965_avc_encoder_context));
7547 generic_state = calloc(1, sizeof(struct generic_enc_codec_state));
7548 avc_state = calloc(1, sizeof(struct avc_enc_state));
7550 if (!vme_context || !generic_ctx || !avc_ctx || !generic_state || !avc_state)
7551 goto allocate_structure_failed;
7553 memset(vme_context, 0, sizeof(struct encoder_vme_mfc_context));
7554 memset(generic_ctx, 0, sizeof(struct generic_encoder_context));
7555 memset(avc_ctx, 0, sizeof(struct i965_avc_encoder_context));
7556 memset(generic_state, 0, sizeof(struct generic_enc_codec_state));
7557 memset(avc_state, 0, sizeof(struct avc_enc_state));
7559 encoder_context->vme_context = vme_context;
7560 vme_context->generic_enc_ctx = generic_ctx;
7561 vme_context->private_enc_ctx = avc_ctx;
7562 vme_context->generic_enc_state = generic_state;
7563 vme_context->private_enc_state = avc_state;
7565 if (IS_SKL(i965->intel.device_info) ||
7566 IS_BXT(i965->intel.device_info)) {
7567 generic_ctx->enc_kernel_ptr = (void *)skl_avc_encoder_kernels;
7568 generic_ctx->enc_kernel_size = sizeof(skl_avc_encoder_kernels);
7569 } else if (IS_KBL(i965->intel.device_info) ||
7570 IS_GLK(i965->intel.device_info)) {
7571 generic_ctx->enc_kernel_ptr = (void *)kbl_avc_encoder_kernels;
7572 generic_ctx->enc_kernel_size = sizeof(kbl_avc_encoder_kernels);
7574 goto allocate_structure_failed;
7576 /* initialize misc ? */
7578 generic_ctx->use_hw_scoreboard = 1;
7579 generic_ctx->use_hw_non_stalling_scoreboard = 1;
7581 /* initialize generic state */
7583 generic_state->kernel_mode = INTEL_ENC_KERNEL_NORMAL;
7584 generic_state->preset = INTEL_PRESET_RT_SPEED;
7585 generic_state->seq_frame_number = 0;
7586 generic_state->total_frame_number = 0;
7587 generic_state->frame_type = 0;
7588 generic_state->first_frame = 1;
7590 generic_state->frame_width_in_pixel = 0;
7591 generic_state->frame_height_in_pixel = 0;
7592 generic_state->frame_width_in_mbs = 0;
7593 generic_state->frame_height_in_mbs = 0;
7594 generic_state->frame_width_4x = 0;
7595 generic_state->frame_height_4x = 0;
7596 generic_state->frame_width_16x = 0;
7597 generic_state->frame_height_16x = 0;
7598 generic_state->frame_width_32x = 0;
7599 generic_state->downscaled_width_4x_in_mb = 0;
7600 generic_state->downscaled_height_4x_in_mb = 0;
7601 generic_state->downscaled_width_16x_in_mb = 0;
7602 generic_state->downscaled_height_16x_in_mb = 0;
7603 generic_state->downscaled_width_32x_in_mb = 0;
7604 generic_state->downscaled_height_32x_in_mb = 0;
7606 generic_state->hme_supported = 1;
7607 generic_state->b16xme_supported = 1;
7608 generic_state->b32xme_supported = 0;
7609 generic_state->hme_enabled = 0;
7610 generic_state->b16xme_enabled = 0;
7611 generic_state->b32xme_enabled = 0;
7612 generic_state->brc_distortion_buffer_supported = 1;
7613 generic_state->brc_constant_buffer_supported = 0;
7616 generic_state->frame_rate = 30;
7617 generic_state->brc_allocated = 0;
7618 generic_state->brc_inited = 0;
7619 generic_state->brc_need_reset = 0;
7620 generic_state->is_low_delay = 0;
7621 generic_state->brc_enabled = 0;//default
7622 generic_state->internal_rate_mode = 0;
7623 generic_state->curr_pak_pass = 0;
7624 generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
7625 generic_state->is_first_pass = 1;
7626 generic_state->is_last_pass = 0;
7627 generic_state->mb_brc_enabled = 0; // enable mb brc
7628 generic_state->brc_roi_enable = 0;
7629 generic_state->brc_dirty_roi_enable = 0;
7630 generic_state->skip_frame_enbale = 0;
7632 generic_state->target_bit_rate = 0;
7633 generic_state->max_bit_rate = 0;
7634 generic_state->min_bit_rate = 0;
7635 generic_state->init_vbv_buffer_fullness_in_bit = 0;
7636 generic_state->vbv_buffer_size_in_bit = 0;
7637 generic_state->frames_per_100s = 0;
7638 generic_state->gop_size = 0;
7639 generic_state->gop_ref_distance = 0;
7640 generic_state->brc_target_size = 0;
7641 generic_state->brc_mode = 0;
7642 generic_state->brc_init_current_target_buf_full_in_bits = 0.0;
7643 generic_state->brc_init_reset_input_bits_per_frame = 0.0;
7644 generic_state->brc_init_reset_buf_size_in_bits = 0;
7645 generic_state->brc_init_previous_target_buf_full_in_bits = 0;
7646 generic_state->frames_per_window_size = 0;//default
7647 generic_state->target_percentage = 0;
7649 generic_state->avbr_curracy = 0;
7650 generic_state->avbr_convergence = 0;
7652 generic_state->num_skip_frames = 0;
7653 generic_state->size_skip_frames = 0;
7655 generic_state->num_roi = 0;
7656 generic_state->max_delta_qp = 0;
7657 generic_state->min_delta_qp = 0;
7659 if (encoder_context->rate_control_mode != VA_RC_NONE &&
7660 encoder_context->rate_control_mode != VA_RC_CQP) {
7661 generic_state->brc_enabled = 1;
7662 generic_state->brc_distortion_buffer_supported = 1;
7663 generic_state->brc_constant_buffer_supported = 1;
7664 generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
7666 /*avc state initialization */
7667 avc_state->mad_enable = 0;
7668 avc_state->mb_disable_skip_map_enable = 0;
7669 avc_state->sfd_enable = 1;//default
7670 avc_state->sfd_mb_enable = 1;//set it true
7671 avc_state->adaptive_search_window_enable = 1;//default
7672 avc_state->mb_qp_data_enable = 0;
7673 avc_state->intra_refresh_i_enable = 0;
7674 avc_state->min_max_qp_enable = 0;
7675 avc_state->skip_bias_adjustment_enable = 0;//default,same as skip_bias_adjustment_supporte? no
7678 avc_state->non_ftq_skip_threshold_lut_input_enable = 0;
7679 avc_state->ftq_skip_threshold_lut_input_enable = 0;
7680 avc_state->ftq_override = 0;
7682 avc_state->direct_bias_adjustment_enable = 0;
7683 avc_state->global_motion_bias_adjustment_enable = 0;
7684 avc_state->disable_sub_mb_partion = 0;
7685 avc_state->arbitrary_num_mbs_in_slice = 0;
7686 avc_state->adaptive_transform_decision_enable = 0;//default
7687 avc_state->skip_check_disable = 0;
7688 avc_state->tq_enable = 0;
7689 avc_state->enable_avc_ildb = 0;
7690 avc_state->mbaff_flag = 0;
7691 avc_state->enable_force_skip = 1;//default
7692 avc_state->rc_panic_enable = 1;//default
7693 avc_state->suppress_recon_enable = 1;//default
7695 avc_state->ref_pic_select_list_supported = 1;
7696 avc_state->mb_brc_supported = 1;//?,default
7697 avc_state->multi_pre_enable = 1;//default
7698 avc_state->ftq_enable = 1;//default
7699 avc_state->caf_supported = 1; //default
7700 avc_state->caf_enable = 0;
7701 avc_state->caf_disable_hd = 1;//default
7702 avc_state->skip_bias_adjustment_supported = 1;//default
7704 avc_state->adaptive_intra_scaling_enable = 1;//default
7705 avc_state->old_mode_cost_enable = 0;//default
7706 avc_state->multi_ref_qp_enable = 1;//default
7707 avc_state->weighted_ref_l0_enable = 1;//default
7708 avc_state->weighted_ref_l1_enable = 1;//default
7709 avc_state->weighted_prediction_supported = 0;
7710 avc_state->brc_split_enable = 0;
7711 avc_state->slice_level_report_supported = 0;
7713 avc_state->fbr_bypass_enable = 1;//default
7714 avc_state->field_scaling_output_interleaved = 0;
7715 avc_state->mb_variance_output_enable = 0;
7716 avc_state->mb_pixel_average_output_enable = 0;
7717 avc_state->rolling_intra_refresh_enable = 0;// same as intra_refresh_i_enable?
7718 avc_state->mbenc_curbe_set_in_brc_update = 0;
7719 avc_state->rounding_inter_enable = 1; //default
7720 avc_state->adaptive_rounding_inter_enable = 1;//default
7722 avc_state->mbenc_i_frame_dist_in_use = 0;
7723 avc_state->mb_status_supported = 1; //set in intialization for gen9
7724 avc_state->mb_status_enable = 0;
7725 avc_state->mb_vproc_stats_enable = 0;
7726 avc_state->flatness_check_enable = 0;
7727 avc_state->flatness_check_supported = 1;//default
7728 avc_state->block_based_skip_enable = 0;
7729 avc_state->use_widi_mbenc_kernel = 0;
7730 avc_state->kernel_trellis_enable = 0;
7731 avc_state->generic_reserved = 0;
7733 avc_state->rounding_value = 0;
7734 avc_state->rounding_inter_p = AVC_INVALID_ROUNDING_VALUE;//default
7735 avc_state->rounding_inter_b = AVC_INVALID_ROUNDING_VALUE; //default
7736 avc_state->rounding_inter_b_ref = AVC_INVALID_ROUNDING_VALUE; //default
7737 avc_state->min_qp_i = INTEL_AVC_MIN_QP;
7738 avc_state->min_qp_p = INTEL_AVC_MIN_QP;
7739 avc_state->min_qp_b = INTEL_AVC_MIN_QP;
7740 avc_state->max_qp_i = INTEL_AVC_MAX_QP;
7741 avc_state->max_qp_p = INTEL_AVC_MAX_QP;
7742 avc_state->max_qp_b = INTEL_AVC_MAX_QP;
7744 memset(avc_state->non_ftq_skip_threshold_lut, 0, AVC_QP_MAX * sizeof(uint8_t));
7745 memset(avc_state->ftq_skip_threshold_lut, 0, AVC_QP_MAX * sizeof(uint8_t));
7746 memset(avc_state->lamda_value_lut, 0, AVC_QP_MAX * 2 * sizeof(uint32_t));
7748 avc_state->intra_refresh_qp_threshold = 0;
7749 avc_state->trellis_flag = 0;
7750 avc_state->hme_mv_cost_scaling_factor = 0;
7751 avc_state->slice_height = 1;
7752 avc_state->slice_num = 1;
7753 memset(avc_state->dist_scale_factor_list0, 0, 32 * sizeof(uint32_t));
7754 avc_state->bi_weight = 0;
7756 avc_state->lambda_table_enable = 0;
7759 if (IS_SKL(i965->intel.device_info) ||
7760 IS_BXT(i965->intel.device_info)) {
7761 avc_state->brc_const_data_surface_width = 64;
7762 avc_state->brc_const_data_surface_height = 44;
7763 avc_state->brc_split_enable = 1;
7764 } else if (IS_KBL(i965->intel.device_info) ||
7765 IS_GLK(i965->intel.device_info)) {
7766 avc_state->brc_const_data_surface_width = 64;
7767 avc_state->brc_const_data_surface_height = 53;
7769 avc_state->decouple_mbenc_curbe_from_brc_enable = 1;
7770 avc_state->extended_mv_cost_range_enable = 0;
7771 avc_state->reserved_g95 = 0;
7772 avc_state->mbenc_brc_buffer_size = 128;
7773 avc_state->kernel_trellis_enable = 1;
7774 avc_state->lambda_table_enable = 1;
7775 avc_state->brc_split_enable = 1;
7778 avc_state->num_refs[0] = 0;
7779 avc_state->num_refs[1] = 0;
7780 memset(avc_state->list_ref_idx, 0, 32 * 2 * sizeof(uint32_t));
7781 memset(avc_state->top_field_poc, 0, NUM_MFC_AVC_DMV_BUFFERS * sizeof(int32_t));
7782 avc_state->tq_rounding = 0;
7783 avc_state->zero_mv_threshold = 0;
7784 avc_state->slice_second_levle_batch_buffer_in_use = 0;
7788 /* the definition of status buffer offset for Encoder */
7790 status_buffer = &avc_ctx->status_buffer;
7791 memset(status_buffer, 0, sizeof(struct encoder_status_buffer_internal));
7793 status_buffer->base_offset = base_offset;
7794 status_buffer->bs_byte_count_frame_offset = base_offset + offsetof(struct encoder_status, bs_byte_count_frame);
7795 status_buffer->bs_byte_count_frame_nh_offset = base_offset + offsetof(struct encoder_status, bs_byte_count_frame_nh);
7796 status_buffer->image_status_mask_offset = base_offset + offsetof(struct encoder_status, image_status_mask);
7797 status_buffer->image_status_ctrl_offset = base_offset + offsetof(struct encoder_status, image_status_ctrl);
7798 status_buffer->mfc_qp_status_count_offset = base_offset + offsetof(struct encoder_status, mfc_qp_status_count);
7799 status_buffer->media_index_offset = base_offset + offsetof(struct encoder_status, media_index);
7801 status_buffer->status_buffer_size = sizeof(struct encoder_status);
7802 status_buffer->bs_byte_count_frame_reg_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG;
7803 status_buffer->bs_byte_count_frame_nh_reg_offset = MFC_BITSTREAM_BYTECOUNT_SLICE_REG;
7804 status_buffer->image_status_mask_reg_offset = MFC_IMAGE_STATUS_MASK_REG;
7805 status_buffer->image_status_ctrl_reg_offset = MFC_IMAGE_STATUS_CTRL_REG;
7806 status_buffer->mfc_qp_status_count_reg_offset = MFC_QP_STATUS_COUNT_REG;
7808 gen9_avc_kernel_init(ctx, encoder_context);
7809 encoder_context->vme_context = vme_context;
7810 encoder_context->vme_pipeline = gen9_avc_vme_pipeline;
7811 encoder_context->vme_context_destroy = gen9_avc_vme_context_destroy;
7815 allocate_structure_failed:
7820 free(generic_state);
7826 gen9_avc_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
7828 /* VME & PAK share the same context */
7829 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7834 encoder_context->mfc_context = pak_context;
7835 encoder_context->mfc_context_destroy = gen9_avc_pak_context_destroy;
7836 encoder_context->mfc_pipeline = gen9_avc_pak_pipeline;
7837 encoder_context->mfc_brc_prepare = gen9_avc_pak_brc_prepare;
7838 encoder_context->get_status = gen9_avc_get_coded_status;