2 * Copyright @ 2017 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWAR OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Pengfei Qu <Pengfei.qu@intel.com>
37 #include "intel_batchbuffer.h"
38 #include "intel_driver.h"
40 #include "i965_defines.h"
41 #include "i965_structs.h"
42 #include "i965_drv_video.h"
43 #include "i965_encoder.h"
44 #include "i965_encoder_utils.h"
45 #include "intel_media.h"
47 #include "i965_gpe_utils.h"
48 #include "i965_encoder_common.h"
49 #include "i965_avc_encoder_common.h"
50 #include "gen9_avc_encoder_kernels.h"
51 #include "gen9_avc_encoder.h"
52 #include "gen9_avc_const_def.h"
54 #define MAX_URB_SIZE 4096 /* In register */
55 #define NUM_KERNELS_PER_GPE_CONTEXT 1
56 #define MBENC_KERNEL_BASE GEN9_AVC_KERNEL_MBENC_QUALITY_I
57 #define GPE_RESOURCE_ALIGNMENT 4 /* 4 means 16 = 1 << 4) */
59 #define OUT_BUFFER_2DW(batch, bo, is_target, delta) do { \
61 OUT_BCS_RELOC64(batch, \
63 I915_GEM_DOMAIN_INSTRUCTION, \
64 is_target ? I915_GEM_DOMAIN_RENDER : 0, \
67 OUT_BCS_BATCH(batch, 0); \
68 OUT_BCS_BATCH(batch, 0); \
72 #define OUT_BUFFER_3DW(batch, bo, is_target, delta, attr) do { \
73 OUT_BUFFER_2DW(batch, bo, is_target, delta); \
74 OUT_BCS_BATCH(batch, attr); \
77 static const uint32_t qm_flat[16] = {
78 0x10101010, 0x10101010, 0x10101010, 0x10101010,
79 0x10101010, 0x10101010, 0x10101010, 0x10101010,
80 0x10101010, 0x10101010, 0x10101010, 0x10101010,
81 0x10101010, 0x10101010, 0x10101010, 0x10101010
84 static const uint32_t fqm_flat[32] = {
85 0x10001000, 0x10001000, 0x10001000, 0x10001000,
86 0x10001000, 0x10001000, 0x10001000, 0x10001000,
87 0x10001000, 0x10001000, 0x10001000, 0x10001000,
88 0x10001000, 0x10001000, 0x10001000, 0x10001000,
89 0x10001000, 0x10001000, 0x10001000, 0x10001000,
90 0x10001000, 0x10001000, 0x10001000, 0x10001000,
91 0x10001000, 0x10001000, 0x10001000, 0x10001000,
92 0x10001000, 0x10001000, 0x10001000, 0x10001000
95 static const unsigned int slice_type_kernel[3] = {1, 2, 0};
97 static const gen9_avc_brc_init_reset_curbe_data gen9_avc_brc_init_reset_curbe_init_data = {
254 static const gen9_avc_frame_brc_update_curbe_data gen9_avc_frame_brc_update_curbe_init_data = {
412 gen9_avc_update_misc_parameters(VADriverContextP ctx,
413 struct encode_state *encode_state,
414 struct intel_encoder_context *encoder_context)
416 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
417 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
421 generic_state->max_bit_rate = (encoder_context->brc.bits_per_second[0] + 1000 - 1) / 1000;
423 generic_state->brc_need_reset = encoder_context->brc.need_reset;
425 if (generic_state->internal_rate_mode == VA_RC_CBR) {
426 generic_state->min_bit_rate = generic_state->max_bit_rate;
427 generic_state->mb_brc_enabled = encoder_context->brc.mb_rate_control[0];
429 if (generic_state->target_bit_rate != generic_state->max_bit_rate) {
430 generic_state->target_bit_rate = generic_state->max_bit_rate;
431 generic_state->brc_need_reset = 1;
433 } else if (generic_state->internal_rate_mode == VA_RC_VBR) {
434 generic_state->min_bit_rate = generic_state->max_bit_rate * (2 * encoder_context->brc.target_percentage[0] - 100) / 100;
435 generic_state->mb_brc_enabled = encoder_context->brc.mb_rate_control[0];
437 if (generic_state->target_bit_rate != generic_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100) {
438 generic_state->target_bit_rate = generic_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100;
439 generic_state->brc_need_reset = 1;
444 if (generic_state->internal_rate_mode != VA_RC_CQP) {
445 generic_state->frames_per_100s = encoder_context->brc.framerate[0].num * 100 / encoder_context->brc.framerate[0].den ;
446 generic_state->frame_rate = encoder_context->brc.framerate[0].num / encoder_context->brc.framerate[0].den ;
447 generic_state->frames_per_window_size = (int)(encoder_context->brc.window_size * generic_state->frame_rate / 1000); // brc.windows size in ms as the unit
449 generic_state->frames_per_100s = 30 * 100;
450 generic_state->frame_rate = 30 ;
451 generic_state->frames_per_window_size = 30;
455 if (generic_state->internal_rate_mode != VA_RC_CQP) {
456 generic_state->vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;//misc->buffer_size;
457 generic_state->init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;//misc->initial_buffer_fullness;
461 generic_state->num_roi = MIN(encoder_context->brc.num_roi, 3);
462 if (generic_state->num_roi > 0) {
463 generic_state->max_delta_qp = encoder_context->brc.roi_max_delta_qp;
464 generic_state->min_delta_qp = encoder_context->brc.roi_min_delta_qp;
466 for (i = 0; i < generic_state->num_roi; i++) {
467 generic_state->roi[i].left = encoder_context->brc.roi[i].left;
468 generic_state->roi[i].right = encoder_context->brc.roi[i].right;
469 generic_state->roi[i].top = encoder_context->brc.roi[i].top;
470 generic_state->roi[i].bottom = encoder_context->brc.roi[i].bottom;
471 generic_state->roi[i].value = encoder_context->brc.roi[i].value;
473 generic_state->roi[i].left /= 16;
474 generic_state->roi[i].right /= 16;
475 generic_state->roi[i].top /= 16;
476 generic_state->roi[i].bottom /= 16;
483 intel_avc_get_kernel_header_and_size(void *pvbinary,
485 INTEL_GENERIC_ENC_OPERATION operation,
487 struct i965_kernel *ret_kernel)
489 typedef uint32_t BIN_PTR[4];
492 gen9_avc_encoder_kernel_header *pkh_table;
493 kernel_header *pcurr_header, *pinvalid_entry, *pnext_header;
496 if (!pvbinary || !ret_kernel)
499 bin_start = (char *)pvbinary;
500 pkh_table = (gen9_avc_encoder_kernel_header *)pvbinary;
501 pinvalid_entry = &(pkh_table->static_detection) + 1;
502 next_krnoffset = binary_size;
504 if (operation == INTEL_GENERIC_ENC_SCALING4X) {
505 pcurr_header = &pkh_table->ply_dscale_ply;
506 } else if (operation == INTEL_GENERIC_ENC_SCALING2X) {
507 pcurr_header = &pkh_table->ply_2xdscale_ply;
508 } else if (operation == INTEL_GENERIC_ENC_ME) {
509 pcurr_header = &pkh_table->me_p;
510 } else if (operation == INTEL_GENERIC_ENC_BRC) {
511 pcurr_header = &pkh_table->frame_brc_init;
512 } else if (operation == INTEL_GENERIC_ENC_MBENC) {
513 pcurr_header = &pkh_table->mbenc_quality_I;
514 } else if (operation == INTEL_GENERIC_ENC_WP) {
515 pcurr_header = &pkh_table->wp;
516 } else if (operation == INTEL_GENERIC_ENC_SFD) {
517 pcurr_header = &pkh_table->static_detection;
522 pcurr_header += krnstate_idx;
523 ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
525 pnext_header = (pcurr_header + 1);
526 if (pnext_header < pinvalid_entry) {
527 next_krnoffset = pnext_header->kernel_start_pointer << 6;
529 ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
534 gen9_free_surfaces_avc(void **data)
536 struct gen9_surface_avc *avc_surface;
543 if (avc_surface->scaled_4x_surface_obj) {
544 i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_4x_surface_id, 1);
545 avc_surface->scaled_4x_surface_id = VA_INVALID_SURFACE;
546 avc_surface->scaled_4x_surface_obj = NULL;
549 if (avc_surface->scaled_16x_surface_obj) {
550 i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_16x_surface_id, 1);
551 avc_surface->scaled_16x_surface_id = VA_INVALID_SURFACE;
552 avc_surface->scaled_16x_surface_obj = NULL;
555 if (avc_surface->scaled_32x_surface_obj) {
556 i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_32x_surface_id, 1);
557 avc_surface->scaled_32x_surface_id = VA_INVALID_SURFACE;
558 avc_surface->scaled_32x_surface_obj = NULL;
561 i965_free_gpe_resource(&avc_surface->res_mb_code_surface);
562 i965_free_gpe_resource(&avc_surface->res_mv_data_surface);
563 i965_free_gpe_resource(&avc_surface->res_ref_pic_select_surface);
565 dri_bo_unreference(avc_surface->dmv_top);
566 avc_surface->dmv_top = NULL;
567 dri_bo_unreference(avc_surface->dmv_bottom);
568 avc_surface->dmv_bottom = NULL;
578 gen9_avc_init_check_surfaces(VADriverContextP ctx,
579 struct object_surface *obj_surface,
580 struct intel_encoder_context *encoder_context,
581 struct avc_surface_param *surface_param)
583 struct i965_driver_data *i965 = i965_driver_data(ctx);
584 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
585 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
586 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
588 struct gen9_surface_avc *avc_surface;
589 int downscaled_width_4x, downscaled_height_4x;
590 int downscaled_width_16x, downscaled_height_16x;
591 int downscaled_width_32x, downscaled_height_32x;
593 unsigned int frame_width_in_mbs = ALIGN(surface_param->frame_width, 16) / 16;
594 unsigned int frame_height_in_mbs = ALIGN(surface_param->frame_height, 16) / 16;
595 unsigned int frame_mb_nums = frame_width_in_mbs * frame_height_in_mbs;
596 int allocate_flag = 1;
599 if (!obj_surface || !obj_surface->bo)
600 return VA_STATUS_ERROR_INVALID_SURFACE;
602 if (obj_surface->private_data) {
603 return VA_STATUS_SUCCESS;
606 avc_surface = calloc(1, sizeof(struct gen9_surface_avc));
609 return VA_STATUS_ERROR_ALLOCATION_FAILED;
611 avc_surface->ctx = ctx;
612 obj_surface->private_data = avc_surface;
613 obj_surface->free_private_data = gen9_free_surfaces_avc;
615 downscaled_width_4x = generic_state->frame_width_4x;
616 downscaled_height_4x = generic_state->frame_height_4x;
618 i965_CreateSurfaces(ctx,
620 downscaled_height_4x,
623 &avc_surface->scaled_4x_surface_id);
625 avc_surface->scaled_4x_surface_obj = SURFACE(avc_surface->scaled_4x_surface_id);
627 if (!avc_surface->scaled_4x_surface_obj) {
628 return VA_STATUS_ERROR_ALLOCATION_FAILED;
631 i965_check_alloc_surface_bo(ctx, avc_surface->scaled_4x_surface_obj, 1,
632 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
634 downscaled_width_16x = generic_state->frame_width_16x;
635 downscaled_height_16x = generic_state->frame_height_16x;
636 i965_CreateSurfaces(ctx,
637 downscaled_width_16x,
638 downscaled_height_16x,
641 &avc_surface->scaled_16x_surface_id);
642 avc_surface->scaled_16x_surface_obj = SURFACE(avc_surface->scaled_16x_surface_id);
644 if (!avc_surface->scaled_16x_surface_obj) {
645 return VA_STATUS_ERROR_ALLOCATION_FAILED;
648 i965_check_alloc_surface_bo(ctx, avc_surface->scaled_16x_surface_obj, 1,
649 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
651 if (generic_state->b32xme_supported ||
652 generic_state->b32xme_enabled) {
653 downscaled_width_32x = generic_state->frame_width_32x;
654 downscaled_height_32x = generic_state->frame_height_32x;
655 i965_CreateSurfaces(ctx,
656 downscaled_width_32x,
657 downscaled_height_32x,
660 &avc_surface->scaled_32x_surface_id);
661 avc_surface->scaled_32x_surface_obj = SURFACE(avc_surface->scaled_32x_surface_id);
663 if (!avc_surface->scaled_32x_surface_obj) {
664 return VA_STATUS_ERROR_ALLOCATION_FAILED;
667 i965_check_alloc_surface_bo(ctx, avc_surface->scaled_32x_surface_obj, 1,
668 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
671 /*mb code and mv data for each frame*/
672 size = frame_mb_nums * 16 * 4;
673 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
674 &avc_surface->res_mb_code_surface,
678 goto failed_allocation;
680 size = frame_mb_nums * 32 * 4;
681 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
682 &avc_surface->res_mv_data_surface,
686 goto failed_allocation;
689 if (avc_state->ref_pic_select_list_supported) {
690 width = ALIGN(frame_width_in_mbs * 8, 64);
691 height = frame_height_in_mbs ;
692 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
693 &avc_surface->res_ref_pic_select_surface,
696 "Ref pic select list buffer");
698 goto failed_allocation;
702 avc_surface->dmv_top =
703 dri_bo_alloc(i965->intel.bufmgr,
704 "direct mv top Buffer",
707 avc_surface->dmv_bottom =
708 dri_bo_alloc(i965->intel.bufmgr,
709 "direct mv bottom Buffer",
712 assert(avc_surface->dmv_top);
713 assert(avc_surface->dmv_bottom);
715 return VA_STATUS_SUCCESS;
718 return VA_STATUS_ERROR_ALLOCATION_FAILED;
722 gen9_avc_allocate_resources(VADriverContextP ctx,
723 struct encode_state *encode_state,
724 struct intel_encoder_context *encoder_context)
726 struct i965_driver_data *i965 = i965_driver_data(ctx);
727 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
728 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
729 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
730 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
731 unsigned int size = 0;
732 unsigned int width = 0;
733 unsigned int height = 0;
734 unsigned char * data = NULL;
735 int allocate_flag = 1;
738 /*all the surface/buffer are allocated here*/
740 /*second level batch buffer for image state write when cqp etc*/
741 i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
742 size = INTEL_AVC_IMAGE_STATE_CMD_SIZE ;
743 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
744 &avc_ctx->res_image_state_batch_buffer_2nd_level,
746 "second levle batch (image state write) buffer");
748 goto failed_allocation;
750 /* scaling related surface */
751 if (avc_state->mb_status_supported) {
752 i965_free_gpe_resource(&avc_ctx->res_mb_status_buffer);
753 size = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * 16 * 4 + 1023)&~0x3ff;
754 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
755 &avc_ctx->res_mb_status_buffer,
757 "MB statistics output buffer");
759 goto failed_allocation;
760 i965_zero_gpe_resource(&avc_ctx->res_mb_status_buffer);
763 if (avc_state->flatness_check_supported) {
764 width = generic_state->frame_width_in_mbs * 4;
765 height = generic_state->frame_height_in_mbs * 4;
766 i965_free_gpe_resource(&avc_ctx->res_flatness_check_surface);
767 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
768 &avc_ctx->res_flatness_check_surface,
771 "Flatness check buffer");
773 goto failed_allocation;
775 /* me related surface */
776 width = generic_state->downscaled_width_4x_in_mb * 8;
777 height = generic_state->downscaled_height_4x_in_mb * 4 * 10;
778 i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
779 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
780 &avc_ctx->s4x_memv_distortion_buffer,
783 "4x MEMV distortion buffer");
785 goto failed_allocation;
786 i965_zero_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
788 width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
789 height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
790 i965_free_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
791 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
792 &avc_ctx->s4x_memv_min_distortion_brc_buffer,
795 "4x MEMV min distortion brc buffer");
797 goto failed_allocation;
798 i965_zero_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
801 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 32, 64);
802 height = generic_state->downscaled_height_4x_in_mb * 4 * 2 * 10;
803 i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
804 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
805 &avc_ctx->s4x_memv_data_buffer,
808 "4x MEMV data buffer");
810 goto failed_allocation;
811 i965_zero_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
814 width = ALIGN(generic_state->downscaled_width_16x_in_mb * 32, 64);
815 height = generic_state->downscaled_height_16x_in_mb * 4 * 2 * 10 ;
816 i965_free_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
817 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
818 &avc_ctx->s16x_memv_data_buffer,
821 "16x MEMV data buffer");
823 goto failed_allocation;
824 i965_zero_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
827 width = ALIGN(generic_state->downscaled_width_32x_in_mb * 32, 64);
828 height = generic_state->downscaled_height_32x_in_mb * 4 * 2 * 10 ;
829 i965_free_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
830 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
831 &avc_ctx->s32x_memv_data_buffer,
834 "32x MEMV data buffer");
836 goto failed_allocation;
837 i965_zero_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
840 if (!generic_state->brc_allocated) {
841 /*brc related surface */
842 i965_free_gpe_resource(&avc_ctx->res_brc_history_buffer);
844 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
845 &avc_ctx->res_brc_history_buffer,
847 "brc history buffer");
849 goto failed_allocation;
851 i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
853 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
854 &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
856 "brc pak statistic buffer");
858 goto failed_allocation;
860 i965_free_gpe_resource(&avc_ctx->res_brc_image_state_read_buffer);
861 size = INTEL_AVC_IMAGE_STATE_CMD_SIZE * 7;
862 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
863 &avc_ctx->res_brc_image_state_read_buffer,
865 "brc image state read buffer");
867 goto failed_allocation;
869 i965_free_gpe_resource(&avc_ctx->res_brc_image_state_write_buffer);
870 size = INTEL_AVC_IMAGE_STATE_CMD_SIZE * 7;
871 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
872 &avc_ctx->res_brc_image_state_write_buffer,
874 "brc image state write buffer");
876 goto failed_allocation;
878 width = ALIGN(avc_state->brc_const_data_surface_width, 64);
879 height = avc_state->brc_const_data_surface_height;
880 i965_free_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
881 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
882 &avc_ctx->res_brc_const_data_buffer,
885 "brc const data buffer");
887 goto failed_allocation;
889 if (generic_state->brc_distortion_buffer_supported) {
890 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 8, 64);
891 height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
892 width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
893 height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
894 i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
895 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
896 &avc_ctx->res_brc_dist_data_surface,
899 "brc dist data buffer");
901 goto failed_allocation;
902 i965_zero_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
905 if (generic_state->brc_roi_enable) {
906 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 16, 64);
907 height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
908 i965_free_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
909 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
910 &avc_ctx->res_mbbrc_roi_surface,
915 goto failed_allocation;
916 i965_zero_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
920 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 4, 64);
921 height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
922 i965_free_gpe_resource(&avc_ctx->res_mbbrc_mb_qp_data_surface);
923 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
924 &avc_ctx->res_mbbrc_mb_qp_data_surface,
927 "mbbrc mb qp buffer");
929 goto failed_allocation;
931 i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
932 size = 16 * AVC_QP_MAX * 4;
933 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
934 &avc_ctx->res_mbbrc_const_data_buffer,
936 "mbbrc const data buffer");
938 goto failed_allocation;
940 if (avc_state->decouple_mbenc_curbe_from_brc_enable) {
941 i965_free_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
942 size = avc_state->mbenc_brc_buffer_size;
943 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
944 &avc_ctx->res_mbenc_brc_buffer,
948 goto failed_allocation;
949 i965_zero_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
951 generic_state->brc_allocated = 1;
955 if (avc_state->mb_qp_data_enable) {
956 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 4, 64);
957 height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
958 i965_free_gpe_resource(&avc_ctx->res_mb_qp_data_surface);
959 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
960 &avc_ctx->res_mb_qp_data_surface,
963 "external mb qp buffer");
965 goto failed_allocation;
968 /* mbenc related surface. it share most of surface with other kernels */
969 if (avc_state->arbitrary_num_mbs_in_slice) {
970 width = (generic_state->frame_width_in_mbs + 1) * 64;
971 height = generic_state->frame_height_in_mbs ;
972 i965_free_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
973 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
974 &avc_ctx->res_mbenc_slice_map_surface,
979 goto failed_allocation;
981 /*generate slice map,default one slice per frame.*/
984 /* sfd related surface */
985 if (avc_state->sfd_enable) {
986 i965_free_gpe_resource(&avc_ctx->res_sfd_output_buffer);
988 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
989 &avc_ctx->res_sfd_output_buffer,
991 "sfd output buffer");
993 goto failed_allocation;
995 i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer);
996 size = ALIGN(52, 64);
997 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
998 &avc_ctx->res_sfd_cost_table_p_frame_buffer,
1000 "sfd P frame cost table buffer");
1002 goto failed_allocation;
1003 data = i965_map_gpe_resource(&(avc_ctx->res_sfd_cost_table_p_frame_buffer));
1005 memcpy(data, gen9_avc_sfd_cost_table_p_frame, sizeof(unsigned char) * 52);
1006 i965_unmap_gpe_resource(&(avc_ctx->res_sfd_cost_table_p_frame_buffer));
1008 i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_b_frame_buffer);
1009 size = ALIGN(52, 64);
1010 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1011 &avc_ctx->res_sfd_cost_table_b_frame_buffer,
1013 "sfd B frame cost table buffer");
1015 goto failed_allocation;
1016 data = i965_map_gpe_resource(&(avc_ctx->res_sfd_cost_table_b_frame_buffer));
1018 memcpy(data, gen9_avc_sfd_cost_table_b_frame, sizeof(unsigned char) * 52);
1019 i965_unmap_gpe_resource(&(avc_ctx->res_sfd_cost_table_b_frame_buffer));
1022 /* wp related surfaces */
1023 if (avc_state->weighted_prediction_supported) {
1024 for (i = 0; i < 2 ; i++) {
1025 if (avc_ctx->wp_output_pic_select_surface_obj[i]) {
1029 width = generic_state->frame_width_in_pixel;
1030 height = generic_state->frame_height_in_pixel ;
1031 i965_CreateSurfaces(ctx,
1034 VA_RT_FORMAT_YUV420,
1036 &avc_ctx->wp_output_pic_select_surface_id[i]);
1037 avc_ctx->wp_output_pic_select_surface_obj[i] = SURFACE(avc_ctx->wp_output_pic_select_surface_id[i]);
1039 if (!avc_ctx->wp_output_pic_select_surface_obj[i]) {
1040 goto failed_allocation;
1043 i965_check_alloc_surface_bo(ctx, avc_ctx->wp_output_pic_select_surface_obj[i], 1,
1044 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
1046 i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[0]);
1047 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_wp_output_pic_select_surface_list[0], avc_ctx->wp_output_pic_select_surface_obj[0], GPE_RESOURCE_ALIGNMENT);
1048 i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[1]);
1049 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_wp_output_pic_select_surface_list[1], avc_ctx->wp_output_pic_select_surface_obj[1], GPE_RESOURCE_ALIGNMENT);
1054 i965_free_gpe_resource(&avc_ctx->res_mad_data_buffer);
1056 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1057 &avc_ctx->res_mad_data_buffer,
1058 ALIGN(size, 0x1000),
1061 goto failed_allocation;
1063 return VA_STATUS_SUCCESS;
1066 return VA_STATUS_ERROR_ALLOCATION_FAILED;
1070 gen9_avc_free_resources(struct encoder_vme_mfc_context * vme_context)
1075 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1076 VADriverContextP ctx = avc_ctx->ctx;
1079 /* free all the surface/buffer here*/
1080 i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
1081 i965_free_gpe_resource(&avc_ctx->res_mb_status_buffer);
1082 i965_free_gpe_resource(&avc_ctx->res_flatness_check_surface);
1083 i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
1084 i965_free_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
1085 i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
1086 i965_free_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
1087 i965_free_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
1088 i965_free_gpe_resource(&avc_ctx->res_brc_history_buffer);
1089 i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
1090 i965_free_gpe_resource(&avc_ctx->res_brc_image_state_read_buffer);
1091 i965_free_gpe_resource(&avc_ctx->res_brc_image_state_write_buffer);
1092 i965_free_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
1093 i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
1094 i965_free_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
1095 i965_free_gpe_resource(&avc_ctx->res_mbbrc_mb_qp_data_surface);
1096 i965_free_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
1097 i965_free_gpe_resource(&avc_ctx->res_mb_qp_data_surface);
1098 i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
1099 i965_free_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1100 i965_free_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1101 i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer);
1102 i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_b_frame_buffer);
1103 i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[0]);
1104 i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[1]);
1105 i965_free_gpe_resource(&avc_ctx->res_mad_data_buffer);
1107 for (i = 0; i < 2 ; i++) {
1108 if (avc_ctx->wp_output_pic_select_surface_obj[i]) {
1109 i965_DestroySurfaces(ctx, &avc_ctx->wp_output_pic_select_surface_id[i], 1);
1110 avc_ctx->wp_output_pic_select_surface_id[i] = VA_INVALID_SURFACE;
1111 avc_ctx->wp_output_pic_select_surface_obj[i] = NULL;
1118 gen9_avc_run_kernel_media_object(VADriverContextP ctx,
1119 struct intel_encoder_context *encoder_context,
1120 struct i965_gpe_context *gpe_context,
1122 struct gpe_media_object_parameter *param)
1124 struct i965_driver_data *i965 = i965_driver_data(ctx);
1125 struct i965_gpe_table *gpe = &i965->gpe_table;
1126 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1127 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1129 struct intel_batchbuffer *batch = encoder_context->base.batch;
1130 struct encoder_status_buffer_internal *status_buffer;
1131 struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1136 intel_batchbuffer_start_atomic(batch, 0x1000);
1137 intel_batchbuffer_emit_mi_flush(batch);
1139 status_buffer = &(avc_ctx->status_buffer);
1140 memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1141 mi_store_data_imm.bo = status_buffer->bo;
1142 mi_store_data_imm.offset = status_buffer->media_index_offset;
1143 mi_store_data_imm.dw0 = media_function;
1144 gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1146 gpe->pipeline_setup(ctx, gpe_context, batch);
1147 gpe->media_object(ctx, gpe_context, batch, param);
1148 gpe->media_state_flush(ctx, gpe_context, batch);
1150 gpe->pipeline_end(ctx, gpe_context, batch);
1152 intel_batchbuffer_end_atomic(batch);
1154 intel_batchbuffer_flush(batch);
1158 gen9_avc_run_kernel_media_object_walker(VADriverContextP ctx,
1159 struct intel_encoder_context *encoder_context,
1160 struct i965_gpe_context *gpe_context,
1162 struct gpe_media_object_walker_parameter *param)
1164 struct i965_driver_data *i965 = i965_driver_data(ctx);
1165 struct i965_gpe_table *gpe = &i965->gpe_table;
1166 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1167 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1169 struct intel_batchbuffer *batch = encoder_context->base.batch;
1170 struct encoder_status_buffer_internal *status_buffer;
1171 struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1176 intel_batchbuffer_start_atomic(batch, 0x1000);
1178 intel_batchbuffer_emit_mi_flush(batch);
1180 status_buffer = &(avc_ctx->status_buffer);
1181 memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1182 mi_store_data_imm.bo = status_buffer->bo;
1183 mi_store_data_imm.offset = status_buffer->media_index_offset;
1184 mi_store_data_imm.dw0 = media_function;
1185 gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1187 gpe->pipeline_setup(ctx, gpe_context, batch);
1188 gpe->media_object_walker(ctx, gpe_context, batch, param);
1189 gpe->media_state_flush(ctx, gpe_context, batch);
1191 gpe->pipeline_end(ctx, gpe_context, batch);
1193 intel_batchbuffer_end_atomic(batch);
1195 intel_batchbuffer_flush(batch);
1199 gen9_init_gpe_context_avc(VADriverContextP ctx,
1200 struct i965_gpe_context *gpe_context,
1201 struct encoder_kernel_parameter *kernel_param)
1203 struct i965_driver_data *i965 = i965_driver_data(ctx);
1205 gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
1207 gpe_context->sampler.entry_size = 0;
1208 gpe_context->sampler.max_entries = 0;
1210 if (kernel_param->sampler_size) {
1211 gpe_context->sampler.entry_size = ALIGN(kernel_param->sampler_size, 64);
1212 gpe_context->sampler.max_entries = 1;
1215 gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
1216 gpe_context->idrt.max_entries = NUM_KERNELS_PER_GPE_CONTEXT;
1218 gpe_context->surface_state_binding_table.max_entries = MAX_AVC_ENCODER_SURFACES;
1219 gpe_context->surface_state_binding_table.binding_table_offset = 0;
1220 gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_AVC_ENCODER_SURFACES * 4, 64);
1221 gpe_context->surface_state_binding_table.length = ALIGN(MAX_AVC_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_AVC_ENCODER_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
1223 if (i965->intel.eu_total > 0)
1224 gpe_context->vfe_state.max_num_threads = 6 * i965->intel.eu_total;
1226 gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads
1228 gpe_context->vfe_state.curbe_allocation_size = MAX(1, ALIGN(gpe_context->curbe.length, 32) >> 5); // in registers
1229 gpe_context->vfe_state.urb_entry_size = MAX(1, ALIGN(kernel_param->inline_data_size, 32) >> 5); // in registers
1230 gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE -
1231 gpe_context->vfe_state.curbe_allocation_size -
1232 ((gpe_context->idrt.entry_size >> 5) *
1233 gpe_context->idrt.max_entries)) / gpe_context->vfe_state.urb_entry_size;
1234 gpe_context->vfe_state.num_urb_entries = CLAMP(1, 127, gpe_context->vfe_state.num_urb_entries);
1235 gpe_context->vfe_state.gpgpu_mode = 0;
1239 gen9_init_vfe_scoreboard_avc(struct i965_gpe_context *gpe_context,
1240 struct encoder_scoreboard_parameter *scoreboard_param)
1242 gpe_context->vfe_desc5.scoreboard0.mask = scoreboard_param->mask;
1243 gpe_context->vfe_desc5.scoreboard0.type = scoreboard_param->type;
1244 gpe_context->vfe_desc5.scoreboard0.enable = scoreboard_param->enable;
1246 if (scoreboard_param->walkpat_flag) {
1247 gpe_context->vfe_desc5.scoreboard0.mask = 0x0F;
1248 gpe_context->vfe_desc5.scoreboard0.type = 1;
1250 gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0x0;
1251 gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0xF;
1253 gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
1254 gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xE;
1256 gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0xF;
1257 gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0x3;
1259 gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
1260 gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0x1;
1263 gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0xF;
1264 gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0x0;
1267 gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
1268 gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xF;
1271 gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0x1;
1272 gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0xF;
1275 gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
1276 gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0xF;
1279 gpe_context->vfe_desc7.scoreboard2.delta_x4 = 0xF;
1280 gpe_context->vfe_desc7.scoreboard2.delta_y4 = 0x1;
1283 gpe_context->vfe_desc7.scoreboard2.delta_x5 = 0x0;
1284 gpe_context->vfe_desc7.scoreboard2.delta_y5 = 0xE;
1287 gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0x1;
1288 gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
1291 gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0xF;
1292 gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
1296 VME pipeline related function
1300 scaling kernel related function
1303 gen9_avc_set_curbe_scaling4x(VADriverContextP ctx,
1304 struct encode_state *encode_state,
1305 struct i965_gpe_context *gpe_context,
1306 struct intel_encoder_context *encoder_context,
1309 gen9_avc_scaling4x_curbe_data *curbe_cmd;
1310 struct scaling_param *surface_param = (struct scaling_param *)param;
1312 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1317 memset(curbe_cmd, 0, sizeof(gen9_avc_scaling4x_curbe_data));
1319 curbe_cmd->dw0.input_picture_width = surface_param->input_frame_width;
1320 curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1322 curbe_cmd->dw1.input_y_bti = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1323 curbe_cmd->dw2.output_y_bti = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1326 curbe_cmd->dw5.flatness_threshold = 128;
1327 curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
1328 curbe_cmd->dw7.enable_mb_variance_output = surface_param->enable_mb_variance_output;
1329 curbe_cmd->dw8.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
1331 if (curbe_cmd->dw6.enable_mb_flatness_check ||
1332 curbe_cmd->dw7.enable_mb_variance_output ||
1333 curbe_cmd->dw8.enable_mb_pixel_average_output) {
1334 curbe_cmd->dw10.mbv_proc_stat_bti = GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX;
1337 i965_gpe_context_unmap_curbe(gpe_context);
1342 gen95_avc_set_curbe_scaling4x(VADriverContextP ctx,
1343 struct encode_state *encode_state,
1344 struct i965_gpe_context *gpe_context,
1345 struct intel_encoder_context *encoder_context,
1348 gen95_avc_scaling4x_curbe_data *curbe_cmd;
1349 struct scaling_param *surface_param = (struct scaling_param *)param;
1351 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1356 memset(curbe_cmd, 0, sizeof(gen95_avc_scaling4x_curbe_data));
1358 curbe_cmd->dw0.input_picture_width = surface_param->input_frame_width;
1359 curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1361 curbe_cmd->dw1.input_y_bti_frame = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1362 curbe_cmd->dw2.output_y_bti_frame = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1364 if (surface_param->enable_mb_flatness_check)
1365 curbe_cmd->dw5.flatness_threshold = 128;
1366 curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
1367 curbe_cmd->dw6.enable_mb_variance_output = surface_param->enable_mb_variance_output;
1368 curbe_cmd->dw6.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
1369 curbe_cmd->dw6.enable_block8x8_statistics_output = surface_param->blk8x8_stat_enabled;
1371 if (curbe_cmd->dw6.enable_mb_flatness_check ||
1372 curbe_cmd->dw6.enable_mb_variance_output ||
1373 curbe_cmd->dw6.enable_mb_pixel_average_output) {
1374 curbe_cmd->dw8.mbv_proc_stat_bti_frame = GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX;
1377 i965_gpe_context_unmap_curbe(gpe_context);
1382 gen9_avc_set_curbe_scaling2x(VADriverContextP ctx,
1383 struct encode_state *encode_state,
1384 struct i965_gpe_context *gpe_context,
1385 struct intel_encoder_context *encoder_context,
1388 gen9_avc_scaling2x_curbe_data *curbe_cmd;
1389 struct scaling_param *surface_param = (struct scaling_param *)param;
1391 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1396 memset(curbe_cmd, 0, sizeof(gen9_avc_scaling2x_curbe_data));
1398 curbe_cmd->dw0.input_picture_width = surface_param->input_frame_width;
1399 curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1401 curbe_cmd->dw8.input_y_bti = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1402 curbe_cmd->dw9.output_y_bti = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1404 i965_gpe_context_unmap_curbe(gpe_context);
1409 gen9_avc_send_surface_scaling(VADriverContextP ctx,
1410 struct encode_state *encode_state,
1411 struct i965_gpe_context *gpe_context,
1412 struct intel_encoder_context *encoder_context,
1415 struct scaling_param *surface_param = (struct scaling_param *)param;
1416 unsigned int surface_format;
1417 unsigned int res_size;
1419 if (surface_param->scaling_out_use_32unorm_surf_fmt)
1420 surface_format = I965_SURFACEFORMAT_R32_UNORM;
1421 else if (surface_param->scaling_out_use_16unorm_surf_fmt)
1422 surface_format = I965_SURFACEFORMAT_R16_UNORM;
1424 surface_format = I965_SURFACEFORMAT_R8_UNORM;
1426 gen9_add_2d_gpe_surface(ctx, gpe_context,
1427 surface_param->input_surface,
1428 0, 1, surface_format,
1429 GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX);
1431 gen9_add_2d_gpe_surface(ctx, gpe_context,
1432 surface_param->output_surface,
1433 0, 1, surface_format,
1434 GEN9_AVC_SCALING_FRAME_DST_Y_INDEX);
1436 /*add buffer mv_proc_stat, here need change*/
1437 if (surface_param->mbv_proc_stat_enabled) {
1438 res_size = 16 * (surface_param->input_frame_width / 16) * (surface_param->input_frame_height / 16) * sizeof(unsigned int);
1440 gen9_add_buffer_gpe_surface(ctx,
1442 surface_param->pres_mbv_proc_stat_buffer,
1446 GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
1447 } else if (surface_param->enable_mb_flatness_check) {
1448 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
1449 surface_param->pres_flatness_check_surface,
1451 I965_SURFACEFORMAT_R8_UNORM,
1452 GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
1459 gen9_avc_kernel_scaling(VADriverContextP ctx,
1460 struct encode_state *encode_state,
1461 struct intel_encoder_context *encoder_context,
1464 struct i965_driver_data *i965 = i965_driver_data(ctx);
1465 struct i965_gpe_table *gpe = &i965->gpe_table;
1466 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1467 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1468 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1469 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1470 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
1472 struct i965_gpe_context *gpe_context;
1473 struct scaling_param surface_param;
1474 struct object_surface *obj_surface;
1475 struct gen9_surface_avc *avc_priv_surface;
1476 struct gpe_media_object_walker_parameter media_object_walker_param;
1477 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
1478 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
1479 int media_function = 0;
1482 obj_surface = encode_state->reconstructed_object;
1483 avc_priv_surface = obj_surface->private_data;
1485 memset(&surface_param, 0, sizeof(struct scaling_param));
1487 case INTEL_ENC_HME_4x : {
1488 media_function = INTEL_MEDIA_STATE_4X_SCALING;
1489 kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
1490 downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
1491 downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
1493 surface_param.input_surface = encode_state->input_yuv_object ;
1494 surface_param.input_frame_width = generic_state->frame_width_in_pixel ;
1495 surface_param.input_frame_height = generic_state->frame_height_in_pixel ;
1497 surface_param.output_surface = avc_priv_surface->scaled_4x_surface_obj ;
1498 surface_param.output_frame_width = generic_state->frame_width_4x ;
1499 surface_param.output_frame_height = generic_state->frame_height_4x ;
1501 surface_param.enable_mb_flatness_check = avc_state->flatness_check_enable;
1502 surface_param.enable_mb_variance_output = avc_state->mb_status_enable;
1503 surface_param.enable_mb_pixel_average_output = avc_state->mb_status_enable;
1505 surface_param.blk8x8_stat_enabled = 0 ;
1506 surface_param.use_4x_scaling = 1 ;
1507 surface_param.use_16x_scaling = 0 ;
1508 surface_param.use_32x_scaling = 0 ;
1511 case INTEL_ENC_HME_16x : {
1512 media_function = INTEL_MEDIA_STATE_16X_SCALING;
1513 kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
1514 downscaled_width_in_mb = generic_state->downscaled_width_16x_in_mb;
1515 downscaled_height_in_mb = generic_state->downscaled_height_16x_in_mb;
1517 surface_param.input_surface = avc_priv_surface->scaled_4x_surface_obj ;
1518 surface_param.input_frame_width = generic_state->frame_width_4x ;
1519 surface_param.input_frame_height = generic_state->frame_height_4x ;
1521 surface_param.output_surface = avc_priv_surface->scaled_16x_surface_obj ;
1522 surface_param.output_frame_width = generic_state->frame_width_16x ;
1523 surface_param.output_frame_height = generic_state->frame_height_16x ;
1525 surface_param.enable_mb_flatness_check = 0 ;
1526 surface_param.enable_mb_variance_output = 0 ;
1527 surface_param.enable_mb_pixel_average_output = 0 ;
1529 surface_param.blk8x8_stat_enabled = 0 ;
1530 surface_param.use_4x_scaling = 0 ;
1531 surface_param.use_16x_scaling = 1 ;
1532 surface_param.use_32x_scaling = 0 ;
1536 case INTEL_ENC_HME_32x : {
1537 media_function = INTEL_MEDIA_STATE_32X_SCALING;
1538 kernel_idx = GEN9_AVC_KERNEL_SCALING_2X_IDX;
1539 downscaled_width_in_mb = generic_state->downscaled_width_32x_in_mb;
1540 downscaled_height_in_mb = generic_state->downscaled_height_32x_in_mb;
1542 surface_param.input_surface = avc_priv_surface->scaled_16x_surface_obj ;
1543 surface_param.input_frame_width = generic_state->frame_width_16x ;
1544 surface_param.input_frame_height = generic_state->frame_height_16x ;
1546 surface_param.output_surface = avc_priv_surface->scaled_32x_surface_obj ;
1547 surface_param.output_frame_width = generic_state->frame_width_32x ;
1548 surface_param.output_frame_height = generic_state->frame_height_32x ;
1550 surface_param.enable_mb_flatness_check = 0 ;
1551 surface_param.enable_mb_variance_output = 0 ;
1552 surface_param.enable_mb_pixel_average_output = 0 ;
1554 surface_param.blk8x8_stat_enabled = 0 ;
1555 surface_param.use_4x_scaling = 0 ;
1556 surface_param.use_16x_scaling = 0 ;
1557 surface_param.use_32x_scaling = 1 ;
1565 gpe_context = &(avc_ctx->context_scaling.gpe_contexts[kernel_idx]);
1567 gpe->context_init(ctx, gpe_context);
1568 gpe->reset_binding_table(ctx, gpe_context);
1570 if (surface_param.use_32x_scaling) {
1571 generic_ctx->pfn_set_curbe_scaling2x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1573 generic_ctx->pfn_set_curbe_scaling4x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1576 if (surface_param.use_32x_scaling) {
1577 surface_param.scaling_out_use_16unorm_surf_fmt = 1 ;
1578 surface_param.scaling_out_use_32unorm_surf_fmt = 0 ;
1580 surface_param.scaling_out_use_16unorm_surf_fmt = 0 ;
1581 surface_param.scaling_out_use_32unorm_surf_fmt = 1 ;
1584 if (surface_param.use_4x_scaling) {
1585 if (avc_state->mb_status_supported) {
1586 surface_param.enable_mb_flatness_check = 0;
1587 surface_param.mbv_proc_stat_enabled = (surface_param.use_4x_scaling) ? (avc_state->mb_status_enable || avc_state->flatness_check_enable) : 0 ;
1588 surface_param.pres_mbv_proc_stat_buffer = &(avc_ctx->res_mb_status_buffer);
1591 surface_param.enable_mb_flatness_check = (surface_param.use_4x_scaling) ? avc_state->flatness_check_enable : 0;
1592 surface_param.mbv_proc_stat_enabled = 0 ;
1593 surface_param.pres_flatness_check_surface = &(avc_ctx->res_flatness_check_surface);
1597 generic_ctx->pfn_send_scaling_surface(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1599 /* setup the interface data */
1600 gpe->setup_interface_data(ctx, gpe_context);
1602 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
1603 if (surface_param.use_32x_scaling) {
1604 kernel_walker_param.resolution_x = downscaled_width_in_mb ;
1605 kernel_walker_param.resolution_y = downscaled_height_in_mb ;
1607 /* the scaling is based on 8x8 blk level */
1608 kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
1609 kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
1611 kernel_walker_param.no_dependency = 1;
1613 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
1615 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
1618 &media_object_walker_param);
1620 return VA_STATUS_SUCCESS;
1624 frame/mb brc related function
1627 gen9_avc_init_mfx_avc_img_state(VADriverContextP ctx,
1628 struct encode_state *encode_state,
1629 struct intel_encoder_context *encoder_context,
1630 struct gen9_mfx_avc_img_state *pstate)
1632 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1633 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1634 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1636 VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
1637 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
1639 memset(pstate, 0, sizeof(*pstate));
1641 pstate->dw0.dword_length = (sizeof(struct gen9_mfx_avc_img_state)) / 4 - 2;
1642 pstate->dw0.sub_opcode_b = 0;
1643 pstate->dw0.sub_opcode_a = 0;
1644 pstate->dw0.command_opcode = 1;
1645 pstate->dw0.pipeline = 2;
1646 pstate->dw0.command_type = 3;
1648 pstate->dw1.frame_size_in_mbs = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs ;
1650 pstate->dw2.frame_width_in_mbs_minus1 = generic_state->frame_width_in_mbs - 1;
1651 pstate->dw2.frame_height_in_mbs_minus1 = generic_state->frame_height_in_mbs - 1;
1653 pstate->dw3.image_structure = 0;//frame is zero
1654 pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
1655 pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
1656 pstate->dw3.brc_domain_rate_control_enable = 0;//0,set for non-vdenc mode;
1657 pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
1658 pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
1660 pstate->dw4.field_picture_flag = 0;
1661 pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
1662 pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
1663 pstate->dw4.transform_8x8_idct_mode_flag = pic_param->pic_fields.bits.transform_8x8_mode_flag;
1664 pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
1665 pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
1666 pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
1667 pstate->dw4.mb_mv_format_flag = 1;
1668 pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
1669 pstate->dw4.mv_unpacked_flag = 1;
1670 pstate->dw4.insert_test_flag = 0;
1671 pstate->dw4.load_slice_pointer_flag = 0;
1672 pstate->dw4.macroblock_stat_enable = 0; /* disable in the first pass */
1673 pstate->dw4.minimum_frame_size = 0;
1674 pstate->dw5.intra_mb_max_bit_flag = 1;
1675 pstate->dw5.inter_mb_max_bit_flag = 1;
1676 pstate->dw5.frame_size_over_flag = 1;
1677 pstate->dw5.frame_size_under_flag = 1;
1678 pstate->dw5.intra_mb_ipcm_flag = 1;
1679 pstate->dw5.mb_rate_ctrl_flag = 0;
1680 pstate->dw5.non_first_pass_flag = 0;
1681 pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
1682 pstate->dw5.aq_chroma_disable = 1;
1683 if (pstate->dw4.entropy_coding_flag && (avc_state->tq_enable)) {
1684 pstate->dw5.aq_enable = avc_state->tq_enable;
1685 pstate->dw5.aq_rounding = avc_state->tq_rounding;
1687 pstate->dw5.aq_rounding = 0;
1690 pstate->dw6.intra_mb_max_size = 2700;
1691 pstate->dw6.inter_mb_max_size = 4095;
1693 pstate->dw8.slice_delta_qp_max0 = 0;
1694 pstate->dw8.slice_delta_qp_max1 = 0;
1695 pstate->dw8.slice_delta_qp_max2 = 0;
1696 pstate->dw8.slice_delta_qp_max3 = 0;
1698 pstate->dw9.slice_delta_qp_min0 = 0;
1699 pstate->dw9.slice_delta_qp_min1 = 0;
1700 pstate->dw9.slice_delta_qp_min2 = 0;
1701 pstate->dw9.slice_delta_qp_min3 = 0;
1703 pstate->dw10.frame_bitrate_min = 0;
1704 pstate->dw10.frame_bitrate_min_unit = 1;
1705 pstate->dw10.frame_bitrate_min_unit_mode = 1;
1706 pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
1707 pstate->dw10.frame_bitrate_max_unit = 1;
1708 pstate->dw10.frame_bitrate_max_unit_mode = 1;
1710 pstate->dw11.frame_bitrate_min_delta = 0;
1711 pstate->dw11.frame_bitrate_max_delta = 0;
1713 pstate->dw12.vad_error_logic = 1;
1714 /* set paramters DW19/DW20 for slices */
1717 void gen9_avc_set_image_state(VADriverContextP ctx,
1718 struct encode_state *encode_state,
1719 struct intel_encoder_context *encoder_context,
1720 struct i965_gpe_resource *gpe_resource)
1722 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1723 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
1726 unsigned int * data;
1727 struct gen9_mfx_avc_img_state cmd;
1729 pdata = i965_map_gpe_resource(gpe_resource);
1734 gen9_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
1735 for (i = 0; i < generic_state->num_pak_passes; i++) {
1738 cmd.dw4.macroblock_stat_enable = 0;
1739 cmd.dw5.non_first_pass_flag = 0;
1741 cmd.dw4.macroblock_stat_enable = 1;
1742 cmd.dw5.non_first_pass_flag = 1;
1743 cmd.dw5.intra_mb_ipcm_flag = 1;
1746 cmd.dw5.mb_rate_ctrl_flag = 0;
1747 memcpy(pdata, &cmd, sizeof(struct gen9_mfx_avc_img_state));
1748 data = (unsigned int *)(pdata + sizeof(struct gen9_mfx_avc_img_state));
1749 *data = MI_BATCH_BUFFER_END;
1751 pdata += INTEL_AVC_IMAGE_STATE_CMD_SIZE;
1753 i965_unmap_gpe_resource(gpe_resource);
1757 void gen9_avc_set_image_state_non_brc(VADriverContextP ctx,
1758 struct encode_state *encode_state,
1759 struct intel_encoder_context *encoder_context,
1760 struct i965_gpe_resource *gpe_resource)
1762 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1763 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
1766 unsigned int * data;
1767 struct gen9_mfx_avc_img_state cmd;
1769 pdata = i965_map_gpe_resource(gpe_resource);
1774 gen9_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
1776 if (generic_state->curr_pak_pass == 0) {
1777 cmd.dw4.macroblock_stat_enable = 0;
1778 cmd.dw5.non_first_pass_flag = 0;
1781 cmd.dw4.macroblock_stat_enable = 1;
1782 cmd.dw5.non_first_pass_flag = 0;
1783 cmd.dw5.intra_mb_ipcm_flag = 1;
1786 cmd.dw5.mb_rate_ctrl_flag = 0;
1787 memcpy(pdata, &cmd, sizeof(struct gen9_mfx_avc_img_state));
1788 data = (unsigned int *)(pdata + sizeof(struct gen9_mfx_avc_img_state));
1789 *data = MI_BATCH_BUFFER_END;
1791 i965_unmap_gpe_resource(gpe_resource);
1796 gen95_avc_calc_lambda_table(VADriverContextP ctx,
1797 struct encode_state *encode_state,
1798 struct intel_encoder_context *encoder_context)
1800 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1801 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1802 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1803 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
1804 unsigned int value, inter, intra;
1805 unsigned int rounding_value = 0;
1806 unsigned int size = 0;
1809 unsigned int * lambda_table = (unsigned int *)(avc_state->lamda_value_lut);
1815 size = AVC_QP_MAX * 2 * sizeof(unsigned int);
1816 switch (generic_state->frame_type) {
1818 memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_i_frame[0][0], size * sizeof(unsigned char));
1821 memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_p_frame[0][0], size * sizeof(unsigned char));
1824 memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_b_frame[0][0], size * sizeof(unsigned char));
1831 for (i = 0; i < AVC_QP_MAX ; i++) {
1832 for (col = 0; col < 2; col++) {
1833 value = *(lambda_table + i * 2 + col);
1834 intra = value >> 16;
1836 if (intra < GEN95_AVC_MAX_LAMBDA) {
1837 if (intra == 0xfffa) {
1838 intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
1842 intra = intra << 16;
1843 inter = value & 0xffff;
1845 if (inter < GEN95_AVC_MAX_LAMBDA) {
1846 if (inter == 0xffef) {
1847 if (generic_state->frame_type == SLICE_TYPE_P) {
1848 if (avc_state->rounding_inter_p == AVC_INVALID_ROUNDING_VALUE)
1849 rounding_value = gen9_avc_inter_rounding_p[generic_state->preset];
1851 rounding_value = avc_state->rounding_inter_p;
1852 } else if (generic_state->frame_type == SLICE_TYPE_B) {
1853 if (pic_param->pic_fields.bits.reference_pic_flag) {
1854 if (avc_state->rounding_inter_b_ref == AVC_INVALID_ROUNDING_VALUE)
1855 rounding_value = gen9_avc_inter_rounding_b_ref[generic_state->preset];
1857 rounding_value = avc_state->rounding_inter_b_ref;
1859 if (avc_state->rounding_inter_b == AVC_INVALID_ROUNDING_VALUE)
1860 rounding_value = gen9_avc_inter_rounding_b[generic_state->preset];
1862 rounding_value = avc_state->rounding_inter_b;
1866 inter = 0xf000 + rounding_value;
1868 *(lambda_table + i * 2 + col) = intra + inter;
1874 gen9_avc_init_brc_const_data(VADriverContextP ctx,
1875 struct encode_state *encode_state,
1876 struct intel_encoder_context *encoder_context)
1878 struct i965_driver_data *i965 = i965_driver_data(ctx);
1879 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1880 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1881 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1882 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1884 struct i965_gpe_resource *gpe_resource = NULL;
1885 unsigned char * data = NULL;
1886 unsigned char * data_tmp = NULL;
1887 unsigned int size = 0;
1888 unsigned int table_idx = 0;
1889 unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
1892 struct object_surface *obj_surface;
1893 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
1894 VASurfaceID surface_id;
1895 unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
1897 gpe_resource = &(avc_ctx->res_brc_const_data_buffer);
1898 assert(gpe_resource);
1900 i965_zero_gpe_resource(gpe_resource);
1902 data = i965_map_gpe_resource(gpe_resource);
1905 table_idx = slice_type_kernel[generic_state->frame_type];
1907 /* Fill surface with QP Adjustment table, Distortion threshold table, MaxFrame threshold table, Distortion QP Adjustment Table*/
1908 size = sizeof(gen9_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb);
1909 memcpy(data, gen9_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb, size * sizeof(unsigned char));
1913 /* skip threshold table*/
1915 switch (generic_state->frame_type) {
1917 memcpy(data, gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
1920 memcpy(data, gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
1923 /*SLICE_TYPE_I,no change */
1927 if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->non_ftq_skip_threshold_lut_input_enable) {
1928 for (i = 0; i < AVC_QP_MAX ; i++) {
1929 *(data + 1 + (i * 2)) = (unsigned char)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
1934 /*fill the qp for ref list*/
1935 size = 32 + 32 + 32 + 160;
1936 memset(data, 0xff, 32);
1937 memset(data + 32 + 32, 0xff, 32);
1938 switch (generic_state->frame_type) {
1939 case SLICE_TYPE_P: {
1940 for (i = 0 ; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
1941 surface_id = slice_param->RefPicList0[i].picture_id;
1942 obj_surface = SURFACE(surface_id);
1945 *(data + i) = avc_state->list_ref_idx[0][i];//?
1949 case SLICE_TYPE_B: {
1950 data = data + 32 + 32;
1951 for (i = 0 ; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
1952 surface_id = slice_param->RefPicList1[i].picture_id;
1953 obj_surface = SURFACE(surface_id);
1956 *(data + i) = avc_state->list_ref_idx[1][i];//?
1959 data = data - 32 - 32;
1961 for (i = 0 ; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
1962 surface_id = slice_param->RefPicList0[i].picture_id;
1963 obj_surface = SURFACE(surface_id);
1966 *(data + i) = avc_state->list_ref_idx[0][i];//?
1971 /*SLICE_TYPE_I,no change */
1976 /*mv cost and mode cost*/
1978 memcpy(data, (unsigned char *)&gen9_avc_mode_mv_cost_table[table_idx][0][0], size * sizeof(unsigned char));
1980 if (avc_state->old_mode_cost_enable) {
1982 for (i = 0; i < AVC_QP_MAX ; i++) {
1983 *(data_tmp + 3) = (unsigned int)gen9_avc_old_intra_mode_cost[i];
1988 if (avc_state->ftq_skip_threshold_lut_input_enable) {
1989 for (i = 0; i < AVC_QP_MAX ; i++) {
1990 *(data + (i * 32) + 24) =
1991 *(data + (i * 32) + 25) =
1992 *(data + (i * 32) + 27) =
1993 *(data + (i * 32) + 28) =
1994 *(data + (i * 32) + 29) =
1995 *(data + (i * 32) + 30) =
1996 *(data + (i * 32) + 31) = avc_state->ftq_skip_threshold_lut[i];
2004 memcpy(data, (unsigned char *)&gen9_avc_ref_cost[table_idx][0], size * sizeof(unsigned char));
2009 if (avc_state->adaptive_intra_scaling_enable) {
2010 memcpy(data, (unsigned char *)gen9_avc_adaptive_intra_scaling_factor, size * sizeof(unsigned char));
2012 memcpy(data, (unsigned char *)gen9_avc_intra_scaling_factor, size * sizeof(unsigned char));
2015 if (IS_KBL(i965->intel.device_info) ||
2016 IS_GLK(i965->intel.device_info)) {
2020 memcpy(data, (unsigned char *)gen95_avc_lambda_data, size * sizeof(unsigned char));
2024 memcpy(data, (unsigned char *)gen95_avc_ftq25, size * sizeof(unsigned char));
2027 i965_unmap_gpe_resource(gpe_resource);
2031 gen9_avc_init_brc_const_data_old(VADriverContextP ctx,
2032 struct encode_state *encode_state,
2033 struct intel_encoder_context *encoder_context)
2035 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2036 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2037 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2038 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2040 struct i965_gpe_resource *gpe_resource = NULL;
2041 unsigned int * data = NULL;
2042 unsigned int * data_tmp = NULL;
2043 unsigned int size = 0;
2044 unsigned int table_idx = 0;
2045 unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
2046 unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
2049 gpe_resource = &(avc_ctx->res_brc_const_data_buffer);
2050 assert(gpe_resource);
2052 i965_zero_gpe_resource(gpe_resource);
2054 data = i965_map_gpe_resource(gpe_resource);
2057 table_idx = slice_type_kernel[generic_state->frame_type];
2059 /* Fill surface with QP Adjustment table, Distortion threshold table, MaxFrame threshold table, Distortion QP Adjustment Table*/
2060 size = sizeof(gen75_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb);
2061 memcpy(data, gen75_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb, size * sizeof(unsigned char));
2065 /* skip threshold table*/
2067 switch (generic_state->frame_type) {
2069 memcpy(data, gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2072 memcpy(data, gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2075 /*SLICE_TYPE_I,no change */
2079 if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->non_ftq_skip_threshold_lut_input_enable) {
2080 for (i = 0; i < AVC_QP_MAX ; i++) {
2081 *(data + 1 + (i * 2)) = (unsigned char)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
2086 /*fill the qp for ref list*/
2092 /*mv cost and mode cost*/
2094 memcpy(data, (unsigned char *)&gen75_avc_mode_mv_cost_table[table_idx][0][0], size * sizeof(unsigned char));
2096 if (avc_state->old_mode_cost_enable) {
2098 for (i = 0; i < AVC_QP_MAX ; i++) {
2099 *(data_tmp + 3) = (unsigned int)gen9_avc_old_intra_mode_cost[i];
2104 if (avc_state->ftq_skip_threshold_lut_input_enable) {
2105 for (i = 0; i < AVC_QP_MAX ; i++) {
2106 *(data + (i * 32) + 24) =
2107 *(data + (i * 32) + 25) =
2108 *(data + (i * 32) + 27) =
2109 *(data + (i * 32) + 28) =
2110 *(data + (i * 32) + 29) =
2111 *(data + (i * 32) + 30) =
2112 *(data + (i * 32) + 31) = avc_state->ftq_skip_threshold_lut[i];
2120 memcpy(data, (unsigned char *)&gen9_avc_ref_cost[table_idx][0], size * sizeof(unsigned char));
2122 i965_unmap_gpe_resource(gpe_resource);
2125 gen9_avc_set_curbe_brc_init_reset(VADriverContextP ctx,
2126 struct encode_state *encode_state,
2127 struct i965_gpe_context *gpe_context,
2128 struct intel_encoder_context *encoder_context,
2131 gen9_avc_brc_init_reset_curbe_data *cmd;
2132 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2133 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2134 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2135 double input_bits_per_frame = 0;
2136 double bps_ratio = 0;
2137 VAEncSequenceParameterBufferH264 * seq_param = avc_state->seq_param;
2138 struct avc_param common_param;
2140 cmd = i965_gpe_context_map_curbe(gpe_context);
2145 memcpy(cmd, &gen9_avc_brc_init_reset_curbe_init_data, sizeof(gen9_avc_brc_init_reset_curbe_data));
2147 memset(&common_param, 0, sizeof(common_param));
2148 common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
2149 common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
2150 common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
2151 common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
2152 common_param.frames_per_100s = generic_state->frames_per_100s;
2153 common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
2154 common_param.target_bit_rate = generic_state->target_bit_rate;
2156 cmd->dw0.profile_level_max_frame = i965_avc_get_profile_level_max_frame(&common_param, seq_param->level_idc);
2157 cmd->dw1.init_buf_full_in_bits = generic_state->init_vbv_buffer_fullness_in_bit;
2158 cmd->dw2.buf_size_in_bits = generic_state->vbv_buffer_size_in_bit;
2159 cmd->dw3.average_bit_rate = generic_state->target_bit_rate * 1000;
2160 cmd->dw4.max_bit_rate = generic_state->max_bit_rate * 1000;
2161 cmd->dw8.gop_p = (generic_state->gop_ref_distance) ? ((generic_state->gop_size - 1) / generic_state->gop_ref_distance) : 0;
2162 cmd->dw9.gop_b = (generic_state->gop_size - 1 - cmd->dw8.gop_p);
2163 cmd->dw9.frame_width_in_bytes = generic_state->frame_width_in_pixel;
2164 cmd->dw10.frame_height_in_bytes = generic_state->frame_height_in_pixel;
2165 cmd->dw12.no_slices = avc_state->slice_num;
2168 if (seq_param->vui_parameters_present_flag && generic_state->internal_rate_mode != INTEL_BRC_AVBR) {
2169 cmd->dw4.max_bit_rate = cmd->dw4.max_bit_rate;
2170 if (generic_state->internal_rate_mode == VA_RC_CBR) {
2171 cmd->dw3.average_bit_rate = cmd->dw4.max_bit_rate;
2176 cmd->dw6.frame_rate_m = generic_state->frames_per_100s;
2177 cmd->dw7.frame_rate_d = 100;
2178 cmd->dw8.brc_flag = 0;
2179 cmd->dw8.brc_flag |= (generic_state->mb_brc_enabled) ? 0 : 0x8000;
2182 if (generic_state->internal_rate_mode == VA_RC_CBR) {
2184 cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate;
2185 cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISCBR;
2187 } else if (generic_state->internal_rate_mode == VA_RC_VBR) {
2189 if (cmd->dw4.max_bit_rate < cmd->dw3.average_bit_rate) {
2190 cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate << 1;
2192 cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISVBR;
2194 } else if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2196 cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate;
2197 cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISAVBR;
2200 //igonre icq/vcm/qvbr
2202 cmd->dw10.avbr_accuracy = generic_state->avbr_curracy;
2203 cmd->dw11.avbr_convergence = generic_state->avbr_convergence;
2206 input_bits_per_frame = (double)(cmd->dw4.max_bit_rate) * (double)(cmd->dw7.frame_rate_d) / (double)(cmd->dw6.frame_rate_m);;
2208 if (cmd->dw2.buf_size_in_bits == 0) {
2209 cmd->dw2.buf_size_in_bits = (unsigned int)(input_bits_per_frame * 4);
2212 if (cmd->dw1.init_buf_full_in_bits == 0) {
2213 cmd->dw1.init_buf_full_in_bits = cmd->dw2.buf_size_in_bits * 7 / 8;
2215 if (cmd->dw1.init_buf_full_in_bits < (unsigned int)(input_bits_per_frame * 2)) {
2216 cmd->dw1.init_buf_full_in_bits = (unsigned int)(input_bits_per_frame * 2);
2218 if (cmd->dw1.init_buf_full_in_bits > cmd->dw2.buf_size_in_bits) {
2219 cmd->dw1.init_buf_full_in_bits = cmd->dw2.buf_size_in_bits;
2223 if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2224 cmd->dw2.buf_size_in_bits = 2 * generic_state->target_bit_rate * 1000;
2225 cmd->dw1.init_buf_full_in_bits = (unsigned int)(3 * cmd->dw2.buf_size_in_bits / 4);
2229 bps_ratio = input_bits_per_frame / (cmd->dw2.buf_size_in_bits / 30.0);
2230 bps_ratio = (bps_ratio < 0.1) ? 0.1 : (bps_ratio > 3.5) ? 3.5 : bps_ratio;
2233 cmd->dw16.deviation_threshold_0_pand_b = (unsigned int)(-50 * pow(0.90, bps_ratio));
2234 cmd->dw16.deviation_threshold_1_pand_b = (unsigned int)(-50 * pow(0.66, bps_ratio));
2235 cmd->dw16.deviation_threshold_2_pand_b = (unsigned int)(-50 * pow(0.46, bps_ratio));
2236 cmd->dw16.deviation_threshold_3_pand_b = (unsigned int)(-50 * pow(0.3, bps_ratio));
2237 cmd->dw17.deviation_threshold_4_pand_b = (unsigned int)(50 * pow(0.3, bps_ratio));
2238 cmd->dw17.deviation_threshold_5_pand_b = (unsigned int)(50 * pow(0.46, bps_ratio));
2239 cmd->dw17.deviation_threshold_6_pand_b = (unsigned int)(50 * pow(0.7, bps_ratio));
2240 cmd->dw17.deviation_threshold_7_pand_b = (unsigned int)(50 * pow(0.9, bps_ratio));
2241 cmd->dw18.deviation_threshold_0_vbr = (unsigned int)(-50 * pow(0.9, bps_ratio));
2242 cmd->dw18.deviation_threshold_1_vbr = (unsigned int)(-50 * pow(0.7, bps_ratio));
2243 cmd->dw18.deviation_threshold_2_vbr = (unsigned int)(-50 * pow(0.5, bps_ratio));
2244 cmd->dw18.deviation_threshold_3_vbr = (unsigned int)(-50 * pow(0.3, bps_ratio));
2245 cmd->dw19.deviation_threshold_4_vbr = (unsigned int)(100 * pow(0.4, bps_ratio));
2246 cmd->dw19.deviation_threshold_5_vbr = (unsigned int)(100 * pow(0.5, bps_ratio));
2247 cmd->dw19.deviation_threshold_6_vbr = (unsigned int)(100 * pow(0.75, bps_ratio));
2248 cmd->dw19.deviation_threshold_7_vbr = (unsigned int)(100 * pow(0.9, bps_ratio));
2249 cmd->dw20.deviation_threshold_0_i = (unsigned int)(-50 * pow(0.8, bps_ratio));
2250 cmd->dw20.deviation_threshold_1_i = (unsigned int)(-50 * pow(0.6, bps_ratio));
2251 cmd->dw20.deviation_threshold_2_i = (unsigned int)(-50 * pow(0.34, bps_ratio));
2252 cmd->dw20.deviation_threshold_3_i = (unsigned int)(-50 * pow(0.2, bps_ratio));
2253 cmd->dw21.deviation_threshold_4_i = (unsigned int)(50 * pow(0.2, bps_ratio));
2254 cmd->dw21.deviation_threshold_5_i = (unsigned int)(50 * pow(0.4, bps_ratio));
2255 cmd->dw21.deviation_threshold_6_i = (unsigned int)(50 * pow(0.66, bps_ratio));
2256 cmd->dw21.deviation_threshold_7_i = (unsigned int)(50 * pow(0.9, bps_ratio));
2258 cmd->dw22.sliding_window_size = generic_state->frames_per_window_size;
2260 i965_gpe_context_unmap_curbe(gpe_context);
2266 gen9_avc_send_surface_brc_init_reset(VADriverContextP ctx,
2267 struct encode_state *encode_state,
2268 struct i965_gpe_context *gpe_context,
2269 struct intel_encoder_context *encoder_context,
2272 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2273 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2275 gen9_add_buffer_gpe_surface(ctx,
2277 &avc_ctx->res_brc_history_buffer,
2279 avc_ctx->res_brc_history_buffer.size,
2281 GEN9_AVC_BRC_INIT_RESET_HISTORY_INDEX);
2283 gen9_add_buffer_2d_gpe_surface(ctx,
2285 &avc_ctx->res_brc_dist_data_surface,
2287 I965_SURFACEFORMAT_R8_UNORM,
2288 GEN9_AVC_BRC_INIT_RESET_DISTORTION_INDEX);
2294 gen9_avc_kernel_brc_init_reset(VADriverContextP ctx,
2295 struct encode_state *encode_state,
2296 struct intel_encoder_context *encoder_context)
2298 struct i965_driver_data *i965 = i965_driver_data(ctx);
2299 struct i965_gpe_table *gpe = &i965->gpe_table;
2300 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2301 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2302 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2303 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
2305 struct i965_gpe_context *gpe_context;
2306 struct gpe_media_object_parameter media_object_param;
2307 struct gpe_media_object_inline_data media_object_inline_data;
2308 int media_function = 0;
2309 int kernel_idx = GEN9_AVC_KERNEL_BRC_INIT;
2311 media_function = INTEL_MEDIA_STATE_BRC_INIT_RESET;
2313 if (generic_state->brc_inited)
2314 kernel_idx = GEN9_AVC_KERNEL_BRC_RESET;
2316 gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
2318 gpe->context_init(ctx, gpe_context);
2319 gpe->reset_binding_table(ctx, gpe_context);
2321 generic_ctx->pfn_set_curbe_brc_init_reset(ctx, encode_state, gpe_context, encoder_context, NULL);
2323 generic_ctx->pfn_send_brc_init_reset_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
2325 gpe->setup_interface_data(ctx, gpe_context);
2327 memset(&media_object_param, 0, sizeof(media_object_param));
2328 memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
2329 media_object_param.pinline_data = &media_object_inline_data;
2330 media_object_param.inline_size = sizeof(media_object_inline_data);
2332 gen9_avc_run_kernel_media_object(ctx, encoder_context,
2335 &media_object_param);
2337 return VA_STATUS_SUCCESS;
2341 gen9_avc_set_curbe_brc_frame_update(VADriverContextP ctx,
2342 struct encode_state *encode_state,
2343 struct i965_gpe_context *gpe_context,
2344 struct intel_encoder_context *encoder_context,
2347 gen9_avc_frame_brc_update_curbe_data *cmd;
2348 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2349 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2350 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2351 struct object_surface *obj_surface;
2352 struct gen9_surface_avc *avc_priv_surface;
2353 struct avc_param common_param;
2354 VAEncSequenceParameterBufferH264 * seq_param = avc_state->seq_param;
2356 obj_surface = encode_state->reconstructed_object;
2358 if (!obj_surface || !obj_surface->private_data)
2360 avc_priv_surface = obj_surface->private_data;
2362 cmd = i965_gpe_context_map_curbe(gpe_context);
2367 memcpy(cmd, &gen9_avc_frame_brc_update_curbe_init_data, sizeof(gen9_avc_frame_brc_update_curbe_data));
2369 cmd->dw5.target_size_flag = 0 ;
2370 if (generic_state->brc_init_current_target_buf_full_in_bits > (double)generic_state->brc_init_reset_buf_size_in_bits) {
2372 generic_state->brc_init_current_target_buf_full_in_bits -= (double)generic_state->brc_init_reset_buf_size_in_bits;
2373 cmd->dw5.target_size_flag = 1 ;
2376 if (generic_state->skip_frame_enbale) {
2377 cmd->dw6.num_skip_frames = generic_state->num_skip_frames ;
2378 cmd->dw7.size_skip_frames = generic_state->size_skip_frames;
2380 generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame * generic_state->num_skip_frames;
2383 cmd->dw0.target_size = (unsigned int)generic_state->brc_init_current_target_buf_full_in_bits ;
2384 cmd->dw1.frame_number = generic_state->seq_frame_number ;
2385 cmd->dw2.size_of_pic_headers = generic_state->herder_bytes_inserted << 3 ;
2386 cmd->dw5.cur_frame_type = generic_state->frame_type ;
2387 cmd->dw5.brc_flag = 0 ;
2388 cmd->dw5.brc_flag |= (avc_priv_surface->is_as_ref) ? INTEL_ENCODE_BRCUPDATE_IS_REFERENCE : 0 ;
2390 if (avc_state->multi_pre_enable) {
2391 cmd->dw5.brc_flag |= INTEL_ENCODE_BRCUPDATE_IS_ACTUALQP ;
2392 cmd->dw14.qp_index_of_cur_pic = avc_priv_surface->frame_idx ; //do not know this. use -1
2395 cmd->dw5.max_num_paks = generic_state->num_pak_passes ;
2396 if (avc_state->min_max_qp_enable) {
2397 switch (generic_state->frame_type) {
2399 cmd->dw6.minimum_qp = avc_state->min_qp_i ;
2400 cmd->dw6.maximum_qp = avc_state->max_qp_i ;
2403 cmd->dw6.minimum_qp = avc_state->min_qp_p ;
2404 cmd->dw6.maximum_qp = avc_state->max_qp_p ;
2407 cmd->dw6.minimum_qp = avc_state->min_qp_b ;
2408 cmd->dw6.maximum_qp = avc_state->max_qp_b ;
2412 cmd->dw6.minimum_qp = 0 ;
2413 cmd->dw6.maximum_qp = 0 ;
2415 cmd->dw6.enable_force_skip = avc_state->enable_force_skip ;
2416 cmd->dw6.enable_sliding_window = 0 ;
2418 generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame;
2420 if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2421 cmd->dw3.start_gadj_frame0 = (unsigned int)((10 * generic_state->avbr_convergence) / (double)150);
2422 cmd->dw3.start_gadj_frame1 = (unsigned int)((50 * generic_state->avbr_convergence) / (double)150);
2423 cmd->dw4.start_gadj_frame2 = (unsigned int)((100 * generic_state->avbr_convergence) / (double)150);
2424 cmd->dw4.start_gadj_frame3 = (unsigned int)((150 * generic_state->avbr_convergence) / (double)150);
2425 cmd->dw11.g_rate_ratio_threshold_0 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 40)));
2426 cmd->dw11.g_rate_ratio_threshold_1 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 75)));
2427 cmd->dw12.g_rate_ratio_threshold_2 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 97)));
2428 cmd->dw12.g_rate_ratio_threshold_3 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (103 - 100)));
2429 cmd->dw12.g_rate_ratio_threshold_4 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (125 - 100)));
2430 cmd->dw12.g_rate_ratio_threshold_5 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (160 - 100)));
2433 cmd->dw15.enable_roi = generic_state->brc_roi_enable ;
2435 memset(&common_param, 0, sizeof(common_param));
2436 common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
2437 common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
2438 common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
2439 common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
2440 common_param.frames_per_100s = generic_state->frames_per_100s;
2441 common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
2442 common_param.target_bit_rate = generic_state->target_bit_rate;
2444 cmd->dw19.user_max_frame = i965_avc_get_profile_level_max_frame(&common_param, seq_param->level_idc);
2445 i965_gpe_context_unmap_curbe(gpe_context);
2451 gen9_avc_send_surface_brc_frame_update(VADriverContextP ctx,
2452 struct encode_state *encode_state,
2453 struct i965_gpe_context *gpe_context,
2454 struct intel_encoder_context *encoder_context,
2457 struct i965_driver_data *i965 = i965_driver_data(ctx);
2458 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2459 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2460 struct brc_param * param = (struct brc_param *)param_brc ;
2461 struct i965_gpe_context * gpe_context_mbenc = param->gpe_context_mbenc;
2462 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2463 unsigned char is_g95 = 0;
2465 if (IS_SKL(i965->intel.device_info) ||
2466 IS_BXT(i965->intel.device_info))
2468 else if (IS_KBL(i965->intel.device_info) ||
2469 IS_GLK(i965->intel.device_info))
2472 /* brc history buffer*/
2473 gen9_add_buffer_gpe_surface(ctx,
2475 &avc_ctx->res_brc_history_buffer,
2477 avc_ctx->res_brc_history_buffer.size,
2479 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_HISTORY_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_HISTORY_INDEX));
2481 /* previous pak buffer*/
2482 gen9_add_buffer_gpe_surface(ctx,
2484 &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
2486 avc_ctx->res_brc_pre_pak_statistics_output_buffer.size,
2488 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_PAK_STATISTICS_OUTPUT_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_PAK_STATISTICS_OUTPUT_INDEX));
2490 /* image state command buffer read only*/
2491 gen9_add_buffer_gpe_surface(ctx,
2493 &avc_ctx->res_brc_image_state_read_buffer,
2495 avc_ctx->res_brc_image_state_read_buffer.size,
2497 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_READ_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_READ_INDEX));
2499 /* image state command buffer write only*/
2500 gen9_add_buffer_gpe_surface(ctx,
2502 &avc_ctx->res_brc_image_state_write_buffer,
2504 avc_ctx->res_brc_image_state_write_buffer.size,
2506 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_WRITE_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_WRITE_INDEX));
2508 if (avc_state->mbenc_brc_buffer_size > 0) {
2509 gen9_add_buffer_gpe_surface(ctx,
2511 &(avc_ctx->res_mbenc_brc_buffer),
2513 avc_ctx->res_mbenc_brc_buffer.size,
2515 GEN95_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_WRITE_INDEX);
2517 /* Mbenc curbe input buffer */
2518 gen9_add_dri_buffer_gpe_surface(ctx,
2520 gpe_context_mbenc->dynamic_state.bo,
2522 ALIGN(gpe_context_mbenc->curbe.length, 64),
2523 gpe_context_mbenc->curbe.offset,
2524 GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_READ_INDEX);
2525 /* Mbenc curbe output buffer */
2526 gen9_add_dri_buffer_gpe_surface(ctx,
2528 gpe_context_mbenc->dynamic_state.bo,
2530 ALIGN(gpe_context_mbenc->curbe.length, 64),
2531 gpe_context_mbenc->curbe.offset,
2532 GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_WRITE_INDEX);
2535 /* AVC_ME Distortion 2D surface buffer,input/output. is it res_brc_dist_data_surface*/
2536 gen9_add_buffer_2d_gpe_surface(ctx,
2538 &avc_ctx->res_brc_dist_data_surface,
2540 I965_SURFACEFORMAT_R8_UNORM,
2541 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_DISTORTION_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_DISTORTION_INDEX));
2543 /* BRC const data 2D surface buffer */
2544 gen9_add_buffer_2d_gpe_surface(ctx,
2546 &avc_ctx->res_brc_const_data_buffer,
2548 I965_SURFACEFORMAT_R8_UNORM,
2549 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX));
2551 /* MB statistical data surface*/
2552 gen9_add_buffer_gpe_surface(ctx,
2554 &avc_ctx->res_mb_status_buffer,
2556 avc_ctx->res_mb_status_buffer.size,
2558 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX));
2564 gen9_avc_kernel_brc_frame_update(VADriverContextP ctx,
2565 struct encode_state *encode_state,
2566 struct intel_encoder_context *encoder_context)
2569 struct i965_driver_data *i965 = i965_driver_data(ctx);
2570 struct i965_gpe_table *gpe = &i965->gpe_table;
2571 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2572 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
2573 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2574 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2575 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2577 struct i965_gpe_context *gpe_context = NULL;
2578 struct gpe_media_object_parameter media_object_param;
2579 struct gpe_media_object_inline_data media_object_inline_data;
2580 int media_function = 0;
2582 unsigned int mb_const_data_buffer_in_use, mb_qp_buffer_in_use;
2583 unsigned int brc_enabled = 0;
2584 unsigned int roi_enable = (generic_state->num_roi > 0) ? 1 : 0;
2585 unsigned int dirty_roi_enable = ((generic_state->dirty_num_roi > 0) && (generic_state->frame_type == SLICE_TYPE_P) && (0));
2587 /* the following set the mbenc curbe*/
2588 struct mbenc_param curbe_mbenc_param ;
2589 struct brc_param curbe_brc_param ;
2591 mb_const_data_buffer_in_use =
2592 generic_state->mb_brc_enabled ||
2595 avc_state->mb_qp_data_enable ||
2596 avc_state->rolling_intra_refresh_enable;
2597 mb_qp_buffer_in_use =
2598 generic_state->mb_brc_enabled ||
2599 generic_state->brc_roi_enable ||
2600 avc_state->mb_qp_data_enable;
2602 switch (generic_state->kernel_mode) {
2603 case INTEL_ENC_KERNEL_NORMAL : {
2604 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_NORMAL_I;
2607 case INTEL_ENC_KERNEL_PERFORMANCE : {
2608 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_PERFORMANCE_I;
2611 case INTEL_ENC_KERNEL_QUALITY : {
2612 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_QUALITY_I;
2620 if (generic_state->frame_type == SLICE_TYPE_P) {
2622 } else if (generic_state->frame_type == SLICE_TYPE_B) {
2626 gpe_context = &(avc_ctx->context_mbenc.gpe_contexts[kernel_idx]);
2627 gpe->context_init(ctx, gpe_context);
2629 memset(&curbe_mbenc_param, 0, sizeof(struct mbenc_param));
2631 curbe_mbenc_param.mb_const_data_buffer_in_use = mb_const_data_buffer_in_use;
2632 curbe_mbenc_param.mb_qp_buffer_in_use = mb_qp_buffer_in_use;
2633 curbe_mbenc_param.mbenc_i_frame_dist_in_use = 0;
2634 curbe_mbenc_param.brc_enabled = brc_enabled;
2635 curbe_mbenc_param.roi_enabled = roi_enable;
2637 /* set curbe mbenc*/
2638 generic_ctx->pfn_set_curbe_mbenc(ctx, encode_state, gpe_context, encoder_context, &curbe_mbenc_param);
2640 // gen95 set curbe out of the brc. gen9 do it here
2641 avc_state->mbenc_curbe_set_in_brc_update = !avc_state->decouple_mbenc_curbe_from_brc_enable;
2642 /*begin brc frame update*/
2643 memset(&curbe_brc_param, 0, sizeof(struct brc_param));
2644 curbe_brc_param.gpe_context_mbenc = gpe_context;
2645 media_function = INTEL_MEDIA_STATE_BRC_UPDATE;
2646 kernel_idx = GEN9_AVC_KERNEL_BRC_FRAME_UPDATE;
2647 gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
2648 curbe_brc_param.gpe_context_brc_frame_update = gpe_context;
2650 gpe->context_init(ctx, gpe_context);
2651 gpe->reset_binding_table(ctx, gpe_context);
2652 /*brc copy ignored*/
2654 /* set curbe frame update*/
2655 generic_ctx->pfn_set_curbe_brc_frame_update(ctx, encode_state, gpe_context, encoder_context, &curbe_brc_param);
2657 /* load brc constant data, is it same as mbenc mb brc constant data? no.*/
2658 if (avc_state->multi_pre_enable) {
2659 gen9_avc_init_brc_const_data(ctx, encode_state, encoder_context);
2661 gen9_avc_init_brc_const_data_old(ctx, encode_state, encoder_context);
2663 /* image state construct*/
2664 gen9_avc_set_image_state(ctx, encode_state, encoder_context, &(avc_ctx->res_brc_image_state_read_buffer));
2665 /* set surface frame mbenc*/
2666 generic_ctx->pfn_send_brc_frame_update_surface(ctx, encode_state, gpe_context, encoder_context, &curbe_brc_param);
2669 gpe->setup_interface_data(ctx, gpe_context);
2671 memset(&media_object_param, 0, sizeof(media_object_param));
2672 memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
2673 media_object_param.pinline_data = &media_object_inline_data;
2674 media_object_param.inline_size = sizeof(media_object_inline_data);
2676 gen9_avc_run_kernel_media_object(ctx, encoder_context,
2679 &media_object_param);
2681 return VA_STATUS_SUCCESS;
2685 gen9_avc_set_curbe_brc_mb_update(VADriverContextP ctx,
2686 struct encode_state *encode_state,
2687 struct i965_gpe_context *gpe_context,
2688 struct intel_encoder_context *encoder_context,
2691 gen9_avc_mb_brc_curbe_data *cmd;
2692 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2693 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2695 cmd = i965_gpe_context_map_curbe(gpe_context);
2700 memset(cmd, 0, sizeof(gen9_avc_mb_brc_curbe_data));
2702 cmd->dw0.cur_frame_type = generic_state->frame_type;
2703 if (generic_state->brc_roi_enable) {
2704 cmd->dw0.enable_roi = 1;
2706 cmd->dw0.enable_roi = 0;
2709 i965_gpe_context_unmap_curbe(gpe_context);
2715 gen9_avc_send_surface_brc_mb_update(VADriverContextP ctx,
2716 struct encode_state *encode_state,
2717 struct i965_gpe_context *gpe_context,
2718 struct intel_encoder_context *encoder_context,
2721 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2722 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2723 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2725 /* brc history buffer*/
2726 gen9_add_buffer_gpe_surface(ctx,
2728 &avc_ctx->res_brc_history_buffer,
2730 avc_ctx->res_brc_history_buffer.size,
2732 GEN9_AVC_MB_BRC_UPDATE_HISTORY_INDEX);
2734 /* MB qp data buffer is it same as res_mbbrc_mb_qp_data_surface*/
2735 if (generic_state->mb_brc_enabled) {
2736 gen9_add_buffer_2d_gpe_surface(ctx,
2738 &avc_ctx->res_mbbrc_mb_qp_data_surface,
2740 I965_SURFACEFORMAT_R8_UNORM,
2741 GEN9_AVC_MB_BRC_UPDATE_MB_QP_INDEX);
2745 /* BRC roi feature*/
2746 if (generic_state->brc_roi_enable) {
2747 gen9_add_buffer_gpe_surface(ctx,
2749 &avc_ctx->res_mbbrc_roi_surface,
2751 avc_ctx->res_mbbrc_roi_surface.size,
2753 GEN9_AVC_MB_BRC_UPDATE_ROI_INDEX);
2757 /* MB statistical data surface*/
2758 gen9_add_buffer_gpe_surface(ctx,
2760 &avc_ctx->res_mb_status_buffer,
2762 avc_ctx->res_mb_status_buffer.size,
2764 GEN9_AVC_MB_BRC_UPDATE_MB_STATUS_INDEX);
2770 gen9_avc_kernel_brc_mb_update(VADriverContextP ctx,
2771 struct encode_state *encode_state,
2772 struct intel_encoder_context *encoder_context)
2775 struct i965_driver_data *i965 = i965_driver_data(ctx);
2776 struct i965_gpe_table *gpe = &i965->gpe_table;
2777 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2778 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2779 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2780 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
2782 struct i965_gpe_context *gpe_context;
2783 struct gpe_media_object_walker_parameter media_object_walker_param;
2784 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
2785 int media_function = 0;
2788 media_function = INTEL_MEDIA_STATE_MB_BRC_UPDATE;
2789 kernel_idx = GEN9_AVC_KERNEL_BRC_MB_UPDATE;
2790 gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
2792 gpe->context_init(ctx, gpe_context);
2793 gpe->reset_binding_table(ctx, gpe_context);
2795 /* set curbe brc mb update*/
2796 generic_ctx->pfn_set_curbe_brc_mb_update(ctx, encode_state, gpe_context, encoder_context, NULL);
2799 /* set surface brc mb update*/
2800 generic_ctx->pfn_send_brc_mb_update_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
2803 gpe->setup_interface_data(ctx, gpe_context);
2805 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2806 /* the scaling is based on 8x8 blk level */
2807 kernel_walker_param.resolution_x = (generic_state->frame_width_in_mbs + 1) / 2;
2808 kernel_walker_param.resolution_y = (generic_state->frame_height_in_mbs + 1) / 2 ;
2809 kernel_walker_param.no_dependency = 1;
2811 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
2813 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
2816 &media_object_walker_param);
2818 return VA_STATUS_SUCCESS;
2822 mbenc kernel related function,it include intra dist kernel
2825 gen9_avc_get_biweight(int dist_scale_factor_ref_id0_list0, unsigned short weighted_bipredidc)
2827 int biweight = 32; // default value
2829 /* based on kernel HLD*/
2830 if (weighted_bipredidc != INTEL_AVC_WP_MODE_IMPLICIT) {
2833 biweight = (dist_scale_factor_ref_id0_list0 + 2) >> 2;
2835 if (biweight != 16 && biweight != 21 &&
2836 biweight != 32 && biweight != 43 && biweight != 48) {
2837 biweight = 32; // If # of B-pics between two refs is more than 3. VME does not support it.
2845 gen9_avc_get_dist_scale_factor(VADriverContextP ctx,
2846 struct encode_state *encode_state,
2847 struct intel_encoder_context *encoder_context)
2849 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2850 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2851 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
2852 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
2854 int max_num_references;
2855 VAPictureH264 *curr_pic;
2856 VAPictureH264 *ref_pic_l0;
2857 VAPictureH264 *ref_pic_l1;
2866 max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
2868 memset(avc_state->dist_scale_factor_list0, 0, 32 * sizeof(unsigned int));
2869 curr_pic = &pic_param->CurrPic;
2870 for (i = 0; i < max_num_references; i++) {
2871 ref_pic_l0 = &(slice_param->RefPicList0[i]);
2873 if ((ref_pic_l0->flags & VA_PICTURE_H264_INVALID) ||
2874 (ref_pic_l0->picture_id == VA_INVALID_SURFACE))
2876 ref_pic_l1 = &(slice_param->RefPicList1[0]);
2877 if ((ref_pic_l0->flags & VA_PICTURE_H264_INVALID) ||
2878 (ref_pic_l0->picture_id == VA_INVALID_SURFACE))
2881 poc0 = (curr_pic->TopFieldOrderCnt - ref_pic_l0->TopFieldOrderCnt);
2882 poc1 = (ref_pic_l1->TopFieldOrderCnt - ref_pic_l0->TopFieldOrderCnt);
2883 CLIP(poc0, -128, 127);
2884 CLIP(poc1, -128, 127);
2891 tmp = (td / 2 > 0) ? (td / 2) : (-(td / 2));
2892 tx = (16384 + tmp) / td ;
2893 tmp = (tb * tx + 32) >> 6;
2894 CLIP(tmp, -1024, 1023);
2895 avc_state->dist_scale_factor_list0[i] = tmp;
2901 gen9_avc_get_qp_from_ref_list(VADriverContextP ctx,
2902 VAEncSliceParameterBufferH264 *slice_param,
2906 struct i965_driver_data *i965 = i965_driver_data(ctx);
2907 struct object_surface *obj_surface;
2908 struct gen9_surface_avc *avc_priv_surface;
2909 VASurfaceID surface_id;
2911 assert(slice_param);
2915 if (ref_frame_idx < slice_param->num_ref_idx_l0_active_minus1 + 1)
2916 surface_id = slice_param->RefPicList0[ref_frame_idx].picture_id;
2920 if (ref_frame_idx < slice_param->num_ref_idx_l1_active_minus1 + 1)
2921 surface_id = slice_param->RefPicList1[ref_frame_idx].picture_id;
2925 obj_surface = SURFACE(surface_id);
2926 if (obj_surface && obj_surface->private_data) {
2927 avc_priv_surface = obj_surface->private_data;
2928 return avc_priv_surface->qp_value;
2935 gen9_avc_load_mb_brc_const_data(VADriverContextP ctx,
2936 struct encode_state *encode_state,
2937 struct intel_encoder_context *encoder_context)
2939 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2940 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2941 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2942 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2944 struct i965_gpe_resource *gpe_resource = NULL;
2945 unsigned int * data = NULL;
2946 unsigned int * data_tmp = NULL;
2947 unsigned int size = 16 * 52;
2948 unsigned int table_idx = 0;
2949 unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
2950 unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
2953 gpe_resource = &(avc_ctx->res_mbbrc_const_data_buffer);
2954 assert(gpe_resource);
2955 data = i965_map_gpe_resource(gpe_resource);
2958 table_idx = slice_type_kernel[generic_state->frame_type];
2960 memcpy(data, gen9_avc_mb_brc_const_data[table_idx][0], size * sizeof(unsigned int));
2964 switch (generic_state->frame_type) {
2966 for (i = 0; i < AVC_QP_MAX ; i++) {
2967 if (avc_state->old_mode_cost_enable)
2968 *data = (unsigned int)gen9_avc_old_intra_mode_cost[i];
2974 for (i = 0; i < AVC_QP_MAX ; i++) {
2975 if (generic_state->frame_type == SLICE_TYPE_P) {
2976 if (avc_state->skip_bias_adjustment_enable)
2977 *(data + 3) = (unsigned int)gen9_avc_mv_cost_p_skip_adjustment[i];
2979 if (avc_state->non_ftq_skip_threshold_lut_input_enable) {
2980 *(data + 9) = (unsigned int)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
2981 } else if (generic_state->frame_type == SLICE_TYPE_P) {
2982 *(data + 9) = (unsigned int)gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag][i];
2984 *(data + 9) = (unsigned int)gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag][i];
2987 if (avc_state->adaptive_intra_scaling_enable) {
2988 *(data + 10) = (unsigned int)gen9_avc_adaptive_intra_scaling_factor[i];
2990 *(data + 10) = (unsigned int)gen9_avc_intra_scaling_factor[i];
3002 for (i = 0; i < AVC_QP_MAX ; i++) {
3003 if (avc_state->ftq_skip_threshold_lut_input_enable) {
3004 *(data + 6) = (avc_state->ftq_skip_threshold_lut[i] |
3005 (avc_state->ftq_skip_threshold_lut[i] << 16) |
3006 (avc_state->ftq_skip_threshold_lut[i] << 24));
3007 *(data + 7) = (avc_state->ftq_skip_threshold_lut[i] |
3008 (avc_state->ftq_skip_threshold_lut[i] << 8) |
3009 (avc_state->ftq_skip_threshold_lut[i] << 16) |
3010 (avc_state->ftq_skip_threshold_lut[i] << 24));
3013 if (avc_state->kernel_trellis_enable) {
3014 *(data + 11) = (unsigned int)avc_state->lamda_value_lut[i][0];
3015 *(data + 12) = (unsigned int)avc_state->lamda_value_lut[i][1];
3021 i965_unmap_gpe_resource(gpe_resource);
3025 gen9_avc_set_curbe_mbenc(VADriverContextP ctx,
3026 struct encode_state *encode_state,
3027 struct i965_gpe_context *gpe_context,
3028 struct intel_encoder_context *encoder_context,
3031 struct i965_driver_data *i965 = i965_driver_data(ctx);
3033 gen9_avc_mbenc_curbe_data *g9;
3034 gen95_avc_mbenc_curbe_data *g95;
3036 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3037 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3038 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3040 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3041 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
3042 VASurfaceID surface_id;
3043 struct object_surface *obj_surface;
3045 struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
3046 unsigned char qp = 0;
3047 unsigned char me_method = 0;
3048 unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
3049 unsigned int table_idx = 0;
3050 unsigned char is_g9 = 0;
3051 unsigned char is_g95 = 0;
3052 unsigned int curbe_size = 0;
3054 unsigned int preset = generic_state->preset;
3055 if (IS_SKL(i965->intel.device_info) ||
3056 IS_BXT(i965->intel.device_info)) {
3057 cmd.g9 = (gen9_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
3061 curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
3062 memset(cmd.g9, 0, curbe_size);
3064 if (mbenc_i_frame_dist_in_use) {
3065 memcpy(cmd.g9, gen9_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
3068 switch (generic_state->frame_type) {
3070 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
3073 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
3076 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
3083 } else if (IS_KBL(i965->intel.device_info) ||
3084 IS_GLK(i965->intel.device_info)) {
3085 cmd.g95 = (gen95_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
3089 curbe_size = sizeof(gen95_avc_mbenc_curbe_data);
3090 memset(cmd.g9, 0, curbe_size);
3092 if (mbenc_i_frame_dist_in_use) {
3093 memcpy(cmd.g95, gen95_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
3096 switch (generic_state->frame_type) {
3098 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
3101 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
3104 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
3113 me_method = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_b_me_method[preset] : gen9_avc_p_me_method[preset];
3114 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
3116 cmd.g9->dw0.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
3117 cmd.g9->dw37.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
3118 cmd.g9->dw0.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
3119 cmd.g9->dw37.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
3121 cmd.g9->dw2.max_len_sp = gen9_avc_max_len_sp[preset];
3122 cmd.g9->dw38.max_len_sp = 0;
3125 cmd.g95->dw1.extended_mv_cost_range = avc_state->extended_mv_cost_range_enable;
3127 cmd.g9->dw3.src_access = 0;
3128 cmd.g9->dw3.ref_access = 0;
3130 if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) {
3131 //disable ftq_override by now.
3132 if (avc_state->ftq_override) {
3133 cmd.g9->dw3.ftq_enable = avc_state->ftq_enable;
3136 // both gen9 and gen95 come here by now
3137 if (generic_state->frame_type == SLICE_TYPE_P) {
3138 cmd.g9->dw3.ftq_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
3141 cmd.g9->dw3.ftq_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
3145 cmd.g9->dw3.ftq_enable = 0;
3148 if (avc_state->disable_sub_mb_partion)
3149 cmd.g9->dw3.sub_mb_part_mask = 0x7;
3151 if (mbenc_i_frame_dist_in_use) {
3152 cmd.g9->dw2.pitch_width = generic_state->downscaled_width_4x_in_mb;
3153 cmd.g9->dw4.picture_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
3154 cmd.g9->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4;
3155 cmd.g9->dw6.batch_buffer_end = 0;
3156 cmd.g9->dw31.intra_compute_type = 1;
3159 cmd.g9->dw2.pitch_width = generic_state->frame_width_in_mbs;
3160 cmd.g9->dw4.picture_height_minus1 = generic_state->frame_height_in_mbs - 1;
3161 cmd.g9->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ? generic_state->frame_height_in_mbs : avc_state->slice_height;
3164 memcpy(&(cmd.g9->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
3165 if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
3166 //cmd.g9->dw8 = gen9_avc_old_intra_mode_cost[qp];
3167 } else if (avc_state->skip_bias_adjustment_enable) {
3168 /* Load different MvCost for P picture when SkipBiasAdjustment is enabled
3169 // No need to check for P picture as the flag is only enabled for P picture */
3170 cmd.g9->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
3175 table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
3176 memcpy(&(cmd.g9->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
3178 cmd.g9->dw4.enable_fbr_bypass = avc_state->fbr_bypass_enable;
3179 cmd.g9->dw4.enable_intra_cost_scaling_for_static_frame = avc_state->sfd_enable && generic_state->hme_enabled;
3180 cmd.g9->dw4.field_parity_flag = 0;//bottom field
3181 cmd.g9->dw4.enable_cur_fld_idr = 0;//field realted
3182 cmd.g9->dw4.contrained_intra_pred_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
3183 cmd.g9->dw4.hme_enable = generic_state->hme_enabled;
3184 cmd.g9->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
3185 cmd.g9->dw4.use_actual_ref_qp_value = generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
3188 cmd.g9->dw7.intra_part_mask = avc_state->transform_8x8_mode_enable ? 0 : 0x02;
3189 cmd.g9->dw7.src_field_polarity = 0;//field related
3191 /*ftq_skip_threshold_lut set,dw14 /15*/
3193 /*r5 disable NonFTQSkipThresholdLUT*/
3194 if (generic_state->frame_type == SLICE_TYPE_P) {
3195 cmd.g9->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
3197 } else if (generic_state->frame_type == SLICE_TYPE_B) {
3198 cmd.g9->dw32.skip_val = gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
3202 cmd.g9->dw13.qp_prime_y = qp;
3203 cmd.g9->dw13.qp_prime_cb = qp;
3204 cmd.g9->dw13.qp_prime_cr = qp;
3205 cmd.g9->dw13.target_size_in_word = 0xff;//hardcode for brc disable
3207 if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) {
3208 switch (gen9_avc_multi_pred[preset]) {
3210 cmd.g9->dw32.mult_pred_l0_disable = 128;
3211 cmd.g9->dw32.mult_pred_l1_disable = 128;
3214 cmd.g9->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_P) ? 1 : 128;
3215 cmd.g9->dw32.mult_pred_l1_disable = 128;
3218 cmd.g9->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3219 cmd.g9->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3222 cmd.g9->dw32.mult_pred_l0_disable = 1;
3223 cmd.g9->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3229 cmd.g9->dw32.mult_pred_l0_disable = 128;
3230 cmd.g9->dw32.mult_pred_l1_disable = 128;
3233 /*field setting for dw33 34, ignored*/
3235 if (avc_state->adaptive_transform_decision_enable) {
3236 if (generic_state->frame_type != SLICE_TYPE_I) {
3237 cmd.g9->dw34.enable_adaptive_tx_decision = 1;
3239 cmd.g95->dw60.mb_texture_threshold = 1024;
3240 cmd.g95->dw60.tx_decision_threshold = 128;
3246 cmd.g9->dw58.mb_texture_threshold = 1024;
3247 cmd.g9->dw58.tx_decision_threshold = 128;
3252 if (generic_state->frame_type == SLICE_TYPE_B) {
3253 cmd.g9->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
3254 cmd.g9->dw34.list1_ref_id0_frm_field_parity = 0;
3255 cmd.g9->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
3258 cmd.g9->dw34.b_original_bff = 0; //frame only
3259 cmd.g9->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
3260 cmd.g9->dw34.roi_enable_flag = curbe_param->roi_enabled;
3261 cmd.g9->dw34.mad_enable_falg = avc_state->mad_enable;
3262 cmd.g9->dw34.mb_brc_enable = avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled;
3263 cmd.g9->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
3265 cmd.g95->dw34.tq_enable = avc_state->tq_enable;
3266 cmd.g95->dw34.cqp_flag = !generic_state->brc_enabled;
3270 cmd.g9->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
3272 if (cmd.g9->dw34.force_non_skip_check) {
3273 cmd.g9->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
3278 cmd.g9->dw36.check_all_fractional_enable = avc_state->caf_enable;
3279 cmd.g9->dw38.ref_threshold = 400;
3280 cmd.g9->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
3282 /* Default:2 used for MBBRC (MB QP Surface width and height are 4x downscaled picture in MB unit * 4 bytes)
3283 0 used for MBQP data surface (MB QP Surface width and height are same as the input picture size in MB unit * 1bytes)
3284 starting GEN9, BRC use split kernel, MB QP surface is same size as input picture */
3285 cmd.g9->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled) ? 0 : 2;
3287 if (mbenc_i_frame_dist_in_use) {
3288 cmd.g9->dw13.qp_prime_y = 0;
3289 cmd.g9->dw13.qp_prime_cb = 0;
3290 cmd.g9->dw13.qp_prime_cr = 0;
3291 cmd.g9->dw33.intra_16x16_nondc_penalty = 0;
3292 cmd.g9->dw33.intra_8x8_nondc_penalty = 0;
3293 cmd.g9->dw33.intra_4x4_nondc_penalty = 0;
3296 if (cmd.g9->dw4.use_actual_ref_qp_value) {
3297 cmd.g9->dw44.actual_qp_value_for_ref_id0_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 0);
3298 cmd.g9->dw44.actual_qp_value_for_ref_id1_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 1);
3299 cmd.g9->dw44.actual_qp_value_for_ref_id2_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 2);
3300 cmd.g9->dw44.actual_qp_value_for_ref_id3_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 3);
3301 cmd.g9->dw45.actual_qp_value_for_ref_id4_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 4);
3302 cmd.g9->dw45.actual_qp_value_for_ref_id5_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 5);
3303 cmd.g9->dw45.actual_qp_value_for_ref_id6_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 6);
3304 cmd.g9->dw45.actual_qp_value_for_ref_id7_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 7);
3305 cmd.g9->dw46.actual_qp_value_for_ref_id0_list1 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 0);
3306 cmd.g9->dw46.actual_qp_value_for_ref_id1_list1 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 1);
3309 table_idx = slice_type_kernel[generic_state->frame_type];
3310 cmd.g9->dw46.ref_cost = gen9_avc_ref_cost[table_idx][qp];
3312 if (generic_state->frame_type == SLICE_TYPE_I) {
3313 cmd.g9->dw0.skip_mode_enable = 0;
3314 cmd.g9->dw37.skip_mode_enable = 0;
3315 cmd.g9->dw36.hme_combine_overlap = 0;
3316 cmd.g9->dw47.intra_cost_sf = 16;
3317 cmd.g9->dw34.enable_direct_bias_adjustment = 0;
3319 cmd.g9->dw34.enable_global_motion_bias_adjustment = 0;
3321 } else if (generic_state->frame_type == SLICE_TYPE_P) {
3322 cmd.g9->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
3323 cmd.g9->dw3.bme_disable_fbr = 1;
3324 cmd.g9->dw5.ref_width = gen9_avc_search_x[preset];
3325 cmd.g9->dw5.ref_height = gen9_avc_search_y[preset];
3326 cmd.g9->dw7.non_skip_zmv_added = 1;
3327 cmd.g9->dw7.non_skip_mode_added = 1;
3328 cmd.g9->dw7.skip_center_mask = 1;
3329 cmd.g9->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
3330 cmd.g9->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame onlys
3331 cmd.g9->dw36.hme_combine_overlap = 1;
3332 cmd.g9->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
3333 cmd.g9->dw39.ref_width = gen9_avc_search_x[preset];
3334 cmd.g9->dw39.ref_height = gen9_avc_search_y[preset];
3335 cmd.g9->dw34.enable_direct_bias_adjustment = 0;
3336 cmd.g9->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
3337 if (is_g9 && avc_state->global_motion_bias_adjustment_enable)
3338 cmd.g9->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
3341 cmd.g9->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
3342 cmd.g9->dw1.bi_weight = avc_state->bi_weight;
3343 cmd.g9->dw3.search_ctrl = 7;
3344 cmd.g9->dw3.skip_type = 1;
3345 cmd.g9->dw5.ref_width = gen9_avc_b_search_x[preset];
3346 cmd.g9->dw5.ref_height = gen9_avc_b_search_y[preset];
3347 cmd.g9->dw7.skip_center_mask = 0xff;
3348 cmd.g9->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
3349 cmd.g9->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
3350 cmd.g9->dw36.hme_combine_overlap = 1;
3351 surface_id = slice_param->RefPicList1[0].picture_id;
3352 obj_surface = SURFACE(surface_id);
3354 WARN_ONCE("Invalid backward reference frame\n");
3357 cmd.g9->dw36.is_fwd_frame_short_term_ref = !!(slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
3359 cmd.g9->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
3360 cmd.g9->dw36.num_ref_idx_l1_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l1_active_minus1 : 0;
3361 cmd.g9->dw39.ref_width = gen9_avc_b_search_x[preset];
3362 cmd.g9->dw39.ref_height = gen9_avc_b_search_y[preset];
3363 cmd.g9->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
3364 cmd.g9->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
3365 cmd.g9->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
3366 cmd.g9->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
3367 cmd.g9->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
3368 cmd.g9->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
3369 cmd.g9->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
3370 cmd.g9->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
3372 cmd.g9->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
3373 if (cmd.g9->dw34.enable_direct_bias_adjustment) {
3374 cmd.g9->dw7.non_skip_zmv_added = 1;
3375 cmd.g9->dw7.non_skip_mode_added = 1;
3378 cmd.g9->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
3379 if (is_g9 && avc_state->global_motion_bias_adjustment_enable)
3380 cmd.g9->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
3384 avc_state->block_based_skip_enable = cmd.g9->dw3.block_based_skip_enable;
3386 if (avc_state->rolling_intra_refresh_enable) {
3387 /*by now disable it*/
3388 cmd.g9->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
3389 cmd.g9->dw32.mult_pred_l0_disable = 128;
3390 /* Pass the same IntraRefreshUnit to the kernel w/o the adjustment by -1, so as to have an overlap of one MB row or column of Intra macroblocks
3391 across one P frame to another P frame, as needed by the RollingI algo */
3393 cmd.g9->dw48.widi_intra_refresh_mb_num = 0;
3394 cmd.g9->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
3395 cmd.g9->dw48.widi_intra_refresh_qp_delta = 0;
3399 if (avc_state->rolling_intra_refresh_enable == INTEL_ROLLING_I_SQUARE && generic_state->brc_enabled) {
3400 cmd.g95->dw4.enable_intra_refresh = 0;
3401 cmd.g95->dw34.widi_intra_refresh_en = INTEL_ROLLING_I_DISABLED;
3402 cmd.g95->dw48.widi_intra_refresh_mb_x = 0;
3403 cmd.g95->dw61.widi_intra_refresh_mb_y = 0;
3405 cmd.g95->dw4.enable_intra_refresh = 1;
3406 cmd.g95->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
3407 cmd.g95->dw48.widi_intra_refresh_mb_x = 0;
3408 cmd.g95->dw61.widi_intra_refresh_mb_y = 0;
3409 cmd.g9->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
3410 cmd.g9->dw48.widi_intra_refresh_qp_delta = 0;
3415 cmd.g9->dw34.widi_intra_refresh_en = 0;
3418 cmd.g9->dw34.enable_per_mb_static_check = avc_state->sfd_enable && generic_state->hme_enabled;
3419 cmd.g9->dw34.enable_adaptive_search_window_size = avc_state->adaptive_search_window_enable;
3421 /*roi set disable by now. 49-56*/
3422 if (curbe_param->roi_enabled) {
3423 cmd.g9->dw49.roi_1_x_left = generic_state->roi[0].left;
3424 cmd.g9->dw49.roi_1_y_top = generic_state->roi[0].top;
3425 cmd.g9->dw50.roi_1_x_right = generic_state->roi[0].right;
3426 cmd.g9->dw50.roi_1_y_bottom = generic_state->roi[0].bottom;
3428 cmd.g9->dw51.roi_2_x_left = generic_state->roi[1].left;
3429 cmd.g9->dw51.roi_2_y_top = generic_state->roi[1].top;
3430 cmd.g9->dw52.roi_2_x_right = generic_state->roi[1].right;
3431 cmd.g9->dw52.roi_2_y_bottom = generic_state->roi[1].bottom;
3433 cmd.g9->dw53.roi_3_x_left = generic_state->roi[2].left;
3434 cmd.g9->dw53.roi_3_y_top = generic_state->roi[2].top;
3435 cmd.g9->dw54.roi_3_x_right = generic_state->roi[2].right;
3436 cmd.g9->dw54.roi_3_y_bottom = generic_state->roi[2].bottom;
3438 cmd.g9->dw55.roi_4_x_left = generic_state->roi[3].left;
3439 cmd.g9->dw55.roi_4_y_top = generic_state->roi[3].top;
3440 cmd.g9->dw56.roi_4_x_right = generic_state->roi[3].right;
3441 cmd.g9->dw56.roi_4_y_bottom = generic_state->roi[3].bottom;
3443 if (!generic_state->brc_enabled) {
3445 tmp = generic_state->roi[0].value;
3446 CLIP(tmp, -qp, AVC_QP_MAX - qp);
3447 cmd.g9->dw57.roi_1_dqp_prime_y = tmp;
3448 tmp = generic_state->roi[1].value;
3449 CLIP(tmp, -qp, AVC_QP_MAX - qp);
3450 cmd.g9->dw57.roi_2_dqp_prime_y = tmp;
3451 tmp = generic_state->roi[2].value;
3452 CLIP(tmp, -qp, AVC_QP_MAX - qp);
3453 cmd.g9->dw57.roi_3_dqp_prime_y = tmp;
3454 tmp = generic_state->roi[3].value;
3455 CLIP(tmp, -qp, AVC_QP_MAX - qp);
3456 cmd.g9->dw57.roi_4_dqp_prime_y = tmp;
3458 cmd.g9->dw34.roi_enable_flag = 0;
3463 if (avc_state->tq_enable) {
3464 if (generic_state->frame_type == SLICE_TYPE_I) {
3465 cmd.g95->dw58.value = gen95_avc_tq_lambda_i_frame[qp][0];
3466 cmd.g95->dw59.value = gen95_avc_tq_lambda_i_frame[qp][1];
3468 } else if (generic_state->frame_type == SLICE_TYPE_P) {
3469 cmd.g95->dw58.value = gen95_avc_tq_lambda_p_frame[qp][0];
3470 cmd.g95->dw59.value = gen95_avc_tq_lambda_p_frame[qp][1];
3473 cmd.g95->dw58.value = gen95_avc_tq_lambda_b_frame[qp][0];
3474 cmd.g95->dw59.value = gen95_avc_tq_lambda_b_frame[qp][1];
3477 if (cmd.g95->dw58.lambda_8x8_inter > GEN95_AVC_MAX_LAMBDA)
3478 cmd.g95->dw58.lambda_8x8_inter = 0xf000 + avc_state->rounding_value;
3480 if (cmd.g95->dw58.lambda_8x8_intra > GEN95_AVC_MAX_LAMBDA)
3481 cmd.g95->dw58.lambda_8x8_intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
3483 if (cmd.g95->dw59.lambda_inter > GEN95_AVC_MAX_LAMBDA)
3484 cmd.g95->dw59.lambda_inter = 0xf000 + avc_state->rounding_value;
3486 if (cmd.g95->dw59.lambda_intra > GEN95_AVC_MAX_LAMBDA)
3487 cmd.g95->dw59.lambda_intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
3492 cmd.g95->dw66.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
3493 cmd.g95->dw67.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
3494 cmd.g95->dw68.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
3495 cmd.g95->dw69.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
3496 cmd.g95->dw70.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
3497 cmd.g95->dw71.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
3498 cmd.g95->dw72.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
3499 cmd.g95->dw73.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
3500 cmd.g95->dw74.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
3501 cmd.g95->dw75.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
3502 cmd.g95->dw76.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
3503 cmd.g95->dw77.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
3504 cmd.g95->dw78.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
3505 cmd.g95->dw79.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
3506 cmd.g95->dw80.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
3507 cmd.g95->dw81.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
3508 cmd.g95->dw82.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
3509 cmd.g95->dw83.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
3510 cmd.g95->dw84.brc_curbe_surf_index = GEN95_AVC_MBENC_BRC_CURBE_DATA_INDEX;
3511 cmd.g95->dw85.force_non_skip_mb_map_surface = GEN95_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
3512 cmd.g95->dw86.widi_wa_surf_index = GEN95_AVC_MBENC_WIDI_WA_INDEX;
3513 cmd.g95->dw87.static_detection_cost_table_index = GEN95_AVC_MBENC_SFD_COST_TABLE_INDEX;
3517 cmd.g9->dw64.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
3518 cmd.g9->dw65.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
3519 cmd.g9->dw66.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
3520 cmd.g9->dw67.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
3521 cmd.g9->dw68.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
3522 cmd.g9->dw69.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
3523 cmd.g9->dw70.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
3524 cmd.g9->dw71.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
3525 cmd.g9->dw72.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
3526 cmd.g9->dw73.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
3527 cmd.g9->dw74.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
3528 cmd.g9->dw75.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
3529 cmd.g9->dw76.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
3530 cmd.g9->dw77.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
3531 cmd.g9->dw78.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
3532 cmd.g9->dw79.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
3533 cmd.g9->dw80.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
3534 cmd.g9->dw81.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
3535 cmd.g9->dw82.force_non_skip_mb_map_surface = GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
3536 cmd.g9->dw83.widi_wa_surf_index = GEN9_AVC_MBENC_WIDI_WA_INDEX;
3537 cmd.g9->dw84.brc_curbe_surf_index = GEN9_AVC_MBENC_BRC_CURBE_DATA_INDEX;
3538 cmd.g9->dw85.static_detection_cost_table_index = GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX;
3541 i965_gpe_context_unmap_curbe(gpe_context);
3547 gen9_avc_send_surface_mbenc(VADriverContextP ctx,
3548 struct encode_state *encode_state,
3549 struct i965_gpe_context *gpe_context,
3550 struct intel_encoder_context *encoder_context,
3553 struct i965_driver_data *i965 = i965_driver_data(ctx);
3554 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3555 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3556 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3557 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3558 struct object_surface *obj_surface;
3559 struct gen9_surface_avc *avc_priv_surface;
3560 struct i965_gpe_resource *gpe_resource;
3561 struct mbenc_param * param = (struct mbenc_param *)param_mbenc ;
3562 VASurfaceID surface_id;
3563 unsigned int mbenc_i_frame_dist_in_use = param->mbenc_i_frame_dist_in_use;
3564 unsigned int size = 0;
3565 unsigned int frame_mb_size = generic_state->frame_width_in_mbs *
3566 generic_state->frame_height_in_mbs;
3568 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3569 unsigned char is_g95 = 0;
3571 if (IS_SKL(i965->intel.device_info) ||
3572 IS_BXT(i965->intel.device_info))
3574 else if (IS_KBL(i965->intel.device_info) ||
3575 IS_GLK(i965->intel.device_info))
3578 obj_surface = encode_state->reconstructed_object;
3580 if (!obj_surface || !obj_surface->private_data)
3582 avc_priv_surface = obj_surface->private_data;
3584 /*pak obj command buffer output*/
3585 size = frame_mb_size * 16 * 4;
3586 gpe_resource = &avc_priv_surface->res_mb_code_surface;
3587 gen9_add_buffer_gpe_surface(ctx,
3593 GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX);
3595 /*mv data buffer output*/
3596 size = frame_mb_size * 32 * 4;
3597 gpe_resource = &avc_priv_surface->res_mv_data_surface;
3598 gen9_add_buffer_gpe_surface(ctx,
3604 GEN9_AVC_MBENC_IND_MV_DATA_INDEX);
3606 /*input current YUV surface, current input Y/UV object*/
3607 if (mbenc_i_frame_dist_in_use) {
3608 obj_surface = encode_state->reconstructed_object;
3609 if (!obj_surface || !obj_surface->private_data)
3611 avc_priv_surface = obj_surface->private_data;
3612 obj_surface = avc_priv_surface->scaled_4x_surface_obj;
3614 obj_surface = encode_state->input_yuv_object;
3616 gen9_add_2d_gpe_surface(ctx,
3621 I965_SURFACEFORMAT_R8_UNORM,
3622 GEN9_AVC_MBENC_CURR_Y_INDEX);
3624 gen9_add_2d_gpe_surface(ctx,
3629 I965_SURFACEFORMAT_R16_UINT,
3630 GEN9_AVC_MBENC_CURR_UV_INDEX);
3632 if (generic_state->hme_enabled) {
3634 gpe_resource = &(avc_ctx->s4x_memv_data_buffer);
3635 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3638 I965_SURFACEFORMAT_R8_UNORM,
3639 GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX);
3640 /* memv distortion input*/
3641 gpe_resource = &(avc_ctx->s4x_memv_distortion_buffer);
3642 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3645 I965_SURFACEFORMAT_R8_UNORM,
3646 GEN9_AVC_MBENC_4XME_DISTORTION_INDEX);
3649 /*mbbrc const data_buffer*/
3650 if (param->mb_const_data_buffer_in_use) {
3651 size = 16 * AVC_QP_MAX * sizeof(unsigned int);
3652 gpe_resource = &avc_ctx->res_mbbrc_const_data_buffer;
3653 gen9_add_buffer_gpe_surface(ctx,
3659 GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX);
3663 /*mb qp data_buffer*/
3664 if (param->mb_qp_buffer_in_use) {
3665 if (avc_state->mb_qp_data_enable)
3666 gpe_resource = &(avc_ctx->res_mb_qp_data_surface);
3668 gpe_resource = &(avc_ctx->res_mbbrc_mb_qp_data_surface);
3669 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3672 I965_SURFACEFORMAT_R8_UNORM,
3673 GEN9_AVC_MBENC_MBQP_INDEX);
3676 /*input current YUV surface, current input Y/UV object*/
3677 if (mbenc_i_frame_dist_in_use) {
3678 obj_surface = encode_state->reconstructed_object;
3679 if (!obj_surface || !obj_surface->private_data)
3681 avc_priv_surface = obj_surface->private_data;
3682 obj_surface = avc_priv_surface->scaled_4x_surface_obj;
3684 obj_surface = encode_state->input_yuv_object;
3686 gen9_add_adv_gpe_surface(ctx, gpe_context,
3688 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX);
3689 /*input ref YUV surface*/
3690 for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
3691 surface_id = slice_param->RefPicList0[i].picture_id;
3692 obj_surface = SURFACE(surface_id);
3693 if (!obj_surface || !obj_surface->private_data)
3696 gen9_add_adv_gpe_surface(ctx, gpe_context,
3698 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 1);
3700 /*input current YUV surface, current input Y/UV object*/
3701 if (mbenc_i_frame_dist_in_use) {
3702 obj_surface = encode_state->reconstructed_object;
3703 if (!obj_surface || !obj_surface->private_data)
3705 avc_priv_surface = obj_surface->private_data;
3706 obj_surface = avc_priv_surface->scaled_4x_surface_obj;
3708 obj_surface = encode_state->input_yuv_object;
3710 gen9_add_adv_gpe_surface(ctx, gpe_context,
3712 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX);
3714 for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
3715 if (i > 0) break; // only one ref supported here for B frame
3716 surface_id = slice_param->RefPicList1[i].picture_id;
3717 obj_surface = SURFACE(surface_id);
3718 if (!obj_surface || !obj_surface->private_data)
3721 gen9_add_adv_gpe_surface(ctx, gpe_context,
3723 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1);
3724 gen9_add_adv_gpe_surface(ctx, gpe_context,
3726 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 2);
3728 avc_priv_surface = obj_surface->private_data;
3729 /*pak obj command buffer output(mb code)*/
3730 size = frame_mb_size * 16 * 4;
3731 gpe_resource = &avc_priv_surface->res_mb_code_surface;
3732 gen9_add_buffer_gpe_surface(ctx,
3738 GEN9_AVC_MBENC_FWD_MB_DATA_INDEX);
3740 /*mv data buffer output*/
3741 size = frame_mb_size * 32 * 4;
3742 gpe_resource = &avc_priv_surface->res_mv_data_surface;
3743 gen9_add_buffer_gpe_surface(ctx,
3749 GEN9_AVC_MBENC_FWD_MV_DATA_INDEX);
3753 if (i < INTEL_AVC_MAX_BWD_REF_NUM) {
3754 gen9_add_adv_gpe_surface(ctx, gpe_context,
3756 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1 + INTEL_AVC_MAX_BWD_REF_NUM);
3761 /* BRC distortion data buffer for I frame*/
3762 if (mbenc_i_frame_dist_in_use) {
3763 gpe_resource = &(avc_ctx->res_brc_dist_data_surface);
3764 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3767 I965_SURFACEFORMAT_R8_UNORM,
3768 GEN9_AVC_MBENC_BRC_DISTORTION_INDEX);
3771 /* as ref frame ,update later RefPicSelect of Current Picture*/
3772 obj_surface = encode_state->reconstructed_object;
3773 avc_priv_surface = obj_surface->private_data;
3774 if (avc_state->ref_pic_select_list_supported && avc_priv_surface->is_as_ref) {
3775 gpe_resource = &(avc_priv_surface->res_ref_pic_select_surface);
3776 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3779 I965_SURFACEFORMAT_R8_UNORM,
3780 GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX);
3784 if (param->mb_vproc_stats_enable) {
3785 /*mb status buffer input*/
3786 size = frame_mb_size * 16 * 4;
3787 gpe_resource = &(avc_ctx->res_mb_status_buffer);
3788 gen9_add_buffer_gpe_surface(ctx,
3794 GEN9_AVC_MBENC_MB_STATS_INDEX);
3796 } else if (avc_state->flatness_check_enable) {
3798 gpe_resource = &(avc_ctx->res_flatness_check_surface);
3799 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3802 I965_SURFACEFORMAT_R8_UNORM,
3803 GEN9_AVC_MBENC_MB_STATS_INDEX);
3806 if (param->mad_enable) {
3807 /*mad buffer input*/
3809 gpe_resource = &(avc_ctx->res_mad_data_buffer);
3810 gen9_add_buffer_gpe_surface(ctx,
3816 GEN9_AVC_MBENC_MAD_DATA_INDEX);
3817 i965_zero_gpe_resource(gpe_resource);
3820 /*brc updated mbenc curbe data buffer,it is ignored by gen9 and used in gen95*/
3821 if (avc_state->mbenc_brc_buffer_size > 0) {
3822 size = avc_state->mbenc_brc_buffer_size;
3823 gpe_resource = &(avc_ctx->res_mbenc_brc_buffer);
3824 gen9_add_buffer_gpe_surface(ctx,
3830 GEN95_AVC_MBENC_BRC_CURBE_DATA_INDEX);
3833 /*artitratry num mbs in slice*/
3834 if (avc_state->arbitrary_num_mbs_in_slice) {
3835 /*slice surface input*/
3836 gpe_resource = &(avc_ctx->res_mbenc_slice_map_surface);
3837 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3840 I965_SURFACEFORMAT_R8_UNORM,
3841 GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX);
3844 /* BRC distortion data buffer for I frame */
3845 if (!mbenc_i_frame_dist_in_use) {
3846 if (avc_state->mb_disable_skip_map_enable) {
3847 gpe_resource = &(avc_ctx->res_mb_disable_skip_map_surface);
3848 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3851 I965_SURFACEFORMAT_R8_UNORM,
3852 (is_g95 ? GEN95_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX : GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX));
3855 if (avc_state->sfd_enable && generic_state->hme_enabled) {
3856 if (generic_state->frame_type == SLICE_TYPE_P) {
3857 gpe_resource = &(avc_ctx->res_sfd_cost_table_p_frame_buffer);
3859 } else if (generic_state->frame_type == SLICE_TYPE_B) {
3860 gpe_resource = &(avc_ctx->res_sfd_cost_table_b_frame_buffer);
3863 if (generic_state->frame_type != SLICE_TYPE_I) {
3864 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3867 I965_SURFACEFORMAT_R8_UNORM,
3868 (is_g95 ? GEN95_AVC_MBENC_SFD_COST_TABLE_INDEX : GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX));
3877 gen9_avc_kernel_mbenc(VADriverContextP ctx,
3878 struct encode_state *encode_state,
3879 struct intel_encoder_context *encoder_context,
3880 bool i_frame_dist_in_use)
3882 struct i965_driver_data *i965 = i965_driver_data(ctx);
3883 struct i965_gpe_table *gpe = &i965->gpe_table;
3884 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3885 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
3886 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3887 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3888 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3890 struct i965_gpe_context *gpe_context;
3891 struct gpe_media_object_walker_parameter media_object_walker_param;
3892 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
3893 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
3894 int media_function = 0;
3896 unsigned int mb_const_data_buffer_in_use = 0;
3897 unsigned int mb_qp_buffer_in_use = 0;
3898 unsigned int brc_enabled = 0;
3899 unsigned int roi_enable = (generic_state->num_roi > 0) ? 1 : 0;
3900 unsigned int dirty_roi_enable = ((generic_state->dirty_num_roi > 0) && (generic_state->frame_type == SLICE_TYPE_P) && (0));
3901 struct mbenc_param param ;
3903 int mbenc_i_frame_dist_in_use = i_frame_dist_in_use;
3905 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3907 mb_const_data_buffer_in_use =
3908 generic_state->mb_brc_enabled ||
3911 avc_state->mb_qp_data_enable ||
3912 avc_state->rolling_intra_refresh_enable;
3913 mb_qp_buffer_in_use =
3914 generic_state->mb_brc_enabled ||
3915 generic_state->brc_roi_enable ||
3916 avc_state->mb_qp_data_enable;
3918 if (mbenc_i_frame_dist_in_use) {
3919 media_function = INTEL_MEDIA_STATE_ENC_I_FRAME_DIST;
3920 kernel_idx = GEN9_AVC_KERNEL_BRC_I_FRAME_DIST;
3921 downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
3922 downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
3926 gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
3928 switch (generic_state->kernel_mode) {
3929 case INTEL_ENC_KERNEL_NORMAL : {
3930 media_function = INTEL_MEDIA_STATE_ENC_NORMAL;
3931 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_NORMAL_I;
3934 case INTEL_ENC_KERNEL_PERFORMANCE : {
3935 media_function = INTEL_MEDIA_STATE_ENC_PERFORMANCE;
3936 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_PERFORMANCE_I;
3939 case INTEL_ENC_KERNEL_QUALITY : {
3940 media_function = INTEL_MEDIA_STATE_ENC_QUALITY;
3941 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_QUALITY_I;
3949 if (generic_state->frame_type == SLICE_TYPE_P) {
3951 } else if (generic_state->frame_type == SLICE_TYPE_B) {
3955 downscaled_width_in_mb = generic_state->frame_width_in_mbs;
3956 downscaled_height_in_mb = generic_state->frame_height_in_mbs;
3957 mad_enable = avc_state->mad_enable;
3958 brc_enabled = generic_state->brc_enabled;
3960 gpe_context = &(avc_ctx->context_mbenc.gpe_contexts[kernel_idx]);
3963 memset(¶m, 0, sizeof(struct mbenc_param));
3965 param.mb_const_data_buffer_in_use = mb_const_data_buffer_in_use;
3966 param.mb_qp_buffer_in_use = mb_qp_buffer_in_use;
3967 param.mbenc_i_frame_dist_in_use = mbenc_i_frame_dist_in_use;
3968 param.mad_enable = mad_enable;
3969 param.brc_enabled = brc_enabled;
3970 param.roi_enabled = roi_enable;
3972 if (avc_state->mb_status_supported) {
3973 param.mb_vproc_stats_enable = avc_state->flatness_check_enable || avc_state->adaptive_transform_decision_enable;
3976 if (!avc_state->mbenc_curbe_set_in_brc_update) {
3977 gpe->context_init(ctx, gpe_context);
3980 gpe->reset_binding_table(ctx, gpe_context);
3982 if (!avc_state->mbenc_curbe_set_in_brc_update) {
3984 generic_ctx->pfn_set_curbe_mbenc(ctx, encode_state, gpe_context, encoder_context, ¶m);
3987 /* MB brc const data buffer set up*/
3988 if (mb_const_data_buffer_in_use) {
3989 // caculate the lambda table, it is kernel controlled trellis quantization,gen95+
3990 if (avc_state->lambda_table_enable)
3991 gen95_avc_calc_lambda_table(ctx, encode_state, encoder_context);
3993 gen9_avc_load_mb_brc_const_data(ctx, encode_state, encoder_context);
3996 /*clear the mad buffer*/
3998 i965_zero_gpe_resource(&(avc_ctx->res_mad_data_buffer));
4001 generic_ctx->pfn_send_mbenc_surface(ctx, encode_state, gpe_context, encoder_context, ¶m);
4003 gpe->setup_interface_data(ctx, gpe_context);
4006 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
4008 kernel_walker_param.use_scoreboard = 1;
4009 kernel_walker_param.resolution_x = downscaled_width_in_mb ;
4010 kernel_walker_param.resolution_y = downscaled_height_in_mb ;
4011 if (mbenc_i_frame_dist_in_use) {
4012 kernel_walker_param.no_dependency = 1;
4014 switch (generic_state->frame_type) {
4016 kernel_walker_param.walker_degree = WALKER_45_DEGREE;
4019 kernel_walker_param.walker_degree = WALKER_26_DEGREE;
4022 kernel_walker_param.walker_degree = WALKER_26_DEGREE;
4023 if (!slice_param->direct_spatial_mv_pred_flag) {
4024 kernel_walker_param.walker_degree = WALKER_45_DEGREE;
4030 kernel_walker_param.no_dependency = 0;
4033 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
4035 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
4038 &media_object_walker_param);
4039 return VA_STATUS_SUCCESS;
4043 me kernle related function
4046 gen9_avc_set_curbe_me(VADriverContextP ctx,
4047 struct encode_state *encode_state,
4048 struct i965_gpe_context *gpe_context,
4049 struct intel_encoder_context *encoder_context,
4052 gen9_avc_me_curbe_data *curbe_cmd;
4053 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4054 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4055 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4057 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4059 struct me_param * curbe_param = (struct me_param *)param ;
4060 unsigned char use_mv_from_prev_step = 0;
4061 unsigned char write_distortions = 0;
4062 unsigned char qp_prime_y = 0;
4063 unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
4064 unsigned char seach_table_idx = 0;
4065 unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
4066 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
4067 unsigned int scale_factor = 0;
4069 qp_prime_y = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta;
4070 switch (curbe_param->hme_type) {
4071 case INTEL_ENC_HME_4x : {
4072 use_mv_from_prev_step = (generic_state->b16xme_enabled) ? 1 : 0;
4073 write_distortions = 1;
4074 mv_shift_factor = 2;
4076 prev_mv_read_pos_factor = 0;
4079 case INTEL_ENC_HME_16x : {
4080 use_mv_from_prev_step = (generic_state->b32xme_enabled) ? 1 : 0;
4081 write_distortions = 0;
4082 mv_shift_factor = 2;
4084 prev_mv_read_pos_factor = 1;
4087 case INTEL_ENC_HME_32x : {
4088 use_mv_from_prev_step = 0;
4089 write_distortions = 0;
4090 mv_shift_factor = 1;
4092 prev_mv_read_pos_factor = 0;
4099 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
4104 downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
4105 downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
4107 memcpy(curbe_cmd, gen9_avc_me_curbe_init_data, sizeof(gen9_avc_me_curbe_data));
4109 curbe_cmd->dw3.sub_pel_mode = 3;
4110 if (avc_state->field_scaling_output_interleaved) {
4111 /*frame set to zero,field specified*/
4112 curbe_cmd->dw3.src_access = 0;
4113 curbe_cmd->dw3.ref_access = 0;
4114 curbe_cmd->dw7.src_field_polarity = 0;
4116 curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
4117 curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
4118 curbe_cmd->dw5.qp_prime_y = qp_prime_y;
4120 curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
4121 curbe_cmd->dw6.write_distortions = write_distortions;
4122 curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
4123 curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
4125 if (generic_state->frame_type == SLICE_TYPE_B) {
4126 curbe_cmd->dw1.bi_weight = 32;
4127 curbe_cmd->dw13.num_ref_idx_l1_minus1 = slice_param->num_ref_idx_l1_active_minus1;
4128 me_method = gen9_avc_b_me_method[generic_state->preset];
4129 seach_table_idx = 1;
4132 if (generic_state->frame_type == SLICE_TYPE_P ||
4133 generic_state->frame_type == SLICE_TYPE_B)
4134 curbe_cmd->dw13.num_ref_idx_l0_minus1 = slice_param->num_ref_idx_l0_active_minus1;
4136 curbe_cmd->dw13.ref_streamin_cost = 5;
4137 curbe_cmd->dw13.roi_enable = 0;
4139 curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
4140 curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
4142 memcpy(&curbe_cmd->dw16, table_enc_search_path[seach_table_idx][me_method], 14 * sizeof(int));
4144 curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN9_AVC_ME_MV_DATA_SURFACE_INDEX;
4145 curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x) ? GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX : GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX ;
4146 curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN9_AVC_ME_DISTORTION_SURFACE_INDEX;
4147 curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN9_AVC_ME_BRC_DISTORTION_INDEX;
4148 curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX;
4149 curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX;
4150 curbe_cmd->dw38.reserved = GEN9_AVC_ME_VDENC_STREAMIN_INDEX;
4152 i965_gpe_context_unmap_curbe(gpe_context);
4157 gen9_avc_send_surface_me(VADriverContextP ctx,
4158 struct encode_state *encode_state,
4159 struct i965_gpe_context *gpe_context,
4160 struct intel_encoder_context *encoder_context,
4163 struct i965_driver_data *i965 = i965_driver_data(ctx);
4165 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4166 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4167 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4168 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4170 struct object_surface *obj_surface, *input_surface;
4171 struct gen9_surface_avc *avc_priv_surface;
4172 struct i965_gpe_resource *gpe_resource;
4173 struct me_param * curbe_param = (struct me_param *)param ;
4175 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4176 VASurfaceID surface_id;
4179 /* all scaled input surface stored in reconstructed_object*/
4180 obj_surface = encode_state->reconstructed_object;
4181 if (!obj_surface || !obj_surface->private_data)
4183 avc_priv_surface = obj_surface->private_data;
4186 switch (curbe_param->hme_type) {
4187 case INTEL_ENC_HME_4x : {
4189 gpe_resource = &avc_ctx->s4x_memv_data_buffer;
4190 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4193 I965_SURFACEFORMAT_R8_UNORM,
4194 GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
4197 if (generic_state->b16xme_enabled) {
4198 gpe_resource = &avc_ctx->s16x_memv_data_buffer;
4199 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4202 I965_SURFACEFORMAT_R8_UNORM,
4203 GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX);
4205 /* brc distortion output*/
4206 gpe_resource = &avc_ctx->res_brc_dist_data_surface;
4207 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4210 I965_SURFACEFORMAT_R8_UNORM,
4211 GEN9_AVC_ME_BRC_DISTORTION_INDEX);
4212 /* memv distortion output*/
4213 gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
4214 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4217 I965_SURFACEFORMAT_R8_UNORM,
4218 GEN9_AVC_ME_DISTORTION_SURFACE_INDEX);
4219 /*input current down scaled YUV surface*/
4220 obj_surface = encode_state->reconstructed_object;
4221 avc_priv_surface = obj_surface->private_data;
4222 input_surface = avc_priv_surface->scaled_4x_surface_obj;
4223 gen9_add_adv_gpe_surface(ctx, gpe_context,
4225 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
4226 /*input ref scaled YUV surface*/
4227 for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
4228 surface_id = slice_param->RefPicList0[i].picture_id;
4229 obj_surface = SURFACE(surface_id);
4230 if (!obj_surface || !obj_surface->private_data)
4232 avc_priv_surface = obj_surface->private_data;
4234 input_surface = avc_priv_surface->scaled_4x_surface_obj;
4236 gen9_add_adv_gpe_surface(ctx, gpe_context,
4238 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
4241 obj_surface = encode_state->reconstructed_object;
4242 avc_priv_surface = obj_surface->private_data;
4243 input_surface = avc_priv_surface->scaled_4x_surface_obj;
4245 gen9_add_adv_gpe_surface(ctx, gpe_context,
4247 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
4249 for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
4250 surface_id = slice_param->RefPicList1[i].picture_id;
4251 obj_surface = SURFACE(surface_id);
4252 if (!obj_surface || !obj_surface->private_data)
4254 avc_priv_surface = obj_surface->private_data;
4256 input_surface = avc_priv_surface->scaled_4x_surface_obj;
4258 gen9_add_adv_gpe_surface(ctx, gpe_context,
4260 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
4265 case INTEL_ENC_HME_16x : {
4266 gpe_resource = &avc_ctx->s16x_memv_data_buffer;
4267 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4270 I965_SURFACEFORMAT_R8_UNORM,
4271 GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
4273 if (generic_state->b32xme_enabled) {
4274 gpe_resource = &avc_ctx->s32x_memv_data_buffer;
4275 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4278 I965_SURFACEFORMAT_R8_UNORM,
4279 GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX);
4282 obj_surface = encode_state->reconstructed_object;
4283 avc_priv_surface = obj_surface->private_data;
4284 input_surface = avc_priv_surface->scaled_16x_surface_obj;
4285 gen9_add_adv_gpe_surface(ctx, gpe_context,
4287 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
4289 for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
4290 surface_id = slice_param->RefPicList0[i].picture_id;
4291 obj_surface = SURFACE(surface_id);
4292 if (!obj_surface || !obj_surface->private_data)
4294 avc_priv_surface = obj_surface->private_data;
4296 input_surface = avc_priv_surface->scaled_16x_surface_obj;
4298 gen9_add_adv_gpe_surface(ctx, gpe_context,
4300 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
4303 obj_surface = encode_state->reconstructed_object;
4304 avc_priv_surface = obj_surface->private_data;
4305 input_surface = avc_priv_surface->scaled_16x_surface_obj;
4307 gen9_add_adv_gpe_surface(ctx, gpe_context,
4309 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
4311 for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
4312 surface_id = slice_param->RefPicList1[i].picture_id;
4313 obj_surface = SURFACE(surface_id);
4314 if (!obj_surface || !obj_surface->private_data)
4316 avc_priv_surface = obj_surface->private_data;
4318 input_surface = avc_priv_surface->scaled_16x_surface_obj;
4320 gen9_add_adv_gpe_surface(ctx, gpe_context,
4322 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
4326 case INTEL_ENC_HME_32x : {
4327 gpe_resource = &avc_ctx->s32x_memv_data_buffer;
4328 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4331 I965_SURFACEFORMAT_R8_UNORM,
4332 GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
4334 obj_surface = encode_state->reconstructed_object;
4335 avc_priv_surface = obj_surface->private_data;
4336 input_surface = avc_priv_surface->scaled_32x_surface_obj;
4337 gen9_add_adv_gpe_surface(ctx, gpe_context,
4339 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
4341 for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
4342 surface_id = slice_param->RefPicList0[i].picture_id;
4343 obj_surface = SURFACE(surface_id);
4344 if (!obj_surface || !obj_surface->private_data)
4346 avc_priv_surface = obj_surface->private_data;
4348 input_surface = avc_priv_surface->scaled_32x_surface_obj;
4350 gen9_add_adv_gpe_surface(ctx, gpe_context,
4352 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
4355 obj_surface = encode_state->reconstructed_object;
4356 avc_priv_surface = obj_surface->private_data;
4357 input_surface = avc_priv_surface->scaled_32x_surface_obj;
4359 gen9_add_adv_gpe_surface(ctx, gpe_context,
4361 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
4363 for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
4364 surface_id = slice_param->RefPicList1[i].picture_id;
4365 obj_surface = SURFACE(surface_id);
4366 if (!obj_surface || !obj_surface->private_data)
4368 avc_priv_surface = obj_surface->private_data;
4370 input_surface = avc_priv_surface->scaled_32x_surface_obj;
4372 gen9_add_adv_gpe_surface(ctx, gpe_context,
4374 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
4385 gen9_avc_kernel_me(VADriverContextP ctx,
4386 struct encode_state *encode_state,
4387 struct intel_encoder_context *encoder_context,
4390 struct i965_driver_data *i965 = i965_driver_data(ctx);
4391 struct i965_gpe_table *gpe = &i965->gpe_table;
4392 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4393 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
4394 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4395 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4397 struct i965_gpe_context *gpe_context;
4398 struct gpe_media_object_walker_parameter media_object_walker_param;
4399 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
4400 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
4401 int media_function = 0;
4403 struct me_param param ;
4404 unsigned int scale_factor = 0;
4407 case INTEL_ENC_HME_4x : {
4408 media_function = INTEL_MEDIA_STATE_4X_ME;
4412 case INTEL_ENC_HME_16x : {
4413 media_function = INTEL_MEDIA_STATE_16X_ME;
4417 case INTEL_ENC_HME_32x : {
4418 media_function = INTEL_MEDIA_STATE_32X_ME;
4427 downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
4428 downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
4430 /* I frame should not come here.*/
4431 kernel_idx = (generic_state->frame_type == SLICE_TYPE_P) ? GEN9_AVC_KERNEL_ME_P_IDX : GEN9_AVC_KERNEL_ME_B_IDX;
4432 gpe_context = &(avc_ctx->context_me.gpe_contexts[kernel_idx]);
4434 gpe->context_init(ctx, gpe_context);
4435 gpe->reset_binding_table(ctx, gpe_context);
4438 memset(¶m, 0, sizeof(param));
4439 param.hme_type = hme_type;
4440 generic_ctx->pfn_set_curbe_me(ctx, encode_state, gpe_context, encoder_context, ¶m);
4443 generic_ctx->pfn_send_me_surface(ctx, encode_state, gpe_context, encoder_context, ¶m);
4445 gpe->setup_interface_data(ctx, gpe_context);
4447 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
4448 /* the scaling is based on 8x8 blk level */
4449 kernel_walker_param.resolution_x = downscaled_width_in_mb ;
4450 kernel_walker_param.resolution_y = downscaled_height_in_mb ;
4451 kernel_walker_param.no_dependency = 1;
4453 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
4455 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
4458 &media_object_walker_param);
4460 return VA_STATUS_SUCCESS;
4467 gen9_avc_set_curbe_wp(VADriverContextP ctx,
4468 struct encode_state *encode_state,
4469 struct i965_gpe_context *gpe_context,
4470 struct intel_encoder_context *encoder_context,
4473 gen9_avc_wp_curbe_data *cmd;
4474 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4475 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4476 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4477 struct wp_param * curbe_param = (struct wp_param *)param;
4479 cmd = i965_gpe_context_map_curbe(gpe_context);
4483 memset(cmd, 0, sizeof(gen9_avc_wp_curbe_data));
4484 if (curbe_param->ref_list_idx) {
4485 cmd->dw0.default_weight = slice_param->luma_weight_l1[0];
4486 cmd->dw0.default_offset = slice_param->luma_offset_l1[0];
4488 cmd->dw0.default_weight = slice_param->luma_weight_l0[0];
4489 cmd->dw0.default_offset = slice_param->luma_offset_l0[0];
4492 cmd->dw49.input_surface = GEN9_AVC_WP_INPUT_REF_SURFACE_INDEX;
4493 cmd->dw50.output_surface = GEN9_AVC_WP_OUTPUT_SCALED_SURFACE_INDEX;
4495 i965_gpe_context_unmap_curbe(gpe_context);
4500 gen9_avc_send_surface_wp(VADriverContextP ctx,
4501 struct encode_state *encode_state,
4502 struct i965_gpe_context *gpe_context,
4503 struct intel_encoder_context *encoder_context,
4506 struct i965_driver_data *i965 = i965_driver_data(ctx);
4507 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4508 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4509 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4510 struct wp_param * curbe_param = (struct wp_param *)param;
4511 struct object_surface *obj_surface;
4512 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4513 VASurfaceID surface_id;
4515 if (curbe_param->ref_list_idx) {
4516 surface_id = slice_param->RefPicList1[0].picture_id;
4517 obj_surface = SURFACE(surface_id);
4518 if (!obj_surface || !obj_surface->private_data)
4519 avc_state->weighted_ref_l1_enable = 0;
4521 avc_state->weighted_ref_l1_enable = 1;
4523 surface_id = slice_param->RefPicList0[0].picture_id;
4524 obj_surface = SURFACE(surface_id);
4525 if (!obj_surface || !obj_surface->private_data)
4526 avc_state->weighted_ref_l0_enable = 0;
4528 avc_state->weighted_ref_l0_enable = 1;
4531 obj_surface = encode_state->reference_objects[0];
4534 gen9_add_adv_gpe_surface(ctx, gpe_context,
4536 GEN9_AVC_WP_INPUT_REF_SURFACE_INDEX);
4538 obj_surface = avc_ctx->wp_output_pic_select_surface_obj[curbe_param->ref_list_idx];
4539 gen9_add_adv_gpe_surface(ctx, gpe_context,
4541 GEN9_AVC_WP_OUTPUT_SCALED_SURFACE_INDEX);
4546 gen9_avc_kernel_wp(VADriverContextP ctx,
4547 struct encode_state *encode_state,
4548 struct intel_encoder_context *encoder_context,
4549 unsigned int list1_in_use)
4551 struct i965_driver_data *i965 = i965_driver_data(ctx);
4552 struct i965_gpe_table *gpe = &i965->gpe_table;
4553 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4554 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4555 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4556 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
4558 struct i965_gpe_context *gpe_context;
4559 struct gpe_media_object_walker_parameter media_object_walker_param;
4560 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
4561 int media_function = INTEL_MEDIA_STATE_ENC_WP;
4562 struct wp_param param;
4564 gpe_context = &(avc_ctx->context_wp.gpe_contexts);
4566 gpe->context_init(ctx, gpe_context);
4567 gpe->reset_binding_table(ctx, gpe_context);
4569 memset(¶m, 0, sizeof(param));
4570 param.ref_list_idx = (list1_in_use == 1) ? 1 : 0;
4572 generic_ctx->pfn_set_curbe_wp(ctx, encode_state, gpe_context, encoder_context, ¶m);
4575 generic_ctx->pfn_send_wp_surface(ctx, encode_state, gpe_context, encoder_context, ¶m);
4577 gpe->setup_interface_data(ctx, gpe_context);
4579 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
4580 /* the scaling is based on 8x8 blk level */
4581 kernel_walker_param.resolution_x = generic_state->frame_width_in_mbs;
4582 kernel_walker_param.resolution_y = generic_state->frame_height_in_mbs;
4583 kernel_walker_param.no_dependency = 1;
4585 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
4587 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
4590 &media_object_walker_param);
4592 return VA_STATUS_SUCCESS;
4597 sfd related function
4600 gen9_avc_set_curbe_sfd(VADriverContextP ctx,
4601 struct encode_state *encode_state,
4602 struct i965_gpe_context *gpe_context,
4603 struct intel_encoder_context *encoder_context,
4606 gen9_avc_sfd_curbe_data *cmd;
4607 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4608 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4609 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4610 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4612 cmd = i965_gpe_context_map_curbe(gpe_context);
4616 memset(cmd, 0, sizeof(gen9_avc_sfd_curbe_data));
4618 cmd->dw0.enable_intra_cost_scaling_for_static_frame = 1 ;
4619 cmd->dw0.enable_adaptive_mv_stream_in = 0 ;
4620 cmd->dw0.stream_in_type = 7 ;
4621 cmd->dw0.slice_type = slice_type_kernel[generic_state->frame_type] ;
4622 cmd->dw0.brc_mode_enable = generic_state->brc_enabled ;
4623 cmd->dw0.vdenc_mode_disable = 1 ;
4625 cmd->dw1.hme_stream_in_ref_cost = 5 ;
4626 cmd->dw1.num_of_refs = slice_param->num_ref_idx_l0_active_minus1 ;
4627 cmd->dw1.qp_value = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta ;
4629 cmd->dw2.frame_width_in_mbs = generic_state->frame_width_in_mbs ;
4630 cmd->dw2.frame_height_in_mbs = generic_state->frame_height_in_mbs ;
4632 cmd->dw3.large_mv_threshold = 128 ;
4633 cmd->dw4.total_large_mv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs) / 100 ;
4634 cmd->dw5.zmv_threshold = 4 ;
4635 cmd->dw6.total_zmv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * avc_state->zero_mv_threshold) / 100 ; // zero_mv_threshold = 60;
4636 cmd->dw7.min_dist_threshold = 10 ;
4638 if (generic_state->frame_type == SLICE_TYPE_P) {
4639 memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_p_frame, AVC_QP_MAX * sizeof(unsigned char));
4641 } else if (generic_state->frame_type == SLICE_TYPE_B) {
4642 memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_b_frame, AVC_QP_MAX * sizeof(unsigned char));
4645 cmd->dw21.actual_width_in_mb = cmd->dw2.frame_width_in_mbs ;
4646 cmd->dw21.actual_height_in_mb = cmd->dw2.frame_height_in_mbs ;
4647 cmd->dw24.vdenc_input_image_state_index = GEN9_AVC_SFD_VDENC_INPUT_IMAGE_STATE_INDEX ;
4648 cmd->dw26.mv_data_surface_index = GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX ;
4649 cmd->dw27.inter_distortion_surface_index = GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX ;
4650 cmd->dw28.output_data_surface_index = GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX ;
4651 cmd->dw29.vdenc_output_image_state_index = GEN9_AVC_SFD_VDENC_OUTPUT_IMAGE_STATE_INDEX ;
4653 i965_gpe_context_unmap_curbe(gpe_context);
4658 gen9_avc_send_surface_sfd(VADriverContextP ctx,
4659 struct encode_state *encode_state,
4660 struct i965_gpe_context *gpe_context,
4661 struct intel_encoder_context *encoder_context,
4664 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4665 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4666 struct i965_gpe_resource *gpe_resource;
4669 /*HME mv data surface memv output 4x*/
4670 gpe_resource = &avc_ctx->s4x_memv_data_buffer;
4671 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4674 I965_SURFACEFORMAT_R8_UNORM,
4675 GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX);
4677 /* memv distortion */
4678 gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
4679 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4682 I965_SURFACEFORMAT_R8_UNORM,
4683 GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX);
4686 gpe_resource = &avc_ctx->res_sfd_output_buffer;
4687 gen9_add_buffer_gpe_surface(ctx,
4693 GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX);
4698 gen9_avc_kernel_sfd(VADriverContextP ctx,
4699 struct encode_state *encode_state,
4700 struct intel_encoder_context *encoder_context)
4702 struct i965_driver_data *i965 = i965_driver_data(ctx);
4703 struct i965_gpe_table *gpe = &i965->gpe_table;
4704 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4705 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4706 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
4708 struct i965_gpe_context *gpe_context;
4709 struct gpe_media_object_parameter media_object_param;
4710 struct gpe_media_object_inline_data media_object_inline_data;
4711 int media_function = INTEL_MEDIA_STATE_STATIC_FRAME_DETECTION;
4712 gpe_context = &(avc_ctx->context_sfd.gpe_contexts);
4714 gpe->context_init(ctx, gpe_context);
4715 gpe->reset_binding_table(ctx, gpe_context);
4718 generic_ctx->pfn_set_curbe_sfd(ctx, encode_state, gpe_context, encoder_context, NULL);
4721 generic_ctx->pfn_send_sfd_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
4723 gpe->setup_interface_data(ctx, gpe_context);
4725 memset(&media_object_param, 0, sizeof(media_object_param));
4726 memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
4727 media_object_param.pinline_data = &media_object_inline_data;
4728 media_object_param.inline_size = sizeof(media_object_inline_data);
4730 gen9_avc_run_kernel_media_object(ctx, encoder_context,
4733 &media_object_param);
4735 return VA_STATUS_SUCCESS;
4739 kernel related function:init/destroy etc
4742 gen9_avc_kernel_init_scaling(VADriverContextP ctx,
4743 struct generic_encoder_context *generic_context,
4744 struct gen_avc_scaling_context *kernel_context)
4746 struct i965_driver_data *i965 = i965_driver_data(ctx);
4747 struct i965_gpe_table *gpe = &i965->gpe_table;
4748 struct i965_gpe_context *gpe_context = NULL;
4749 struct encoder_kernel_parameter kernel_param ;
4750 struct encoder_scoreboard_parameter scoreboard_param;
4751 struct i965_kernel common_kernel;
4753 if (IS_SKL(i965->intel.device_info) ||
4754 IS_BXT(i965->intel.device_info)) {
4755 kernel_param.curbe_size = sizeof(gen9_avc_scaling4x_curbe_data);
4756 kernel_param.inline_data_size = sizeof(gen9_avc_scaling4x_curbe_data);
4757 } else if (IS_KBL(i965->intel.device_info) ||
4758 IS_GLK(i965->intel.device_info)) {
4759 kernel_param.curbe_size = sizeof(gen95_avc_scaling4x_curbe_data);
4760 kernel_param.inline_data_size = sizeof(gen95_avc_scaling4x_curbe_data);
4763 /* 4x scaling kernel*/
4764 kernel_param.sampler_size = 0;
4766 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4767 scoreboard_param.mask = 0xFF;
4768 scoreboard_param.enable = generic_context->use_hw_scoreboard;
4769 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
4770 scoreboard_param.walkpat_flag = 0;
4772 gpe_context = &kernel_context->gpe_contexts[GEN9_AVC_KERNEL_SCALING_4X_IDX];
4773 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4774 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4776 memset(&common_kernel, 0, sizeof(common_kernel));
4778 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4779 generic_context->enc_kernel_size,
4780 INTEL_GENERIC_ENC_SCALING4X,
4784 gpe->load_kernels(ctx,
4789 /*2x scaling kernel*/
4790 kernel_param.curbe_size = sizeof(gen9_avc_scaling2x_curbe_data);
4791 kernel_param.inline_data_size = 0;
4792 kernel_param.sampler_size = 0;
4794 gpe_context = &kernel_context->gpe_contexts[GEN9_AVC_KERNEL_SCALING_2X_IDX];
4795 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4796 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4798 memset(&common_kernel, 0, sizeof(common_kernel));
4800 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4801 generic_context->enc_kernel_size,
4802 INTEL_GENERIC_ENC_SCALING2X,
4806 gpe->load_kernels(ctx,
4814 gen9_avc_kernel_init_me(VADriverContextP ctx,
4815 struct generic_encoder_context *generic_context,
4816 struct gen_avc_me_context *kernel_context)
4818 struct i965_driver_data *i965 = i965_driver_data(ctx);
4819 struct i965_gpe_table *gpe = &i965->gpe_table;
4820 struct i965_gpe_context *gpe_context = NULL;
4821 struct encoder_kernel_parameter kernel_param ;
4822 struct encoder_scoreboard_parameter scoreboard_param;
4823 struct i965_kernel common_kernel;
4826 kernel_param.curbe_size = sizeof(gen9_avc_me_curbe_data);
4827 kernel_param.inline_data_size = 0;
4828 kernel_param.sampler_size = 0;
4830 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4831 scoreboard_param.mask = 0xFF;
4832 scoreboard_param.enable = generic_context->use_hw_scoreboard;
4833 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
4834 scoreboard_param.walkpat_flag = 0;
4836 for (i = 0; i < 2; i++) {
4837 gpe_context = &kernel_context->gpe_contexts[i];
4838 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4839 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4841 memset(&common_kernel, 0, sizeof(common_kernel));
4843 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4844 generic_context->enc_kernel_size,
4845 INTEL_GENERIC_ENC_ME,
4849 gpe->load_kernels(ctx,
4858 gen9_avc_kernel_init_mbenc(VADriverContextP ctx,
4859 struct generic_encoder_context *generic_context,
4860 struct gen_avc_mbenc_context *kernel_context)
4862 struct i965_driver_data *i965 = i965_driver_data(ctx);
4863 struct i965_gpe_table *gpe = &i965->gpe_table;
4864 struct i965_gpe_context *gpe_context = NULL;
4865 struct encoder_kernel_parameter kernel_param ;
4866 struct encoder_scoreboard_parameter scoreboard_param;
4867 struct i965_kernel common_kernel;
4869 unsigned int curbe_size = 0;
4871 if (IS_SKL(i965->intel.device_info) ||
4872 IS_BXT(i965->intel.device_info)) {
4873 curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
4874 } else if (IS_KBL(i965->intel.device_info) ||
4875 IS_GLK(i965->intel.device_info)) {
4876 curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
4879 assert(curbe_size > 0);
4880 kernel_param.curbe_size = curbe_size;
4881 kernel_param.inline_data_size = 0;
4882 kernel_param.sampler_size = 0;
4884 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4885 scoreboard_param.mask = 0xFF;
4886 scoreboard_param.enable = generic_context->use_hw_scoreboard;
4887 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
4888 scoreboard_param.walkpat_flag = 0;
4890 for (i = 0; i < NUM_GEN9_AVC_KERNEL_MBENC ; i++) {
4891 gpe_context = &kernel_context->gpe_contexts[i];
4892 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4893 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4895 memset(&common_kernel, 0, sizeof(common_kernel));
4897 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4898 generic_context->enc_kernel_size,
4899 INTEL_GENERIC_ENC_MBENC,
4903 gpe->load_kernels(ctx,
4912 gen9_avc_kernel_init_brc(VADriverContextP ctx,
4913 struct generic_encoder_context *generic_context,
4914 struct gen_avc_brc_context *kernel_context)
4916 struct i965_driver_data *i965 = i965_driver_data(ctx);
4917 struct i965_gpe_table *gpe = &i965->gpe_table;
4918 struct i965_gpe_context *gpe_context = NULL;
4919 struct encoder_kernel_parameter kernel_param ;
4920 struct encoder_scoreboard_parameter scoreboard_param;
4921 struct i965_kernel common_kernel;
4924 static const int brc_curbe_size[NUM_GEN9_AVC_KERNEL_BRC] = {
4925 (sizeof(gen9_avc_brc_init_reset_curbe_data)),
4926 (sizeof(gen9_avc_frame_brc_update_curbe_data)),
4927 (sizeof(gen9_avc_brc_init_reset_curbe_data)),
4928 (sizeof(gen9_avc_mbenc_curbe_data)),
4930 (sizeof(gen9_avc_mb_brc_curbe_data))
4933 kernel_param.inline_data_size = 0;
4934 kernel_param.sampler_size = 0;
4936 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4937 scoreboard_param.mask = 0xFF;
4938 scoreboard_param.enable = generic_context->use_hw_scoreboard;
4939 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
4940 scoreboard_param.walkpat_flag = 0;
4942 for (i = 0; i < NUM_GEN9_AVC_KERNEL_BRC; i++) {
4943 kernel_param.curbe_size = brc_curbe_size[i];
4944 gpe_context = &kernel_context->gpe_contexts[i];
4945 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4946 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4948 memset(&common_kernel, 0, sizeof(common_kernel));
4950 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4951 generic_context->enc_kernel_size,
4952 INTEL_GENERIC_ENC_BRC,
4956 gpe->load_kernels(ctx,
4965 gen9_avc_kernel_init_wp(VADriverContextP ctx,
4966 struct generic_encoder_context *generic_context,
4967 struct gen_avc_wp_context *kernel_context)
4969 struct i965_driver_data *i965 = i965_driver_data(ctx);
4970 struct i965_gpe_table *gpe = &i965->gpe_table;
4971 struct i965_gpe_context *gpe_context = NULL;
4972 struct encoder_kernel_parameter kernel_param ;
4973 struct encoder_scoreboard_parameter scoreboard_param;
4974 struct i965_kernel common_kernel;
4976 kernel_param.curbe_size = sizeof(gen9_avc_wp_curbe_data);
4977 kernel_param.inline_data_size = 0;
4978 kernel_param.sampler_size = 0;
4980 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4981 scoreboard_param.mask = 0xFF;
4982 scoreboard_param.enable = generic_context->use_hw_scoreboard;
4983 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
4984 scoreboard_param.walkpat_flag = 0;
4986 gpe_context = &kernel_context->gpe_contexts;
4987 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4988 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4990 memset(&common_kernel, 0, sizeof(common_kernel));
4992 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4993 generic_context->enc_kernel_size,
4994 INTEL_GENERIC_ENC_WP,
4998 gpe->load_kernels(ctx,
5006 gen9_avc_kernel_init_sfd(VADriverContextP ctx,
5007 struct generic_encoder_context *generic_context,
5008 struct gen_avc_sfd_context *kernel_context)
5010 struct i965_driver_data *i965 = i965_driver_data(ctx);
5011 struct i965_gpe_table *gpe = &i965->gpe_table;
5012 struct i965_gpe_context *gpe_context = NULL;
5013 struct encoder_kernel_parameter kernel_param ;
5014 struct encoder_scoreboard_parameter scoreboard_param;
5015 struct i965_kernel common_kernel;
5017 kernel_param.curbe_size = sizeof(gen9_avc_sfd_curbe_data);
5018 kernel_param.inline_data_size = 0;
5019 kernel_param.sampler_size = 0;
5021 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
5022 scoreboard_param.mask = 0xFF;
5023 scoreboard_param.enable = generic_context->use_hw_scoreboard;
5024 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
5025 scoreboard_param.walkpat_flag = 0;
5027 gpe_context = &kernel_context->gpe_contexts;
5028 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
5029 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
5031 memset(&common_kernel, 0, sizeof(common_kernel));
5033 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
5034 generic_context->enc_kernel_size,
5035 INTEL_GENERIC_ENC_SFD,
5039 gpe->load_kernels(ctx,
5047 gen9_avc_kernel_destroy(struct encoder_vme_mfc_context * vme_context)
5050 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5051 struct i965_driver_data *i965 = i965_driver_data(avc_ctx->ctx);
5052 struct i965_gpe_table *gpe = &i965->gpe_table;
5056 gen9_avc_free_resources(vme_context);
5058 for (i = 0; i < NUM_GEN9_AVC_KERNEL_SCALING; i++)
5059 gpe->context_destroy(&avc_ctx->context_scaling.gpe_contexts[i]);
5061 for (i = 0; i < NUM_GEN9_AVC_KERNEL_BRC; i++)
5062 gpe->context_destroy(&avc_ctx->context_brc.gpe_contexts[i]);
5064 for (i = 0; i < NUM_GEN9_AVC_KERNEL_ME; i++)
5065 gpe->context_destroy(&avc_ctx->context_me.gpe_contexts[i]);
5067 for (i = 0; i < NUM_GEN9_AVC_KERNEL_MBENC; i++)
5068 gpe->context_destroy(&avc_ctx->context_mbenc.gpe_contexts[i]);
5070 gpe->context_destroy(&avc_ctx->context_wp.gpe_contexts);
5072 gpe->context_destroy(&avc_ctx->context_sfd.gpe_contexts);
5080 gen9_avc_update_parameters(VADriverContextP ctx,
5082 struct encode_state *encode_state,
5083 struct intel_encoder_context *encoder_context)
5085 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5086 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5087 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5088 VAEncSequenceParameterBufferH264 *seq_param;
5089 VAEncSliceParameterBufferH264 * slice_param;
5091 unsigned int preset = generic_state->preset;
5093 /* seq/pic/slice parameter setting */
5094 generic_state->b16xme_supported = gen9_avc_super_hme[preset];
5095 generic_state->b32xme_supported = gen9_avc_ultra_hme[preset];
5097 avc_state->seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
5098 avc_state->pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
5101 avc_state->enable_avc_ildb = 0;
5102 avc_state->slice_num = 0;
5103 for (j = 0; j < encode_state->num_slice_params_ext && avc_state->enable_avc_ildb == 0; j++) {
5104 assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
5105 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
5107 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
5108 assert((slice_param->slice_type == SLICE_TYPE_I) ||
5109 (slice_param->slice_type == SLICE_TYPE_SI) ||
5110 (slice_param->slice_type == SLICE_TYPE_P) ||
5111 (slice_param->slice_type == SLICE_TYPE_SP) ||
5112 (slice_param->slice_type == SLICE_TYPE_B));
5114 if (slice_param->disable_deblocking_filter_idc != 1) {
5115 avc_state->enable_avc_ildb = 1;
5118 avc_state->slice_param[i] = slice_param;
5120 avc_state->slice_num++;
5124 /* how many slices support by now? 1 slice or multi slices, but row slice.not slice group. */
5125 seq_param = avc_state->seq_param;
5126 slice_param = avc_state->slice_param[0];
5128 generic_state->frame_type = avc_state->slice_param[0]->slice_type;
5130 if (slice_param->slice_type == SLICE_TYPE_I ||
5131 slice_param->slice_type == SLICE_TYPE_SI)
5132 generic_state->frame_type = SLICE_TYPE_I;
5133 else if (slice_param->slice_type == SLICE_TYPE_P)
5134 generic_state->frame_type = SLICE_TYPE_P;
5135 else if (slice_param->slice_type == SLICE_TYPE_B)
5136 generic_state->frame_type = SLICE_TYPE_B;
5137 if (profile == VAProfileH264High)
5138 avc_state->transform_8x8_mode_enable = 0;//work around for high profile to disabel pic_param->pic_fields.bits.transform_8x8_mode_flag
5140 avc_state->transform_8x8_mode_enable = 0;
5143 if (generic_state->brc_enabled && (!generic_state->brc_inited || generic_state->brc_need_reset)) {
5144 generic_state->target_bit_rate = ALIGN(seq_param->bits_per_second, 1000) / 1000;
5145 generic_state->init_vbv_buffer_fullness_in_bit = seq_param->bits_per_second;
5146 generic_state->vbv_buffer_size_in_bit = (uint64_t)seq_param->bits_per_second << 1;
5147 generic_state->frames_per_100s = 3000; /* 30fps */
5150 generic_state->gop_size = seq_param->intra_period;
5151 generic_state->gop_ref_distance = seq_param->ip_period;
5153 if (generic_state->internal_rate_mode == VA_RC_CBR) {
5154 generic_state->max_bit_rate = generic_state->target_bit_rate;
5155 generic_state->min_bit_rate = generic_state->target_bit_rate;
5158 if (generic_state->frame_type == SLICE_TYPE_I || generic_state->first_frame) {
5159 gen9_avc_update_misc_parameters(ctx, encode_state, encoder_context);
5162 generic_state->preset = encoder_context->quality_level;
5163 if (encoder_context->quality_level == INTEL_PRESET_UNKNOWN) {
5164 generic_state->preset = INTEL_PRESET_RT_SPEED;
5166 generic_state->kernel_mode = gen9_avc_kernel_mode[generic_state->preset];
5168 if (!generic_state->brc_inited) {
5169 generic_state->brc_init_reset_input_bits_per_frame = ((double)(generic_state->max_bit_rate * 1000) * 100) / generic_state->frames_per_100s;;
5170 generic_state->brc_init_current_target_buf_full_in_bits = generic_state->init_vbv_buffer_fullness_in_bit;
5171 generic_state->brc_init_reset_buf_size_in_bits = generic_state->vbv_buffer_size_in_bit;
5172 generic_state->brc_target_size = generic_state->init_vbv_buffer_fullness_in_bit;
5176 generic_state->curr_pak_pass = 0;
5177 generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
5179 if (generic_state->internal_rate_mode == VA_RC_CBR ||
5180 generic_state->internal_rate_mode == VA_RC_VBR)
5181 generic_state->brc_enabled = 1;
5183 generic_state->brc_enabled = 0;
5185 if (generic_state->brc_enabled &&
5186 (!generic_state->init_vbv_buffer_fullness_in_bit ||
5187 !generic_state->vbv_buffer_size_in_bit ||
5188 !generic_state->max_bit_rate ||
5189 !generic_state->target_bit_rate ||
5190 !generic_state->frames_per_100s)) {
5191 WARN_ONCE("Rate control parameter is required for BRC\n");
5192 generic_state->brc_enabled = 0;
5195 if (!generic_state->brc_enabled) {
5196 generic_state->target_bit_rate = 0;
5197 generic_state->max_bit_rate = 0;
5198 generic_state->min_bit_rate = 0;
5199 generic_state->init_vbv_buffer_fullness_in_bit = 0;
5200 generic_state->vbv_buffer_size_in_bit = 0;
5201 generic_state->num_pak_passes = 1;
5203 generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
5207 generic_state->frame_width_in_mbs = seq_param->picture_width_in_mbs;
5208 generic_state->frame_height_in_mbs = seq_param->picture_height_in_mbs;
5209 generic_state->frame_width_in_pixel = generic_state->frame_width_in_mbs * 16;
5210 generic_state->frame_height_in_pixel = generic_state->frame_height_in_mbs * 16;
5212 generic_state->frame_width_4x = ALIGN(generic_state->frame_width_in_pixel / 4, 16);
5213 generic_state->frame_height_4x = ALIGN(generic_state->frame_height_in_pixel / 4, 16);
5214 generic_state->downscaled_width_4x_in_mb = generic_state->frame_width_4x / 16 ;
5215 generic_state->downscaled_height_4x_in_mb = generic_state->frame_height_4x / 16;
5217 generic_state->frame_width_16x = ALIGN(generic_state->frame_width_in_pixel / 16, 16);
5218 generic_state->frame_height_16x = ALIGN(generic_state->frame_height_in_pixel / 16, 16);
5219 generic_state->downscaled_width_16x_in_mb = generic_state->frame_width_16x / 16 ;
5220 generic_state->downscaled_height_16x_in_mb = generic_state->frame_height_16x / 16;
5222 generic_state->frame_width_32x = ALIGN(generic_state->frame_width_in_pixel / 32, 16);
5223 generic_state->frame_height_32x = ALIGN(generic_state->frame_height_in_pixel / 32, 16);
5224 generic_state->downscaled_width_32x_in_mb = generic_state->frame_width_32x / 16 ;
5225 generic_state->downscaled_height_32x_in_mb = generic_state->frame_height_32x / 16;
5227 if (generic_state->hme_supported) {
5228 generic_state->hme_enabled = 1;
5230 generic_state->hme_enabled = 0;
5233 if (generic_state->b16xme_supported) {
5234 generic_state->b16xme_enabled = 1;
5236 generic_state->b16xme_enabled = 0;
5239 if (generic_state->b32xme_supported) {
5240 generic_state->b32xme_enabled = 1;
5242 generic_state->b32xme_enabled = 0;
5244 /* disable HME/16xME if the size is too small */
5245 if (generic_state->frame_width_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5246 generic_state->b32xme_supported = 0;
5247 generic_state->b32xme_enabled = 0;
5248 generic_state->b16xme_supported = 0;
5249 generic_state->b16xme_enabled = 0;
5250 generic_state->frame_width_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5251 generic_state->downscaled_width_4x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5253 if (generic_state->frame_height_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5254 generic_state->b32xme_supported = 0;
5255 generic_state->b32xme_enabled = 0;
5256 generic_state->b16xme_supported = 0;
5257 generic_state->b16xme_enabled = 0;
5258 generic_state->frame_height_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5259 generic_state->downscaled_height_4x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5262 if (generic_state->frame_width_16x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5263 generic_state->b32xme_supported = 0;
5264 generic_state->b32xme_enabled = 0;
5265 generic_state->frame_width_16x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5266 generic_state->downscaled_width_16x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5268 if (generic_state->frame_height_16x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5269 generic_state->b32xme_supported = 0;
5270 generic_state->b32xme_enabled = 0;
5271 generic_state->frame_height_16x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5272 generic_state->downscaled_height_16x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5275 if (generic_state->frame_width_32x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5276 generic_state->frame_width_32x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5277 generic_state->downscaled_width_32x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5279 if (generic_state->frame_height_32x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5280 generic_state->frame_height_32x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5281 generic_state->downscaled_height_32x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5287 gen9_avc_encode_check_parameter(VADriverContextP ctx,
5288 struct encode_state *encode_state,
5289 struct intel_encoder_context *encoder_context)
5291 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5292 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5293 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5294 unsigned int rate_control_mode = encoder_context->rate_control_mode;
5295 unsigned int preset = generic_state->preset;
5296 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param ;
5297 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5299 int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
5301 generic_state->avbr_curracy = 30;
5302 generic_state->avbr_convergence = 150;
5304 switch (rate_control_mode & 0x7f) {
5306 generic_state->internal_rate_mode = VA_RC_CBR;
5310 generic_state->internal_rate_mode = VA_RC_VBR;
5315 generic_state->internal_rate_mode = VA_RC_CQP;
5319 if (rate_control_mode != VA_RC_NONE &&
5320 rate_control_mode != VA_RC_CQP) {
5321 generic_state->brc_enabled = 1;
5322 generic_state->brc_distortion_buffer_supported = 1;
5323 generic_state->brc_constant_buffer_supported = 1;
5324 generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
5327 /*check brc parameter*/
5328 if (generic_state->brc_enabled) {
5329 avc_state->mb_qp_data_enable = 0;
5332 /*set the brc init and reset accordingly*/
5333 if (generic_state->brc_need_reset &&
5334 (generic_state->brc_distortion_buffer_supported == 0 ||
5335 rate_control_mode == VA_RC_CQP)) {
5336 generic_state->brc_need_reset = 0;// not support by CQP
5339 if (generic_state->brc_need_reset && !avc_state->sfd_mb_enable) {
5340 avc_state->sfd_enable = 0;
5343 if (generic_state->frames_per_window_size == 0) {
5344 generic_state->frames_per_window_size = (generic_state->frames_per_100s / 100 < 60) ? (generic_state->frames_per_100s / 100) : 60;
5345 } else if (generic_state->frames_per_window_size > 2 * generic_state->frames_per_100s / 100) {
5346 generic_state->frames_per_window_size = (generic_state->frames_per_100s / 100 < 60) ? (generic_state->frames_per_100s / 100) : 60;
5349 if (generic_state->brc_enabled) {
5350 generic_state->hme_enabled = generic_state->frame_type != SLICE_TYPE_I;
5351 if (avc_state->min_max_qp_enable) {
5352 generic_state->num_pak_passes = 1;
5354 generic_state->brc_roi_enable = (rate_control_mode != VA_RC_CQP) && (generic_state->num_roi > 0);// only !CQP
5355 generic_state->mb_brc_enabled = generic_state->mb_brc_enabled || generic_state->brc_roi_enable;
5357 generic_state->num_pak_passes = 1;// CQP only one pass
5360 avc_state->mbenc_i_frame_dist_in_use = 0;
5361 avc_state->mbenc_i_frame_dist_in_use = (generic_state->brc_enabled) && (generic_state->brc_distortion_buffer_supported) && (generic_state->frame_type == SLICE_TYPE_I);
5363 /*ROI must enable mbbrc.*/
5366 if (avc_state->caf_supported) {
5367 switch (generic_state->frame_type) {
5371 avc_state->caf_enable = gen9_avc_all_fractional[preset] & 0x01;
5374 avc_state->caf_enable = (gen9_avc_all_fractional[preset] >> 1) & 0x01;
5378 if (avc_state->caf_enable && avc_state->caf_disable_hd && gen9_avc_disable_all_fractional_check_for_high_res[preset]) {
5379 if (generic_state->frame_width_in_pixel >= 1280 && generic_state->frame_height_in_pixel >= 720)
5380 avc_state->caf_enable = 0;
5384 avc_state->adaptive_transform_decision_enable &= gen9_avc_enable_adaptive_tx_decision[preset & 0x7];
5386 /* Flatness check is enabled only if scaling will be performed and CAF is enabled. here only frame */
5387 if (avc_state->flatness_check_supported) {
5388 avc_state->flatness_check_enable = ((avc_state->caf_enable) && (generic_state->brc_enabled || generic_state->hme_supported)) ;
5390 avc_state->flatness_check_enable = 0;
5393 /* check mb_status_supported/enbale*/
5394 if (avc_state->adaptive_transform_decision_enable) {
5395 avc_state->mb_status_enable = 1;
5397 avc_state->mb_status_enable = 0;
5399 /*slice check,all the slices use the same slice height except the last slice*/
5400 avc_state->arbitrary_num_mbs_in_slice = 0;
5401 for (i = 0; i < avc_state->slice_num; i++) {
5402 assert(avc_state->slice_param[i]->num_macroblocks % generic_state->frame_width_in_mbs == 0);
5403 avc_state->slice_height = avc_state->slice_param[i]->num_macroblocks / generic_state->frame_width_in_mbs;
5404 /*add it later for muli slices map*/
5407 if (generic_state->frame_type == SLICE_TYPE_I) {
5408 generic_state->hme_enabled = 0;
5409 generic_state->b16xme_enabled = 0;
5410 generic_state->b32xme_enabled = 0;
5413 if (generic_state->frame_type == SLICE_TYPE_B) {
5414 gen9_avc_get_dist_scale_factor(ctx, encode_state, encoder_context);
5415 avc_state->bi_weight = gen9_avc_get_biweight(avc_state->dist_scale_factor_list0[0], pic_param->pic_fields.bits.weighted_bipred_idc);
5418 /* Determine if SkipBiasAdjustment should be enabled for P picture 1. No B frame 2. Qp >= 22 3. CQP mode */
5419 avc_state->skip_bias_adjustment_enable = avc_state->skip_bias_adjustment_supported && (generic_state->frame_type == SLICE_TYPE_P)
5420 && (generic_state->gop_ref_distance == 1) && (avc_state->pic_param->pic_init_qp + avc_state->slice_param[0]->slice_qp_delta >= 22) && !generic_state->brc_enabled;
5422 if (generic_state->kernel_mode == INTEL_ENC_KERNEL_QUALITY) {
5423 avc_state->tq_enable = 1;
5424 avc_state->tq_rounding = 6;
5425 if (generic_state->brc_enabled) {
5426 generic_state->mb_brc_enabled = 1;
5430 //check the inter rounding
5431 avc_state->rounding_value = 0;
5432 avc_state->rounding_inter_p = 255;//default
5433 avc_state->rounding_inter_b = 255; //default
5434 avc_state->rounding_inter_b_ref = 255; //default
5436 if (generic_state->frame_type == SLICE_TYPE_P) {
5437 if (avc_state->rounding_inter_p == AVC_INVALID_ROUNDING_VALUE) {
5438 if (avc_state->adaptive_rounding_inter_enable && !(generic_state->brc_enabled)) {
5439 if (generic_state->gop_ref_distance == 1)
5440 avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_p_without_b[slice_qp];
5442 avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_p[slice_qp];
5444 avc_state->rounding_value = gen9_avc_inter_rounding_p[generic_state->preset];
5448 avc_state->rounding_value = avc_state->rounding_inter_p;
5450 } else if (generic_state->frame_type == SLICE_TYPE_B) {
5451 if (pic_param->pic_fields.bits.reference_pic_flag) {
5452 if (avc_state->rounding_inter_b_ref == AVC_INVALID_ROUNDING_VALUE)
5453 avc_state->rounding_value = gen9_avc_inter_rounding_b_ref[generic_state->preset];
5455 avc_state->rounding_value = avc_state->rounding_inter_b_ref;
5457 if (avc_state->rounding_inter_b == AVC_INVALID_ROUNDING_VALUE) {
5458 if (avc_state->adaptive_rounding_inter_enable && !(generic_state->brc_enabled))
5459 avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_b[slice_qp];
5461 avc_state->rounding_value = gen9_avc_inter_rounding_b[generic_state->preset];
5463 avc_state->rounding_value = avc_state->rounding_inter_b;
5467 return VA_STATUS_SUCCESS;
5471 gen9_avc_vme_gpe_kernel_prepare(VADriverContextP ctx,
5472 struct encode_state *encode_state,
5473 struct intel_encoder_context *encoder_context)
5476 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5477 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5478 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5479 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5480 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5482 struct object_surface *obj_surface;
5483 struct object_buffer *obj_buffer;
5484 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5485 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
5486 struct i965_coded_buffer_segment *coded_buffer_segment;
5488 struct gen9_surface_avc *avc_priv_surface;
5490 struct avc_surface_param surface_param;
5492 unsigned char * pdata;
5494 /* Setup current reconstruct frame */
5495 obj_surface = encode_state->reconstructed_object;
5496 va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5498 if (va_status != VA_STATUS_SUCCESS)
5501 memset(&surface_param, 0, sizeof(surface_param));
5502 surface_param.frame_width = generic_state->frame_width_in_pixel;
5503 surface_param.frame_height = generic_state->frame_height_in_pixel;
5504 va_status = gen9_avc_init_check_surfaces(ctx,
5508 if (va_status != VA_STATUS_SUCCESS)
5511 /* init the member of avc_priv_surface,frame_store_id,qp_value*/
5512 avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
5513 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = 0;
5514 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = 0;
5515 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2]);
5516 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1]);
5517 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2], avc_priv_surface->dmv_top);
5518 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1], avc_priv_surface->dmv_bottom);
5519 avc_priv_surface->qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
5520 avc_priv_surface->frame_store_id = 0;
5521 avc_priv_surface->frame_idx = pic_param->CurrPic.frame_idx;
5522 avc_priv_surface->top_field_order_cnt = pic_param->CurrPic.TopFieldOrderCnt;
5523 avc_priv_surface->is_as_ref = pic_param->pic_fields.bits.reference_pic_flag;
5524 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = avc_priv_surface->top_field_order_cnt;
5525 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = avc_priv_surface->top_field_order_cnt + 1;
5527 i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
5528 i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_reconstructed_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
5530 /* input YUV surface*/
5531 obj_surface = encode_state->input_yuv_object;
5532 va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5534 if (va_status != VA_STATUS_SUCCESS)
5536 i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
5537 i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_uncompressed_input_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
5539 /* Reference surfaces */
5540 for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
5541 i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
5542 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2]);
5543 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1]);
5544 obj_surface = encode_state->reference_objects[i];
5545 avc_state->top_field_poc[2 * i] = 0;
5546 avc_state->top_field_poc[2 * i + 1] = 0;
5548 if (obj_surface && obj_surface->bo) {
5549 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->list_reference_res[i], obj_surface, GPE_RESOURCE_ALIGNMENT);
5551 /* actually it should be handled when it is reconstructed surface*/
5552 va_status = gen9_avc_init_check_surfaces(ctx,
5553 obj_surface, encoder_context,
5555 if (va_status != VA_STATUS_SUCCESS)
5557 avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
5558 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2], avc_priv_surface->dmv_top);
5559 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1], avc_priv_surface->dmv_bottom);
5560 avc_state->top_field_poc[2 * i] = avc_priv_surface->top_field_order_cnt;
5561 avc_state->top_field_poc[2 * i + 1] = avc_priv_surface->top_field_order_cnt + 1;
5562 avc_priv_surface->frame_store_id = i;
5568 /* Encoded bitstream ?*/
5569 obj_buffer = encode_state->coded_buf_object;
5570 bo = obj_buffer->buffer_store->bo;
5571 i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
5572 i965_dri_object_to_buffer_gpe_resource(&generic_ctx->compressed_bitstream.res, bo);
5573 generic_ctx->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
5574 generic_ctx->compressed_bitstream.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
5577 avc_ctx->status_buffer.bo = bo;
5579 /* set the internal flag to 0 to indicate the coded size is unknown */
5581 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
5582 coded_buffer_segment->mapped = 0;
5583 coded_buffer_segment->codec = encoder_context->codec;
5584 coded_buffer_segment->status_support = 1;
5586 pdata = bo->virtual + avc_ctx->status_buffer.base_offset;
5587 memset(pdata, 0, avc_ctx->status_buffer.status_buffer_size);
5590 //frame id, it is the ref pic id in the reference_objects list.
5591 avc_state->num_refs[0] = 0;
5592 avc_state->num_refs[1] = 0;
5593 if (generic_state->frame_type == SLICE_TYPE_P) {
5594 avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
5596 if (slice_param->num_ref_idx_active_override_flag)
5597 avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
5598 } else if (generic_state->frame_type == SLICE_TYPE_B) {
5599 avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
5600 avc_state->num_refs[1] = pic_param->num_ref_idx_l1_active_minus1 + 1;
5602 if (slice_param->num_ref_idx_active_override_flag) {
5603 avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
5604 avc_state->num_refs[1] = slice_param->num_ref_idx_l1_active_minus1 + 1;
5608 if (avc_state->num_refs[0] > ARRAY_ELEMS(avc_state->list_ref_idx[0]))
5609 return VA_STATUS_ERROR_INVALID_VALUE;
5610 if (avc_state->num_refs[1] > ARRAY_ELEMS(avc_state->list_ref_idx[1]))
5611 return VA_STATUS_ERROR_INVALID_VALUE;
5613 for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[0]); i++) {
5614 VAPictureH264 *va_pic;
5616 assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(avc_state->list_ref_idx[0]));
5617 avc_state->list_ref_idx[0][i] = 0;
5619 if (i >= avc_state->num_refs[0])
5622 va_pic = &slice_param->RefPicList0[i];
5624 for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
5625 obj_surface = encode_state->reference_objects[j];
5629 obj_surface->base.id == va_pic->picture_id) {
5631 assert(obj_surface->base.id != VA_INVALID_SURFACE);
5632 avc_state->list_ref_idx[0][i] = j;
5638 for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[1]); i++) {
5639 VAPictureH264 *va_pic;
5641 assert(ARRAY_ELEMS(slice_param->RefPicList1) == ARRAY_ELEMS(avc_state->list_ref_idx[1]));
5642 avc_state->list_ref_idx[1][i] = 0;
5644 if (i >= avc_state->num_refs[1])
5647 va_pic = &slice_param->RefPicList1[i];
5649 for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
5650 obj_surface = encode_state->reference_objects[j];
5654 obj_surface->base.id == va_pic->picture_id) {
5656 assert(obj_surface->base.id != VA_INVALID_SURFACE);
5657 avc_state->list_ref_idx[1][i] = j;
5664 return VA_STATUS_SUCCESS;
5668 gen9_avc_vme_gpe_kernel_init(VADriverContextP ctx,
5669 struct encode_state *encode_state,
5670 struct intel_encoder_context *encoder_context)
5672 return VA_STATUS_SUCCESS;
5676 gen9_avc_vme_gpe_kernel_final(VADriverContextP ctx,
5677 struct encode_state *encode_state,
5678 struct intel_encoder_context *encoder_context)
5681 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5682 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5683 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5685 /*set this flag when all kernel is finished*/
5686 if (generic_state->brc_enabled) {
5687 generic_state->brc_inited = 1;
5688 generic_state->brc_need_reset = 0;
5689 avc_state->mbenc_curbe_set_in_brc_update = 0;
5691 return VA_STATUS_SUCCESS;
5695 gen9_avc_vme_gpe_kernel_run(VADriverContextP ctx,
5696 struct encode_state *encode_state,
5697 struct intel_encoder_context *encoder_context)
5699 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5700 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5701 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5703 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
5704 VAEncSliceParameterBufferH264 *slice_param = avc_state->slice_param[0];
5707 /* BRC init/reset needs to be called before HME since it will reset the Brc Distortion surface*/
5708 if (generic_state->brc_enabled && (!generic_state->brc_inited || generic_state->brc_need_reset)) {
5709 gen9_avc_kernel_brc_init_reset(ctx, encode_state, encoder_context);
5713 if (generic_state->hme_supported) {
5714 gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
5715 if (generic_state->b16xme_supported) {
5716 gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_16x);
5717 if (generic_state->b32xme_supported) {
5718 gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_32x);
5724 if (generic_state->hme_enabled) {
5725 if (generic_state->b16xme_enabled) {
5726 if (generic_state->b32xme_enabled) {
5727 gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_32x);
5729 gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_16x);
5731 gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
5734 /*call SFD kernel after HME in same command buffer*/
5735 sfd_in_use = avc_state->sfd_enable && generic_state->hme_enabled;
5736 sfd_in_use = sfd_in_use && !avc_state->sfd_mb_enable;
5738 gen9_avc_kernel_sfd(ctx, encode_state, encoder_context);
5741 /* BRC and MbEnc are included in the same task phase*/
5742 if (generic_state->brc_enabled) {
5743 if (avc_state->mbenc_i_frame_dist_in_use) {
5744 gen9_avc_kernel_mbenc(ctx, encode_state, encoder_context, true);
5746 gen9_avc_kernel_brc_frame_update(ctx, encode_state, encoder_context);
5748 if (avc_state->brc_split_enable && generic_state->mb_brc_enabled) {
5749 gen9_avc_kernel_brc_mb_update(ctx, encode_state, encoder_context);
5753 /*weight prediction,disable by now */
5754 avc_state->weighted_ref_l0_enable = 0;
5755 avc_state->weighted_ref_l1_enable = 0;
5756 if (avc_state->weighted_prediction_supported &&
5757 ((generic_state->frame_type == SLICE_TYPE_P && pic_param->pic_fields.bits.weighted_pred_flag) ||
5758 (generic_state->frame_type == SLICE_TYPE_B && pic_param->pic_fields.bits.weighted_bipred_idc == INTEL_AVC_WP_MODE_EXPLICIT))) {
5759 if (slice_param->luma_weight_l0_flag & 1) {
5760 gen9_avc_kernel_wp(ctx, encode_state, encoder_context, 0);
5762 } else if (!(slice_param->chroma_weight_l0_flag & 1)) {
5763 pic_param->pic_fields.bits.weighted_pred_flag = 0;// it should be handled in app
5766 if (generic_state->frame_type == SLICE_TYPE_B && pic_param->pic_fields.bits.weighted_bipred_idc == INTEL_AVC_WP_MODE_EXPLICIT) {
5767 if (slice_param->luma_weight_l1_flag & 1) {
5768 gen9_avc_kernel_wp(ctx, encode_state, encoder_context, 1);
5769 } else if (!((slice_param->luma_weight_l0_flag & 1) ||
5770 (slice_param->chroma_weight_l0_flag & 1) ||
5771 (slice_param->chroma_weight_l1_flag & 1))) {
5772 pic_param->pic_fields.bits.weighted_bipred_idc = INTEL_AVC_WP_MODE_DEFAULT;// it should be handled in app
5778 gen9_avc_kernel_mbenc(ctx, encode_state, encoder_context, false);
5780 /*ignore the reset vertical line kernel*/
5782 return VA_STATUS_SUCCESS;
5786 gen9_avc_vme_pipeline(VADriverContextP ctx,
5788 struct encode_state *encode_state,
5789 struct intel_encoder_context *encoder_context)
5793 gen9_avc_update_parameters(ctx, profile, encode_state, encoder_context);
5795 va_status = gen9_avc_encode_check_parameter(ctx, encode_state, encoder_context);
5796 if (va_status != VA_STATUS_SUCCESS)
5799 va_status = gen9_avc_allocate_resources(ctx, encode_state, encoder_context);
5800 if (va_status != VA_STATUS_SUCCESS)
5803 va_status = gen9_avc_vme_gpe_kernel_prepare(ctx, encode_state, encoder_context);
5804 if (va_status != VA_STATUS_SUCCESS)
5807 va_status = gen9_avc_vme_gpe_kernel_init(ctx, encode_state, encoder_context);
5808 if (va_status != VA_STATUS_SUCCESS)
5811 va_status = gen9_avc_vme_gpe_kernel_run(ctx, encode_state, encoder_context);
5812 if (va_status != VA_STATUS_SUCCESS)
5815 gen9_avc_vme_gpe_kernel_final(ctx, encode_state, encoder_context);
5817 return VA_STATUS_SUCCESS;
5821 gen9_avc_vme_context_destroy(void * context)
5823 struct encoder_vme_mfc_context *vme_context = (struct encoder_vme_mfc_context *)context;
5824 struct generic_encoder_context *generic_ctx;
5825 struct i965_avc_encoder_context *avc_ctx;
5826 struct generic_enc_codec_state *generic_state;
5827 struct avc_enc_state *avc_state;
5832 generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5833 avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5834 generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5835 avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5837 gen9_avc_kernel_destroy(vme_context);
5841 free(generic_state);
5849 gen9_avc_kernel_init(VADriverContextP ctx,
5850 struct intel_encoder_context *encoder_context)
5852 struct i965_driver_data *i965 = i965_driver_data(ctx);
5853 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5854 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5855 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5857 gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling);
5858 gen9_avc_kernel_init_brc(ctx, generic_ctx, &avc_ctx->context_brc);
5859 gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me);
5860 gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc);
5861 gen9_avc_kernel_init_wp(ctx, generic_ctx, &avc_ctx->context_wp);
5862 gen9_avc_kernel_init_sfd(ctx, generic_ctx, &avc_ctx->context_sfd);
5865 generic_ctx->pfn_set_curbe_scaling2x = gen9_avc_set_curbe_scaling2x;
5866 generic_ctx->pfn_set_curbe_scaling4x = gen9_avc_set_curbe_scaling4x;
5867 generic_ctx->pfn_set_curbe_me = gen9_avc_set_curbe_me;
5868 generic_ctx->pfn_set_curbe_mbenc = gen9_avc_set_curbe_mbenc;
5869 generic_ctx->pfn_set_curbe_brc_init_reset = gen9_avc_set_curbe_brc_init_reset;
5870 generic_ctx->pfn_set_curbe_brc_frame_update = gen9_avc_set_curbe_brc_frame_update;
5871 generic_ctx->pfn_set_curbe_brc_mb_update = gen9_avc_set_curbe_brc_mb_update;
5872 generic_ctx->pfn_set_curbe_sfd = gen9_avc_set_curbe_sfd;
5873 generic_ctx->pfn_set_curbe_wp = gen9_avc_set_curbe_wp;
5875 generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
5876 generic_ctx->pfn_send_me_surface = gen9_avc_send_surface_me;
5877 generic_ctx->pfn_send_mbenc_surface = gen9_avc_send_surface_mbenc;
5878 generic_ctx->pfn_send_brc_init_reset_surface = gen9_avc_send_surface_brc_init_reset;
5879 generic_ctx->pfn_send_brc_frame_update_surface = gen9_avc_send_surface_brc_frame_update;
5880 generic_ctx->pfn_send_brc_mb_update_surface = gen9_avc_send_surface_brc_mb_update;
5881 generic_ctx->pfn_send_sfd_surface = gen9_avc_send_surface_sfd;
5882 generic_ctx->pfn_send_wp_surface = gen9_avc_send_surface_wp;
5884 if (IS_SKL(i965->intel.device_info) ||
5885 IS_BXT(i965->intel.device_info))
5886 generic_ctx->pfn_set_curbe_scaling4x = gen9_avc_set_curbe_scaling4x;
5887 else if (IS_KBL(i965->intel.device_info) ||
5888 IS_GLK(i965->intel.device_info))
5889 generic_ctx->pfn_set_curbe_scaling4x = gen95_avc_set_curbe_scaling4x;
5894 PAK pipeline related function
5897 intel_avc_enc_slice_type_fixup(int slice_type);
5900 gen9_mfc_avc_pipe_mode_select(VADriverContextP ctx,
5901 struct encode_state *encode_state,
5902 struct intel_encoder_context *encoder_context)
5904 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5905 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
5906 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
5907 struct intel_batchbuffer *batch = encoder_context->base.batch;
5909 BEGIN_BCS_BATCH(batch, 5);
5911 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
5912 OUT_BCS_BATCH(batch,
5914 (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
5915 (MFD_MODE_VLD << 15) |
5916 (0 << 13) | /* Non-VDEnc mode is 0*/
5917 ((generic_state->curr_pak_pass != (generic_state->num_pak_passes - 1)) << 10) | /* Stream-Out Enable */
5918 ((!!avc_ctx->res_post_deblocking_output.bo) << 9) | /* Post Deblocking Output */
5919 ((!!avc_ctx->res_pre_deblocking_output.bo) << 8) | /* Pre Deblocking Output */
5920 (0 << 7) | /* Scaled surface enable */
5921 (0 << 6) | /* Frame statistics stream out enable */
5922 (0 << 5) | /* not in stitch mode */
5923 (1 << 4) | /* encoding mode */
5924 (MFX_FORMAT_AVC << 0));
5925 OUT_BCS_BATCH(batch,
5926 (0 << 7) | /* expand NOA bus flag */
5927 (0 << 6) | /* disable slice-level clock gating */
5928 (0 << 5) | /* disable clock gating for NOA */
5929 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
5930 (0 << 3) | /* terminate if AVC mbdata error occurs */
5931 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
5934 OUT_BCS_BATCH(batch, 0);
5935 OUT_BCS_BATCH(batch, 0);
5937 ADVANCE_BCS_BATCH(batch);
5941 gen9_mfc_avc_surface_state(VADriverContextP ctx,
5942 struct intel_encoder_context *encoder_context,
5943 struct i965_gpe_resource *gpe_resource,
5946 struct intel_batchbuffer *batch = encoder_context->base.batch;
5948 BEGIN_BCS_BATCH(batch, 6);
5950 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
5951 OUT_BCS_BATCH(batch, id);
5952 OUT_BCS_BATCH(batch,
5953 ((gpe_resource->height - 1) << 18) |
5954 ((gpe_resource->width - 1) << 4));
5955 OUT_BCS_BATCH(batch,
5956 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
5957 (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
5958 ((gpe_resource->pitch - 1) << 3) | /* pitch */
5959 (0 << 2) | /* must be 0 for interleave U/V */
5960 (1 << 1) | /* must be tiled */
5961 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
5962 OUT_BCS_BATCH(batch,
5963 (0 << 16) | /* must be 0 for interleave U/V */
5964 (gpe_resource->y_cb_offset)); /* y offset for U(cb) */
5965 OUT_BCS_BATCH(batch,
5966 (0 << 16) | /* must be 0 for interleave U/V */
5967 (gpe_resource->y_cb_offset)); /* y offset for U(cb) */
5969 ADVANCE_BCS_BATCH(batch);
5973 gen9_mfc_avc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5975 struct i965_driver_data *i965 = i965_driver_data(ctx);
5976 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5977 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
5978 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
5979 struct intel_batchbuffer *batch = encoder_context->base.batch;
5982 BEGIN_BCS_BATCH(batch, 65);
5984 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (65 - 2));
5986 /* the DW1-3 is for pre_deblocking */
5987 OUT_BUFFER_3DW(batch, avc_ctx->res_pre_deblocking_output.bo, 1, 0, i965->intel.mocs_state);
5989 /* the DW4-6 is for the post_deblocking */
5990 OUT_BUFFER_3DW(batch, avc_ctx->res_post_deblocking_output.bo, 1, 0, i965->intel.mocs_state);
5992 /* the DW7-9 is for the uncompressed_picture */
5993 OUT_BUFFER_3DW(batch, generic_ctx->res_uncompressed_input_surface.bo, 0, 0, i965->intel.mocs_state);
5995 /* the DW10-12 is for PAK information (write) */
5996 OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, i965->intel.mocs_state);//?
5998 /* the DW13-15 is for the intra_row_store_scratch */
5999 OUT_BUFFER_3DW(batch, avc_ctx->res_intra_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
6001 /* the DW16-18 is for the deblocking filter */
6002 OUT_BUFFER_3DW(batch, avc_ctx->res_deblocking_filter_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
6004 /* the DW 19-50 is for Reference pictures*/
6005 for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
6006 OUT_BUFFER_2DW(batch, avc_ctx->list_reference_res[i].bo, 0, 0);
6009 /* DW 51, reference picture attributes */
6010 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
6012 /* The DW 52-54 is for PAK information (read) */
6013 OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, i965->intel.mocs_state);
6015 /* the DW 55-57 is the ILDB buffer */
6016 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6018 /* the DW 58-60 is the second ILDB buffer */
6019 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6021 /* DW 61, memory compress enable & mode */
6022 OUT_BCS_BATCH(batch, 0);
6024 /* the DW 62-64 is the buffer */
6025 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6027 ADVANCE_BCS_BATCH(batch);
6031 gen9_mfc_avc_ind_obj_base_addr_state(VADriverContextP ctx,
6032 struct encode_state *encode_state,
6033 struct intel_encoder_context *encoder_context)
6035 struct i965_driver_data *i965 = i965_driver_data(ctx);
6036 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6037 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
6038 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
6039 struct intel_batchbuffer *batch = encoder_context->base.batch;
6040 struct object_surface *obj_surface;
6041 struct gen9_surface_avc *avc_priv_surface;
6042 unsigned int size = 0;
6043 unsigned int w_mb = generic_state->frame_width_in_mbs;
6044 unsigned int h_mb = generic_state->frame_height_in_mbs;
6046 obj_surface = encode_state->reconstructed_object;
6048 if (!obj_surface || !obj_surface->private_data)
6050 avc_priv_surface = obj_surface->private_data;
6052 BEGIN_BCS_BATCH(batch, 26);
6054 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
6055 /* The DW1-5 is for the MFX indirect bistream offset */
6056 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6057 OUT_BUFFER_2DW(batch, NULL, 0, 0);
6059 /* the DW6-10 is for MFX Indirect MV Object Base Address */
6060 size = w_mb * h_mb * 32 * 4;
6061 OUT_BUFFER_3DW(batch,
6062 avc_priv_surface->res_mv_data_surface.bo,
6065 i965->intel.mocs_state);
6066 OUT_BUFFER_2DW(batch,
6067 avc_priv_surface->res_mv_data_surface.bo,
6069 ALIGN(size, 0x1000));
6071 /* The DW11-15 is for MFX IT-COFF. Not used on encoder */
6072 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6073 OUT_BUFFER_2DW(batch, NULL, 0, 0);
6075 /* The DW16-20 is for MFX indirect DBLK. Not used on encoder */
6076 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6077 OUT_BUFFER_2DW(batch, NULL, 0, 0);
6079 /* The DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder
6080 * Note: an offset is specified in MFX_AVC_SLICE_STATE
6082 OUT_BUFFER_3DW(batch,
6083 generic_ctx->compressed_bitstream.res.bo,
6086 i965->intel.mocs_state);
6087 OUT_BUFFER_2DW(batch,
6088 generic_ctx->compressed_bitstream.res.bo,
6090 generic_ctx->compressed_bitstream.end_offset);
6092 ADVANCE_BCS_BATCH(batch);
6096 gen9_mfc_avc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
6098 struct i965_driver_data *i965 = i965_driver_data(ctx);
6099 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6100 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
6101 struct intel_batchbuffer *batch = encoder_context->base.batch;
6103 BEGIN_BCS_BATCH(batch, 10);
6105 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
6107 /* The DW1-3 is for bsd/mpc row store scratch buffer */
6108 OUT_BUFFER_3DW(batch, avc_ctx->res_bsd_mpc_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
6110 /* The DW4-6 is for MPR Row Store Scratch Buffer Base Address, ignore for encoder */
6111 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6113 /* The DW7-9 is for Bitplane Read Buffer Base Address, ignore for encoder */
6114 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6116 ADVANCE_BCS_BATCH(batch);
6120 gen9_mfc_avc_directmode_state(VADriverContextP ctx,
6121 struct intel_encoder_context *encoder_context)
6123 struct i965_driver_data *i965 = i965_driver_data(ctx);
6124 struct intel_batchbuffer *batch = encoder_context->base.batch;
6125 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6126 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
6127 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
6131 BEGIN_BCS_BATCH(batch, 71);
6133 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
6135 /* Reference frames and Current frames */
6136 /* the DW1-32 is for the direct MV for reference */
6137 for (i = 0; i < NUM_MFC_AVC_DMV_BUFFERS - 2; i += 2) {
6138 if (avc_ctx->res_direct_mv_buffersr[i].bo != NULL) {
6139 OUT_BCS_RELOC64(batch, avc_ctx->res_direct_mv_buffersr[i].bo,
6140 I915_GEM_DOMAIN_INSTRUCTION, 0,
6143 OUT_BCS_BATCH(batch, 0);
6144 OUT_BCS_BATCH(batch, 0);
6148 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
6150 /* the DW34-36 is the MV for the current frame */
6151 OUT_BCS_RELOC64(batch, avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2].bo,
6152 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
6155 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
6158 for (i = 0; i < 32; i++) {
6159 OUT_BCS_BATCH(batch, avc_state->top_field_poc[i]);
6161 OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2]);
6162 OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1]);
6164 ADVANCE_BCS_BATCH(batch);
6168 gen9_mfc_qm_state(VADriverContextP ctx,
6170 const unsigned int *qm,
6172 struct intel_encoder_context *encoder_context)
6174 struct intel_batchbuffer *batch = encoder_context->base.batch;
6175 unsigned int qm_buffer[16];
6177 assert(qm_length <= 16);
6178 assert(sizeof(*qm) == 4);
6179 memset(qm_buffer, 0, 16 * 4);
6180 memcpy(qm_buffer, qm, qm_length * 4);
6182 BEGIN_BCS_BATCH(batch, 18);
6183 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
6184 OUT_BCS_BATCH(batch, qm_type << 0);
6185 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
6186 ADVANCE_BCS_BATCH(batch);
6190 gen9_mfc_avc_qm_state(VADriverContextP ctx,
6191 struct encode_state *encode_state,
6192 struct intel_encoder_context *encoder_context)
6194 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6195 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
6196 VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
6197 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
6200 const unsigned int *qm_4x4_intra;
6201 const unsigned int *qm_4x4_inter;
6202 const unsigned int *qm_8x8_intra;
6203 const unsigned int *qm_8x8_inter;
6205 if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
6206 && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
6207 qm_4x4_intra = qm_4x4_inter = qm_8x8_intra = qm_8x8_inter = qm_flat;
6209 VAIQMatrixBufferH264 *qm;
6210 assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
6211 qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
6212 qm_4x4_intra = (unsigned int *)qm->ScalingList4x4[0];
6213 qm_4x4_inter = (unsigned int *)qm->ScalingList4x4[3];
6214 qm_8x8_intra = (unsigned int *)qm->ScalingList8x8[0];
6215 qm_8x8_inter = (unsigned int *)qm->ScalingList8x8[1];
6218 gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm_4x4_intra, 12, encoder_context);
6219 gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm_4x4_inter, 12, encoder_context);
6220 gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm_8x8_intra, 16, encoder_context);
6221 gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm_8x8_inter, 16, encoder_context);
6225 gen9_mfc_fqm_state(VADriverContextP ctx,
6227 const unsigned int *fqm,
6229 struct intel_encoder_context *encoder_context)
6231 struct intel_batchbuffer *batch = encoder_context->base.batch;
6232 unsigned int fqm_buffer[32];
6234 assert(fqm_length <= 32);
6235 assert(sizeof(*fqm) == 4);
6236 memset(fqm_buffer, 0, 32 * 4);
6237 memcpy(fqm_buffer, fqm, fqm_length * 4);
6239 BEGIN_BCS_BATCH(batch, 34);
6240 OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
6241 OUT_BCS_BATCH(batch, fqm_type << 0);
6242 intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
6243 ADVANCE_BCS_BATCH(batch);
6247 gen9_mfc_fill_fqm(uint8_t *qm, uint16_t *fqm, int len)
6250 for (i = 0; i < len; i++)
6251 for (j = 0; j < len; j++) {
6252 assert(qm[j * len + i]);
6253 fqm[i * len + j] = (1 << 16) / qm[j * len + i];
6258 gen9_mfc_avc_fqm_state(VADriverContextP ctx,
6259 struct encode_state *encode_state,
6260 struct intel_encoder_context *encoder_context)
6262 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6263 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
6264 VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
6265 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
6267 if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
6268 && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
6269 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm_flat, 24, encoder_context);
6270 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm_flat, 24, encoder_context);
6271 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm_flat, 32, encoder_context);
6272 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm_flat, 32, encoder_context);
6276 VAIQMatrixBufferH264 *qm;
6277 assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
6278 qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
6280 for (i = 0; i < 3; i++)
6281 gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * i, 4);
6282 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm, 24, encoder_context);
6284 for (i = 3; i < 6; i++)
6285 gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * (i - 3), 4);
6286 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm, 24, encoder_context);
6288 gen9_mfc_fill_fqm(qm->ScalingList8x8[0], (uint16_t *)fqm, 8);
6289 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm, 32, encoder_context);
6291 gen9_mfc_fill_fqm(qm->ScalingList8x8[1], (uint16_t *)fqm, 8);
6292 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm, 32, encoder_context);
6297 gen9_mfc_avc_insert_object(VADriverContextP ctx,
6298 struct intel_encoder_context *encoder_context,
6299 unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
6300 int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
6301 int slice_header_indicator,
6302 struct intel_batchbuffer *batch)
6304 if (data_bits_in_last_dw == 0)
6305 data_bits_in_last_dw = 32;
6307 BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
6309 OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws));
6310 OUT_BCS_BATCH(batch,
6311 (0 << 16) | /* always start at offset 0 */
6312 (slice_header_indicator << 14) |
6313 (data_bits_in_last_dw << 8) |
6314 (skip_emul_byte_count << 4) |
6315 (!!emulation_flag << 3) |
6316 ((!!is_last_header) << 2) |
6317 ((!!is_end_of_slice) << 1) |
6318 (0 << 0)); /* check this flag */
6319 intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
6321 ADVANCE_BCS_BATCH(batch);
6325 gen9_mfc_avc_insert_aud_packed_data(VADriverContextP ctx,
6326 struct encode_state *encode_state,
6327 struct intel_encoder_context *encoder_context,
6328 struct intel_batchbuffer *batch)
6330 VAEncPackedHeaderParameterBuffer *param = NULL;
6331 unsigned int length_in_bits;
6332 unsigned int *header_data = NULL;
6333 unsigned char *nal_type = NULL;
6334 int count, i, start_index;
6336 count = encode_state->slice_rawdata_count[0];
6337 start_index = (encode_state->slice_rawdata_index[0] & SLICE_PACKED_DATA_INDEX_MASK);
6339 for (i = 0; i < count; i++) {
6340 unsigned int skip_emul_byte_cnt;
6342 header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
6343 nal_type = (unsigned char *)header_data;
6345 param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
6347 length_in_bits = param->bit_length;
6349 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6351 if ((*(nal_type+skip_emul_byte_cnt-1)&0x1f) == AVC_NAL_DELIMITER ) {
6352 gen9_mfc_avc_insert_object(ctx,
6355 ALIGN(length_in_bits, 32) >> 5,
6356 length_in_bits & 0x1f,
6360 !param->has_emulation_bytes,
6369 gen9_mfc_avc_insert_slice_packed_data(VADriverContextP ctx,
6370 struct encode_state *encode_state,
6371 struct intel_encoder_context *encoder_context,
6373 struct intel_batchbuffer *batch)
6375 VAEncPackedHeaderParameterBuffer *param = NULL;
6376 unsigned int length_in_bits;
6377 unsigned int *header_data = NULL;
6378 int count, i, start_index;
6379 int slice_header_index;
6380 unsigned char *nal_type = NULL;
6382 if (encode_state->slice_header_index[slice_index] == 0)
6383 slice_header_index = -1;
6385 slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
6387 count = encode_state->slice_rawdata_count[slice_index];
6388 start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
6390 for (i = 0; i < count; i++) {
6391 unsigned int skip_emul_byte_cnt;
6393 header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
6394 nal_type = (unsigned char *)header_data;
6396 param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
6398 length_in_bits = param->bit_length;
6400 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6402 /* skip the slice header packed data type as it is lastly inserted */
6403 if (param->type == VAEncPackedHeaderSlice || (*(nal_type+skip_emul_byte_cnt-1)&0x1f) == AVC_NAL_DELIMITER)
6406 /* as the slice header is still required, the last header flag is set to
6409 gen9_mfc_avc_insert_object(ctx,
6412 ALIGN(length_in_bits, 32) >> 5,
6413 length_in_bits & 0x1f,
6417 !param->has_emulation_bytes,
6422 if (slice_header_index == -1) {
6423 VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
6424 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
6425 VAEncSliceParameterBufferH264 *slice_params = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
6426 unsigned char *slice_header = NULL;
6427 int slice_header_length_in_bits = 0;
6429 /* No slice header data is passed. And the driver needs to generate it */
6430 /* For the Normal H264 */
6431 slice_header_length_in_bits = build_avc_slice_header(seq_param,
6435 gen9_mfc_avc_insert_object(ctx,
6437 (unsigned int *)slice_header,
6438 ALIGN(slice_header_length_in_bits, 32) >> 5,
6439 slice_header_length_in_bits & 0x1f,
6440 5, /* first 5 bytes are start code + nal unit type */
6447 unsigned int skip_emul_byte_cnt;
6449 header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
6451 param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[slice_header_index]->buffer);
6452 length_in_bits = param->bit_length;
6454 /* as the slice header is the last header data for one slice,
6455 * the last header flag is set to one.
6457 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6459 gen9_mfc_avc_insert_object(ctx,
6462 ALIGN(length_in_bits, 32) >> 5,
6463 length_in_bits & 0x1f,
6467 !param->has_emulation_bytes,
6476 gen9_mfc_avc_inset_headers(VADriverContextP ctx,
6477 struct encode_state *encode_state,
6478 struct intel_encoder_context *encoder_context,
6479 VAEncSliceParameterBufferH264 *slice_param,
6481 struct intel_batchbuffer *batch)
6483 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6484 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
6485 int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
6486 unsigned int internal_rate_mode = generic_state->internal_rate_mode;
6487 unsigned int skip_emul_byte_cnt;
6489 if (slice_index == 0) {
6491 /* if AUD exist and insert it firstly */
6492 gen9_mfc_avc_insert_aud_packed_data(ctx,encode_state,encoder_context,batch);
6494 if (encode_state->packed_header_data[idx]) {
6495 VAEncPackedHeaderParameterBuffer *param = NULL;
6496 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
6497 unsigned int length_in_bits;
6499 assert(encode_state->packed_header_param[idx]);
6500 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
6501 length_in_bits = param->bit_length;
6503 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6504 gen9_mfc_avc_insert_object(ctx,
6507 ALIGN(length_in_bits, 32) >> 5,
6508 length_in_bits & 0x1f,
6512 !param->has_emulation_bytes,
6517 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
6519 if (encode_state->packed_header_data[idx]) {
6520 VAEncPackedHeaderParameterBuffer *param = NULL;
6521 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
6522 unsigned int length_in_bits;
6524 assert(encode_state->packed_header_param[idx]);
6525 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
6526 length_in_bits = param->bit_length;
6528 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6530 gen9_mfc_avc_insert_object(ctx,
6533 ALIGN(length_in_bits, 32) >> 5,
6534 length_in_bits & 0x1f,
6538 !param->has_emulation_bytes,
6543 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
6545 if (encode_state->packed_header_data[idx]) {
6546 VAEncPackedHeaderParameterBuffer *param = NULL;
6547 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
6548 unsigned int length_in_bits;
6550 assert(encode_state->packed_header_param[idx]);
6551 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
6552 length_in_bits = param->bit_length;
6554 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6555 gen9_mfc_avc_insert_object(ctx,
6558 ALIGN(length_in_bits, 32) >> 5,
6559 length_in_bits & 0x1f,
6563 !param->has_emulation_bytes,
6566 } else if (internal_rate_mode == VA_RC_CBR) {
6571 gen9_mfc_avc_insert_slice_packed_data(ctx,
6579 gen9_mfc_avc_slice_state(VADriverContextP ctx,
6580 struct encode_state *encode_state,
6581 struct intel_encoder_context *encoder_context,
6582 VAEncPictureParameterBufferH264 *pic_param,
6583 VAEncSliceParameterBufferH264 *slice_param,
6584 VAEncSliceParameterBufferH264 *next_slice_param,
6585 struct intel_batchbuffer *batch)
6587 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6588 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
6589 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
6590 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
6591 unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
6592 unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
6593 unsigned char correct[6], grow, shrink;
6594 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
6595 int max_qp_n, max_qp_p;
6597 int weighted_pred_idc = 0;
6598 int num_ref_l0 = 0, num_ref_l1 = 0;
6599 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
6600 int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
6601 unsigned int rc_panic_enable = 0;
6602 unsigned int rate_control_counter_enable = 0;
6603 unsigned int rounding_value = 0;
6604 unsigned int rounding_inter_enable = 0;
6606 slice_hor_pos = slice_param->macroblock_address % generic_state->frame_width_in_mbs;
6607 slice_ver_pos = slice_param->macroblock_address / generic_state->frame_width_in_mbs;
6609 if (next_slice_param) {
6610 next_slice_hor_pos = next_slice_param->macroblock_address % generic_state->frame_width_in_mbs;
6611 next_slice_ver_pos = next_slice_param->macroblock_address / generic_state->frame_width_in_mbs;
6613 next_slice_hor_pos = 0;
6614 next_slice_ver_pos = generic_state->frame_height_in_mbs;
6617 if (slice_type == SLICE_TYPE_I) {
6618 luma_log2_weight_denom = 0;
6619 chroma_log2_weight_denom = 0;
6620 } else if (slice_type == SLICE_TYPE_P) {
6621 weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
6622 num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
6623 rounding_inter_enable = avc_state->rounding_inter_enable;
6624 rounding_value = avc_state->rounding_value;
6626 if (slice_param->num_ref_idx_active_override_flag)
6627 num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
6628 } else if (slice_type == SLICE_TYPE_B) {
6629 weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
6630 num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
6631 num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
6632 rounding_inter_enable = avc_state->rounding_inter_enable;
6633 rounding_value = avc_state->rounding_value;
6635 if (slice_param->num_ref_idx_active_override_flag) {
6636 num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
6637 num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
6640 if (weighted_pred_idc == 2) {
6641 /* 8.4.3 - Derivation process for prediction weights (8-279) */
6642 luma_log2_weight_denom = 5;
6643 chroma_log2_weight_denom = 5;
6652 rate_control_counter_enable = (generic_state->brc_enabled && (generic_state->curr_pak_pass != 0));
6653 rc_panic_enable = (avc_state->rc_panic_enable &&
6654 (!avc_state->min_max_qp_enable) &&
6655 (encoder_context->rate_control_mode != VA_RC_CQP) &&
6656 (generic_state->curr_pak_pass == (generic_state->num_pak_passes - 1)));
6658 for (i = 0; i < 6; i++)
6661 BEGIN_BCS_BATCH(batch, 11);
6663 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
6664 OUT_BCS_BATCH(batch, slice_type);
6665 OUT_BCS_BATCH(batch,
6666 (num_ref_l1 << 24) |
6667 (num_ref_l0 << 16) |
6668 (chroma_log2_weight_denom << 8) |
6669 (luma_log2_weight_denom << 0));
6670 OUT_BCS_BATCH(batch,
6671 (weighted_pred_idc << 30) |
6672 (((slice_type == SLICE_TYPE_B) ? slice_param->direct_spatial_mv_pred_flag : 0) << 29) |
6673 (slice_param->disable_deblocking_filter_idc << 27) |
6674 (slice_param->cabac_init_idc << 24) |
6676 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
6677 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
6679 OUT_BCS_BATCH(batch,
6680 slice_ver_pos << 24 |
6681 slice_hor_pos << 16 |
6682 slice_param->macroblock_address);
6683 OUT_BCS_BATCH(batch,
6684 next_slice_ver_pos << 16 |
6685 next_slice_hor_pos);
6687 OUT_BCS_BATCH(batch,
6688 (rate_control_counter_enable << 31) |
6689 (1 << 30) | /* ResetRateControlCounter */
6690 (2 << 28) | /* Loose Rate Control */
6691 (0 << 24) | /* RC Stable Tolerance */
6692 (rc_panic_enable << 23) | /* RC Panic Enable */
6693 (1 << 22) | /* CBP mode */
6694 (0 << 21) | /* MB Type Direct Conversion, 0: Enable, 1: Disable */
6695 (0 << 20) | /* MB Type Skip Conversion, 0: Enable, 1: Disable */
6696 (!next_slice_param << 19) | /* Is Last Slice */
6697 (0 << 18) | /* BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable */
6698 (1 << 17) | /* HeaderPresentFlag */
6699 (1 << 16) | /* SliceData PresentFlag */
6700 (0 << 15) | /* TailPresentFlag */
6701 (1 << 13) | /* RBSP NAL TYPE */
6702 (1 << 12)); /* CabacZeroWordInsertionEnable */
6704 OUT_BCS_BATCH(batch, generic_ctx->compressed_bitstream.start_offset);
6706 OUT_BCS_BATCH(batch,
6707 (max_qp_n << 24) | /*Target QP - 24 is lowest QP*/
6708 (max_qp_p << 16) | /*Target QP + 20 is highest QP*/
6711 OUT_BCS_BATCH(batch,
6712 (rounding_inter_enable << 31) |
6713 (rounding_value << 28) |
6716 (correct[5] << 20) |
6717 (correct[4] << 16) |
6718 (correct[3] << 12) |
6722 OUT_BCS_BATCH(batch, 0);
6724 ADVANCE_BCS_BATCH(batch);
6728 gen9_mfc_avc_get_ref_idx_state(VAPictureH264 *va_pic, unsigned int frame_store_id)
6730 unsigned int is_long_term =
6731 !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
6732 unsigned int is_top_field =
6733 !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
6734 unsigned int is_bottom_field =
6735 !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
6737 return ((is_long_term << 6) |
6739 (frame_store_id << 1) |
6740 ((is_top_field ^ 1) & is_bottom_field));
6744 gen9_mfc_avc_ref_idx_state(VADriverContextP ctx,
6745 struct encode_state *encode_state,
6746 struct intel_encoder_context *encoder_context,
6747 VAEncSliceParameterBufferH264 *slice_param,
6748 struct intel_batchbuffer *batch)
6750 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6751 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
6752 VAPictureH264 *ref_pic;
6753 int i, slice_type, ref_idx_shift;
6754 unsigned int fwd_ref_entry;
6755 unsigned int bwd_ref_entry;
6757 /* max 4 ref frames are allowed for l0 and l1 */
6758 fwd_ref_entry = 0x80808080;
6759 slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
6761 if ((slice_type == SLICE_TYPE_P) ||
6762 (slice_type == SLICE_TYPE_B)) {
6763 for (i = 0; i < MIN(avc_state->num_refs[0], 4); i++) {
6764 ref_pic = &slice_param->RefPicList0[i];
6765 ref_idx_shift = i * 8;
6767 fwd_ref_entry &= ~(0xFF << ref_idx_shift);
6768 fwd_ref_entry += (gen9_mfc_avc_get_ref_idx_state(ref_pic, avc_state->list_ref_idx[0][i]) << ref_idx_shift);
6772 bwd_ref_entry = 0x80808080;
6773 if (slice_type == SLICE_TYPE_B) {
6774 for (i = 0; i < MIN(avc_state->num_refs[1], 4); i++) {
6775 ref_pic = &slice_param->RefPicList1[i];
6776 ref_idx_shift = i * 8;
6778 bwd_ref_entry &= ~(0xFF << ref_idx_shift);
6779 bwd_ref_entry += (gen9_mfc_avc_get_ref_idx_state(ref_pic, avc_state->list_ref_idx[1][i]) << ref_idx_shift);
6783 if ((slice_type == SLICE_TYPE_P) ||
6784 (slice_type == SLICE_TYPE_B)) {
6785 BEGIN_BCS_BATCH(batch, 10);
6786 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
6787 OUT_BCS_BATCH(batch, 0); // L0
6788 OUT_BCS_BATCH(batch, fwd_ref_entry);
6790 for (i = 0; i < 7; i++) {
6791 OUT_BCS_BATCH(batch, 0x80808080);
6794 ADVANCE_BCS_BATCH(batch);
6797 if (slice_type == SLICE_TYPE_B) {
6798 BEGIN_BCS_BATCH(batch, 10);
6799 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
6800 OUT_BCS_BATCH(batch, 1); //Select L1
6801 OUT_BCS_BATCH(batch, bwd_ref_entry); //max 4 reference allowed
6802 for (i = 0; i < 7; i++) {
6803 OUT_BCS_BATCH(batch, 0x80808080);
6805 ADVANCE_BCS_BATCH(batch);
6810 gen9_mfc_avc_weightoffset_state(VADriverContextP ctx,
6811 struct encode_state *encode_state,
6812 struct intel_encoder_context *encoder_context,
6813 VAEncPictureParameterBufferH264 *pic_param,
6814 VAEncSliceParameterBufferH264 *slice_param,
6815 struct intel_batchbuffer *batch)
6818 short weightoffsets[32 * 6];
6820 slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
6822 if (slice_type == SLICE_TYPE_P &&
6823 pic_param->pic_fields.bits.weighted_pred_flag == 1) {
6824 memset(weightoffsets, 0, 32 * 6 * sizeof(short));
6825 for (i = 0; i < 32; i++) {
6826 weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
6827 weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
6828 weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
6829 weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
6830 weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
6831 weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
6834 BEGIN_BCS_BATCH(batch, 98);
6835 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
6836 OUT_BCS_BATCH(batch, 0);
6837 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
6839 ADVANCE_BCS_BATCH(batch);
6842 if (slice_type == SLICE_TYPE_B &&
6843 (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
6844 memset(weightoffsets, 0, 32 * 6 * sizeof(short));
6845 for (i = 0; i < 32; i++) {
6846 weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
6847 weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
6848 weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
6849 weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
6850 weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
6851 weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
6854 BEGIN_BCS_BATCH(batch, 98);
6855 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
6856 OUT_BCS_BATCH(batch, 0);
6857 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
6858 ADVANCE_BCS_BATCH(batch);
6860 memset(weightoffsets, 0, 32 * 6 * sizeof(short));
6861 for (i = 0; i < 32; i++) {
6862 weightoffsets[i * 6 + 0] = slice_param->luma_weight_l1[i];
6863 weightoffsets[i * 6 + 1] = slice_param->luma_offset_l1[i];
6864 weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l1[i][0];
6865 weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l1[i][0];
6866 weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l1[i][1];
6867 weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l1[i][1];
6870 BEGIN_BCS_BATCH(batch, 98);
6871 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
6872 OUT_BCS_BATCH(batch, 1);
6873 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
6874 ADVANCE_BCS_BATCH(batch);
6879 gen9_mfc_avc_single_slice(VADriverContextP ctx,
6880 struct encode_state *encode_state,
6881 struct intel_encoder_context *encoder_context,
6882 VAEncSliceParameterBufferH264 *slice_param,
6883 VAEncSliceParameterBufferH264 *next_slice_param,
6886 struct i965_driver_data *i965 = i965_driver_data(ctx);
6887 struct i965_gpe_table *gpe = &i965->gpe_table;
6888 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6889 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
6890 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
6891 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
6892 struct intel_batchbuffer *batch = encoder_context->base.batch;
6893 struct intel_batchbuffer *slice_batch = avc_ctx->pres_slice_batch_buffer_2nd_level;
6894 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
6895 struct gpe_mi_batch_buffer_start_parameter second_level_batch;
6896 struct object_surface *obj_surface;
6897 struct gen9_surface_avc *avc_priv_surface;
6899 unsigned int slice_offset = 0;
6901 if (generic_state->curr_pak_pass == 0) {
6902 slice_offset = intel_batchbuffer_used_size(slice_batch);
6903 avc_state->slice_batch_offset[slice_index] = slice_offset;
6904 gen9_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context, slice_param, slice_batch);
6905 gen9_mfc_avc_weightoffset_state(ctx,
6911 gen9_mfc_avc_slice_state(ctx,
6918 gen9_mfc_avc_inset_headers(ctx,
6925 BEGIN_BCS_BATCH(slice_batch, 2);
6926 OUT_BCS_BATCH(slice_batch, 0);
6927 OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END);
6928 ADVANCE_BCS_BATCH(slice_batch);
6931 slice_offset = avc_state->slice_batch_offset[slice_index];
6933 /* insert slice as second levle.*/
6934 memset(&second_level_batch, 0, sizeof(second_level_batch));
6935 second_level_batch.is_second_level = 1; /* Must be the second level batch buffer */
6936 second_level_batch.offset = slice_offset;
6937 second_level_batch.bo = slice_batch->buffer;
6938 gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
6940 /* insert mb code as second levle.*/
6941 obj_surface = encode_state->reconstructed_object;
6942 assert(obj_surface->private_data);
6943 avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
6945 memset(&second_level_batch, 0, sizeof(second_level_batch));
6946 second_level_batch.is_second_level = 1; /* Must be the second level batch buffer */
6947 second_level_batch.offset = slice_param->macroblock_address * 16 * 4;
6948 second_level_batch.bo = avc_priv_surface->res_mb_code_surface.bo;
6949 gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
6954 gen9_avc_pak_slice_level(VADriverContextP ctx,
6955 struct encode_state *encode_state,
6956 struct intel_encoder_context *encoder_context)
6958 struct i965_driver_data *i965 = i965_driver_data(ctx);
6959 struct i965_gpe_table *gpe = &i965->gpe_table;
6960 struct intel_batchbuffer *batch = encoder_context->base.batch;
6961 struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
6962 VAEncSliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
6964 int slice_index = 0;
6965 int is_frame_level = 1; /* check it for SKL,now single slice per frame */
6966 int has_tail = 0; /* check it later */
6968 for (j = 0; j < encode_state->num_slice_params_ext; j++) {
6969 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
6971 if (j == encode_state->num_slice_params_ext - 1)
6972 next_slice_group_param = NULL;
6974 next_slice_group_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j + 1]->buffer;
6976 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
6977 if (i < encode_state->slice_params_ext[j]->num_elements - 1)
6978 next_slice_param = slice_param + 1;
6980 next_slice_param = next_slice_group_param;
6982 gen9_mfc_avc_single_slice(ctx,
6994 /* remove assert(0) and add other commands here */
7004 /* insert a tail if required */
7007 memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
7008 mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
7009 gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_params);
7012 gen9_avc_pak_picture_level(VADriverContextP ctx,
7013 struct encode_state *encode_state,
7014 struct intel_encoder_context *encoder_context)
7016 struct i965_driver_data *i965 = i965_driver_data(ctx);
7017 struct i965_gpe_table *gpe = &i965->gpe_table;
7018 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7019 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
7020 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
7021 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
7022 struct gpe_mi_batch_buffer_start_parameter second_level_batch;
7023 struct intel_batchbuffer *batch = encoder_context->base.batch;
7025 if (generic_state->brc_enabled &&
7026 generic_state->curr_pak_pass) {
7027 struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
7028 struct encoder_status_buffer_internal *status_buffer;
7029 status_buffer = &(avc_ctx->status_buffer);
7031 memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
7032 mi_conditional_batch_buffer_end_params.offset = status_buffer->image_status_mask_offset;
7033 mi_conditional_batch_buffer_end_params.bo = status_buffer->bo;
7034 mi_conditional_batch_buffer_end_params.compare_data = 0;
7035 mi_conditional_batch_buffer_end_params.compare_mask_mode_disabled = 0;
7036 gpe->mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
7039 gen9_mfc_avc_pipe_mode_select(ctx, encode_state, encoder_context);
7040 gen9_mfc_avc_surface_state(ctx, encoder_context, &(generic_ctx->res_reconstructed_surface), 0);
7041 gen9_mfc_avc_surface_state(ctx, encoder_context, &(generic_ctx->res_uncompressed_input_surface), 4);
7042 gen9_mfc_avc_pipe_buf_addr_state(ctx, encoder_context);
7043 gen9_mfc_avc_ind_obj_base_addr_state(ctx, encode_state, encoder_context);
7044 gen9_mfc_avc_bsp_buf_base_addr_state(ctx, encoder_context);
7046 if (generic_state->brc_enabled) {
7047 memset(&second_level_batch, 0, sizeof(second_level_batch));
7048 if (generic_state->curr_pak_pass == 0) {
7049 second_level_batch.offset = 0;
7051 second_level_batch.offset = generic_state->curr_pak_pass * INTEL_AVC_IMAGE_STATE_CMD_SIZE;
7053 second_level_batch.is_second_level = 1;
7054 second_level_batch.bo = avc_ctx->res_brc_image_state_read_buffer.bo;
7055 gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
7057 /*generate a new image state */
7058 gen9_avc_set_image_state_non_brc(ctx, encode_state, encoder_context, &(avc_ctx->res_image_state_batch_buffer_2nd_level));
7059 memset(&second_level_batch, 0, sizeof(second_level_batch));
7060 second_level_batch.offset = 0;
7061 second_level_batch.is_second_level = 1;
7062 second_level_batch.bo = avc_ctx->res_image_state_batch_buffer_2nd_level.bo;
7063 gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
7066 gen9_mfc_avc_qm_state(ctx, encode_state, encoder_context);
7067 gen9_mfc_avc_fqm_state(ctx, encode_state, encoder_context);
7068 gen9_mfc_avc_directmode_state(ctx, encoder_context);
7073 gen9_avc_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
7075 struct i965_driver_data *i965 = i965_driver_data(ctx);
7076 struct i965_gpe_table *gpe = &i965->gpe_table;
7077 struct intel_batchbuffer *batch = encoder_context->base.batch;
7078 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7079 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
7080 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
7082 struct gpe_mi_store_register_mem_parameter mi_store_reg_mem_param;
7083 struct gpe_mi_store_data_imm_parameter mi_store_data_imm_param;
7084 struct gpe_mi_flush_dw_parameter mi_flush_dw_param;
7085 struct encoder_status_buffer_internal *status_buffer;
7087 status_buffer = &(avc_ctx->status_buffer);
7089 memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
7090 gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
7092 /* read register and store into status_buffer and pak_statitistic info */
7093 memset(&mi_store_reg_mem_param, 0, sizeof(mi_store_reg_mem_param));
7094 mi_store_reg_mem_param.bo = status_buffer->bo;
7095 mi_store_reg_mem_param.offset = status_buffer->bs_byte_count_frame_offset;
7096 mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_reg_offset;
7097 gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7099 mi_store_reg_mem_param.bo = status_buffer->bo;
7100 mi_store_reg_mem_param.offset = status_buffer->image_status_mask_offset;
7101 mi_store_reg_mem_param.mmio_offset = status_buffer->image_status_mask_reg_offset;
7102 gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7104 /*update the status in the pak_statistic_surface */
7105 mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
7106 mi_store_reg_mem_param.offset = 0;
7107 mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_reg_offset;
7108 gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7110 mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
7111 mi_store_reg_mem_param.offset = 4;
7112 mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_nh_reg_offset;
7113 gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7115 memset(&mi_store_data_imm_param, 0, sizeof(mi_store_data_imm_param));
7116 mi_store_data_imm_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
7117 mi_store_data_imm_param.offset = sizeof(unsigned int) * 2;
7118 mi_store_data_imm_param.dw0 = (generic_state->curr_pak_pass + 1);
7119 gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm_param);
7121 mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
7122 mi_store_reg_mem_param.offset = sizeof(unsigned int) * (4 + generic_state->curr_pak_pass) ;
7123 mi_store_reg_mem_param.mmio_offset = status_buffer->image_status_ctrl_reg_offset;
7124 gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7126 memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
7127 gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
7133 gen9_avc_pak_brc_prepare(struct encode_state *encode_state,
7134 struct intel_encoder_context *encoder_context)
7136 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7137 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
7138 unsigned int rate_control_mode = encoder_context->rate_control_mode;
7140 switch (rate_control_mode & 0x7f) {
7142 generic_state->internal_rate_mode = VA_RC_CBR;
7146 generic_state->internal_rate_mode = VA_RC_VBR;//AVBR
7151 generic_state->internal_rate_mode = VA_RC_CQP;
7155 if (encoder_context->quality_level == 0)
7156 encoder_context->quality_level = ENCODER_DEFAULT_QUALITY_AVC;
7160 gen9_avc_pak_pipeline_prepare(VADriverContextP ctx,
7161 struct encode_state *encode_state,
7162 struct intel_encoder_context *encoder_context)
7165 struct i965_driver_data *i965 = i965_driver_data(ctx);
7166 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7167 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
7168 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
7169 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
7170 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
7172 struct object_surface *obj_surface;
7173 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
7174 VAEncSliceParameterBufferH264 *slice_param = avc_state->slice_param[0];
7176 struct gen9_surface_avc *avc_priv_surface;
7177 int i, j, enable_avc_ildb = 0;
7178 unsigned int allocate_flag = 1;
7180 unsigned int w_mb = generic_state->frame_width_in_mbs;
7181 unsigned int h_mb = generic_state->frame_height_in_mbs;
7182 struct avc_surface_param surface_param;
7184 /* update the parameter and check slice parameter */
7185 for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
7186 assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
7187 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
7189 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
7190 assert((slice_param->slice_type == SLICE_TYPE_I) ||
7191 (slice_param->slice_type == SLICE_TYPE_SI) ||
7192 (slice_param->slice_type == SLICE_TYPE_P) ||
7193 (slice_param->slice_type == SLICE_TYPE_SP) ||
7194 (slice_param->slice_type == SLICE_TYPE_B));
7196 if (slice_param->disable_deblocking_filter_idc != 1) {
7197 enable_avc_ildb = 1;
7204 avc_state->enable_avc_ildb = enable_avc_ildb;
7206 /* setup the all surface and buffer for PAK */
7207 /* Setup current reconstruct frame */
7208 obj_surface = encode_state->reconstructed_object;
7209 va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
7211 if (va_status != VA_STATUS_SUCCESS)
7214 memset(&surface_param, 0, sizeof(surface_param));
7215 surface_param.frame_width = generic_state->frame_width_in_pixel;
7216 surface_param.frame_height = generic_state->frame_height_in_pixel;
7217 va_status = gen9_avc_init_check_surfaces(ctx,
7218 obj_surface, encoder_context,
7220 if (va_status != VA_STATUS_SUCCESS)
7222 /* init the member of avc_priv_surface,frame_store_id,qp_value */
7224 avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
7225 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = 0;
7226 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = 0;
7227 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2]);
7228 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1]);
7229 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2], avc_priv_surface->dmv_top);
7230 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1], avc_priv_surface->dmv_bottom);
7231 avc_priv_surface->qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
7232 avc_priv_surface->frame_store_id = 0;
7233 avc_priv_surface->frame_idx = pic_param->CurrPic.frame_idx;
7234 avc_priv_surface->top_field_order_cnt = pic_param->CurrPic.TopFieldOrderCnt;
7235 avc_priv_surface->is_as_ref = pic_param->pic_fields.bits.reference_pic_flag;
7236 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = avc_priv_surface->top_field_order_cnt;
7237 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = avc_priv_surface->top_field_order_cnt + 1;
7239 i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
7240 i965_free_gpe_resource(&avc_ctx->res_post_deblocking_output);
7241 i965_free_gpe_resource(&avc_ctx->res_pre_deblocking_output);
7242 i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_reconstructed_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
7245 if (avc_state->enable_avc_ildb) {
7246 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_post_deblocking_output, obj_surface, GPE_RESOURCE_ALIGNMENT);
7248 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_pre_deblocking_output, obj_surface, GPE_RESOURCE_ALIGNMENT);
7250 /* input YUV surface */
7251 obj_surface = encode_state->input_yuv_object;
7252 va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
7254 if (va_status != VA_STATUS_SUCCESS)
7256 i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
7257 i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_uncompressed_input_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
7259 /* Reference surfaces */
7260 for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
7261 i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
7262 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2]);
7263 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1]);
7264 obj_surface = encode_state->reference_objects[i];
7265 avc_state->top_field_poc[2 * i] = 0;
7266 avc_state->top_field_poc[2 * i + 1] = 0;
7268 if (obj_surface && obj_surface->bo) {
7269 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->list_reference_res[i], obj_surface, GPE_RESOURCE_ALIGNMENT);
7271 /* actually it should be handled when it is reconstructed surface */
7272 va_status = gen9_avc_init_check_surfaces(ctx,
7273 obj_surface, encoder_context,
7275 if (va_status != VA_STATUS_SUCCESS)
7277 avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
7278 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2], avc_priv_surface->dmv_top);
7279 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1], avc_priv_surface->dmv_bottom);
7280 avc_priv_surface->frame_store_id = i;
7281 avc_state->top_field_poc[2 * i] = avc_priv_surface->top_field_order_cnt;
7282 avc_state->top_field_poc[2 * i + 1] = avc_priv_surface->top_field_order_cnt + 1;
7288 if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
7289 intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
7290 avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
7293 avc_ctx->pres_slice_batch_buffer_2nd_level =
7294 intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD,
7296 encode_state->num_slice_params_ext);
7297 if (!avc_ctx->pres_slice_batch_buffer_2nd_level)
7298 return VA_STATUS_ERROR_ALLOCATION_FAILED;
7300 for (i = 0; i < MAX_AVC_SLICE_NUM; i++) {
7301 avc_state->slice_batch_offset[i] = 0;
7306 i965_free_gpe_resource(&avc_ctx->res_intra_row_store_scratch_buffer);
7307 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
7308 &avc_ctx->res_intra_row_store_scratch_buffer,
7310 "PAK Intra row store scratch buffer");
7312 goto failed_allocation;
7314 size = w_mb * 4 * 64;
7315 i965_free_gpe_resource(&avc_ctx->res_deblocking_filter_row_store_scratch_buffer);
7316 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
7317 &avc_ctx->res_deblocking_filter_row_store_scratch_buffer,
7319 "PAK Deblocking filter row store scratch buffer");
7321 goto failed_allocation;
7323 size = w_mb * 2 * 64;
7324 i965_free_gpe_resource(&avc_ctx->res_bsd_mpc_row_store_scratch_buffer);
7325 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
7326 &avc_ctx->res_bsd_mpc_row_store_scratch_buffer,
7328 "PAK BSD/MPC row store scratch buffer");
7330 goto failed_allocation;
7332 size = w_mb * h_mb * 16;
7333 i965_free_gpe_resource(&avc_ctx->res_pak_mb_status_buffer);
7334 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
7335 &avc_ctx->res_pak_mb_status_buffer,
7337 "PAK MB status buffer");
7339 goto failed_allocation;
7341 return VA_STATUS_SUCCESS;
7344 return VA_STATUS_ERROR_ALLOCATION_FAILED;
7348 gen9_avc_encode_picture(VADriverContextP ctx,
7350 struct encode_state *encode_state,
7351 struct intel_encoder_context *encoder_context)
7354 struct i965_driver_data *i965 = i965_driver_data(ctx);
7355 struct i965_gpe_table *gpe = &i965->gpe_table;
7356 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7357 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
7358 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
7359 struct intel_batchbuffer *batch = encoder_context->base.batch;
7361 va_status = gen9_avc_pak_pipeline_prepare(ctx, encode_state, encoder_context);
7363 if (va_status != VA_STATUS_SUCCESS)
7366 if (i965->intel.has_bsd2)
7367 intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
7369 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
7370 intel_batchbuffer_emit_mi_flush(batch);
7372 for (generic_state->curr_pak_pass = 0;
7373 generic_state->curr_pak_pass < generic_state->num_pak_passes;
7374 generic_state->curr_pak_pass++) {
7376 if (generic_state->curr_pak_pass == 0) {
7377 /* Initialize the avc Image Ctrl reg for the first pass,write 0 to staturs/control register, is it needed in AVC? */
7378 struct gpe_mi_load_register_imm_parameter mi_load_reg_imm;
7379 struct encoder_status_buffer_internal *status_buffer;
7381 status_buffer = &(avc_ctx->status_buffer);
7382 memset(&mi_load_reg_imm, 0, sizeof(mi_load_reg_imm));
7383 mi_load_reg_imm.mmio_offset = status_buffer->image_status_ctrl_reg_offset;
7384 mi_load_reg_imm.data = 0;
7385 gpe->mi_load_register_imm(ctx, batch, &mi_load_reg_imm);
7387 gen9_avc_pak_picture_level(ctx, encode_state, encoder_context);
7388 gen9_avc_pak_slice_level(ctx, encode_state, encoder_context);
7389 gen9_avc_read_mfc_status(ctx, encoder_context);
7393 if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
7394 intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
7395 avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
7398 intel_batchbuffer_end_atomic(batch);
7399 intel_batchbuffer_flush(batch);
7401 generic_state->seq_frame_number++;
7402 generic_state->total_frame_number++;
7403 generic_state->first_frame = 0;
7404 return VA_STATUS_SUCCESS;
7408 gen9_avc_pak_pipeline(VADriverContextP ctx,
7410 struct encode_state *encode_state,
7411 struct intel_encoder_context *encoder_context)
7416 case VAProfileH264ConstrainedBaseline:
7417 case VAProfileH264Main:
7418 case VAProfileH264High:
7419 case VAProfileH264MultiviewHigh:
7420 case VAProfileH264StereoHigh:
7421 vaStatus = gen9_avc_encode_picture(ctx, profile, encode_state, encoder_context);
7425 vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
7433 gen9_avc_pak_context_destroy(void * context)
7435 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)context;
7436 struct generic_encoder_context * generic_ctx;
7437 struct i965_avc_encoder_context * avc_ctx;
7443 generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
7444 avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
7447 i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
7448 i965_free_gpe_resource(&avc_ctx->res_post_deblocking_output);
7449 i965_free_gpe_resource(&avc_ctx->res_pre_deblocking_output);
7450 i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
7452 i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
7453 i965_free_gpe_resource(&avc_ctx->res_intra_row_store_scratch_buffer);
7454 i965_free_gpe_resource(&avc_ctx->res_deblocking_filter_row_store_scratch_buffer);
7455 i965_free_gpe_resource(&avc_ctx->res_bsd_mpc_row_store_scratch_buffer);
7456 i965_free_gpe_resource(&avc_ctx->res_pak_mb_status_buffer);
7458 for (i = 0 ; i < MAX_MFC_AVC_REFERENCE_SURFACES; i++) {
7459 i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
7462 for (i = 0 ; i < NUM_MFC_AVC_DMV_BUFFERS; i++) {
7463 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i]);
7466 if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
7467 intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
7468 avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
7474 gen9_avc_get_coded_status(VADriverContextP ctx,
7475 struct intel_encoder_context *encoder_context,
7476 struct i965_coded_buffer_segment *coded_buf_seg)
7478 struct encoder_status *avc_encode_status;
7480 if (!encoder_context || !coded_buf_seg)
7481 return VA_STATUS_ERROR_INVALID_BUFFER;
7483 avc_encode_status = (struct encoder_status *)coded_buf_seg->codec_private_data;
7484 coded_buf_seg->base.size = avc_encode_status->bs_byte_count_frame;
7486 return VA_STATUS_SUCCESS;
7490 gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
7492 /* VME & PAK share the same context */
7493 struct i965_driver_data *i965 = i965_driver_data(ctx);
7494 struct encoder_vme_mfc_context * vme_context = NULL;
7495 struct generic_encoder_context * generic_ctx = NULL;
7496 struct i965_avc_encoder_context * avc_ctx = NULL;
7497 struct generic_enc_codec_state * generic_state = NULL;
7498 struct avc_enc_state * avc_state = NULL;
7499 struct encoder_status_buffer_internal *status_buffer;
7500 uint32_t base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
7502 vme_context = calloc(1, sizeof(struct encoder_vme_mfc_context));
7503 generic_ctx = calloc(1, sizeof(struct generic_encoder_context));
7504 avc_ctx = calloc(1, sizeof(struct i965_avc_encoder_context));
7505 generic_state = calloc(1, sizeof(struct generic_enc_codec_state));
7506 avc_state = calloc(1, sizeof(struct avc_enc_state));
7508 if (!vme_context || !generic_ctx || !avc_ctx || !generic_state || !avc_state)
7509 goto allocate_structure_failed;
7511 memset(vme_context, 0, sizeof(struct encoder_vme_mfc_context));
7512 memset(generic_ctx, 0, sizeof(struct generic_encoder_context));
7513 memset(avc_ctx, 0, sizeof(struct i965_avc_encoder_context));
7514 memset(generic_state, 0, sizeof(struct generic_enc_codec_state));
7515 memset(avc_state, 0, sizeof(struct avc_enc_state));
7517 encoder_context->vme_context = vme_context;
7518 vme_context->generic_enc_ctx = generic_ctx;
7519 vme_context->private_enc_ctx = avc_ctx;
7520 vme_context->generic_enc_state = generic_state;
7521 vme_context->private_enc_state = avc_state;
7523 if (IS_SKL(i965->intel.device_info) ||
7524 IS_BXT(i965->intel.device_info)) {
7525 generic_ctx->enc_kernel_ptr = (void *)skl_avc_encoder_kernels;
7526 generic_ctx->enc_kernel_size = sizeof(skl_avc_encoder_kernels);
7527 } else if (IS_KBL(i965->intel.device_info) ||
7528 IS_GLK(i965->intel.device_info)) {
7529 generic_ctx->enc_kernel_ptr = (void *)kbl_avc_encoder_kernels;
7530 generic_ctx->enc_kernel_size = sizeof(kbl_avc_encoder_kernels);
7532 goto allocate_structure_failed;
7534 /* initialize misc ? */
7536 generic_ctx->use_hw_scoreboard = 1;
7537 generic_ctx->use_hw_non_stalling_scoreboard = 1;
7539 /* initialize generic state */
7541 generic_state->kernel_mode = INTEL_ENC_KERNEL_NORMAL;
7542 generic_state->preset = INTEL_PRESET_RT_SPEED;
7543 generic_state->seq_frame_number = 0;
7544 generic_state->total_frame_number = 0;
7545 generic_state->frame_type = 0;
7546 generic_state->first_frame = 1;
7548 generic_state->frame_width_in_pixel = 0;
7549 generic_state->frame_height_in_pixel = 0;
7550 generic_state->frame_width_in_mbs = 0;
7551 generic_state->frame_height_in_mbs = 0;
7552 generic_state->frame_width_4x = 0;
7553 generic_state->frame_height_4x = 0;
7554 generic_state->frame_width_16x = 0;
7555 generic_state->frame_height_16x = 0;
7556 generic_state->frame_width_32x = 0;
7557 generic_state->downscaled_width_4x_in_mb = 0;
7558 generic_state->downscaled_height_4x_in_mb = 0;
7559 generic_state->downscaled_width_16x_in_mb = 0;
7560 generic_state->downscaled_height_16x_in_mb = 0;
7561 generic_state->downscaled_width_32x_in_mb = 0;
7562 generic_state->downscaled_height_32x_in_mb = 0;
7564 generic_state->hme_supported = 1;
7565 generic_state->b16xme_supported = 1;
7566 generic_state->b32xme_supported = 0;
7567 generic_state->hme_enabled = 0;
7568 generic_state->b16xme_enabled = 0;
7569 generic_state->b32xme_enabled = 0;
7570 generic_state->brc_distortion_buffer_supported = 1;
7571 generic_state->brc_constant_buffer_supported = 0;
7574 generic_state->frame_rate = 30;
7575 generic_state->brc_allocated = 0;
7576 generic_state->brc_inited = 0;
7577 generic_state->brc_need_reset = 0;
7578 generic_state->is_low_delay = 0;
7579 generic_state->brc_enabled = 0;//default
7580 generic_state->internal_rate_mode = 0;
7581 generic_state->curr_pak_pass = 0;
7582 generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
7583 generic_state->is_first_pass = 1;
7584 generic_state->is_last_pass = 0;
7585 generic_state->mb_brc_enabled = 0; // enable mb brc
7586 generic_state->brc_roi_enable = 0;
7587 generic_state->brc_dirty_roi_enable = 0;
7588 generic_state->skip_frame_enbale = 0;
7590 generic_state->target_bit_rate = 0;
7591 generic_state->max_bit_rate = 0;
7592 generic_state->min_bit_rate = 0;
7593 generic_state->init_vbv_buffer_fullness_in_bit = 0;
7594 generic_state->vbv_buffer_size_in_bit = 0;
7595 generic_state->frames_per_100s = 0;
7596 generic_state->gop_size = 0;
7597 generic_state->gop_ref_distance = 0;
7598 generic_state->brc_target_size = 0;
7599 generic_state->brc_mode = 0;
7600 generic_state->brc_init_current_target_buf_full_in_bits = 0.0;
7601 generic_state->brc_init_reset_input_bits_per_frame = 0.0;
7602 generic_state->brc_init_reset_buf_size_in_bits = 0;
7603 generic_state->brc_init_previous_target_buf_full_in_bits = 0;
7604 generic_state->frames_per_window_size = 0;//default
7605 generic_state->target_percentage = 0;
7607 generic_state->avbr_curracy = 0;
7608 generic_state->avbr_convergence = 0;
7610 generic_state->num_skip_frames = 0;
7611 generic_state->size_skip_frames = 0;
7613 generic_state->num_roi = 0;
7614 generic_state->max_delta_qp = 0;
7615 generic_state->min_delta_qp = 0;
7617 if (encoder_context->rate_control_mode != VA_RC_NONE &&
7618 encoder_context->rate_control_mode != VA_RC_CQP) {
7619 generic_state->brc_enabled = 1;
7620 generic_state->brc_distortion_buffer_supported = 1;
7621 generic_state->brc_constant_buffer_supported = 1;
7622 generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
7624 /*avc state initialization */
7625 avc_state->mad_enable = 0;
7626 avc_state->mb_disable_skip_map_enable = 0;
7627 avc_state->sfd_enable = 1;//default
7628 avc_state->sfd_mb_enable = 1;//set it true
7629 avc_state->adaptive_search_window_enable = 1;//default
7630 avc_state->mb_qp_data_enable = 0;
7631 avc_state->intra_refresh_i_enable = 0;
7632 avc_state->min_max_qp_enable = 0;
7633 avc_state->skip_bias_adjustment_enable = 0;//default,same as skip_bias_adjustment_supporte? no
7636 avc_state->non_ftq_skip_threshold_lut_input_enable = 0;
7637 avc_state->ftq_skip_threshold_lut_input_enable = 0;
7638 avc_state->ftq_override = 0;
7640 avc_state->direct_bias_adjustment_enable = 0;
7641 avc_state->global_motion_bias_adjustment_enable = 0;
7642 avc_state->disable_sub_mb_partion = 0;
7643 avc_state->arbitrary_num_mbs_in_slice = 0;
7644 avc_state->adaptive_transform_decision_enable = 0;//default
7645 avc_state->skip_check_disable = 0;
7646 avc_state->tq_enable = 0;
7647 avc_state->enable_avc_ildb = 0;
7648 avc_state->mbaff_flag = 0;
7649 avc_state->enable_force_skip = 1;//default
7650 avc_state->rc_panic_enable = 1;//default
7651 avc_state->suppress_recon_enable = 1;//default
7653 avc_state->ref_pic_select_list_supported = 1;
7654 avc_state->mb_brc_supported = 1;//?,default
7655 avc_state->multi_pre_enable = 1;//default
7656 avc_state->ftq_enable = 1;//default
7657 avc_state->caf_supported = 1; //default
7658 avc_state->caf_enable = 0;
7659 avc_state->caf_disable_hd = 1;//default
7660 avc_state->skip_bias_adjustment_supported = 1;//default
7662 avc_state->adaptive_intra_scaling_enable = 1;//default
7663 avc_state->old_mode_cost_enable = 0;//default
7664 avc_state->multi_ref_qp_enable = 1;//default
7665 avc_state->weighted_ref_l0_enable = 1;//default
7666 avc_state->weighted_ref_l1_enable = 1;//default
7667 avc_state->weighted_prediction_supported = 0;
7668 avc_state->brc_split_enable = 0;
7669 avc_state->slice_level_report_supported = 0;
7671 avc_state->fbr_bypass_enable = 1;//default
7672 avc_state->field_scaling_output_interleaved = 0;
7673 avc_state->mb_variance_output_enable = 0;
7674 avc_state->mb_pixel_average_output_enable = 0;
7675 avc_state->rolling_intra_refresh_enable = 0;// same as intra_refresh_i_enable?
7676 avc_state->mbenc_curbe_set_in_brc_update = 0;
7677 avc_state->rounding_inter_enable = 1; //default
7678 avc_state->adaptive_rounding_inter_enable = 1;//default
7680 avc_state->mbenc_i_frame_dist_in_use = 0;
7681 avc_state->mb_status_supported = 1; //set in intialization for gen9
7682 avc_state->mb_status_enable = 0;
7683 avc_state->mb_vproc_stats_enable = 0;
7684 avc_state->flatness_check_enable = 0;
7685 avc_state->flatness_check_supported = 1;//default
7686 avc_state->block_based_skip_enable = 0;
7687 avc_state->use_widi_mbenc_kernel = 0;
7688 avc_state->kernel_trellis_enable = 0;
7689 avc_state->generic_reserved = 0;
7691 avc_state->rounding_value = 0;
7692 avc_state->rounding_inter_p = AVC_INVALID_ROUNDING_VALUE;//default
7693 avc_state->rounding_inter_b = AVC_INVALID_ROUNDING_VALUE; //default
7694 avc_state->rounding_inter_b_ref = AVC_INVALID_ROUNDING_VALUE; //default
7695 avc_state->min_qp_i = INTEL_AVC_MIN_QP;
7696 avc_state->min_qp_p = INTEL_AVC_MIN_QP;
7697 avc_state->min_qp_b = INTEL_AVC_MIN_QP;
7698 avc_state->max_qp_i = INTEL_AVC_MAX_QP;
7699 avc_state->max_qp_p = INTEL_AVC_MAX_QP;
7700 avc_state->max_qp_b = INTEL_AVC_MAX_QP;
7702 memset(avc_state->non_ftq_skip_threshold_lut, 0, AVC_QP_MAX * sizeof(uint8_t));
7703 memset(avc_state->ftq_skip_threshold_lut, 0, AVC_QP_MAX * sizeof(uint8_t));
7704 memset(avc_state->lamda_value_lut, 0, AVC_QP_MAX * 2 * sizeof(uint32_t));
7706 avc_state->intra_refresh_qp_threshold = 0;
7707 avc_state->trellis_flag = 0;
7708 avc_state->hme_mv_cost_scaling_factor = 0;
7709 avc_state->slice_height = 1;
7710 avc_state->slice_num = 1;
7711 memset(avc_state->dist_scale_factor_list0, 0, 32 * sizeof(uint32_t));
7712 avc_state->bi_weight = 0;
7714 avc_state->lambda_table_enable = 0;
7717 if (IS_SKL(i965->intel.device_info) ||
7718 IS_BXT(i965->intel.device_info)) {
7719 avc_state->brc_const_data_surface_width = 64;
7720 avc_state->brc_const_data_surface_height = 44;
7721 } else if (IS_KBL(i965->intel.device_info) ||
7722 IS_GLK(i965->intel.device_info)) {
7723 avc_state->brc_const_data_surface_width = 64;
7724 avc_state->brc_const_data_surface_height = 53;
7726 avc_state->decouple_mbenc_curbe_from_brc_enable = 1;
7727 avc_state->extended_mv_cost_range_enable = 0;
7728 avc_state->reserved_g95 = 0;
7729 avc_state->mbenc_brc_buffer_size = 128;
7730 avc_state->kernel_trellis_enable = 1;
7731 avc_state->lambda_table_enable = 1;
7732 avc_state->brc_split_enable = 1;
7735 avc_state->num_refs[0] = 0;
7736 avc_state->num_refs[1] = 0;
7737 memset(avc_state->list_ref_idx, 0, 32 * 2 * sizeof(uint32_t));
7738 memset(avc_state->top_field_poc, 0, NUM_MFC_AVC_DMV_BUFFERS * sizeof(int32_t));
7739 avc_state->tq_rounding = 0;
7740 avc_state->zero_mv_threshold = 0;
7741 avc_state->slice_second_levle_batch_buffer_in_use = 0;
7745 /* the definition of status buffer offset for Encoder */
7747 status_buffer = &avc_ctx->status_buffer;
7748 memset(status_buffer, 0, sizeof(struct encoder_status_buffer_internal));
7750 status_buffer->base_offset = base_offset;
7751 status_buffer->bs_byte_count_frame_offset = base_offset + offsetof(struct encoder_status, bs_byte_count_frame);
7752 status_buffer->bs_byte_count_frame_nh_offset = base_offset + offsetof(struct encoder_status, bs_byte_count_frame_nh);
7753 status_buffer->image_status_mask_offset = base_offset + offsetof(struct encoder_status, image_status_mask);
7754 status_buffer->image_status_ctrl_offset = base_offset + offsetof(struct encoder_status, image_status_ctrl);
7755 status_buffer->mfc_qp_status_count_offset = base_offset + offsetof(struct encoder_status, mfc_qp_status_count);
7756 status_buffer->media_index_offset = base_offset + offsetof(struct encoder_status, media_index);
7758 status_buffer->status_buffer_size = sizeof(struct encoder_status);
7759 status_buffer->bs_byte_count_frame_reg_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG;
7760 status_buffer->bs_byte_count_frame_nh_reg_offset = MFC_BITSTREAM_BYTECOUNT_SLICE_REG;
7761 status_buffer->image_status_mask_reg_offset = MFC_IMAGE_STATUS_MASK_REG;
7762 status_buffer->image_status_ctrl_reg_offset = MFC_IMAGE_STATUS_CTRL_REG;
7763 status_buffer->mfc_qp_status_count_reg_offset = MFC_QP_STATUS_COUNT_REG;
7765 gen9_avc_kernel_init(ctx, encoder_context);
7766 encoder_context->vme_context = vme_context;
7767 encoder_context->vme_pipeline = gen9_avc_vme_pipeline;
7768 encoder_context->vme_context_destroy = gen9_avc_vme_context_destroy;
7772 allocate_structure_failed:
7777 free(generic_state);
7783 gen9_avc_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
7785 /* VME & PAK share the same context */
7786 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7791 encoder_context->mfc_context = pak_context;
7792 encoder_context->mfc_context_destroy = gen9_avc_pak_context_destroy;
7793 encoder_context->mfc_pipeline = gen9_avc_pak_pipeline;
7794 encoder_context->mfc_brc_prepare = gen9_avc_pak_brc_prepare;
7795 encoder_context->get_status = gen9_avc_get_coded_status;