OSDN Git Service

VC1: Fix for frame coding mode
[android-x86/hardware-intel-common-vaapi.git] / src / i965_avc_encoder.c
1 /*
2  * Copyright @ 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWAR OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Pengfei Qu <Pengfei.qu@intel.com>
26  *    Sreerenj Balachandran <sreerenj.balachandran@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <stdbool.h>
33 #include <string.h>
34 #include <math.h>
35 #include <assert.h>
36 #include <va/va.h>
37
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
40
41 #include "i965_defines.h"
42 #include "i965_structs.h"
43 #include "i965_drv_video.h"
44 #include "i965_encoder.h"
45 #include "i965_encoder_utils.h"
46 #include "intel_media.h"
47
48 #include "i965_gpe_utils.h"
49 #include "i965_encoder_common.h"
50 #include "i965_avc_encoder_common.h"
51 #include "i965_avc_encoder_kernels.h"
52 #include "i965_avc_encoder.h"
53 #include "i965_avc_const_def.h"
54
55 #define MAX_URB_SIZE                    4096 /* In register */
56 #define NUM_KERNELS_PER_GPE_CONTEXT     1
57 #define MBENC_KERNEL_BASE GEN9_AVC_KERNEL_MBENC_QUALITY_I
58 #define GPE_RESOURCE_ALIGNMENT 4  /* 4 means 16 = 1 << 4) */
59
60 #define OUT_BUFFER_2DW(batch, bo, is_target, delta)  do {               \
61         if (bo) {                                                       \
62             OUT_BCS_RELOC64(batch,                                        \
63                             bo,                                         \
64                             I915_GEM_DOMAIN_INSTRUCTION,                \
65                             is_target ? I915_GEM_DOMAIN_RENDER : 0,     \
66                             delta);                                     \
67         } else {                                                        \
68             OUT_BCS_BATCH(batch, 0);                                    \
69             OUT_BCS_BATCH(batch, 0);                                    \
70         }                                                               \
71     } while (0)
72
73 #define OUT_BUFFER_3DW(batch, bo, is_target, delta, attr)  do { \
74         OUT_BUFFER_2DW(batch, bo, is_target, delta);            \
75         OUT_BCS_BATCH(batch, attr);                             \
76     } while (0)
77
78 /* FEI specific buffer sizes per MB in bytes for gen9 */
79 #define FEI_AVC_MB_CODE_BUFFER_SIZE      64
80 #define FEI_AVC_MV_DATA_BUFFER_SIZE      128
81 #define FEI_AVC_MB_CONTROL_BUFFER_SIZE   16
82 #define FEI_AVC_MV_PREDICTOR_BUFFER_SIZE 40
83 #define FEI_AVC_DISTORTION_BUFFER_SIZE   48
84 #define FEI_AVC_QP_BUFFER_SIZE           1
85
86 static const uint32_t qm_flat[16] = {
87     0x10101010, 0x10101010, 0x10101010, 0x10101010,
88     0x10101010, 0x10101010, 0x10101010, 0x10101010,
89     0x10101010, 0x10101010, 0x10101010, 0x10101010,
90     0x10101010, 0x10101010, 0x10101010, 0x10101010
91 };
92
93 static const uint32_t fqm_flat[32] = {
94     0x10001000, 0x10001000, 0x10001000, 0x10001000,
95     0x10001000, 0x10001000, 0x10001000, 0x10001000,
96     0x10001000, 0x10001000, 0x10001000, 0x10001000,
97     0x10001000, 0x10001000, 0x10001000, 0x10001000,
98     0x10001000, 0x10001000, 0x10001000, 0x10001000,
99     0x10001000, 0x10001000, 0x10001000, 0x10001000,
100     0x10001000, 0x10001000, 0x10001000, 0x10001000,
101     0x10001000, 0x10001000, 0x10001000, 0x10001000
102 };
103
104 static const unsigned int slice_type_kernel[3] = {1, 2, 0};
105
106 static const gen9_avc_brc_init_reset_curbe_data gen9_avc_brc_init_reset_curbe_init_data = {
107     // unsigned int 0
108     {
109         0
110     },
111
112     // unsigned int 1
113     {
114         0
115     },
116
117     // unsigned int 2
118     {
119         0
120     },
121
122     // unsigned int 3
123     {
124         0
125     },
126
127     // unsigned int 4
128     {
129         0
130     },
131
132     // unsigned int 5
133     {
134         0
135     },
136
137     // unsigned int 6
138     {
139         0
140     },
141
142     // unsigned int 7
143     {
144         0
145     },
146
147     // unsigned int 8
148     {
149         0,
150         0
151     },
152
153     // unsigned int 9
154     {
155         0,
156         0
157     },
158
159     // unsigned int 10
160     {
161         0,
162         0
163     },
164
165     // unsigned int 11
166     {
167         0,
168         1
169     },
170
171     // unsigned int 12
172     {
173         51,
174         0
175     },
176
177     // unsigned int 13
178     {
179         40,
180         60,
181         80,
182         120
183     },
184
185     // unsigned int 14
186     {
187         35,
188         60,
189         80,
190         120
191     },
192
193     // unsigned int 15
194     {
195         40,
196         60,
197         90,
198         115
199     },
200
201     // unsigned int 16
202     {
203         0,
204         0,
205         0,
206         0
207     },
208
209     // unsigned int 17
210     {
211         0,
212         0,
213         0,
214         0
215     },
216
217     // unsigned int 18
218     {
219         0,
220         0,
221         0,
222         0
223     },
224
225     // unsigned int 19
226     {
227         0,
228         0,
229         0,
230         0
231     },
232
233     // unsigned int 20
234     {
235         0,
236         0,
237         0,
238         0
239     },
240
241     // unsigned int 21
242     {
243         0,
244         0,
245         0,
246         0
247     },
248
249     // unsigned int 22
250     {
251         0,
252         0,
253         0,
254         0
255     },
256
257     // unsigned int 23
258     {
259         0
260     }
261 };
262
263 static const gen8_avc_frame_brc_update_curbe_data gen8_avc_frame_brc_update_curbe_init_data = {
264     //unsigned int 0
265     {
266         0
267     },
268
269     //unsigned int 1
270     {
271         0
272     },
273
274     //unsigned int 2
275     {
276         0
277     },
278
279     //unsigned int 3
280     {
281
282         10,
283         50
284
285     },
286
287     //unsigned int 4
288     {
289
290         100,
291         150
292
293     },
294
295     //unsigned int 5
296     {
297         0, 0, 0, 0
298     },
299
300     //unsigned int 6
301     {
302         0, 0, 0, 0
303     },
304
305     //unsigned int 7
306     {
307         0
308     },
309
310     //unsigned int 8
311     {
312
313         1,
314         1,
315         3,
316         2
317
318     },
319
320     //unsigned int 9
321     {
322
323         1,
324         40,
325         5,
326         5
327
328     },
329
330     //unsigned int 10
331     {
332
333         3,
334         1,
335         7,
336         18
337
338     },
339
340     //unsigned int 11
341     {
342
343         25,
344         37,
345         40,
346         75
347
348     },
349
350     //unsigned int 12
351     {
352
353         97,
354         103,
355         125,
356         160
357
358     },
359
360     //unsigned int 13
361     {
362
363         -3,
364         -2,
365         -1,
366         0
367
368     },
369
370     //unsigned int 14
371     {
372
373         1,
374         2,
375         3,
376         0xff
377
378     },
379
380     //unsigned int 15
381     {
382         0, 0
383     },
384
385     //unsigned int 16
386     {
387         0, 0
388     },
389
390     //unsigned int 17
391     {
392         0, 0
393     },
394 };
395 static const gen9_avc_frame_brc_update_curbe_data gen9_avc_frame_brc_update_curbe_init_data = {
396     // unsigned int 0
397     {
398         0
399     },
400
401     // unsigned int 1
402     {
403         0
404     },
405
406     // unsigned int 2
407     {
408         0
409     },
410
411     // unsigned int 3
412     {
413         10,
414         50
415     },
416
417     // unsigned int 4
418     {
419         100,
420         150
421     },
422
423     // unsigned int 5
424     {
425         0,
426         0,
427         0,
428         0
429     },
430
431     // unsigned int 6
432     {
433         0,
434         0,
435         0,
436         0,
437         0,
438         0
439     },
440
441     // unsigned int 7
442     {
443         0
444     },
445
446     // unsigned int 8
447     {
448         1,
449         1,
450         3,
451         2
452     },
453
454     // unsigned int 9
455     {
456         1,
457         40,
458         5,
459         5
460     },
461
462     // unsigned int 10
463     {
464         3,
465         1,
466         7,
467         18
468     },
469
470     // unsigned int 11
471     {
472         25,
473         37,
474         40,
475         75
476     },
477
478     // unsigned int 12
479     {
480         97,
481         103,
482         125,
483         160
484     },
485
486     // unsigned int 13
487     {
488         -3,
489         -2,
490         -1,
491         0
492     },
493
494     // unsigned int 14
495     {
496         1,
497         2,
498         3,
499         0xff
500     },
501
502     // unsigned int 15
503     {
504         0,
505         0,
506         0,
507         0
508     },
509
510     // unsigned int 16
511     {
512         0
513     },
514
515     // unsigned int 17
516     {
517         0
518     },
519
520     // unsigned int 18
521     {
522         0
523     },
524
525     // unsigned int 19
526     {
527         0
528     },
529
530     // unsigned int 20
531     {
532         0
533     },
534
535     // unsigned int 21
536     {
537         0
538     },
539
540     // unsigned int 22
541     {
542         0
543     },
544
545     // unsigned int 23
546     {
547         0
548     },
549
550 };
551
552 static void
553 gen9_avc_update_misc_parameters(VADriverContextP ctx,
554                                 struct encode_state *encode_state,
555                                 struct intel_encoder_context *encoder_context)
556 {
557     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
558     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
559     int i;
560
561     /* brc */
562     generic_state->max_bit_rate = (encoder_context->brc.bits_per_second[0] + 1000 - 1) / 1000;
563
564     generic_state->brc_need_reset = encoder_context->brc.need_reset;
565
566     if (generic_state->internal_rate_mode == VA_RC_CBR) {
567         generic_state->min_bit_rate = generic_state->max_bit_rate;
568         generic_state->mb_brc_enabled = encoder_context->brc.mb_rate_control[0] == 1;
569
570         if (generic_state->target_bit_rate != generic_state->max_bit_rate) {
571             generic_state->target_bit_rate = generic_state->max_bit_rate;
572             generic_state->brc_need_reset = 1;
573         }
574     } else if (generic_state->internal_rate_mode == VA_RC_VBR) {
575         generic_state->min_bit_rate = generic_state->max_bit_rate * (2 * encoder_context->brc.target_percentage[0] - 100) / 100;
576         generic_state->mb_brc_enabled = encoder_context->brc.mb_rate_control[0] == 1;
577
578         if (generic_state->target_bit_rate != generic_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100) {
579             generic_state->target_bit_rate = generic_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100;
580             generic_state->brc_need_reset = 1;
581         }
582     }
583
584     /*  frame rate */
585     if (generic_state->internal_rate_mode != VA_RC_CQP) {
586         generic_state->frames_per_100s = encoder_context->brc.framerate[0].num * 100 / encoder_context->brc.framerate[0].den ;
587         generic_state->frame_rate = encoder_context->brc.framerate[0].num / encoder_context->brc.framerate[0].den ;
588         generic_state->frames_per_window_size = (int)(encoder_context->brc.window_size * generic_state->frame_rate / 1000); // brc.windows size in ms as the unit
589     } else {
590         generic_state->frames_per_100s = 30 * 100;
591         generic_state->frame_rate = 30 ;
592         generic_state->frames_per_window_size = 30;
593     }
594
595     /*  HRD */
596     if (generic_state->internal_rate_mode != VA_RC_CQP) {
597         generic_state->vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;//misc->buffer_size;
598         generic_state->init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;//misc->initial_buffer_fullness;
599     }
600
601     /* ROI */
602     generic_state->num_roi = MIN(encoder_context->brc.num_roi, 3);
603     if (generic_state->num_roi > 0) {
604         generic_state->max_delta_qp = encoder_context->brc.roi_max_delta_qp;
605         generic_state->min_delta_qp = encoder_context->brc.roi_min_delta_qp;
606
607         for (i = 0; i < generic_state->num_roi; i++) {
608             generic_state->roi[i].left   = encoder_context->brc.roi[i].left;
609             generic_state->roi[i].right  = encoder_context->brc.roi[i].right;
610             generic_state->roi[i].top    = encoder_context->brc.roi[i].top;
611             generic_state->roi[i].bottom = encoder_context->brc.roi[i].bottom;
612             generic_state->roi[i].value  = encoder_context->brc.roi[i].value;
613
614             generic_state->roi[i].left /= 16;
615             generic_state->roi[i].right /= 16;
616             generic_state->roi[i].top /= 16;
617             generic_state->roi[i].bottom /= 16;
618         }
619     }
620
621 }
622
623 static bool
624 intel_avc_get_kernel_header_and_size(void *pvbinary,
625                                      int binary_size,
626                                      INTEL_GENERIC_ENC_OPERATION operation,
627                                      int krnstate_idx,
628                                      struct i965_kernel *ret_kernel)
629 {
630     typedef uint32_t BIN_PTR[4];
631
632     char *bin_start;
633     gen9_avc_encoder_kernel_header      *pkh_table;
634     kernel_header          *pcurr_header, *pinvalid_entry, *pnext_header;
635     int next_krnoffset;
636
637     if (!pvbinary || !ret_kernel)
638         return false;
639
640     bin_start = (char *)pvbinary;
641     pkh_table = (gen9_avc_encoder_kernel_header *)pvbinary;
642     pinvalid_entry = &(pkh_table->static_detection) + 1;
643     next_krnoffset = binary_size;
644
645     if (operation == INTEL_GENERIC_ENC_SCALING4X) {
646         pcurr_header = &pkh_table->ply_dscale_ply;
647     } else if (operation == INTEL_GENERIC_ENC_SCALING2X) {
648         pcurr_header = &pkh_table->ply_2xdscale_ply;
649     } else if (operation == INTEL_GENERIC_ENC_ME) {
650         pcurr_header = &pkh_table->me_p;
651     } else if (operation == INTEL_GENERIC_ENC_BRC) {
652         pcurr_header = &pkh_table->frame_brc_init;
653     } else if (operation == INTEL_GENERIC_ENC_MBENC) {
654         pcurr_header = &pkh_table->mbenc_quality_I;
655     } else if (operation == INTEL_GENERIC_ENC_WP) {
656         pcurr_header = &pkh_table->wp;
657     } else if (operation == INTEL_GENERIC_ENC_SFD) {
658         pcurr_header = &pkh_table->static_detection;
659     } else {
660         return false;
661     }
662
663     pcurr_header += krnstate_idx;
664     ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
665
666     pnext_header = (pcurr_header + 1);
667     if (pnext_header < pinvalid_entry) {
668         next_krnoffset = pnext_header->kernel_start_pointer << 6;
669     }
670     ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
671
672     return true;
673 }
674
675 static bool
676 intel_avc_fei_get_kernel_header_and_size(
677     void                             *pvbinary,
678     int                              binary_size,
679     INTEL_GENERIC_ENC_OPERATION      operation,
680     int                              krnstate_idx,
681     struct i965_kernel               *ret_kernel)
682 {
683     typedef uint32_t BIN_PTR[4];
684
685     char *bin_start;
686     gen9_avc_fei_encoder_kernel_header      *pkh_table;
687     kernel_header          *pcurr_header, *pinvalid_entry, *pnext_header;
688     int next_krnoffset;
689
690     if (!pvbinary || !ret_kernel)
691         return false;
692
693     bin_start = (char *)pvbinary;
694     pkh_table = (gen9_avc_fei_encoder_kernel_header *)pvbinary;
695     pinvalid_entry = &(pkh_table->ply_2xdscale_2f_ply_2f) + 1;
696     next_krnoffset = binary_size;
697
698     if (operation == INTEL_GENERIC_ENC_SCALING4X) {
699         pcurr_header = &pkh_table->ply_2xdscale_ply;
700     } else if (operation == INTEL_GENERIC_ENC_ME) {
701         pcurr_header = &pkh_table->me_p;
702     } else if (operation == INTEL_GENERIC_ENC_MBENC) {
703         pcurr_header = &pkh_table->mbenc_i;
704     } else {
705         return false;
706     }
707
708     pcurr_header += krnstate_idx;
709     ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
710
711     pnext_header = (pcurr_header + 1);
712     if (pnext_header < pinvalid_entry) {
713         next_krnoffset = pnext_header->kernel_start_pointer << 6;
714     }
715     ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
716
717     return true;
718 }
719
720 static void
721 gen9_free_surfaces_avc(void **data)
722 {
723     struct gen9_surface_avc *avc_surface;
724
725     if (!data || !*data)
726         return;
727
728     avc_surface = *data;
729
730     if (avc_surface->scaled_4x_surface_obj) {
731         i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_4x_surface_id, 1);
732         avc_surface->scaled_4x_surface_id = VA_INVALID_SURFACE;
733         avc_surface->scaled_4x_surface_obj = NULL;
734     }
735
736     if (avc_surface->scaled_16x_surface_obj) {
737         i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_16x_surface_id, 1);
738         avc_surface->scaled_16x_surface_id = VA_INVALID_SURFACE;
739         avc_surface->scaled_16x_surface_obj = NULL;
740     }
741
742     if (avc_surface->scaled_32x_surface_obj) {
743         i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_32x_surface_id, 1);
744         avc_surface->scaled_32x_surface_id = VA_INVALID_SURFACE;
745         avc_surface->scaled_32x_surface_obj = NULL;
746     }
747
748     i965_free_gpe_resource(&avc_surface->res_mb_code_surface);
749     i965_free_gpe_resource(&avc_surface->res_mv_data_surface);
750     i965_free_gpe_resource(&avc_surface->res_ref_pic_select_surface);
751
752     /* FEI specific resources */
753     /* since the driver previously taken an extra reference to the drm_bo
754      * in case the buffers were supplied by middleware, there shouldn't
755      * be any memory handling issue */
756     i965_free_gpe_resource(&avc_surface->res_fei_mb_cntrl_surface);
757     i965_free_gpe_resource(&avc_surface->res_fei_mv_predictor_surface);
758     i965_free_gpe_resource(&avc_surface->res_fei_vme_distortion_surface);
759     i965_free_gpe_resource(&avc_surface->res_fei_mb_qp_surface);
760
761     dri_bo_unreference(avc_surface->dmv_top);
762     avc_surface->dmv_top = NULL;
763     dri_bo_unreference(avc_surface->dmv_bottom);
764     avc_surface->dmv_bottom = NULL;
765
766     free(avc_surface);
767
768     *data = NULL;
769
770     return;
771 }
772
773 static VAStatus
774 gen9_avc_init_check_surfaces(VADriverContextP ctx,
775                              struct object_surface *obj_surface,
776                              struct intel_encoder_context *encoder_context,
777                              struct avc_surface_param *surface_param)
778 {
779     struct i965_driver_data *i965 = i965_driver_data(ctx);
780     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
781     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
782     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
783
784     struct gen9_surface_avc *avc_surface;
785     int downscaled_width_4x, downscaled_height_4x;
786     int downscaled_width_16x, downscaled_height_16x;
787     int downscaled_width_32x, downscaled_height_32x;
788     int size = 0;
789     unsigned int frame_width_in_mbs = ALIGN(surface_param->frame_width, 16) / 16;
790     unsigned int frame_height_in_mbs = ALIGN(surface_param->frame_height, 16) / 16;
791     unsigned int frame_mb_nums = frame_width_in_mbs * frame_height_in_mbs;
792     int allocate_flag = 1;
793     int width, height;
794
795     if (!obj_surface || !obj_surface->bo)
796         return VA_STATUS_ERROR_INVALID_SURFACE;
797
798     if (obj_surface->private_data) {
799         return VA_STATUS_SUCCESS;
800     }
801
802     avc_surface = calloc(1, sizeof(struct gen9_surface_avc));
803
804     if (!avc_surface)
805         return VA_STATUS_ERROR_ALLOCATION_FAILED;
806
807     avc_surface->ctx = ctx;
808     obj_surface->private_data = avc_surface;
809     obj_surface->free_private_data = gen9_free_surfaces_avc;
810
811     downscaled_width_4x = generic_state->frame_width_4x;
812     downscaled_height_4x = generic_state->frame_height_4x;
813
814     i965_CreateSurfaces(ctx,
815                         downscaled_width_4x,
816                         downscaled_height_4x,
817                         VA_RT_FORMAT_YUV420,
818                         1,
819                         &avc_surface->scaled_4x_surface_id);
820
821     avc_surface->scaled_4x_surface_obj = SURFACE(avc_surface->scaled_4x_surface_id);
822
823     if (!avc_surface->scaled_4x_surface_obj) {
824         return VA_STATUS_ERROR_ALLOCATION_FAILED;
825     }
826
827     i965_check_alloc_surface_bo(ctx, avc_surface->scaled_4x_surface_obj, 1,
828                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
829
830     downscaled_width_16x = generic_state->frame_width_16x;
831     downscaled_height_16x = generic_state->frame_height_16x;
832     i965_CreateSurfaces(ctx,
833                         downscaled_width_16x,
834                         downscaled_height_16x,
835                         VA_RT_FORMAT_YUV420,
836                         1,
837                         &avc_surface->scaled_16x_surface_id);
838     avc_surface->scaled_16x_surface_obj = SURFACE(avc_surface->scaled_16x_surface_id);
839
840     if (!avc_surface->scaled_16x_surface_obj) {
841         return VA_STATUS_ERROR_ALLOCATION_FAILED;
842     }
843
844     i965_check_alloc_surface_bo(ctx, avc_surface->scaled_16x_surface_obj, 1,
845                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
846
847     if (generic_state->b32xme_supported ||
848         generic_state->b32xme_enabled) {
849         downscaled_width_32x = generic_state->frame_width_32x;
850         downscaled_height_32x = generic_state->frame_height_32x;
851         i965_CreateSurfaces(ctx,
852                             downscaled_width_32x,
853                             downscaled_height_32x,
854                             VA_RT_FORMAT_YUV420,
855                             1,
856                             &avc_surface->scaled_32x_surface_id);
857         avc_surface->scaled_32x_surface_obj = SURFACE(avc_surface->scaled_32x_surface_id);
858
859         if (!avc_surface->scaled_32x_surface_obj) {
860             return VA_STATUS_ERROR_ALLOCATION_FAILED;
861         }
862
863         i965_check_alloc_surface_bo(ctx, avc_surface->scaled_32x_surface_obj, 1,
864                                     VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
865     }
866
867     /*mb code and mv data for each frame*/
868     if (!encoder_context->fei_enabled) {
869         size = frame_mb_nums * 16 * 4;
870         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
871                                                    &avc_surface->res_mb_code_surface,
872                                                    ALIGN(size, 0x1000),
873                                                    "mb code buffer");
874         if (!allocate_flag)
875             goto failed_allocation;
876
877         size = frame_mb_nums * 32 * 4;
878         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
879                                                    &avc_surface->res_mv_data_surface,
880                                                    ALIGN(size, 0x1000),
881                                                    "mv data buffer");
882         if (!allocate_flag)
883             goto failed_allocation;
884     }
885
886     /* ref pic list*/
887     if (avc_state->ref_pic_select_list_supported) {
888         width = ALIGN(frame_width_in_mbs * 8, 64);
889         height = frame_height_in_mbs ;
890         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
891                                                       &avc_surface->res_ref_pic_select_surface,
892                                                       width, height,
893                                                       width,
894                                                       "Ref pic select list buffer");
895         if (!allocate_flag)
896             goto failed_allocation;
897     }
898
899     /*direct mv*/
900     avc_surface->dmv_top =
901         dri_bo_alloc(i965->intel.bufmgr,
902                      "direct mv top Buffer",
903                      68 * frame_mb_nums,
904                      64);
905     avc_surface->dmv_bottom =
906         dri_bo_alloc(i965->intel.bufmgr,
907                      "direct mv bottom Buffer",
908                      68 * frame_mb_nums,
909                      64);
910     assert(avc_surface->dmv_top);
911     assert(avc_surface->dmv_bottom);
912
913     return VA_STATUS_SUCCESS;
914
915 failed_allocation:
916     return VA_STATUS_ERROR_ALLOCATION_FAILED;
917 }
918
919 static void
920 gen9_avc_generate_slice_map(VADriverContextP ctx,
921                             struct encode_state *encode_state,
922                             struct intel_encoder_context *encoder_context)
923 {
924     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
925     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
926     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
927     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
928
929     struct i965_gpe_resource *gpe_resource = NULL;
930     VAEncSliceParameterBufferH264 * slice_param = NULL;
931     unsigned int * data = NULL;
932     unsigned int * data_row = NULL;
933     int i, j, count = 0;
934     unsigned int pitch = ALIGN((generic_state->frame_width_in_mbs + 1) * 4, 64) / 4;
935
936     if (!avc_state->arbitrary_num_mbs_in_slice)
937         return;
938
939     gpe_resource = &(avc_ctx->res_mbenc_slice_map_surface);
940     assert(gpe_resource);
941
942     i965_zero_gpe_resource(gpe_resource);
943
944     data_row = (unsigned int *)i965_map_gpe_resource(gpe_resource);
945     assert(data_row);
946
947     data = data_row;
948     for (i = 0; i < avc_state->slice_num; i++) {
949         slice_param = avc_state->slice_param[i];
950         for (j = 0; j < slice_param->num_macroblocks; j++) {
951             *data++ = i;
952             if ((count > 0) && (count % generic_state->frame_width_in_mbs == 0)) {
953                 data_row += pitch;
954                 data = data_row;
955                 *data++ = i;
956             }
957             count++;
958         }
959     }
960     *data++ = 0xFFFFFFFF;
961
962     i965_unmap_gpe_resource(gpe_resource);
963 }
964
965 static VAStatus
966 gen9_avc_allocate_resources(VADriverContextP ctx,
967                             struct encode_state *encode_state,
968                             struct intel_encoder_context *encoder_context)
969 {
970     struct i965_driver_data *i965 = i965_driver_data(ctx);
971     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
972     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
973     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
974     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
975     unsigned int size  = 0;
976     unsigned int width  = 0;
977     unsigned int height  = 0;
978     unsigned char * data  = NULL;
979     int allocate_flag = 1;
980     int i = 0;
981
982     /*all the surface/buffer are allocated here*/
983
984     /*second level batch buffer for image state write when cqp etc*/
985     i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
986     size = INTEL_AVC_IMAGE_STATE_CMD_SIZE ;
987     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
988                                                &avc_ctx->res_image_state_batch_buffer_2nd_level,
989                                                ALIGN(size, 0x1000),
990                                                "second levle batch (image state write) buffer");
991     if (!allocate_flag)
992         goto failed_allocation;
993
994     /* scaling related surface   */
995     if (avc_state->mb_status_supported) {
996         i965_free_gpe_resource(&avc_ctx->res_mb_status_buffer);
997         size = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * 16 * 4 + 1023) & ~0x3ff;
998         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
999                                                    &avc_ctx->res_mb_status_buffer,
1000                                                    ALIGN(size, 0x1000),
1001                                                    "MB statistics output buffer");
1002         if (!allocate_flag)
1003             goto failed_allocation;
1004         i965_zero_gpe_resource(&avc_ctx->res_mb_status_buffer);
1005     }
1006
1007     if (avc_state->flatness_check_supported) {
1008         width = generic_state->frame_width_in_mbs * 4;
1009         height = generic_state->frame_height_in_mbs * 4;
1010         i965_free_gpe_resource(&avc_ctx->res_flatness_check_surface);
1011         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1012                                                       &avc_ctx->res_flatness_check_surface,
1013                                                       width, height,
1014                                                       ALIGN(width, 64),
1015                                                       "Flatness check buffer");
1016         if (!allocate_flag)
1017             goto failed_allocation;
1018     }
1019     /* me related surface */
1020     width = generic_state->downscaled_width_4x_in_mb * 8;
1021     height = generic_state->downscaled_height_4x_in_mb * 4 * 10;
1022     i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
1023     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1024                                                   &avc_ctx->s4x_memv_distortion_buffer,
1025                                                   width, height,
1026                                                   ALIGN(width, 64),
1027                                                   "4x MEMV distortion buffer");
1028     if (!allocate_flag)
1029         goto failed_allocation;
1030     i965_zero_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
1031
1032     width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
1033     height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
1034     i965_free_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
1035     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1036                                                   &avc_ctx->s4x_memv_min_distortion_brc_buffer,
1037                                                   width, height,
1038                                                   width,
1039                                                   "4x MEMV min distortion brc buffer");
1040     if (!allocate_flag)
1041         goto failed_allocation;
1042     i965_zero_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
1043
1044
1045     width = ALIGN(generic_state->downscaled_width_4x_in_mb * 32, 64);
1046     height = generic_state->downscaled_height_4x_in_mb * 4 * 2 * 10;
1047     i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
1048     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1049                                                   &avc_ctx->s4x_memv_data_buffer,
1050                                                   width, height,
1051                                                   width,
1052                                                   "4x MEMV data buffer");
1053     if (!allocate_flag)
1054         goto failed_allocation;
1055     i965_zero_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
1056
1057
1058     width = ALIGN(generic_state->downscaled_width_16x_in_mb * 32, 64);
1059     height = generic_state->downscaled_height_16x_in_mb * 4 * 2 * 10 ;
1060     i965_free_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
1061     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1062                                                   &avc_ctx->s16x_memv_data_buffer,
1063                                                   width, height,
1064                                                   width,
1065                                                   "16x MEMV data buffer");
1066     if (!allocate_flag)
1067         goto failed_allocation;
1068     i965_zero_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
1069
1070
1071     width = ALIGN(generic_state->downscaled_width_32x_in_mb * 32, 64);
1072     height = generic_state->downscaled_height_32x_in_mb * 4 * 2 * 10 ;
1073     i965_free_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
1074     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1075                                                   &avc_ctx->s32x_memv_data_buffer,
1076                                                   width, height,
1077                                                   width,
1078                                                   "32x MEMV data buffer");
1079     if (!allocate_flag)
1080         goto failed_allocation;
1081     i965_zero_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
1082
1083
1084     if (!generic_state->brc_allocated) {
1085         /*brc related surface */
1086         i965_free_gpe_resource(&avc_ctx->res_brc_history_buffer);
1087         size = 864;
1088         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1089                                                    &avc_ctx->res_brc_history_buffer,
1090                                                    ALIGN(size, 0x1000),
1091                                                    "brc history buffer");
1092         if (!allocate_flag)
1093             goto failed_allocation;
1094
1095         i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
1096         size = 64;//44
1097         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1098                                                    &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
1099                                                    ALIGN(size, 0x1000),
1100                                                    "brc pak statistic buffer");
1101         if (!allocate_flag)
1102             goto failed_allocation;
1103
1104         i965_free_gpe_resource(&avc_ctx->res_brc_image_state_read_buffer);
1105         size = INTEL_AVC_IMAGE_STATE_CMD_SIZE * 7;
1106         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1107                                                    &avc_ctx->res_brc_image_state_read_buffer,
1108                                                    ALIGN(size, 0x1000),
1109                                                    "brc image state read buffer");
1110         if (!allocate_flag)
1111             goto failed_allocation;
1112
1113         i965_free_gpe_resource(&avc_ctx->res_brc_image_state_write_buffer);
1114         size = INTEL_AVC_IMAGE_STATE_CMD_SIZE * 7;
1115         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1116                                                    &avc_ctx->res_brc_image_state_write_buffer,
1117                                                    ALIGN(size, 0x1000),
1118                                                    "brc image state write buffer");
1119         if (!allocate_flag)
1120             goto failed_allocation;
1121
1122         width = ALIGN(avc_state->brc_const_data_surface_width, 64);
1123         height = avc_state->brc_const_data_surface_height;
1124         i965_free_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
1125         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1126                                                       &avc_ctx->res_brc_const_data_buffer,
1127                                                       width, height,
1128                                                       width,
1129                                                       "brc const data buffer");
1130         if (!allocate_flag)
1131             goto failed_allocation;
1132         i965_zero_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
1133
1134         if (generic_state->brc_distortion_buffer_supported) {
1135             width = ALIGN(generic_state->downscaled_width_4x_in_mb * 8, 64);
1136             height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1137             width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
1138             height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
1139             i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
1140             allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1141                                                           &avc_ctx->res_brc_dist_data_surface,
1142                                                           width, height,
1143                                                           width,
1144                                                           "brc dist data buffer");
1145             if (!allocate_flag)
1146                 goto failed_allocation;
1147             i965_zero_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
1148         }
1149
1150         if (generic_state->brc_roi_enable) {
1151             width = ALIGN(generic_state->downscaled_width_4x_in_mb * 16, 64);
1152             height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1153             i965_free_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
1154             allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1155                                                           &avc_ctx->res_mbbrc_roi_surface,
1156                                                           width, height,
1157                                                           width,
1158                                                           "mbbrc roi buffer");
1159             if (!allocate_flag)
1160                 goto failed_allocation;
1161             i965_zero_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
1162         }
1163
1164         /*mb qp in mb brc*/
1165         width = ALIGN(generic_state->downscaled_width_4x_in_mb * 4, 64);
1166         height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1167         i965_free_gpe_resource(&avc_ctx->res_mbbrc_mb_qp_data_surface);
1168         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1169                                                       &avc_ctx->res_mbbrc_mb_qp_data_surface,
1170                                                       width, height,
1171                                                       width,
1172                                                       "mbbrc mb qp buffer");
1173         if (!allocate_flag)
1174             goto failed_allocation;
1175
1176         i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
1177         size = 16 * AVC_QP_MAX * 4;
1178         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1179                                                    &avc_ctx->res_mbbrc_const_data_buffer,
1180                                                    ALIGN(size, 0x1000),
1181                                                    "mbbrc const data buffer");
1182         if (!allocate_flag)
1183             goto failed_allocation;
1184
1185         if (avc_state->decouple_mbenc_curbe_from_brc_enable) {
1186             i965_free_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
1187             size = avc_state->mbenc_brc_buffer_size;
1188             allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1189                                                        &avc_ctx->res_mbenc_brc_buffer,
1190                                                        ALIGN(size, 0x1000),
1191                                                        "mbenc brc buffer");
1192             if (!allocate_flag)
1193                 goto failed_allocation;
1194             i965_zero_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
1195         }
1196         generic_state->brc_allocated = 1;
1197     }
1198
1199     /*mb qp external*/
1200     if (avc_state->mb_qp_data_enable) {
1201         width = ALIGN(generic_state->downscaled_width_4x_in_mb * 4, 64);
1202         height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1203         i965_free_gpe_resource(&avc_ctx->res_mb_qp_data_surface);
1204         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1205                                                       &avc_ctx->res_mb_qp_data_surface,
1206                                                       width, height,
1207                                                       width,
1208                                                       "external mb qp buffer");
1209         if (!allocate_flag)
1210             goto failed_allocation;
1211     }
1212
1213     /*     mbenc related surface. it share most of surface with other kernels     */
1214     if (avc_state->arbitrary_num_mbs_in_slice) {
1215         width = ALIGN((generic_state->frame_width_in_mbs + 1) * 4, 64);
1216         height = generic_state->frame_height_in_mbs ;
1217         i965_free_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1218         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1219                                                       &avc_ctx->res_mbenc_slice_map_surface,
1220                                                       width, height,
1221                                                       width,
1222                                                       "slice map buffer");
1223         if (!allocate_flag)
1224             goto failed_allocation;
1225         i965_zero_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1226
1227         /*generate slice map,default one slice per frame.*/
1228     }
1229
1230     /* sfd related surface  */
1231     if (avc_state->sfd_enable) {
1232         i965_free_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1233         size = 128;
1234         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1235                                                    &avc_ctx->res_sfd_output_buffer,
1236                                                    size,
1237                                                    "sfd output buffer");
1238         if (!allocate_flag)
1239             goto failed_allocation;
1240         i965_zero_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1241
1242         i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer);
1243         size = ALIGN(52, 64);
1244         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1245                                                    &avc_ctx->res_sfd_cost_table_p_frame_buffer,
1246                                                    size,
1247                                                    "sfd P frame cost table buffer");
1248         if (!allocate_flag)
1249             goto failed_allocation;
1250         data = i965_map_gpe_resource(&(avc_ctx->res_sfd_cost_table_p_frame_buffer));
1251         assert(data);
1252         memcpy(data, gen9_avc_sfd_cost_table_p_frame, sizeof(unsigned char) * 52);
1253         i965_unmap_gpe_resource(&(avc_ctx->res_sfd_cost_table_p_frame_buffer));
1254
1255         i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_b_frame_buffer);
1256         size = ALIGN(52, 64);
1257         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1258                                                    &avc_ctx->res_sfd_cost_table_b_frame_buffer,
1259                                                    size,
1260                                                    "sfd B frame cost table buffer");
1261         if (!allocate_flag)
1262             goto failed_allocation;
1263         data = i965_map_gpe_resource(&(avc_ctx->res_sfd_cost_table_b_frame_buffer));
1264         assert(data);
1265         memcpy(data, gen9_avc_sfd_cost_table_b_frame, sizeof(unsigned char) * 52);
1266         i965_unmap_gpe_resource(&(avc_ctx->res_sfd_cost_table_b_frame_buffer));
1267     }
1268
1269     /* wp related surfaces */
1270     if (avc_state->weighted_prediction_supported) {
1271         for (i = 0; i < 2 ; i++) {
1272             if (avc_ctx->wp_output_pic_select_surface_obj[i]) {
1273                 continue;
1274             }
1275
1276             width = generic_state->frame_width_in_pixel;
1277             height = generic_state->frame_height_in_pixel ;
1278             i965_CreateSurfaces(ctx,
1279                                 width,
1280                                 height,
1281                                 VA_RT_FORMAT_YUV420,
1282                                 1,
1283                                 &avc_ctx->wp_output_pic_select_surface_id[i]);
1284             avc_ctx->wp_output_pic_select_surface_obj[i] = SURFACE(avc_ctx->wp_output_pic_select_surface_id[i]);
1285
1286             if (!avc_ctx->wp_output_pic_select_surface_obj[i]) {
1287                 goto failed_allocation;
1288             }
1289
1290             i965_check_alloc_surface_bo(ctx, avc_ctx->wp_output_pic_select_surface_obj[i], 1,
1291                                         VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
1292         }
1293         i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[0]);
1294         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_wp_output_pic_select_surface_list[0], avc_ctx->wp_output_pic_select_surface_obj[0], GPE_RESOURCE_ALIGNMENT);
1295         i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[1]);
1296         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_wp_output_pic_select_surface_list[1], avc_ctx->wp_output_pic_select_surface_obj[1], GPE_RESOURCE_ALIGNMENT);
1297     }
1298
1299     /* other   */
1300
1301     i965_free_gpe_resource(&avc_ctx->res_mad_data_buffer);
1302     size = 4 * 1;
1303     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1304                                                &avc_ctx->res_mad_data_buffer,
1305                                                ALIGN(size, 0x1000),
1306                                                "MAD data buffer");
1307     if (!allocate_flag)
1308         goto failed_allocation;
1309
1310     return VA_STATUS_SUCCESS;
1311
1312 failed_allocation:
1313     return VA_STATUS_ERROR_ALLOCATION_FAILED;
1314 }
1315
1316 static void
1317 gen9_avc_free_resources(struct encoder_vme_mfc_context * vme_context)
1318 {
1319     if (!vme_context)
1320         return;
1321
1322     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1323     VADriverContextP ctx = avc_ctx->ctx;
1324     int i = 0;
1325
1326     /* free all the surface/buffer here*/
1327     i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
1328     i965_free_gpe_resource(&avc_ctx->res_mb_status_buffer);
1329     i965_free_gpe_resource(&avc_ctx->res_flatness_check_surface);
1330     i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
1331     i965_free_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
1332     i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
1333     i965_free_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
1334     i965_free_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
1335     i965_free_gpe_resource(&avc_ctx->res_brc_history_buffer);
1336     i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
1337     i965_free_gpe_resource(&avc_ctx->res_brc_image_state_read_buffer);
1338     i965_free_gpe_resource(&avc_ctx->res_brc_image_state_write_buffer);
1339     i965_free_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
1340     i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
1341     i965_free_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
1342     i965_free_gpe_resource(&avc_ctx->res_mbbrc_mb_qp_data_surface);
1343     i965_free_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
1344     i965_free_gpe_resource(&avc_ctx->res_mb_qp_data_surface);
1345     i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
1346     i965_free_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1347     i965_free_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1348     i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer);
1349     i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_b_frame_buffer);
1350     i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[0]);
1351     i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[1]);
1352     i965_free_gpe_resource(&avc_ctx->res_mad_data_buffer);
1353
1354     for (i = 0; i < 2 ; i++) {
1355         if (avc_ctx->wp_output_pic_select_surface_obj[i]) {
1356             i965_DestroySurfaces(ctx, &avc_ctx->wp_output_pic_select_surface_id[i], 1);
1357             avc_ctx->wp_output_pic_select_surface_id[i] = VA_INVALID_SURFACE;
1358             avc_ctx->wp_output_pic_select_surface_obj[i] = NULL;
1359         }
1360     }
1361
1362 }
1363
1364 static void
1365 gen9_avc_run_kernel_media_object(VADriverContextP ctx,
1366                                  struct intel_encoder_context *encoder_context,
1367                                  struct i965_gpe_context *gpe_context,
1368                                  int media_function,
1369                                  struct gpe_media_object_parameter *param)
1370 {
1371     struct i965_driver_data *i965 = i965_driver_data(ctx);
1372     struct i965_gpe_table *gpe = &i965->gpe_table;
1373     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1374     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1375
1376     struct intel_batchbuffer *batch = encoder_context->base.batch;
1377     struct encoder_status_buffer_internal *status_buffer;
1378     struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1379
1380     if (!batch)
1381         return;
1382
1383     intel_batchbuffer_start_atomic(batch, 0x1000);
1384     intel_batchbuffer_emit_mi_flush(batch);
1385
1386     status_buffer = &(avc_ctx->status_buffer);
1387     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1388     mi_store_data_imm.bo = status_buffer->bo;
1389     mi_store_data_imm.offset = status_buffer->media_index_offset;
1390     mi_store_data_imm.dw0 = media_function;
1391     gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1392
1393     gpe->pipeline_setup(ctx, gpe_context, batch);
1394     gpe->media_object(ctx, gpe_context, batch, param);
1395     gpe->media_state_flush(ctx, gpe_context, batch);
1396
1397     gpe->pipeline_end(ctx, gpe_context, batch);
1398
1399     intel_batchbuffer_end_atomic(batch);
1400
1401     intel_batchbuffer_flush(batch);
1402 }
1403
1404 static void
1405 gen9_avc_run_kernel_media_object_walker(VADriverContextP ctx,
1406                                         struct intel_encoder_context *encoder_context,
1407                                         struct i965_gpe_context *gpe_context,
1408                                         int media_function,
1409                                         struct gpe_media_object_walker_parameter *param)
1410 {
1411     struct i965_driver_data *i965 = i965_driver_data(ctx);
1412     struct i965_gpe_table *gpe = &i965->gpe_table;
1413     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1414     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1415
1416     struct intel_batchbuffer *batch = encoder_context->base.batch;
1417     struct encoder_status_buffer_internal *status_buffer;
1418     struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1419
1420     if (!batch)
1421         return;
1422
1423     intel_batchbuffer_start_atomic(batch, 0x1000);
1424
1425     intel_batchbuffer_emit_mi_flush(batch);
1426
1427     status_buffer = &(avc_ctx->status_buffer);
1428     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1429     mi_store_data_imm.bo = status_buffer->bo;
1430     mi_store_data_imm.offset = status_buffer->media_index_offset;
1431     mi_store_data_imm.dw0 = media_function;
1432     gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1433
1434     gpe->pipeline_setup(ctx, gpe_context, batch);
1435     gpe->media_object_walker(ctx, gpe_context, batch, param);
1436     gpe->media_state_flush(ctx, gpe_context, batch);
1437
1438     gpe->pipeline_end(ctx, gpe_context, batch);
1439
1440     intel_batchbuffer_end_atomic(batch);
1441
1442     intel_batchbuffer_flush(batch);
1443 }
1444
1445 static void
1446 gen9_init_gpe_context_avc(VADriverContextP ctx,
1447                           struct i965_gpe_context *gpe_context,
1448                           struct encoder_kernel_parameter *kernel_param)
1449 {
1450     struct i965_driver_data *i965 = i965_driver_data(ctx);
1451
1452     gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
1453
1454     gpe_context->sampler.entry_size = 0;
1455     gpe_context->sampler.max_entries = 0;
1456
1457     if (kernel_param->sampler_size) {
1458         gpe_context->sampler.entry_size = ALIGN(kernel_param->sampler_size, 64);
1459         gpe_context->sampler.max_entries = 1;
1460     }
1461
1462     gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
1463     gpe_context->idrt.max_entries = NUM_KERNELS_PER_GPE_CONTEXT;
1464
1465     gpe_context->surface_state_binding_table.max_entries = MAX_AVC_ENCODER_SURFACES;
1466     gpe_context->surface_state_binding_table.binding_table_offset = 0;
1467     gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_AVC_ENCODER_SURFACES * 4, 64);
1468     gpe_context->surface_state_binding_table.length = ALIGN(MAX_AVC_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_AVC_ENCODER_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
1469
1470     if (i965->intel.eu_total > 0)
1471         gpe_context->vfe_state.max_num_threads = 6 * i965->intel.eu_total;
1472     else
1473         gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads
1474
1475     gpe_context->vfe_state.curbe_allocation_size = MAX(1, ALIGN(gpe_context->curbe.length, 32) >> 5); // in registers
1476     gpe_context->vfe_state.urb_entry_size = MAX(1, ALIGN(kernel_param->inline_data_size, 32) >> 5); // in registers
1477     gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE -
1478                                               gpe_context->vfe_state.curbe_allocation_size -
1479                                               ((gpe_context->idrt.entry_size >> 5) *
1480                                                gpe_context->idrt.max_entries)) / gpe_context->vfe_state.urb_entry_size;
1481     gpe_context->vfe_state.num_urb_entries = CLAMP(1, 127, gpe_context->vfe_state.num_urb_entries);
1482     gpe_context->vfe_state.gpgpu_mode = 0;
1483 }
1484
1485 static void
1486 gen9_init_vfe_scoreboard_avc(struct i965_gpe_context *gpe_context,
1487                              struct encoder_scoreboard_parameter *scoreboard_param)
1488 {
1489     gpe_context->vfe_desc5.scoreboard0.mask = scoreboard_param->mask;
1490     gpe_context->vfe_desc5.scoreboard0.type = scoreboard_param->type;
1491     gpe_context->vfe_desc5.scoreboard0.enable = scoreboard_param->enable;
1492
1493     if (scoreboard_param->walkpat_flag) {
1494         gpe_context->vfe_desc5.scoreboard0.mask = 0x0F;
1495         gpe_context->vfe_desc5.scoreboard0.type = 1;
1496
1497         gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0x0;
1498         gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0xF;
1499
1500         gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
1501         gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xE;
1502
1503         gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0xF;
1504         gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0x3;
1505
1506         gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
1507         gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0x1;
1508     } else {
1509         // Scoreboard 0
1510         gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0xF;
1511         gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0x0;
1512
1513         // Scoreboard 1
1514         gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
1515         gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xF;
1516
1517         // Scoreboard 2
1518         gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0x1;
1519         gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0xF;
1520
1521         // Scoreboard 3
1522         gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
1523         gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0xF;
1524
1525         // Scoreboard 4
1526         gpe_context->vfe_desc7.scoreboard2.delta_x4 = 0xF;
1527         gpe_context->vfe_desc7.scoreboard2.delta_y4 = 0x1;
1528
1529         // Scoreboard 5
1530         gpe_context->vfe_desc7.scoreboard2.delta_x5 = 0x0;
1531         gpe_context->vfe_desc7.scoreboard2.delta_y5 = 0xE;
1532
1533         // Scoreboard 6
1534         gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0x1;
1535         gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
1536
1537         // Scoreboard 7
1538         gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0xF;
1539         gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
1540     }
1541 }
1542 /*
1543 VME pipeline related function
1544 */
1545
1546 /*
1547 scaling kernel related function
1548 */
1549 static void
1550 gen9_avc_set_curbe_scaling4x(VADriverContextP ctx,
1551                              struct encode_state *encode_state,
1552                              struct i965_gpe_context *gpe_context,
1553                              struct intel_encoder_context *encoder_context,
1554                              void *param)
1555 {
1556     gen9_avc_scaling4x_curbe_data *curbe_cmd;
1557     struct scaling_param *surface_param = (struct scaling_param *)param;
1558
1559     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1560
1561     if (!curbe_cmd)
1562         return;
1563
1564     memset(curbe_cmd, 0, sizeof(gen9_avc_scaling4x_curbe_data));
1565
1566     curbe_cmd->dw0.input_picture_width  = surface_param->input_frame_width;
1567     curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1568
1569     curbe_cmd->dw1.input_y_bti = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1570     curbe_cmd->dw2.output_y_bti = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1571
1572
1573     curbe_cmd->dw5.flatness_threshold = 128;
1574     curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
1575     curbe_cmd->dw7.enable_mb_variance_output = surface_param->enable_mb_variance_output;
1576     curbe_cmd->dw8.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
1577
1578     if (curbe_cmd->dw6.enable_mb_flatness_check ||
1579         curbe_cmd->dw7.enable_mb_variance_output ||
1580         curbe_cmd->dw8.enable_mb_pixel_average_output) {
1581         curbe_cmd->dw10.mbv_proc_stat_bti = GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX;
1582     }
1583
1584     i965_gpe_context_unmap_curbe(gpe_context);
1585     return;
1586 }
1587
1588 static void
1589 gen95_avc_set_curbe_scaling4x(VADriverContextP ctx,
1590                               struct encode_state *encode_state,
1591                               struct i965_gpe_context *gpe_context,
1592                               struct intel_encoder_context *encoder_context,
1593                               void *param)
1594 {
1595     gen95_avc_scaling4x_curbe_data *curbe_cmd;
1596     struct scaling_param *surface_param = (struct scaling_param *)param;
1597
1598     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1599
1600     if (!curbe_cmd)
1601         return;
1602
1603     memset(curbe_cmd, 0, sizeof(gen95_avc_scaling4x_curbe_data));
1604
1605     curbe_cmd->dw0.input_picture_width  = surface_param->input_frame_width;
1606     curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1607
1608     curbe_cmd->dw1.input_y_bti_frame = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1609     curbe_cmd->dw2.output_y_bti_frame = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1610
1611     if (surface_param->enable_mb_flatness_check)
1612         curbe_cmd->dw5.flatness_threshold = 128;
1613     curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
1614     curbe_cmd->dw6.enable_mb_variance_output = surface_param->enable_mb_variance_output;
1615     curbe_cmd->dw6.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
1616     curbe_cmd->dw6.enable_block8x8_statistics_output = surface_param->blk8x8_stat_enabled;
1617
1618     if (curbe_cmd->dw6.enable_mb_flatness_check ||
1619         curbe_cmd->dw6.enable_mb_variance_output ||
1620         curbe_cmd->dw6.enable_mb_pixel_average_output) {
1621         curbe_cmd->dw8.mbv_proc_stat_bti_frame = GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX;
1622     }
1623
1624     i965_gpe_context_unmap_curbe(gpe_context);
1625     return;
1626 }
1627
1628 static void
1629 gen9_avc_set_curbe_scaling2x(VADriverContextP ctx,
1630                              struct encode_state *encode_state,
1631                              struct i965_gpe_context *gpe_context,
1632                              struct intel_encoder_context *encoder_context,
1633                              void *param)
1634 {
1635     gen9_avc_scaling2x_curbe_data *curbe_cmd;
1636     struct scaling_param *surface_param = (struct scaling_param *)param;
1637
1638     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1639
1640     if (!curbe_cmd)
1641         return;
1642
1643     memset(curbe_cmd, 0, sizeof(gen9_avc_scaling2x_curbe_data));
1644
1645     curbe_cmd->dw0.input_picture_width  = surface_param->input_frame_width;
1646     curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1647
1648     curbe_cmd->dw8.input_y_bti = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1649     curbe_cmd->dw9.output_y_bti = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1650
1651     i965_gpe_context_unmap_curbe(gpe_context);
1652     return;
1653 }
1654
1655 static void
1656 gen9_avc_send_surface_scaling(VADriverContextP ctx,
1657                               struct encode_state *encode_state,
1658                               struct i965_gpe_context *gpe_context,
1659                               struct intel_encoder_context *encoder_context,
1660                               void *param)
1661 {
1662     struct scaling_param *surface_param = (struct scaling_param *)param;
1663     struct i965_driver_data *i965 = i965_driver_data(ctx);
1664     unsigned int surface_format;
1665     unsigned int res_size;
1666
1667     if (surface_param->scaling_out_use_32unorm_surf_fmt)
1668         surface_format = I965_SURFACEFORMAT_R32_UNORM;
1669     else if (surface_param->scaling_out_use_16unorm_surf_fmt)
1670         surface_format = I965_SURFACEFORMAT_R16_UNORM;
1671     else
1672         surface_format = I965_SURFACEFORMAT_R8_UNORM;
1673
1674     i965_add_2d_gpe_surface(ctx, gpe_context,
1675                             surface_param->input_surface,
1676                             0, 1, surface_format,
1677                             GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX);
1678
1679     i965_add_2d_gpe_surface(ctx, gpe_context,
1680                             surface_param->output_surface,
1681                             0, 1, surface_format,
1682                             GEN9_AVC_SCALING_FRAME_DST_Y_INDEX);
1683
1684     /*add buffer mv_proc_stat, here need change*/
1685     if (IS_GEN8(i965->intel.device_info)) {
1686         if (surface_param->mbv_proc_stat_enabled) {
1687             res_size = 16 * (surface_param->input_frame_width / 16) * (surface_param->input_frame_height / 16) * sizeof(unsigned int);
1688
1689             i965_add_buffer_gpe_surface(ctx,
1690                                         gpe_context,
1691                                         surface_param->pres_mbv_proc_stat_buffer,
1692                                         0,
1693                                         res_size / 4,
1694                                         0,
1695                                         GEN8_SCALING_FRAME_MBVPROCSTATS_DST_CM);
1696         }
1697         if (surface_param->enable_mb_flatness_check) {
1698             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
1699                                            surface_param->pres_flatness_check_surface,
1700                                            1,
1701                                            I965_SURFACEFORMAT_R8_UNORM,
1702                                            GEN8_SCALING_FRAME_FLATNESS_DST_CM);
1703         }
1704     } else {
1705         if (surface_param->mbv_proc_stat_enabled) {
1706             res_size = 16 * (surface_param->input_frame_width / 16) * (surface_param->input_frame_height / 16) * sizeof(unsigned int);
1707
1708             i965_add_buffer_gpe_surface(ctx,
1709                                         gpe_context,
1710                                         surface_param->pres_mbv_proc_stat_buffer,
1711                                         0,
1712                                         res_size / 4,
1713                                         0,
1714                                         GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
1715         } else if (surface_param->enable_mb_flatness_check) {
1716             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
1717                                            surface_param->pres_flatness_check_surface,
1718                                            1,
1719                                            I965_SURFACEFORMAT_R8_UNORM,
1720                                            GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
1721         }
1722     }
1723     return;
1724 }
1725
1726 static VAStatus
1727 gen9_avc_kernel_scaling(VADriverContextP ctx,
1728                         struct encode_state *encode_state,
1729                         struct intel_encoder_context *encoder_context,
1730                         int hme_type)
1731 {
1732     struct i965_driver_data *i965 = i965_driver_data(ctx);
1733     struct i965_gpe_table *gpe = &i965->gpe_table;
1734     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1735     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1736     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1737     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1738     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
1739
1740     struct i965_gpe_context *gpe_context;
1741     struct scaling_param surface_param;
1742     struct object_surface *obj_surface;
1743     struct gen9_surface_avc *avc_priv_surface;
1744     struct gpe_media_object_walker_parameter media_object_walker_param;
1745     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
1746     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
1747     int media_function = 0;
1748     int kernel_idx = 0;
1749
1750     obj_surface = encode_state->reconstructed_object;
1751     avc_priv_surface = obj_surface->private_data;
1752
1753     memset(&surface_param, 0, sizeof(struct scaling_param));
1754     switch (hme_type) {
1755     case INTEL_ENC_HME_4x : {
1756         media_function = INTEL_MEDIA_STATE_4X_SCALING;
1757         kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
1758         downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
1759         downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
1760
1761         surface_param.input_surface = encode_state->input_yuv_object ;
1762         surface_param.input_frame_width = generic_state->frame_width_in_pixel ;
1763         surface_param.input_frame_height = generic_state->frame_height_in_pixel ;
1764
1765         surface_param.output_surface = avc_priv_surface->scaled_4x_surface_obj ;
1766         surface_param.output_frame_width = generic_state->frame_width_4x ;
1767         surface_param.output_frame_height = generic_state->frame_height_4x ;
1768
1769         surface_param.enable_mb_flatness_check = avc_state->flatness_check_enable;
1770         surface_param.enable_mb_variance_output = avc_state->mb_status_enable;
1771         surface_param.enable_mb_pixel_average_output = avc_state->mb_status_enable;
1772
1773         surface_param.blk8x8_stat_enabled = 0 ;
1774         surface_param.use_4x_scaling  = 1 ;
1775         surface_param.use_16x_scaling = 0 ;
1776         surface_param.use_32x_scaling = 0 ;
1777         break;
1778     }
1779     case INTEL_ENC_HME_16x : {
1780         media_function = INTEL_MEDIA_STATE_16X_SCALING;
1781         kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
1782         downscaled_width_in_mb = generic_state->downscaled_width_16x_in_mb;
1783         downscaled_height_in_mb = generic_state->downscaled_height_16x_in_mb;
1784
1785         surface_param.input_surface = avc_priv_surface->scaled_4x_surface_obj ;
1786         surface_param.input_frame_width = generic_state->frame_width_4x ;
1787         surface_param.input_frame_height = generic_state->frame_height_4x ;
1788
1789         surface_param.output_surface = avc_priv_surface->scaled_16x_surface_obj ;
1790         surface_param.output_frame_width = generic_state->frame_width_16x ;
1791         surface_param.output_frame_height = generic_state->frame_height_16x ;
1792
1793         surface_param.enable_mb_flatness_check = 0 ;
1794         surface_param.enable_mb_variance_output = 0 ;
1795         surface_param.enable_mb_pixel_average_output = 0 ;
1796
1797         surface_param.blk8x8_stat_enabled = 0 ;
1798         surface_param.use_4x_scaling  = 0 ;
1799         surface_param.use_16x_scaling = 1 ;
1800         surface_param.use_32x_scaling = 0 ;
1801
1802         break;
1803     }
1804     case INTEL_ENC_HME_32x : {
1805         media_function = INTEL_MEDIA_STATE_32X_SCALING;
1806         kernel_idx = GEN9_AVC_KERNEL_SCALING_2X_IDX;
1807         downscaled_width_in_mb = generic_state->downscaled_width_32x_in_mb;
1808         downscaled_height_in_mb = generic_state->downscaled_height_32x_in_mb;
1809
1810         surface_param.input_surface = avc_priv_surface->scaled_16x_surface_obj ;
1811         surface_param.input_frame_width = generic_state->frame_width_16x ;
1812         surface_param.input_frame_height = generic_state->frame_height_16x ;
1813
1814         surface_param.output_surface = avc_priv_surface->scaled_32x_surface_obj ;
1815         surface_param.output_frame_width = generic_state->frame_width_32x ;
1816         surface_param.output_frame_height = generic_state->frame_height_32x ;
1817
1818         surface_param.enable_mb_flatness_check = 0 ;
1819         surface_param.enable_mb_variance_output = 0 ;
1820         surface_param.enable_mb_pixel_average_output = 0 ;
1821
1822         surface_param.blk8x8_stat_enabled = 0 ;
1823         surface_param.use_4x_scaling  = 0 ;
1824         surface_param.use_16x_scaling = 0 ;
1825         surface_param.use_32x_scaling = 1 ;
1826         break;
1827     }
1828     default :
1829         assert(0);
1830
1831     }
1832
1833     gpe_context = &(avc_ctx->context_scaling.gpe_contexts[kernel_idx]);
1834
1835     gpe->context_init(ctx, gpe_context);
1836     gpe->reset_binding_table(ctx, gpe_context);
1837
1838     if (surface_param.use_32x_scaling) {
1839         generic_ctx->pfn_set_curbe_scaling2x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1840     } else {
1841         generic_ctx->pfn_set_curbe_scaling4x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1842     }
1843
1844     if (surface_param.use_32x_scaling) {
1845         surface_param.scaling_out_use_16unorm_surf_fmt = 1 ;
1846         surface_param.scaling_out_use_32unorm_surf_fmt = 0 ;
1847     } else {
1848         surface_param.scaling_out_use_16unorm_surf_fmt = 0 ;
1849         surface_param.scaling_out_use_32unorm_surf_fmt = 1 ;
1850     }
1851
1852     if (surface_param.use_4x_scaling) {
1853         if (avc_state->mb_status_supported) {
1854             surface_param.enable_mb_flatness_check = 0;
1855             surface_param.mbv_proc_stat_enabled = (surface_param.use_4x_scaling) ? (avc_state->mb_status_enable || avc_state->flatness_check_enable) : 0 ;
1856             surface_param.pres_mbv_proc_stat_buffer = &(avc_ctx->res_mb_status_buffer);
1857
1858         } else {
1859             surface_param.enable_mb_flatness_check = (surface_param.use_4x_scaling) ? avc_state->flatness_check_enable : 0;
1860             surface_param.mbv_proc_stat_enabled = 0 ;
1861             surface_param.pres_flatness_check_surface = &(avc_ctx->res_flatness_check_surface);
1862         }
1863     }
1864
1865     generic_ctx->pfn_send_scaling_surface(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1866
1867     /* setup the interface data */
1868     gpe->setup_interface_data(ctx, gpe_context);
1869
1870     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
1871     if (surface_param.use_32x_scaling) {
1872         kernel_walker_param.resolution_x = downscaled_width_in_mb ;
1873         kernel_walker_param.resolution_y = downscaled_height_in_mb ;
1874     } else {
1875         /* the scaling is based on 8x8 blk level */
1876         kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
1877         kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
1878     }
1879     kernel_walker_param.no_dependency = 1;
1880
1881     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
1882
1883     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
1884                                             gpe_context,
1885                                             media_function,
1886                                             &media_object_walker_param);
1887
1888     return VA_STATUS_SUCCESS;
1889 }
1890
1891 /*
1892 frame/mb brc related function
1893 */
1894 static void
1895 gen9_avc_init_mfx_avc_img_state(VADriverContextP ctx,
1896                                 struct encode_state *encode_state,
1897                                 struct intel_encoder_context *encoder_context,
1898                                 struct gen9_mfx_avc_img_state *pstate)
1899 {
1900     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1901     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1902     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1903
1904     VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
1905     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
1906
1907     memset(pstate, 0, sizeof(*pstate));
1908
1909     pstate->dw0.dword_length = (sizeof(struct gen9_mfx_avc_img_state)) / 4 - 2;
1910     pstate->dw0.sub_opcode_b = 0;
1911     pstate->dw0.sub_opcode_a = 0;
1912     pstate->dw0.command_opcode = 1;
1913     pstate->dw0.pipeline = 2;
1914     pstate->dw0.command_type = 3;
1915
1916     pstate->dw1.frame_size_in_mbs = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs ;
1917
1918     pstate->dw2.frame_width_in_mbs_minus1 = generic_state->frame_width_in_mbs - 1;
1919     pstate->dw2.frame_height_in_mbs_minus1 = generic_state->frame_height_in_mbs - 1;
1920
1921     pstate->dw3.image_structure = 0;//frame is zero
1922     pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
1923     pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
1924     pstate->dw3.brc_domain_rate_control_enable = 0;//0,set for non-vdenc mode;
1925     pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
1926     pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
1927
1928     pstate->dw4.field_picture_flag = 0;
1929     pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
1930     pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
1931     pstate->dw4.transform_8x8_idct_mode_flag = pic_param->pic_fields.bits.transform_8x8_mode_flag;
1932     pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
1933     pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
1934     pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
1935     pstate->dw4.mb_mv_format_flag = 1;
1936     pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
1937     pstate->dw4.mv_unpacked_flag = 1;
1938     pstate->dw4.insert_test_flag = 0;
1939     pstate->dw4.load_slice_pointer_flag = 0;
1940     pstate->dw4.macroblock_stat_enable = 0;        /* disable in the first pass */
1941     pstate->dw4.minimum_frame_size = 0;
1942     pstate->dw5.intra_mb_max_bit_flag = 1;
1943     pstate->dw5.inter_mb_max_bit_flag = 1;
1944     pstate->dw5.frame_size_over_flag = 1;
1945     pstate->dw5.frame_size_under_flag = 1;
1946     pstate->dw5.intra_mb_ipcm_flag = 1;
1947     pstate->dw5.mb_rate_ctrl_flag = 0;
1948     pstate->dw5.non_first_pass_flag = 0;
1949     pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
1950     pstate->dw5.aq_chroma_disable = 1;
1951     if (pstate->dw4.entropy_coding_flag && (avc_state->tq_enable)) {
1952         pstate->dw5.aq_enable = avc_state->tq_enable;
1953         pstate->dw5.aq_rounding = avc_state->tq_rounding;
1954     } else {
1955         pstate->dw5.aq_rounding = 0;
1956     }
1957
1958     pstate->dw6.intra_mb_max_size = 2700;
1959     pstate->dw6.inter_mb_max_size = 4095;
1960
1961     pstate->dw8.slice_delta_qp_max0 = 0;
1962     pstate->dw8.slice_delta_qp_max1 = 0;
1963     pstate->dw8.slice_delta_qp_max2 = 0;
1964     pstate->dw8.slice_delta_qp_max3 = 0;
1965
1966     pstate->dw9.slice_delta_qp_min0 = 0;
1967     pstate->dw9.slice_delta_qp_min1 = 0;
1968     pstate->dw9.slice_delta_qp_min2 = 0;
1969     pstate->dw9.slice_delta_qp_min3 = 0;
1970
1971     pstate->dw10.frame_bitrate_min = 0;
1972     pstate->dw10.frame_bitrate_min_unit = 1;
1973     pstate->dw10.frame_bitrate_min_unit_mode = 1;
1974     pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
1975     pstate->dw10.frame_bitrate_max_unit = 1;
1976     pstate->dw10.frame_bitrate_max_unit_mode = 1;
1977
1978     pstate->dw11.frame_bitrate_min_delta = 0;
1979     pstate->dw11.frame_bitrate_max_delta = 0;
1980
1981     pstate->dw12.vad_error_logic = 1;
1982     /* set paramters DW19/DW20 for slices */
1983 }
1984
1985 static void
1986 gen8_avc_init_mfx_avc_img_state(VADriverContextP ctx,
1987                                 struct encode_state *encode_state,
1988                                 struct intel_encoder_context *encoder_context,
1989                                 struct gen8_mfx_avc_img_state *pstate)
1990 {
1991     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1992     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1993     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1994
1995     VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
1996     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
1997
1998     memset(pstate, 0, sizeof(*pstate));
1999
2000     pstate->dw0.dword_length = (sizeof(struct gen8_mfx_avc_img_state)) / 4 - 2;
2001     pstate->dw0.command_sub_opcode_b = 0;
2002     pstate->dw0.command_sub_opcode_a = 0;
2003     pstate->dw0.command_opcode = 1;
2004     pstate->dw0.command_pipeline = 2;
2005     pstate->dw0.command_type = 3;
2006
2007     pstate->dw1.frame_size_in_mbs = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs ;
2008
2009     pstate->dw2.frame_width_in_mbs_minus1 = generic_state->frame_width_in_mbs - 1;
2010     pstate->dw2.frame_height_in_mbs_minus1 = generic_state->frame_height_in_mbs - 1;
2011
2012     pstate->dw3.image_structure = 0;//frame is zero
2013     pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
2014     pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
2015     pstate->dw3.inter_mb_conf_flag = 0;
2016     pstate->dw3.intra_mb_conf_flag = 0;
2017     pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
2018     pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
2019
2020     pstate->dw4.field_picture_flag = 0;
2021     pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
2022     pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
2023     pstate->dw4.transform_8x8_idct_mode_flag = pic_param->pic_fields.bits.transform_8x8_mode_flag;
2024     pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
2025     pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
2026     pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
2027     pstate->dw4.mb_mv_format_flag = 1;
2028     pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
2029     pstate->dw4.mv_unpacked_flag = 1;
2030     pstate->dw4.insert_test_flag = 0;
2031     pstate->dw4.load_slice_pointer_flag = 0;
2032     pstate->dw4.macroblock_stat_enable = 0;        /* disable in the first pass */
2033     pstate->dw4.minimum_frame_size = 0;
2034     pstate->dw5.intra_mb_max_bit_flag = 1;
2035     pstate->dw5.inter_mb_max_bit_flag = 1;
2036     pstate->dw5.frame_size_over_flag = 1;
2037     pstate->dw5.frame_size_under_flag = 1;
2038     pstate->dw5.intra_mb_ipcm_flag = 1;
2039     pstate->dw5.mb_rate_ctrl_flag = 0;
2040     pstate->dw5.non_first_pass_flag = 0;
2041     pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
2042     pstate->dw5.aq_chroma_disable = 1;
2043     if (pstate->dw4.entropy_coding_flag && (avc_state->tq_enable)) {
2044         pstate->dw5.aq_enable = avc_state->tq_enable;
2045         pstate->dw5.aq_rounding = avc_state->tq_rounding;
2046     } else {
2047         pstate->dw5.aq_rounding = 0;
2048     }
2049
2050     pstate->dw6.intra_mb_max_size = 2700;
2051     pstate->dw6.inter_mb_max_size = 4095;
2052
2053     pstate->dw8.slice_delta_qp_max0 = 0;
2054     pstate->dw8.slice_delta_qp_max1 = 0;
2055     pstate->dw8.slice_delta_qp_max2 = 0;
2056     pstate->dw8.slice_delta_qp_max3 = 0;
2057
2058     pstate->dw9.slice_delta_qp_min0 = 0;
2059     pstate->dw9.slice_delta_qp_min1 = 0;
2060     pstate->dw9.slice_delta_qp_min2 = 0;
2061     pstate->dw9.slice_delta_qp_min3 = 0;
2062
2063     pstate->dw10.frame_bitrate_min = 0;
2064     pstate->dw10.frame_bitrate_min_unit = 1;
2065     pstate->dw10.frame_bitrate_min_unit_mode = 1;
2066     pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
2067     pstate->dw10.frame_bitrate_max_unit = 1;
2068     pstate->dw10.frame_bitrate_max_unit_mode = 1;
2069
2070     pstate->dw11.frame_bitrate_min_delta = 0;
2071     pstate->dw11.frame_bitrate_max_delta = 0;
2072     /* set paramters DW19/DW20 for slices */
2073 }
2074 void gen9_avc_set_image_state(VADriverContextP ctx,
2075                               struct encode_state *encode_state,
2076                               struct intel_encoder_context *encoder_context,
2077                               struct i965_gpe_resource *gpe_resource)
2078 {
2079     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2080     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
2081     char *pdata;
2082     int i;
2083     unsigned int * data;
2084     struct gen9_mfx_avc_img_state cmd;
2085
2086     pdata = i965_map_gpe_resource(gpe_resource);
2087
2088     if (!pdata)
2089         return;
2090
2091     gen9_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
2092     for (i = 0; i < generic_state->num_pak_passes; i++) {
2093
2094         if (i == 0) {
2095             cmd.dw4.macroblock_stat_enable = 0;
2096             cmd.dw5.non_first_pass_flag = 0;
2097         } else {
2098             cmd.dw4.macroblock_stat_enable = 1;
2099             cmd.dw5.non_first_pass_flag = 1;
2100             cmd.dw5.intra_mb_ipcm_flag = 1;
2101
2102         }
2103         cmd.dw5.mb_rate_ctrl_flag = 0;
2104         memcpy(pdata, &cmd, sizeof(struct gen9_mfx_avc_img_state));
2105         data = (unsigned int *)(pdata + sizeof(struct gen9_mfx_avc_img_state));
2106         *data = MI_BATCH_BUFFER_END;
2107
2108         pdata += INTEL_AVC_IMAGE_STATE_CMD_SIZE;
2109     }
2110     i965_unmap_gpe_resource(gpe_resource);
2111     return;
2112 }
2113
2114 void gen8_avc_set_image_state(VADriverContextP ctx,
2115                               struct encode_state *encode_state,
2116                               struct intel_encoder_context *encoder_context,
2117                               struct i965_gpe_resource *gpe_resource)
2118 {
2119     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2120     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
2121     char *pdata;
2122     int i;
2123     unsigned int * data;
2124     struct gen8_mfx_avc_img_state cmd;
2125
2126     pdata = i965_map_gpe_resource(gpe_resource);
2127
2128     if (!pdata)
2129         return;
2130
2131     gen8_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
2132     for (i = 0; i < generic_state->num_pak_passes; i++) {
2133
2134         if (i == 0) {
2135             cmd.dw4.macroblock_stat_enable = 0;
2136             cmd.dw5.non_first_pass_flag = 0;
2137         } else {
2138             cmd.dw4.macroblock_stat_enable = 1;
2139             cmd.dw5.non_first_pass_flag = 1;
2140             cmd.dw5.intra_mb_ipcm_flag = 1;
2141             cmd.dw3.inter_mb_conf_flag = 1;
2142             cmd.dw3.intra_mb_conf_flag = 1;
2143         }
2144         cmd.dw5.mb_rate_ctrl_flag = 0;
2145         memcpy(pdata, &cmd, sizeof(struct gen8_mfx_avc_img_state));
2146         data = (unsigned int *)(pdata + sizeof(struct gen8_mfx_avc_img_state));
2147         *data = MI_BATCH_BUFFER_END;
2148
2149         pdata += INTEL_AVC_IMAGE_STATE_CMD_SIZE;
2150     }
2151     i965_unmap_gpe_resource(gpe_resource);
2152     return;
2153 }
2154
2155 void gen9_avc_set_image_state_non_brc(VADriverContextP ctx,
2156                                       struct encode_state *encode_state,
2157                                       struct intel_encoder_context *encoder_context,
2158                                       struct i965_gpe_resource *gpe_resource)
2159 {
2160     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2161     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
2162     char *pdata;
2163
2164     unsigned int * data;
2165     struct gen9_mfx_avc_img_state cmd;
2166
2167     pdata = i965_map_gpe_resource(gpe_resource);
2168
2169     if (!pdata)
2170         return;
2171
2172     gen9_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
2173
2174     if (generic_state->curr_pak_pass == 0) {
2175         cmd.dw4.macroblock_stat_enable = 0;
2176         cmd.dw5.non_first_pass_flag = 0;
2177
2178     } else {
2179         cmd.dw4.macroblock_stat_enable = 1;
2180         cmd.dw5.non_first_pass_flag = 0;
2181         cmd.dw5.intra_mb_ipcm_flag = 1;
2182     }
2183
2184     cmd.dw5.mb_rate_ctrl_flag = 0;
2185     memcpy(pdata, &cmd, sizeof(struct gen9_mfx_avc_img_state));
2186     data = (unsigned int *)(pdata + sizeof(struct gen9_mfx_avc_img_state));
2187     *data = MI_BATCH_BUFFER_END;
2188
2189     i965_unmap_gpe_resource(gpe_resource);
2190     return;
2191 }
2192
2193 static void
2194 gen95_avc_calc_lambda_table(VADriverContextP ctx,
2195                             struct encode_state *encode_state,
2196                             struct intel_encoder_context *encoder_context)
2197 {
2198     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2199     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2200     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2201     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
2202     unsigned int value, inter, intra;
2203     unsigned int rounding_value = 0;
2204     unsigned int size = 0;
2205     int i = 0;
2206     int col = 0;
2207     unsigned int * lambda_table = (unsigned int *)(avc_state->lamda_value_lut);
2208
2209     value = 0;
2210     inter = 0;
2211     intra = 0;
2212
2213     size = AVC_QP_MAX * 2 * sizeof(unsigned int);
2214     switch (generic_state->frame_type) {
2215     case SLICE_TYPE_I:
2216         memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_i_frame[0][0], size * sizeof(unsigned char));
2217         break;
2218     case SLICE_TYPE_P:
2219         memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_p_frame[0][0], size * sizeof(unsigned char));
2220         break;
2221     case SLICE_TYPE_B:
2222         memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_b_frame[0][0], size * sizeof(unsigned char));
2223         break;
2224     default:
2225         assert(0);
2226         break;
2227     }
2228
2229     for (i = 0; i < AVC_QP_MAX ; i++) {
2230         for (col = 0; col < 2; col++) {
2231             value = *(lambda_table + i * 2 + col);
2232             intra = value >> 16;
2233
2234             if (intra < GEN95_AVC_MAX_LAMBDA) {
2235                 if (intra == 0xfffa) {
2236                     intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
2237                 }
2238             }
2239
2240             intra = intra << 16;
2241             inter = value & 0xffff;
2242
2243             if (inter < GEN95_AVC_MAX_LAMBDA) {
2244                 if (inter == 0xffef) {
2245                     if (generic_state->frame_type == SLICE_TYPE_P) {
2246                         if (avc_state->rounding_inter_p == AVC_INVALID_ROUNDING_VALUE)
2247                             rounding_value = gen9_avc_inter_rounding_p[generic_state->preset];
2248                         else
2249                             rounding_value = avc_state->rounding_inter_p;
2250                     } else if (generic_state->frame_type == SLICE_TYPE_B) {
2251                         if (pic_param->pic_fields.bits.reference_pic_flag) {
2252                             if (avc_state->rounding_inter_b_ref == AVC_INVALID_ROUNDING_VALUE)
2253                                 rounding_value = gen9_avc_inter_rounding_b_ref[generic_state->preset];
2254                             else
2255                                 rounding_value = avc_state->rounding_inter_b_ref;
2256                         } else {
2257                             if (avc_state->rounding_inter_b == AVC_INVALID_ROUNDING_VALUE)
2258                                 rounding_value = gen9_avc_inter_rounding_b[generic_state->preset];
2259                             else
2260                                 rounding_value = avc_state->rounding_inter_b;
2261                         }
2262                     }
2263                 }
2264                 inter = 0xf000 + rounding_value;
2265             }
2266             *(lambda_table + i * 2 + col) = intra + inter;
2267         }
2268     }
2269 }
2270
2271 static void
2272 gen9_avc_init_brc_const_data(VADriverContextP ctx,
2273                              struct encode_state *encode_state,
2274                              struct intel_encoder_context *encoder_context)
2275 {
2276     struct i965_driver_data *i965 = i965_driver_data(ctx);
2277     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2278     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2279     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2280     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2281
2282     struct i965_gpe_resource *gpe_resource = NULL;
2283     unsigned char * data = NULL;
2284     unsigned char * data_tmp = NULL;
2285     unsigned int size = 0;
2286     unsigned int table_idx = 0;
2287     unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
2288     int i = 0;
2289
2290     struct object_surface *obj_surface;
2291     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
2292     VASurfaceID surface_id;
2293     unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
2294
2295     gpe_resource = &(avc_ctx->res_brc_const_data_buffer);
2296     assert(gpe_resource);
2297
2298     i965_zero_gpe_resource(gpe_resource);
2299
2300     data = i965_map_gpe_resource(gpe_resource);
2301     assert(data);
2302
2303     table_idx = slice_type_kernel[generic_state->frame_type];
2304
2305     /* Fill surface with QP Adjustment table, Distortion threshold table, MaxFrame threshold table, Distortion QP Adjustment Table*/
2306     size = sizeof(gen9_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb);
2307     memcpy(data, gen9_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb, size * sizeof(unsigned char));
2308
2309     data += size;
2310
2311     /* skip threshold table*/
2312     size = 128;
2313     switch (generic_state->frame_type) {
2314     case SLICE_TYPE_P:
2315         memcpy(data, gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2316         break;
2317     case SLICE_TYPE_B:
2318         memcpy(data, gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2319         break;
2320     default:
2321         /*SLICE_TYPE_I,no change */
2322         break;
2323     }
2324
2325     if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->non_ftq_skip_threshold_lut_input_enable) {
2326         for (i = 0; i < AVC_QP_MAX ; i++) {
2327             *(data + 1 + (i * 2)) = (unsigned char)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
2328         }
2329     }
2330     data += size;
2331
2332     /*fill the qp for ref list*/
2333     size = 32 + 32 + 32 + 160;
2334     memset(data, 0xff, 32);
2335     memset(data + 32 + 32, 0xff, 32);
2336     switch (generic_state->frame_type) {
2337     case SLICE_TYPE_P: {
2338         for (i = 0 ; i <  slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
2339             surface_id = slice_param->RefPicList0[i].picture_id;
2340             obj_surface = SURFACE(surface_id);
2341             if (!obj_surface)
2342                 break;
2343             *(data + i) = avc_state->list_ref_idx[0][i];//?
2344         }
2345     }
2346     break;
2347     case SLICE_TYPE_B: {
2348         data = data + 32 + 32;
2349         for (i = 0 ; i <  slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
2350             surface_id = slice_param->RefPicList1[i].picture_id;
2351             obj_surface = SURFACE(surface_id);
2352             if (!obj_surface)
2353                 break;
2354             *(data + i) = avc_state->list_ref_idx[1][i];//?
2355         }
2356
2357         data = data - 32 - 32;
2358
2359         for (i = 0 ; i <  slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
2360             surface_id = slice_param->RefPicList0[i].picture_id;
2361             obj_surface = SURFACE(surface_id);
2362             if (!obj_surface)
2363                 break;
2364             *(data + i) = avc_state->list_ref_idx[0][i];//?
2365         }
2366     }
2367     break;
2368     default:
2369         /*SLICE_TYPE_I,no change */
2370         break;
2371     }
2372     data += size;
2373
2374     /*mv cost and mode cost*/
2375     size = 1664;
2376     memcpy(data, (unsigned char *)&gen9_avc_mode_mv_cost_table[table_idx][0][0], size * sizeof(unsigned char));
2377
2378     if (avc_state->old_mode_cost_enable) {
2379         data_tmp = data;
2380         for (i = 0; i < AVC_QP_MAX ; i++) {
2381             *(data_tmp + 3) = (unsigned int)gen9_avc_old_intra_mode_cost[i];
2382             data_tmp += 16;
2383         }
2384     }
2385
2386     if (avc_state->ftq_skip_threshold_lut_input_enable) {
2387         for (i = 0; i < AVC_QP_MAX ; i++) {
2388             *(data + (i * 32) + 24) =
2389                 *(data + (i * 32) + 25) =
2390                     *(data + (i * 32) + 27) =
2391                         *(data + (i * 32) + 28) =
2392                             *(data + (i * 32) + 29) =
2393                                 *(data + (i * 32) + 30) =
2394                                     *(data + (i * 32) + 31) = avc_state->ftq_skip_threshold_lut[i];
2395         }
2396
2397     }
2398     data += size;
2399
2400     /*ref cost*/
2401     size = 128;
2402     memcpy(data, (unsigned char *)&gen9_avc_ref_cost[table_idx][0], size * sizeof(unsigned char));
2403     data += size;
2404
2405     /*scaling factor*/
2406     size = 64;
2407     if (avc_state->adaptive_intra_scaling_enable) {
2408         memcpy(data, (unsigned char *)gen9_avc_adaptive_intra_scaling_factor, size * sizeof(unsigned char));
2409     } else {
2410         memcpy(data, (unsigned char *)gen9_avc_intra_scaling_factor, size * sizeof(unsigned char));
2411     }
2412
2413     if (IS_KBL(i965->intel.device_info) ||
2414         IS_GLK(i965->intel.device_info)) {
2415         data += size;
2416
2417         size = 512;
2418         memcpy(data, (unsigned char *)gen95_avc_lambda_data, size * sizeof(unsigned char));
2419         data += size;
2420
2421         size = 64;
2422         memcpy(data, (unsigned char *)gen95_avc_ftq25, size * sizeof(unsigned char));
2423     }
2424
2425     i965_unmap_gpe_resource(gpe_resource);
2426 }
2427
2428 static void
2429 gen9_avc_init_brc_const_data_old(VADriverContextP ctx,
2430                                  struct encode_state *encode_state,
2431                                  struct intel_encoder_context *encoder_context)
2432 {
2433     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2434     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2435     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2436     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2437
2438     struct i965_gpe_resource *gpe_resource = NULL;
2439     unsigned int * data = NULL;
2440     unsigned int * data_tmp = NULL;
2441     unsigned int size = 0;
2442     unsigned int table_idx = 0;
2443     unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
2444     unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
2445     int i = 0;
2446
2447     gpe_resource = &(avc_ctx->res_brc_const_data_buffer);
2448     assert(gpe_resource);
2449
2450     i965_zero_gpe_resource(gpe_resource);
2451
2452     data = i965_map_gpe_resource(gpe_resource);
2453     assert(data);
2454
2455     table_idx = slice_type_kernel[generic_state->frame_type];
2456
2457     /* Fill surface with QP Adjustment table, Distortion threshold table, MaxFrame threshold table, Distortion QP Adjustment Table*/
2458     size = sizeof(gen75_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb);
2459     memcpy(data, gen75_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb, size * sizeof(unsigned char));
2460
2461     data += size;
2462
2463     /* skip threshold table*/
2464     size = 128;
2465     switch (generic_state->frame_type) {
2466     case SLICE_TYPE_P:
2467         memcpy(data, gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2468         break;
2469     case SLICE_TYPE_B:
2470         memcpy(data, gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2471         break;
2472     default:
2473         /*SLICE_TYPE_I,no change */
2474         break;
2475     }
2476
2477     if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->non_ftq_skip_threshold_lut_input_enable) {
2478         for (i = 0; i < AVC_QP_MAX ; i++) {
2479             *(data + 1 + (i * 2)) = (unsigned char)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
2480         }
2481     }
2482     data += size;
2483
2484     /*fill the qp for ref list*/
2485     size = 128;
2486     data += size;
2487     size = 128;
2488     data += size;
2489
2490     /*mv cost and mode cost*/
2491     size = 1664;
2492     memcpy(data, (unsigned char *)&gen75_avc_mode_mv_cost_table[table_idx][0][0], size * sizeof(unsigned char));
2493
2494     if (avc_state->old_mode_cost_enable) {
2495         data_tmp = data;
2496         for (i = 0; i < AVC_QP_MAX ; i++) {
2497             *(data_tmp + 3) = (unsigned int)gen9_avc_old_intra_mode_cost[i];
2498             data_tmp += 16;
2499         }
2500     }
2501
2502     if (avc_state->ftq_skip_threshold_lut_input_enable) {
2503         for (i = 0; i < AVC_QP_MAX ; i++) {
2504             *(data + (i * 32) + 24) =
2505                 *(data + (i * 32) + 25) =
2506                     *(data + (i * 32) + 27) =
2507                         *(data + (i * 32) + 28) =
2508                             *(data + (i * 32) + 29) =
2509                                 *(data + (i * 32) + 30) =
2510                                     *(data + (i * 32) + 31) = avc_state->ftq_skip_threshold_lut[i];
2511         }
2512
2513     }
2514     data += size;
2515
2516     /*ref cost*/
2517     size = 128;
2518     memcpy(data, (unsigned char *)&gen9_avc_ref_cost[table_idx][0], size * sizeof(unsigned char));
2519
2520     i965_unmap_gpe_resource(gpe_resource);
2521 }
2522 static void
2523 gen9_avc_set_curbe_brc_init_reset(VADriverContextP ctx,
2524                                   struct encode_state *encode_state,
2525                                   struct i965_gpe_context *gpe_context,
2526                                   struct intel_encoder_context *encoder_context,
2527                                   void * param)
2528 {
2529     gen9_avc_brc_init_reset_curbe_data *cmd;
2530     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2531     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2532     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2533     double input_bits_per_frame = 0;
2534     double bps_ratio = 0;
2535     VAEncSequenceParameterBufferH264 * seq_param = avc_state->seq_param;
2536     struct avc_param common_param;
2537
2538     cmd = i965_gpe_context_map_curbe(gpe_context);
2539
2540     if (!cmd)
2541         return;
2542
2543     memcpy(cmd, &gen9_avc_brc_init_reset_curbe_init_data, sizeof(gen9_avc_brc_init_reset_curbe_data));
2544
2545     memset(&common_param, 0, sizeof(common_param));
2546     common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
2547     common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
2548     common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
2549     common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
2550     common_param.frames_per_100s = generic_state->frames_per_100s;
2551     common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
2552     common_param.target_bit_rate = generic_state->target_bit_rate;
2553
2554     cmd->dw0.profile_level_max_frame = i965_avc_get_profile_level_max_frame(&common_param, seq_param->level_idc);
2555     cmd->dw1.init_buf_full_in_bits = generic_state->init_vbv_buffer_fullness_in_bit;
2556     cmd->dw2.buf_size_in_bits = generic_state->vbv_buffer_size_in_bit;
2557     cmd->dw3.average_bit_rate = generic_state->target_bit_rate * 1000;
2558     cmd->dw4.max_bit_rate = generic_state->max_bit_rate * 1000;
2559     cmd->dw8.gop_p = (generic_state->gop_ref_distance) ? ((generic_state->gop_size - 1) / generic_state->gop_ref_distance) : 0;
2560     cmd->dw9.gop_b = (generic_state->gop_size - 1 - cmd->dw8.gop_p);
2561     cmd->dw9.frame_width_in_bytes = generic_state->frame_width_in_pixel;
2562     cmd->dw10.frame_height_in_bytes = generic_state->frame_height_in_pixel;
2563     cmd->dw12.no_slices = avc_state->slice_num;
2564
2565     //VUI
2566     if (seq_param->vui_parameters_present_flag && generic_state->internal_rate_mode != INTEL_BRC_AVBR) {
2567         cmd->dw4.max_bit_rate = cmd->dw4.max_bit_rate;
2568         if (generic_state->internal_rate_mode == VA_RC_CBR) {
2569             cmd->dw3.average_bit_rate = cmd->dw4.max_bit_rate;
2570
2571         }
2572
2573     }
2574     cmd->dw6.frame_rate_m = generic_state->frames_per_100s;
2575     cmd->dw7.frame_rate_d = 100;
2576     cmd->dw8.brc_flag = 0;
2577     cmd->dw8.brc_flag |= (generic_state->mb_brc_enabled) ? 0 : 0x8000;
2578
2579
2580     if (generic_state->internal_rate_mode == VA_RC_CBR) {
2581         //CBR
2582         cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate;
2583         cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISCBR;
2584
2585     } else if (generic_state->internal_rate_mode == VA_RC_VBR) {
2586         //VBR
2587         if (cmd->dw4.max_bit_rate < cmd->dw3.average_bit_rate) {
2588             cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate << 1;
2589         }
2590         cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISVBR;
2591
2592     } else if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2593         //AVBR
2594         cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate;
2595         cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISAVBR;
2596
2597     }
2598     //igonre icq/vcm/qvbr
2599
2600     cmd->dw10.avbr_accuracy = generic_state->avbr_curracy;
2601     cmd->dw11.avbr_convergence = generic_state->avbr_convergence;
2602
2603     //frame bits
2604     input_bits_per_frame = (double)(cmd->dw4.max_bit_rate) * (double)(cmd->dw7.frame_rate_d) / (double)(cmd->dw6.frame_rate_m);;
2605
2606     if (cmd->dw2.buf_size_in_bits == 0) {
2607         cmd->dw2.buf_size_in_bits = (unsigned int)(input_bits_per_frame * 4);
2608     }
2609
2610     if (cmd->dw1.init_buf_full_in_bits == 0) {
2611         cmd->dw1.init_buf_full_in_bits = cmd->dw2.buf_size_in_bits * 7 / 8;
2612     }
2613     if (cmd->dw1.init_buf_full_in_bits < (unsigned int)(input_bits_per_frame * 2)) {
2614         cmd->dw1.init_buf_full_in_bits = (unsigned int)(input_bits_per_frame * 2);
2615     }
2616     if (cmd->dw1.init_buf_full_in_bits > cmd->dw2.buf_size_in_bits) {
2617         cmd->dw1.init_buf_full_in_bits = cmd->dw2.buf_size_in_bits;
2618     }
2619
2620     //AVBR
2621     if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2622         cmd->dw2.buf_size_in_bits = 2 * generic_state->target_bit_rate * 1000;
2623         cmd->dw1.init_buf_full_in_bits = (unsigned int)(3 * cmd->dw2.buf_size_in_bits / 4);
2624
2625     }
2626
2627     bps_ratio = input_bits_per_frame / (cmd->dw2.buf_size_in_bits / 30.0);
2628     bps_ratio = (bps_ratio < 0.1) ? 0.1 : (bps_ratio > 3.5) ? 3.5 : bps_ratio;
2629
2630
2631     cmd->dw16.deviation_threshold_0_pand_b = (unsigned int)(-50 * pow(0.90, bps_ratio));
2632     cmd->dw16.deviation_threshold_1_pand_b = (unsigned int)(-50 * pow(0.66, bps_ratio));
2633     cmd->dw16.deviation_threshold_2_pand_b = (unsigned int)(-50 * pow(0.46, bps_ratio));
2634     cmd->dw16.deviation_threshold_3_pand_b = (unsigned int)(-50 * pow(0.3, bps_ratio));
2635     cmd->dw17.deviation_threshold_4_pand_b = (unsigned int)(50 *  pow(0.3, bps_ratio));
2636     cmd->dw17.deviation_threshold_5_pand_b = (unsigned int)(50 * pow(0.46, bps_ratio));
2637     cmd->dw17.deviation_threshold_6_pand_b = (unsigned int)(50 * pow(0.7,  bps_ratio));
2638     cmd->dw17.deviation_threshold_7_pand_b = (unsigned int)(50 * pow(0.9,  bps_ratio));
2639     cmd->dw18.deviation_threshold_0_vbr = (unsigned int)(-50 * pow(0.9, bps_ratio));
2640     cmd->dw18.deviation_threshold_1_vbr = (unsigned int)(-50 * pow(0.7, bps_ratio));
2641     cmd->dw18.deviation_threshold_2_vbr = (unsigned int)(-50 * pow(0.5, bps_ratio));
2642     cmd->dw18.deviation_threshold_3_vbr = (unsigned int)(-50 * pow(0.3, bps_ratio));
2643     cmd->dw19.deviation_threshold_4_vbr = (unsigned int)(100 * pow(0.4, bps_ratio));
2644     cmd->dw19.deviation_threshold_5_vbr = (unsigned int)(100 * pow(0.5, bps_ratio));
2645     cmd->dw19.deviation_threshold_6_vbr = (unsigned int)(100 * pow(0.75, bps_ratio));
2646     cmd->dw19.deviation_threshold_7_vbr = (unsigned int)(100 * pow(0.9, bps_ratio));
2647     cmd->dw20.deviation_threshold_0_i = (unsigned int)(-50 * pow(0.8, bps_ratio));
2648     cmd->dw20.deviation_threshold_1_i = (unsigned int)(-50 * pow(0.6, bps_ratio));
2649     cmd->dw20.deviation_threshold_2_i = (unsigned int)(-50 * pow(0.34, bps_ratio));
2650     cmd->dw20.deviation_threshold_3_i = (unsigned int)(-50 * pow(0.2, bps_ratio));
2651     cmd->dw21.deviation_threshold_4_i = (unsigned int)(50 * pow(0.2,  bps_ratio));
2652     cmd->dw21.deviation_threshold_5_i = (unsigned int)(50 * pow(0.4,  bps_ratio));
2653     cmd->dw21.deviation_threshold_6_i = (unsigned int)(50 * pow(0.66, bps_ratio));
2654     cmd->dw21.deviation_threshold_7_i = (unsigned int)(50 * pow(0.9,  bps_ratio));
2655
2656     cmd->dw22.sliding_window_size = generic_state->frames_per_window_size;
2657
2658     i965_gpe_context_unmap_curbe(gpe_context);
2659
2660     return;
2661 }
2662
2663 static void
2664 gen9_avc_send_surface_brc_init_reset(VADriverContextP ctx,
2665                                      struct encode_state *encode_state,
2666                                      struct i965_gpe_context *gpe_context,
2667                                      struct intel_encoder_context *encoder_context,
2668                                      void * param_mbenc)
2669 {
2670     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2671     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2672
2673     i965_add_buffer_gpe_surface(ctx,
2674                                 gpe_context,
2675                                 &avc_ctx->res_brc_history_buffer,
2676                                 0,
2677                                 avc_ctx->res_brc_history_buffer.size,
2678                                 0,
2679                                 GEN9_AVC_BRC_INIT_RESET_HISTORY_INDEX);
2680
2681     i965_add_buffer_2d_gpe_surface(ctx,
2682                                    gpe_context,
2683                                    &avc_ctx->res_brc_dist_data_surface,
2684                                    1,
2685                                    I965_SURFACEFORMAT_R8_UNORM,
2686                                    GEN9_AVC_BRC_INIT_RESET_DISTORTION_INDEX);
2687
2688     return;
2689 }
2690
2691 static VAStatus
2692 gen9_avc_kernel_brc_init_reset(VADriverContextP ctx,
2693                                struct encode_state *encode_state,
2694                                struct intel_encoder_context *encoder_context)
2695 {
2696     struct i965_driver_data *i965 = i965_driver_data(ctx);
2697     struct i965_gpe_table *gpe = &i965->gpe_table;
2698     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2699     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2700     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2701     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
2702
2703     struct i965_gpe_context *gpe_context;
2704     struct gpe_media_object_parameter media_object_param;
2705     struct gpe_media_object_inline_data media_object_inline_data;
2706     int media_function = 0;
2707     int kernel_idx = GEN9_AVC_KERNEL_BRC_INIT;
2708
2709     media_function = INTEL_MEDIA_STATE_BRC_INIT_RESET;
2710
2711     if (generic_state->brc_inited)
2712         kernel_idx = GEN9_AVC_KERNEL_BRC_RESET;
2713
2714     gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
2715
2716     gpe->context_init(ctx, gpe_context);
2717     gpe->reset_binding_table(ctx, gpe_context);
2718
2719     generic_ctx->pfn_set_curbe_brc_init_reset(ctx, encode_state, gpe_context, encoder_context, NULL);
2720
2721     generic_ctx->pfn_send_brc_init_reset_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
2722
2723     gpe->setup_interface_data(ctx, gpe_context);
2724
2725     memset(&media_object_param, 0, sizeof(media_object_param));
2726     memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
2727     media_object_param.pinline_data = &media_object_inline_data;
2728     media_object_param.inline_size = sizeof(media_object_inline_data);
2729
2730     gen9_avc_run_kernel_media_object(ctx, encoder_context,
2731                                      gpe_context,
2732                                      media_function,
2733                                      &media_object_param);
2734
2735     return VA_STATUS_SUCCESS;
2736 }
2737
2738 static void
2739 gen9_avc_set_curbe_brc_frame_update(VADriverContextP ctx,
2740                                     struct encode_state *encode_state,
2741                                     struct i965_gpe_context *gpe_context,
2742                                     struct intel_encoder_context *encoder_context,
2743                                     void * param)
2744 {
2745     gen9_avc_frame_brc_update_curbe_data *cmd;
2746     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2747     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2748     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2749     struct object_surface *obj_surface;
2750     struct gen9_surface_avc *avc_priv_surface;
2751     struct avc_param common_param;
2752     VAEncSequenceParameterBufferH264 * seq_param = avc_state->seq_param;
2753
2754     obj_surface = encode_state->reconstructed_object;
2755
2756     if (!obj_surface || !obj_surface->private_data)
2757         return;
2758     avc_priv_surface = obj_surface->private_data;
2759
2760     cmd = i965_gpe_context_map_curbe(gpe_context);
2761
2762     if (!cmd)
2763         return;
2764
2765     memcpy(cmd, &gen9_avc_frame_brc_update_curbe_init_data, sizeof(gen9_avc_frame_brc_update_curbe_data));
2766
2767     cmd->dw5.target_size_flag = 0 ;
2768     if (generic_state->brc_init_current_target_buf_full_in_bits > (double)generic_state->brc_init_reset_buf_size_in_bits) {
2769         /*overflow*/
2770         generic_state->brc_init_current_target_buf_full_in_bits -= (double)generic_state->brc_init_reset_buf_size_in_bits;
2771         cmd->dw5.target_size_flag = 1 ;
2772     }
2773
2774     if (generic_state->skip_frame_enbale) {
2775         cmd->dw6.num_skip_frames = generic_state->num_skip_frames ;
2776         cmd->dw7.size_skip_frames = generic_state->size_skip_frames;
2777
2778         generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame * generic_state->num_skip_frames;
2779
2780     }
2781     cmd->dw0.target_size = (unsigned int)generic_state->brc_init_current_target_buf_full_in_bits ;
2782     cmd->dw1.frame_number = generic_state->seq_frame_number ;
2783     cmd->dw2.size_of_pic_headers = generic_state->herder_bytes_inserted << 3 ;
2784     cmd->dw5.cur_frame_type = generic_state->frame_type ;
2785     cmd->dw5.brc_flag = 0 ;
2786     cmd->dw5.brc_flag |= (avc_priv_surface->is_as_ref) ? INTEL_ENCODE_BRCUPDATE_IS_REFERENCE : 0 ;
2787
2788     if (avc_state->multi_pre_enable) {
2789         cmd->dw5.brc_flag  |= INTEL_ENCODE_BRCUPDATE_IS_ACTUALQP ;
2790         cmd->dw14.qp_index_of_cur_pic = avc_priv_surface->frame_idx ; //do not know this. use -1
2791     }
2792
2793     cmd->dw5.max_num_paks = generic_state->num_pak_passes ;
2794     if (avc_state->min_max_qp_enable) {
2795         switch (generic_state->frame_type) {
2796         case SLICE_TYPE_I:
2797             cmd->dw6.minimum_qp = avc_state->min_qp_i ;
2798             cmd->dw6.maximum_qp = avc_state->max_qp_i ;
2799             break;
2800         case SLICE_TYPE_P:
2801             cmd->dw6.minimum_qp = avc_state->min_qp_p ;
2802             cmd->dw6.maximum_qp = avc_state->max_qp_p ;
2803             break;
2804         case SLICE_TYPE_B:
2805             cmd->dw6.minimum_qp = avc_state->min_qp_b ;
2806             cmd->dw6.maximum_qp = avc_state->max_qp_b ;
2807             break;
2808         }
2809     } else {
2810         cmd->dw6.minimum_qp = 0 ;
2811         cmd->dw6.maximum_qp = 0 ;
2812     }
2813     cmd->dw6.enable_force_skip = avc_state->enable_force_skip ;
2814     cmd->dw6.enable_sliding_window = 0 ;
2815
2816     generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame;
2817
2818     if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2819         cmd->dw3.start_gadj_frame0 = (unsigned int)((10 *   generic_state->avbr_convergence) / (double)150);
2820         cmd->dw3.start_gadj_frame1 = (unsigned int)((50 *   generic_state->avbr_convergence) / (double)150);
2821         cmd->dw4.start_gadj_frame2 = (unsigned int)((100 *  generic_state->avbr_convergence) / (double)150);
2822         cmd->dw4.start_gadj_frame3 = (unsigned int)((150 *  generic_state->avbr_convergence) / (double)150);
2823         cmd->dw11.g_rate_ratio_threshold_0 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 40)));
2824         cmd->dw11.g_rate_ratio_threshold_1 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 75)));
2825         cmd->dw12.g_rate_ratio_threshold_2 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 97)));
2826         cmd->dw12.g_rate_ratio_threshold_3 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (103 - 100)));
2827         cmd->dw12.g_rate_ratio_threshold_4 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (125 - 100)));
2828         cmd->dw12.g_rate_ratio_threshold_5 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (160 - 100)));
2829
2830     }
2831     cmd->dw15.enable_roi = generic_state->brc_roi_enable ;
2832
2833     memset(&common_param, 0, sizeof(common_param));
2834     common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
2835     common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
2836     common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
2837     common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
2838     common_param.frames_per_100s = generic_state->frames_per_100s;
2839     common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
2840     common_param.target_bit_rate = generic_state->target_bit_rate;
2841
2842     cmd->dw19.user_max_frame = i965_avc_get_profile_level_max_frame(&common_param, seq_param->level_idc);
2843     i965_gpe_context_unmap_curbe(gpe_context);
2844
2845     return;
2846 }
2847
2848 static void
2849 gen9_avc_send_surface_brc_frame_update(VADriverContextP ctx,
2850                                        struct encode_state *encode_state,
2851                                        struct i965_gpe_context *gpe_context,
2852                                        struct intel_encoder_context *encoder_context,
2853                                        void * param_brc)
2854 {
2855     struct i965_driver_data *i965 = i965_driver_data(ctx);
2856     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2857     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2858     struct brc_param * param = (struct brc_param *)param_brc ;
2859     struct i965_gpe_context * gpe_context_mbenc = param->gpe_context_mbenc;
2860     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2861     unsigned char is_g95 = 0;
2862
2863     if (IS_SKL(i965->intel.device_info) ||
2864         IS_BXT(i965->intel.device_info) ||
2865         IS_GEN8(i965->intel.device_info))
2866         is_g95 = 0;
2867     else if (IS_KBL(i965->intel.device_info) ||
2868              IS_GLK(i965->intel.device_info))
2869         is_g95 = 1;
2870
2871     /* brc history buffer*/
2872     i965_add_buffer_gpe_surface(ctx,
2873                                 gpe_context,
2874                                 &avc_ctx->res_brc_history_buffer,
2875                                 0,
2876                                 avc_ctx->res_brc_history_buffer.size,
2877                                 0,
2878                                 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_HISTORY_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_HISTORY_INDEX));
2879
2880     /* previous pak buffer*/
2881     i965_add_buffer_gpe_surface(ctx,
2882                                 gpe_context,
2883                                 &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
2884                                 0,
2885                                 avc_ctx->res_brc_pre_pak_statistics_output_buffer.size,
2886                                 0,
2887                                 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_PAK_STATISTICS_OUTPUT_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_PAK_STATISTICS_OUTPUT_INDEX));
2888
2889     /* image state command buffer read only*/
2890     i965_add_buffer_gpe_surface(ctx,
2891                                 gpe_context,
2892                                 &avc_ctx->res_brc_image_state_read_buffer,
2893                                 0,
2894                                 avc_ctx->res_brc_image_state_read_buffer.size,
2895                                 0,
2896                                 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_READ_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_READ_INDEX));
2897
2898     /* image state command buffer write only*/
2899     i965_add_buffer_gpe_surface(ctx,
2900                                 gpe_context,
2901                                 &avc_ctx->res_brc_image_state_write_buffer,
2902                                 0,
2903                                 avc_ctx->res_brc_image_state_write_buffer.size,
2904                                 0,
2905                                 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_WRITE_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_WRITE_INDEX));
2906
2907     if (avc_state->mbenc_brc_buffer_size > 0) {
2908         i965_add_buffer_gpe_surface(ctx,
2909                                     gpe_context,
2910                                     &(avc_ctx->res_mbenc_brc_buffer),
2911                                     0,
2912                                     avc_ctx->res_mbenc_brc_buffer.size,
2913                                     0,
2914                                     GEN95_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_WRITE_INDEX);
2915     } else {
2916         /*  Mbenc curbe input buffer */
2917         gen9_add_dri_buffer_gpe_surface(ctx,
2918                                         gpe_context,
2919                                         gpe_context_mbenc->dynamic_state.bo,
2920                                         0,
2921                                         ALIGN(gpe_context_mbenc->curbe.length, 64),
2922                                         gpe_context_mbenc->curbe.offset,
2923                                         GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_READ_INDEX);
2924         /* Mbenc curbe output buffer */
2925         gen9_add_dri_buffer_gpe_surface(ctx,
2926                                         gpe_context,
2927                                         gpe_context_mbenc->dynamic_state.bo,
2928                                         0,
2929                                         ALIGN(gpe_context_mbenc->curbe.length, 64),
2930                                         gpe_context_mbenc->curbe.offset,
2931                                         GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_WRITE_INDEX);
2932     }
2933
2934     /* AVC_ME Distortion 2D surface buffer,input/output. is it res_brc_dist_data_surface*/
2935     i965_add_buffer_2d_gpe_surface(ctx,
2936                                    gpe_context,
2937                                    &avc_ctx->res_brc_dist_data_surface,
2938                                    1,
2939                                    I965_SURFACEFORMAT_R8_UNORM,
2940                                    (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_DISTORTION_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_DISTORTION_INDEX));
2941
2942     /* BRC const data 2D surface buffer */
2943     i965_add_buffer_2d_gpe_surface(ctx,
2944                                    gpe_context,
2945                                    &avc_ctx->res_brc_const_data_buffer,
2946                                    1,
2947                                    I965_SURFACEFORMAT_R8_UNORM,
2948                                    (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX));
2949
2950     /* MB statistical data surface*/
2951     if (!IS_GEN8(i965->intel.device_info)) {
2952         i965_add_buffer_gpe_surface(ctx,
2953                                     gpe_context,
2954                                     &avc_ctx->res_mb_status_buffer,
2955                                     0,
2956                                     avc_ctx->res_mb_status_buffer.size,
2957                                     0,
2958                                     (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX));
2959     } else {
2960         i965_add_buffer_2d_gpe_surface(ctx,
2961                                        gpe_context,
2962                                        &avc_ctx->res_mbbrc_mb_qp_data_surface,
2963                                        1,
2964                                        I965_SURFACEFORMAT_R8_UNORM,
2965                                        GEN9_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX);
2966     }
2967     return;
2968 }
2969
2970 static VAStatus
2971 gen9_avc_kernel_brc_frame_update(VADriverContextP ctx,
2972                                  struct encode_state *encode_state,
2973                                  struct intel_encoder_context *encoder_context)
2974
2975 {
2976     struct i965_driver_data *i965 = i965_driver_data(ctx);
2977     struct i965_gpe_table *gpe = &i965->gpe_table;
2978     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2979     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
2980     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2981     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2982     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2983
2984     struct i965_gpe_context *gpe_context = NULL;
2985     struct gpe_media_object_parameter media_object_param;
2986     struct gpe_media_object_inline_data media_object_inline_data;
2987     int media_function = 0;
2988     int kernel_idx = 0;
2989     unsigned int mb_const_data_buffer_in_use, mb_qp_buffer_in_use;
2990     unsigned int brc_enabled = 0;
2991     unsigned int roi_enable = (generic_state->num_roi > 0) ? 1 : 0;
2992     unsigned int dirty_roi_enable = ((generic_state->dirty_num_roi > 0) && (generic_state->frame_type == SLICE_TYPE_P) && (0));
2993
2994     /* the following set the mbenc curbe*/
2995     struct mbenc_param curbe_mbenc_param ;
2996     struct brc_param curbe_brc_param ;
2997
2998     mb_const_data_buffer_in_use =
2999         generic_state->mb_brc_enabled ||
3000         roi_enable ||
3001         dirty_roi_enable ||
3002         avc_state->mb_qp_data_enable ||
3003         avc_state->rolling_intra_refresh_enable;
3004     mb_qp_buffer_in_use =
3005         generic_state->mb_brc_enabled ||
3006         generic_state->brc_roi_enable ||
3007         avc_state->mb_qp_data_enable;
3008
3009     switch (generic_state->kernel_mode) {
3010     case INTEL_ENC_KERNEL_NORMAL : {
3011         kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_NORMAL_I;
3012         break;
3013     }
3014     case INTEL_ENC_KERNEL_PERFORMANCE : {
3015         kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_PERFORMANCE_I;
3016         break;
3017     }
3018     case INTEL_ENC_KERNEL_QUALITY : {
3019         kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_QUALITY_I;
3020         break;
3021     }
3022     default:
3023         assert(0);
3024
3025     }
3026
3027     if (generic_state->frame_type == SLICE_TYPE_P) {
3028         kernel_idx += 1;
3029     } else if (generic_state->frame_type == SLICE_TYPE_B) {
3030         kernel_idx += 2;
3031     }
3032
3033     gpe_context = &(avc_ctx->context_mbenc.gpe_contexts[kernel_idx]);
3034     gpe->context_init(ctx, gpe_context);
3035
3036     memset(&curbe_mbenc_param, 0, sizeof(struct mbenc_param));
3037
3038     curbe_mbenc_param.mb_const_data_buffer_in_use = mb_const_data_buffer_in_use;
3039     curbe_mbenc_param.mb_qp_buffer_in_use = mb_qp_buffer_in_use;
3040     curbe_mbenc_param.mbenc_i_frame_dist_in_use = 0;
3041     curbe_mbenc_param.brc_enabled = brc_enabled;
3042     curbe_mbenc_param.roi_enabled = roi_enable;
3043
3044     /* set curbe mbenc*/
3045     generic_ctx->pfn_set_curbe_mbenc(ctx, encode_state, gpe_context, encoder_context, &curbe_mbenc_param);
3046
3047     // gen95 set curbe out of the brc. gen9 do it here
3048     avc_state->mbenc_curbe_set_in_brc_update = !avc_state->decouple_mbenc_curbe_from_brc_enable;
3049     /*begin brc frame update*/
3050     memset(&curbe_brc_param, 0, sizeof(struct brc_param));
3051     curbe_brc_param.gpe_context_mbenc = gpe_context;
3052     media_function = INTEL_MEDIA_STATE_BRC_UPDATE;
3053     kernel_idx = GEN9_AVC_KERNEL_BRC_FRAME_UPDATE;
3054     gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
3055     curbe_brc_param.gpe_context_brc_frame_update = gpe_context;
3056
3057     gpe->context_init(ctx, gpe_context);
3058     gpe->reset_binding_table(ctx, gpe_context);
3059     /*brc copy ignored*/
3060
3061     /* set curbe frame update*/
3062     generic_ctx->pfn_set_curbe_brc_frame_update(ctx, encode_state, gpe_context, encoder_context, &curbe_brc_param);
3063
3064     /* load brc constant data, is it same as mbenc mb brc constant data? no.*/
3065     if (avc_state->multi_pre_enable) {
3066         gen9_avc_init_brc_const_data(ctx, encode_state, encoder_context);
3067     } else {
3068         gen9_avc_init_brc_const_data_old(ctx, encode_state, encoder_context);
3069     }
3070     /* image state construct*/
3071     if (IS_GEN8(i965->intel.device_info)) {
3072         gen8_avc_set_image_state(ctx, encode_state, encoder_context, &(avc_ctx->res_brc_image_state_read_buffer));
3073     } else {
3074         gen9_avc_set_image_state(ctx, encode_state, encoder_context, &(avc_ctx->res_brc_image_state_read_buffer));
3075     }
3076     /* set surface frame mbenc*/
3077     generic_ctx->pfn_send_brc_frame_update_surface(ctx, encode_state, gpe_context, encoder_context, &curbe_brc_param);
3078
3079
3080     gpe->setup_interface_data(ctx, gpe_context);
3081
3082     memset(&media_object_param, 0, sizeof(media_object_param));
3083     memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
3084     media_object_param.pinline_data = &media_object_inline_data;
3085     media_object_param.inline_size = sizeof(media_object_inline_data);
3086
3087     gen9_avc_run_kernel_media_object(ctx, encoder_context,
3088                                      gpe_context,
3089                                      media_function,
3090                                      &media_object_param);
3091
3092     return VA_STATUS_SUCCESS;
3093 }
3094
3095 static void
3096 gen9_avc_set_curbe_brc_mb_update(VADriverContextP ctx,
3097                                  struct encode_state *encode_state,
3098                                  struct i965_gpe_context *gpe_context,
3099                                  struct intel_encoder_context *encoder_context,
3100                                  void * param)
3101 {
3102     gen9_avc_mb_brc_curbe_data *cmd;
3103     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3104     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3105
3106     cmd = i965_gpe_context_map_curbe(gpe_context);
3107
3108     if (!cmd)
3109         return;
3110
3111     memset(cmd, 0, sizeof(gen9_avc_mb_brc_curbe_data));
3112
3113     cmd->dw0.cur_frame_type = generic_state->frame_type;
3114     if (generic_state->brc_roi_enable) {
3115         cmd->dw0.enable_roi = 1;
3116     } else {
3117         cmd->dw0.enable_roi = 0;
3118     }
3119
3120     i965_gpe_context_unmap_curbe(gpe_context);
3121
3122     return;
3123 }
3124
3125 static void
3126 gen9_avc_send_surface_brc_mb_update(VADriverContextP ctx,
3127                                     struct encode_state *encode_state,
3128                                     struct i965_gpe_context *gpe_context,
3129                                     struct intel_encoder_context *encoder_context,
3130                                     void * param_mbenc)
3131 {
3132     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3133     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3134     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3135
3136     /* brc history buffer*/
3137     i965_add_buffer_gpe_surface(ctx,
3138                                 gpe_context,
3139                                 &avc_ctx->res_brc_history_buffer,
3140                                 0,
3141                                 avc_ctx->res_brc_history_buffer.size,
3142                                 0,
3143                                 GEN9_AVC_MB_BRC_UPDATE_HISTORY_INDEX);
3144
3145     /* MB qp data buffer is it same as res_mbbrc_mb_qp_data_surface*/
3146     if (generic_state->mb_brc_enabled) {
3147         i965_add_buffer_2d_gpe_surface(ctx,
3148                                        gpe_context,
3149                                        &avc_ctx->res_mbbrc_mb_qp_data_surface,
3150                                        1,
3151                                        I965_SURFACEFORMAT_R8_UNORM,
3152                                        GEN9_AVC_MB_BRC_UPDATE_MB_QP_INDEX);
3153
3154     }
3155
3156     /* BRC roi feature*/
3157     if (generic_state->brc_roi_enable) {
3158         i965_add_buffer_gpe_surface(ctx,
3159                                     gpe_context,
3160                                     &avc_ctx->res_mbbrc_roi_surface,
3161                                     0,
3162                                     avc_ctx->res_mbbrc_roi_surface.size,
3163                                     0,
3164                                     GEN9_AVC_MB_BRC_UPDATE_ROI_INDEX);
3165
3166     }
3167
3168     /* MB statistical data surface*/
3169     i965_add_buffer_gpe_surface(ctx,
3170                                 gpe_context,
3171                                 &avc_ctx->res_mb_status_buffer,
3172                                 0,
3173                                 avc_ctx->res_mb_status_buffer.size,
3174                                 0,
3175                                 GEN9_AVC_MB_BRC_UPDATE_MB_STATUS_INDEX);
3176
3177     return;
3178 }
3179
3180 static VAStatus
3181 gen9_avc_kernel_brc_mb_update(VADriverContextP ctx,
3182                               struct encode_state *encode_state,
3183                               struct intel_encoder_context *encoder_context)
3184
3185 {
3186     struct i965_driver_data *i965 = i965_driver_data(ctx);
3187     struct i965_gpe_table *gpe = &i965->gpe_table;
3188     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3189     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3190     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3191     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
3192
3193     struct i965_gpe_context *gpe_context;
3194     struct gpe_media_object_walker_parameter media_object_walker_param;
3195     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
3196     int media_function = 0;
3197     int kernel_idx = 0;
3198
3199     media_function = INTEL_MEDIA_STATE_MB_BRC_UPDATE;
3200     kernel_idx = GEN9_AVC_KERNEL_BRC_MB_UPDATE;
3201     gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
3202
3203     gpe->context_init(ctx, gpe_context);
3204     gpe->reset_binding_table(ctx, gpe_context);
3205
3206     /* set curbe brc mb update*/
3207     generic_ctx->pfn_set_curbe_brc_mb_update(ctx, encode_state, gpe_context, encoder_context, NULL);
3208
3209
3210     /* set surface brc mb update*/
3211     generic_ctx->pfn_send_brc_mb_update_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
3212
3213
3214     gpe->setup_interface_data(ctx, gpe_context);
3215
3216     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
3217     /* the scaling is based on 8x8 blk level */
3218     kernel_walker_param.resolution_x = (generic_state->frame_width_in_mbs + 1) / 2;
3219     kernel_walker_param.resolution_y = (generic_state->frame_height_in_mbs + 1) / 2 ;
3220     kernel_walker_param.no_dependency = 1;
3221
3222     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
3223
3224     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
3225                                             gpe_context,
3226                                             media_function,
3227                                             &media_object_walker_param);
3228
3229     return VA_STATUS_SUCCESS;
3230 }
3231
3232 /*
3233 mbenc kernel related function,it include intra dist kernel
3234 */
3235 static int
3236 gen9_avc_get_biweight(int dist_scale_factor_ref_id0_list0, unsigned short weighted_bipredidc)
3237 {
3238     int biweight = 32;      // default value
3239
3240     /* based on kernel HLD*/
3241     if (weighted_bipredidc != INTEL_AVC_WP_MODE_IMPLICIT) {
3242         biweight = 32;
3243     } else {
3244         biweight = (dist_scale_factor_ref_id0_list0 + 2) >> 2;
3245
3246         if (biweight != 16 && biweight != 21 &&
3247             biweight != 32 && biweight != 43 && biweight != 48) {
3248             biweight = 32;        // If # of B-pics between two refs is more than 3. VME does not support it.
3249         }
3250     }
3251
3252     return biweight;
3253 }
3254
3255 static void
3256 gen9_avc_get_dist_scale_factor(VADriverContextP ctx,
3257                                struct encode_state *encode_state,
3258                                struct intel_encoder_context *encoder_context)
3259 {
3260     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3261     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3262     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3263     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
3264
3265     int max_num_references;
3266     VAPictureH264 *curr_pic;
3267     VAPictureH264 *ref_pic_l0;
3268     VAPictureH264 *ref_pic_l1;
3269     int i = 0;
3270     int tb = 0;
3271     int td = 0;
3272     int tx = 0;
3273     int tmp = 0;
3274     int poc0 = 0;
3275     int poc1 = 0;
3276
3277     max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
3278
3279     memset(avc_state->dist_scale_factor_list0, 0, 32 * sizeof(unsigned int));
3280     curr_pic = &pic_param->CurrPic;
3281     for (i = 0; i < max_num_references; i++) {
3282         ref_pic_l0 = &(slice_param->RefPicList0[i]);
3283
3284         if ((ref_pic_l0->flags & VA_PICTURE_H264_INVALID) ||
3285             (ref_pic_l0->picture_id == VA_INVALID_SURFACE))
3286             break;
3287         ref_pic_l1 = &(slice_param->RefPicList1[0]);
3288         if ((ref_pic_l0->flags & VA_PICTURE_H264_INVALID) ||
3289             (ref_pic_l0->picture_id == VA_INVALID_SURFACE))
3290             break;
3291
3292         poc0 = (curr_pic->TopFieldOrderCnt - ref_pic_l0->TopFieldOrderCnt);
3293         poc1 = (ref_pic_l1->TopFieldOrderCnt - ref_pic_l0->TopFieldOrderCnt);
3294         CLIP(poc0, -128, 127);
3295         CLIP(poc1, -128, 127);
3296         tb = poc0;
3297         td = poc1;
3298
3299         if (td == 0) {
3300             td = 1;
3301         }
3302         tmp = (td / 2 > 0) ? (td / 2) : (-(td / 2));
3303         tx = (16384 + tmp) / td ;
3304         tmp = (tb * tx + 32) >> 6;
3305         CLIP(tmp, -1024, 1023);
3306         avc_state->dist_scale_factor_list0[i] = tmp;
3307     }
3308     return;
3309 }
3310
3311 static unsigned int
3312 gen9_avc_get_qp_from_ref_list(VADriverContextP ctx,
3313                               VAEncSliceParameterBufferH264 *slice_param,
3314                               int list,
3315                               int ref_frame_idx)
3316 {
3317     struct i965_driver_data *i965 = i965_driver_data(ctx);
3318     struct object_surface *obj_surface;
3319     struct gen9_surface_avc *avc_priv_surface;
3320     VASurfaceID surface_id;
3321
3322     assert(slice_param);
3323     assert(list < 2);
3324
3325     if (list == 0) {
3326         if (ref_frame_idx < slice_param->num_ref_idx_l0_active_minus1 + 1)
3327             surface_id = slice_param->RefPicList0[ref_frame_idx].picture_id;
3328         else
3329             return 0;
3330     } else {
3331         if (ref_frame_idx < slice_param->num_ref_idx_l1_active_minus1 + 1)
3332             surface_id = slice_param->RefPicList1[ref_frame_idx].picture_id;
3333         else
3334             return 0;
3335     }
3336     obj_surface = SURFACE(surface_id);
3337     if (obj_surface && obj_surface->private_data) {
3338         avc_priv_surface = obj_surface->private_data;
3339         return avc_priv_surface->qp_value;
3340     } else {
3341         return 0;
3342     }
3343 }
3344
3345 static void
3346 gen9_avc_load_mb_brc_const_data(VADriverContextP ctx,
3347                                 struct encode_state *encode_state,
3348                                 struct intel_encoder_context *encoder_context)
3349 {
3350     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3351     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3352     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3353     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3354
3355     struct i965_gpe_resource *gpe_resource = NULL;
3356     unsigned int * data = NULL;
3357     unsigned int * data_tmp = NULL;
3358     unsigned int size = 16 * 52;
3359     unsigned int table_idx = 0;
3360     unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
3361     unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
3362     int i = 0;
3363
3364     gpe_resource = &(avc_ctx->res_mbbrc_const_data_buffer);
3365     assert(gpe_resource);
3366     data = i965_map_gpe_resource(gpe_resource);
3367     assert(data);
3368
3369     table_idx = slice_type_kernel[generic_state->frame_type];
3370
3371     memcpy(data, gen9_avc_mb_brc_const_data[table_idx][0], size * sizeof(unsigned int));
3372
3373     data_tmp = data;
3374
3375     switch (generic_state->frame_type) {
3376     case SLICE_TYPE_I:
3377         for (i = 0; i < AVC_QP_MAX ; i++) {
3378             if (avc_state->old_mode_cost_enable)
3379                 *data = (unsigned int)gen9_avc_old_intra_mode_cost[i];
3380             data += 16;
3381         }
3382         break;
3383     case SLICE_TYPE_P:
3384     case SLICE_TYPE_B:
3385         for (i = 0; i < AVC_QP_MAX ; i++) {
3386             if (generic_state->frame_type == SLICE_TYPE_P) {
3387                 if (avc_state->skip_bias_adjustment_enable)
3388                     *(data + 3) = (unsigned int)gen9_avc_mv_cost_p_skip_adjustment[i];
3389             }
3390             if (avc_state->non_ftq_skip_threshold_lut_input_enable) {
3391                 *(data + 9) = (unsigned int)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
3392             } else if (generic_state->frame_type == SLICE_TYPE_P) {
3393                 *(data + 9) = (unsigned int)gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag][i];
3394             } else {
3395                 *(data + 9) = (unsigned int)gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag][i];
3396             }
3397
3398             if (avc_state->adaptive_intra_scaling_enable) {
3399                 *(data + 10) = (unsigned int)gen9_avc_adaptive_intra_scaling_factor[i];
3400             } else {
3401                 *(data + 10) = (unsigned int)gen9_avc_intra_scaling_factor[i];
3402
3403             }
3404             data += 16;
3405
3406         }
3407         break;
3408     default:
3409         assert(0);
3410     }
3411
3412     data = data_tmp;
3413     for (i = 0; i < AVC_QP_MAX ; i++) {
3414         if (avc_state->ftq_skip_threshold_lut_input_enable) {
3415             *(data + 6) = (avc_state->ftq_skip_threshold_lut[i] |
3416                            (avc_state->ftq_skip_threshold_lut[i] << 16) |
3417                            (avc_state->ftq_skip_threshold_lut[i] << 24));
3418             *(data + 7) = (avc_state->ftq_skip_threshold_lut[i] |
3419                            (avc_state->ftq_skip_threshold_lut[i] << 8) |
3420                            (avc_state->ftq_skip_threshold_lut[i] << 16) |
3421                            (avc_state->ftq_skip_threshold_lut[i] << 24));
3422         }
3423
3424         if (avc_state->kernel_trellis_enable) {
3425             *(data + 11) = (unsigned int)avc_state->lamda_value_lut[i][0];
3426             *(data + 12) = (unsigned int)avc_state->lamda_value_lut[i][1];
3427
3428         }
3429         data += 16;
3430
3431     }
3432     i965_unmap_gpe_resource(gpe_resource);
3433 }
3434
3435 static void
3436 gen9_avc_set_curbe_mbenc(VADriverContextP ctx,
3437                          struct encode_state *encode_state,
3438                          struct i965_gpe_context *gpe_context,
3439                          struct intel_encoder_context *encoder_context,
3440                          void * param)
3441 {
3442     struct i965_driver_data *i965 = i965_driver_data(ctx);
3443     union {
3444         gen9_avc_mbenc_curbe_data *g9;
3445         gen95_avc_mbenc_curbe_data *g95;
3446     } cmd;
3447     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3448     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3449     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3450
3451     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3452     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
3453     VASurfaceID surface_id;
3454     struct object_surface *obj_surface;
3455
3456     struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
3457     unsigned char qp = 0;
3458     unsigned char me_method = 0;
3459     unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
3460     unsigned int table_idx = 0;
3461     unsigned char is_g9 = 0;
3462     unsigned char is_g95 = 0;
3463     unsigned int curbe_size = 0;
3464
3465     unsigned int preset = generic_state->preset;
3466     if (IS_SKL(i965->intel.device_info) ||
3467         IS_BXT(i965->intel.device_info)) {
3468         cmd.g9 = (gen9_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
3469         if (!cmd.g9)
3470             return;
3471         is_g9 = 1;
3472         curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
3473         memset(cmd.g9, 0, curbe_size);
3474
3475         if (mbenc_i_frame_dist_in_use) {
3476             memcpy(cmd.g9, gen9_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
3477
3478         } else {
3479             switch (generic_state->frame_type) {
3480             case SLICE_TYPE_I:
3481                 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
3482                 break;
3483             case SLICE_TYPE_P:
3484                 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
3485                 break;
3486             case SLICE_TYPE_B:
3487                 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
3488                 break;
3489             default:
3490                 assert(0);
3491             }
3492
3493         }
3494     } else if (IS_KBL(i965->intel.device_info) ||
3495                IS_GLK(i965->intel.device_info)) {
3496         cmd.g95 = (gen95_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
3497         if (!cmd.g95)
3498             return;
3499         is_g95 = 1;
3500         curbe_size = sizeof(gen95_avc_mbenc_curbe_data);
3501         memset(cmd.g9, 0, curbe_size);
3502
3503         if (mbenc_i_frame_dist_in_use) {
3504             memcpy(cmd.g95, gen95_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
3505
3506         } else {
3507             switch (generic_state->frame_type) {
3508             case SLICE_TYPE_I:
3509                 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
3510                 break;
3511             case SLICE_TYPE_P:
3512                 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
3513                 break;
3514             case SLICE_TYPE_B:
3515                 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
3516                 break;
3517             default:
3518                 assert(0);
3519             }
3520
3521         }
3522     } else {
3523         /* Never get here, just silence a gcc warning */
3524         assert(0);
3525
3526         return;
3527     }
3528
3529     me_method = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_b_me_method[preset] : gen9_avc_p_me_method[preset];
3530     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
3531
3532     cmd.g9->dw0.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
3533     cmd.g9->dw37.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
3534     cmd.g9->dw0.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
3535     cmd.g9->dw37.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
3536
3537     cmd.g9->dw2.max_len_sp = gen9_avc_max_len_sp[preset];
3538     cmd.g9->dw38.max_len_sp = 0;
3539
3540     if (is_g95)
3541         cmd.g95->dw1.extended_mv_cost_range = avc_state->extended_mv_cost_range_enable;
3542
3543     cmd.g9->dw3.src_access = 0;
3544     cmd.g9->dw3.ref_access = 0;
3545
3546     if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) {
3547         //disable ftq_override by now.
3548         if (avc_state->ftq_override) {
3549             cmd.g9->dw3.ftq_enable = avc_state->ftq_enable;
3550
3551         } else {
3552             // both gen9 and gen95 come here by now
3553             if (generic_state->frame_type == SLICE_TYPE_P) {
3554                 cmd.g9->dw3.ftq_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
3555
3556             } else {
3557                 cmd.g9->dw3.ftq_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
3558             }
3559         }
3560     } else {
3561         cmd.g9->dw3.ftq_enable = 0;
3562     }
3563
3564     if (avc_state->disable_sub_mb_partion)
3565         cmd.g9->dw3.sub_mb_part_mask = 0x7;
3566
3567     if (mbenc_i_frame_dist_in_use) {
3568         cmd.g9->dw2.pitch_width = generic_state->downscaled_width_4x_in_mb;
3569         cmd.g9->dw4.picture_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
3570         cmd.g9->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4;
3571         cmd.g9->dw6.batch_buffer_end = 0;
3572         cmd.g9->dw31.intra_compute_type = 1;
3573
3574     } else {
3575         cmd.g9->dw2.pitch_width = generic_state->frame_width_in_mbs;
3576         cmd.g9->dw4.picture_height_minus1 = generic_state->frame_height_in_mbs - 1;
3577         cmd.g9->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ? generic_state->frame_height_in_mbs : avc_state->slice_height;
3578
3579         {
3580             memcpy(&(cmd.g9->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
3581             if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
3582             } else if (avc_state->skip_bias_adjustment_enable) {
3583                 /* Load different MvCost for P picture when SkipBiasAdjustment is enabled
3584                 // No need to check for P picture as the flag is only enabled for P picture */
3585                 cmd.g9->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
3586
3587             }
3588         }
3589
3590         table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
3591         memcpy(&(cmd.g9->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
3592     }
3593     cmd.g9->dw4.enable_fbr_bypass = avc_state->fbr_bypass_enable;
3594     cmd.g9->dw4.enable_intra_cost_scaling_for_static_frame = avc_state->sfd_enable && generic_state->hme_enabled;
3595     cmd.g9->dw4.field_parity_flag = 0;//bottom field
3596     cmd.g9->dw4.enable_cur_fld_idr = 0;//field realted
3597     cmd.g9->dw4.contrained_intra_pred_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
3598     cmd.g9->dw4.hme_enable = generic_state->hme_enabled;
3599     cmd.g9->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
3600     cmd.g9->dw4.use_actual_ref_qp_value = generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
3601
3602
3603     cmd.g9->dw7.intra_part_mask = avc_state->transform_8x8_mode_enable ? 0 : 0x02;
3604     cmd.g9->dw7.src_field_polarity = 0;//field related
3605
3606     /*ftq_skip_threshold_lut set,dw14 /15*/
3607
3608     /*r5 disable NonFTQSkipThresholdLUT*/
3609     if (generic_state->frame_type == SLICE_TYPE_P) {
3610         cmd.g9->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
3611
3612     } else if (generic_state->frame_type == SLICE_TYPE_B) {
3613         cmd.g9->dw32.skip_val = gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
3614
3615     }
3616
3617     cmd.g9->dw13.qp_prime_y = qp;
3618     cmd.g9->dw13.qp_prime_cb = qp;
3619     cmd.g9->dw13.qp_prime_cr = qp;
3620     cmd.g9->dw13.target_size_in_word = 0xff;//hardcode for brc disable
3621
3622     if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) {
3623         switch (gen9_avc_multi_pred[preset]) {
3624         case 0:
3625             cmd.g9->dw32.mult_pred_l0_disable = 128;
3626             cmd.g9->dw32.mult_pred_l1_disable = 128;
3627             break;
3628         case 1:
3629             cmd.g9->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_P) ? 1 : 128;
3630             cmd.g9->dw32.mult_pred_l1_disable = 128;
3631             break;
3632         case 2:
3633             cmd.g9->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3634             cmd.g9->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3635             break;
3636         case 3:
3637             cmd.g9->dw32.mult_pred_l0_disable = 1;
3638             cmd.g9->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3639             break;
3640
3641         }
3642
3643     } else {
3644         cmd.g9->dw32.mult_pred_l0_disable = 128;
3645         cmd.g9->dw32.mult_pred_l1_disable = 128;
3646     }
3647
3648     /*field setting for dw33 34, ignored*/
3649
3650     if (avc_state->adaptive_transform_decision_enable) {
3651         if (generic_state->frame_type != SLICE_TYPE_I) {
3652             cmd.g9->dw34.enable_adaptive_tx_decision = 1;
3653             if (is_g95) {
3654                 cmd.g95->dw60.mb_texture_threshold = 1024;
3655                 cmd.g95->dw60.tx_decision_threshold = 128;
3656             }
3657
3658         }
3659
3660         if (is_g9) {
3661             cmd.g9->dw58.mb_texture_threshold = 1024;
3662             cmd.g9->dw58.tx_decision_threshold = 128;
3663         }
3664     }
3665
3666
3667     if (generic_state->frame_type == SLICE_TYPE_B) {
3668         cmd.g9->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
3669         cmd.g9->dw34.list1_ref_id0_frm_field_parity = 0;
3670         cmd.g9->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
3671     }
3672
3673     cmd.g9->dw34.b_original_bff = 0; //frame only
3674     cmd.g9->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
3675     cmd.g9->dw34.roi_enable_flag = curbe_param->roi_enabled;
3676     cmd.g9->dw34.mad_enable_falg = avc_state->mad_enable;
3677     cmd.g9->dw34.mb_brc_enable = avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled;
3678     cmd.g9->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
3679     if (is_g95) {
3680         cmd.g95->dw34.tq_enable = avc_state->tq_enable;
3681         cmd.g95->dw34.cqp_flag = !generic_state->brc_enabled;
3682     }
3683
3684     if (is_g9) {
3685         cmd.g9->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
3686
3687         if (cmd.g9->dw34.force_non_skip_check) {
3688             cmd.g9->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
3689         }
3690     }
3691
3692
3693     cmd.g9->dw36.check_all_fractional_enable = avc_state->caf_enable;
3694     cmd.g9->dw38.ref_threshold = 400;
3695     cmd.g9->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
3696
3697     /* Default:2 used for MBBRC (MB QP Surface width and height are 4x downscaled picture in MB unit * 4  bytes)
3698        0 used for MBQP data surface (MB QP Surface width and height are same as the input picture size in MB unit * 1bytes)
3699        starting GEN9, BRC use split kernel, MB QP surface is same size as input picture */
3700     cmd.g9->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled) ? 0 : 2;
3701
3702     if (mbenc_i_frame_dist_in_use) {
3703         cmd.g9->dw13.qp_prime_y = 0;
3704         cmd.g9->dw13.qp_prime_cb = 0;
3705         cmd.g9->dw13.qp_prime_cr = 0;
3706         cmd.g9->dw33.intra_16x16_nondc_penalty = 0;
3707         cmd.g9->dw33.intra_8x8_nondc_penalty = 0;
3708         cmd.g9->dw33.intra_4x4_nondc_penalty = 0;
3709
3710     }
3711     if (cmd.g9->dw4.use_actual_ref_qp_value) {
3712         cmd.g9->dw44.actual_qp_value_for_ref_id0_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 0);
3713         cmd.g9->dw44.actual_qp_value_for_ref_id1_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 1);
3714         cmd.g9->dw44.actual_qp_value_for_ref_id2_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 2);
3715         cmd.g9->dw44.actual_qp_value_for_ref_id3_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 3);
3716         cmd.g9->dw45.actual_qp_value_for_ref_id4_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 4);
3717         cmd.g9->dw45.actual_qp_value_for_ref_id5_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 5);
3718         cmd.g9->dw45.actual_qp_value_for_ref_id6_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 6);
3719         cmd.g9->dw45.actual_qp_value_for_ref_id7_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 7);
3720         cmd.g9->dw46.actual_qp_value_for_ref_id0_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 0);
3721         cmd.g9->dw46.actual_qp_value_for_ref_id1_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 1);
3722     }
3723
3724     table_idx = slice_type_kernel[generic_state->frame_type];
3725     cmd.g9->dw46.ref_cost = gen9_avc_ref_cost[table_idx][qp];
3726
3727     if (generic_state->frame_type == SLICE_TYPE_I) {
3728         cmd.g9->dw0.skip_mode_enable = 0;
3729         cmd.g9->dw37.skip_mode_enable = 0;
3730         cmd.g9->dw36.hme_combine_overlap = 0;
3731         cmd.g9->dw47.intra_cost_sf = 16;
3732         cmd.g9->dw34.enable_direct_bias_adjustment = 0;
3733         if (is_g9)
3734             cmd.g9->dw34.enable_global_motion_bias_adjustment = 0;
3735
3736     } else if (generic_state->frame_type == SLICE_TYPE_P) {
3737         cmd.g9->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
3738         cmd.g9->dw3.bme_disable_fbr = 1;
3739         cmd.g9->dw5.ref_width = gen9_avc_search_x[preset];
3740         cmd.g9->dw5.ref_height = gen9_avc_search_y[preset];
3741         cmd.g9->dw7.non_skip_zmv_added = 1;
3742         cmd.g9->dw7.non_skip_mode_added = 1;
3743         cmd.g9->dw7.skip_center_mask = 1;
3744         cmd.g9->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
3745         cmd.g9->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame onlys
3746         cmd.g9->dw36.hme_combine_overlap = 1;
3747         cmd.g9->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
3748         cmd.g9->dw39.ref_width = gen9_avc_search_x[preset];
3749         cmd.g9->dw39.ref_height = gen9_avc_search_y[preset];
3750         cmd.g9->dw34.enable_direct_bias_adjustment = 0;
3751         cmd.g9->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
3752         if (is_g9 && avc_state->global_motion_bias_adjustment_enable)
3753             cmd.g9->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
3754
3755     } else {
3756         cmd.g9->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
3757         cmd.g9->dw1.bi_weight = avc_state->bi_weight;
3758         cmd.g9->dw3.search_ctrl = 7;
3759         cmd.g9->dw3.skip_type = 1;
3760         cmd.g9->dw5.ref_width = gen9_avc_b_search_x[preset];
3761         cmd.g9->dw5.ref_height = gen9_avc_b_search_y[preset];
3762         cmd.g9->dw7.skip_center_mask = 0xff;
3763         cmd.g9->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
3764         cmd.g9->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
3765         cmd.g9->dw36.hme_combine_overlap = 1;
3766         surface_id = slice_param->RefPicList1[0].picture_id;
3767         obj_surface = SURFACE(surface_id);
3768         if (!obj_surface) {
3769             WARN_ONCE("Invalid backward reference frame\n");
3770             return;
3771         }
3772         cmd.g9->dw36.is_fwd_frame_short_term_ref = !!(slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
3773
3774         cmd.g9->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
3775         cmd.g9->dw36.num_ref_idx_l1_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l1_active_minus1 : 0;
3776         cmd.g9->dw39.ref_width = gen9_avc_b_search_x[preset];
3777         cmd.g9->dw39.ref_height = gen9_avc_b_search_y[preset];
3778         cmd.g9->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
3779         cmd.g9->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
3780         cmd.g9->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
3781         cmd.g9->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
3782         cmd.g9->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
3783         cmd.g9->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
3784         cmd.g9->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
3785         cmd.g9->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
3786
3787         cmd.g9->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
3788         if (cmd.g9->dw34.enable_direct_bias_adjustment) {
3789             cmd.g9->dw7.non_skip_zmv_added = 1;
3790             cmd.g9->dw7.non_skip_mode_added = 1;
3791         }
3792
3793         cmd.g9->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
3794         if (is_g9 && avc_state->global_motion_bias_adjustment_enable)
3795             cmd.g9->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
3796
3797     }
3798
3799     avc_state->block_based_skip_enable = cmd.g9->dw3.block_based_skip_enable;
3800
3801     if (avc_state->rolling_intra_refresh_enable) {
3802         /*by now disable it*/
3803         cmd.g9->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
3804         cmd.g9->dw32.mult_pred_l0_disable = 128;
3805         /* Pass the same IntraRefreshUnit to the kernel w/o the adjustment by -1, so as to have an overlap of one MB row or column of Intra macroblocks
3806          across one P frame to another P frame, as needed by the RollingI algo */
3807         if (is_g9) {
3808             cmd.g9->dw48.widi_intra_refresh_mb_num = 0;
3809             cmd.g9->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
3810             cmd.g9->dw48.widi_intra_refresh_qp_delta = 0;
3811         }
3812
3813         if (is_g95) {
3814             if (avc_state->rolling_intra_refresh_enable == INTEL_ROLLING_I_SQUARE && generic_state->brc_enabled) {
3815                 cmd.g95->dw4.enable_intra_refresh = 0;
3816                 cmd.g95->dw34.widi_intra_refresh_en = INTEL_ROLLING_I_DISABLED;
3817                 cmd.g95->dw48.widi_intra_refresh_mb_x = 0;
3818                 cmd.g95->dw61.widi_intra_refresh_mb_y = 0;
3819             } else {
3820                 cmd.g95->dw4.enable_intra_refresh = 1;
3821                 cmd.g95->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
3822                 cmd.g95->dw48.widi_intra_refresh_mb_x = 0;
3823                 cmd.g95->dw61.widi_intra_refresh_mb_y = 0;
3824                 cmd.g9->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
3825                 cmd.g9->dw48.widi_intra_refresh_qp_delta = 0;
3826             }
3827         }
3828
3829     } else {
3830         cmd.g9->dw34.widi_intra_refresh_en = 0;
3831     }
3832
3833     cmd.g9->dw34.enable_per_mb_static_check = avc_state->sfd_enable && generic_state->hme_enabled;
3834     cmd.g9->dw34.enable_adaptive_search_window_size = avc_state->adaptive_search_window_enable;
3835
3836     /*roi set disable by now. 49-56*/
3837     if (curbe_param->roi_enabled) {
3838         cmd.g9->dw49.roi_1_x_left   = generic_state->roi[0].left;
3839         cmd.g9->dw49.roi_1_y_top    = generic_state->roi[0].top;
3840         cmd.g9->dw50.roi_1_x_right  = generic_state->roi[0].right;
3841         cmd.g9->dw50.roi_1_y_bottom = generic_state->roi[0].bottom;
3842
3843         cmd.g9->dw51.roi_2_x_left   = generic_state->roi[1].left;
3844         cmd.g9->dw51.roi_2_y_top    = generic_state->roi[1].top;
3845         cmd.g9->dw52.roi_2_x_right  = generic_state->roi[1].right;
3846         cmd.g9->dw52.roi_2_y_bottom = generic_state->roi[1].bottom;
3847
3848         cmd.g9->dw53.roi_3_x_left   = generic_state->roi[2].left;
3849         cmd.g9->dw53.roi_3_y_top    = generic_state->roi[2].top;
3850         cmd.g9->dw54.roi_3_x_right  = generic_state->roi[2].right;
3851         cmd.g9->dw54.roi_3_y_bottom = generic_state->roi[2].bottom;
3852
3853         cmd.g9->dw55.roi_4_x_left   = generic_state->roi[3].left;
3854         cmd.g9->dw55.roi_4_y_top    = generic_state->roi[3].top;
3855         cmd.g9->dw56.roi_4_x_right  = generic_state->roi[3].right;
3856         cmd.g9->dw56.roi_4_y_bottom = generic_state->roi[3].bottom;
3857
3858         if (!generic_state->brc_enabled) {
3859             char tmp = 0;
3860             tmp = generic_state->roi[0].value;
3861             CLIP(tmp, -qp, AVC_QP_MAX - qp);
3862             cmd.g9->dw57.roi_1_dqp_prime_y = tmp;
3863             tmp = generic_state->roi[1].value;
3864             CLIP(tmp, -qp, AVC_QP_MAX - qp);
3865             cmd.g9->dw57.roi_2_dqp_prime_y = tmp;
3866             tmp = generic_state->roi[2].value;
3867             CLIP(tmp, -qp, AVC_QP_MAX - qp);
3868             cmd.g9->dw57.roi_3_dqp_prime_y = tmp;
3869             tmp = generic_state->roi[3].value;
3870             CLIP(tmp, -qp, AVC_QP_MAX - qp);
3871             cmd.g9->dw57.roi_4_dqp_prime_y = tmp;
3872         } else {
3873             cmd.g9->dw34.roi_enable_flag = 0;
3874         }
3875     }
3876
3877     if (is_g95) {
3878         if (avc_state->tq_enable) {
3879             if (generic_state->frame_type == SLICE_TYPE_I) {
3880                 cmd.g95->dw58.value = gen95_avc_tq_lambda_i_frame[qp][0];
3881                 cmd.g95->dw59.value = gen95_avc_tq_lambda_i_frame[qp][1];
3882
3883             } else if (generic_state->frame_type == SLICE_TYPE_P) {
3884                 cmd.g95->dw58.value = gen95_avc_tq_lambda_p_frame[qp][0];
3885                 cmd.g95->dw59.value = gen95_avc_tq_lambda_p_frame[qp][1];
3886
3887             } else {
3888                 cmd.g95->dw58.value = gen95_avc_tq_lambda_b_frame[qp][0];
3889                 cmd.g95->dw59.value = gen95_avc_tq_lambda_b_frame[qp][1];
3890             }
3891
3892             if (cmd.g95->dw58.lambda_8x8_inter > GEN95_AVC_MAX_LAMBDA)
3893                 cmd.g95->dw58.lambda_8x8_inter = 0xf000 + avc_state->rounding_value;
3894
3895             if (cmd.g95->dw58.lambda_8x8_intra > GEN95_AVC_MAX_LAMBDA)
3896                 cmd.g95->dw58.lambda_8x8_intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
3897
3898             if (cmd.g95->dw59.lambda_inter > GEN95_AVC_MAX_LAMBDA)
3899                 cmd.g95->dw59.lambda_inter = 0xf000 + avc_state->rounding_value;
3900
3901             if (cmd.g95->dw59.lambda_intra > GEN95_AVC_MAX_LAMBDA)
3902                 cmd.g95->dw59.lambda_intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
3903         }
3904     }
3905
3906     if (is_g95) {
3907         cmd.g95->dw66.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
3908         cmd.g95->dw67.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
3909         cmd.g95->dw68.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
3910         cmd.g95->dw69.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
3911         cmd.g95->dw70.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
3912         cmd.g95->dw71.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
3913         cmd.g95->dw72.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
3914         cmd.g95->dw73.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
3915         cmd.g95->dw74.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
3916         cmd.g95->dw75.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
3917         cmd.g95->dw76.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
3918         cmd.g95->dw77.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
3919         cmd.g95->dw78.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
3920         cmd.g95->dw79.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
3921         cmd.g95->dw80.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
3922         cmd.g95->dw81.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
3923         cmd.g95->dw82.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
3924         cmd.g95->dw83.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
3925         cmd.g95->dw84.brc_curbe_surf_index = GEN95_AVC_MBENC_BRC_CURBE_DATA_INDEX;
3926         cmd.g95->dw85.force_non_skip_mb_map_surface = GEN95_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
3927         cmd.g95->dw86.widi_wa_surf_index = GEN95_AVC_MBENC_WIDI_WA_INDEX;
3928         cmd.g95->dw87.static_detection_cost_table_index = GEN95_AVC_MBENC_SFD_COST_TABLE_INDEX;
3929     }
3930
3931     if (is_g9) {
3932         cmd.g9->dw64.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
3933         cmd.g9->dw65.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
3934         cmd.g9->dw66.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
3935         cmd.g9->dw67.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
3936         cmd.g9->dw68.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
3937         cmd.g9->dw69.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
3938         cmd.g9->dw70.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
3939         cmd.g9->dw71.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
3940         cmd.g9->dw72.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
3941         cmd.g9->dw73.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
3942         cmd.g9->dw74.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
3943         cmd.g9->dw75.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
3944         cmd.g9->dw76.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
3945         cmd.g9->dw77.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
3946         cmd.g9->dw78.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
3947         cmd.g9->dw79.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
3948         cmd.g9->dw80.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
3949         cmd.g9->dw81.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
3950         cmd.g9->dw82.force_non_skip_mb_map_surface = GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
3951         cmd.g9->dw83.widi_wa_surf_index = GEN9_AVC_MBENC_WIDI_WA_INDEX;
3952         cmd.g9->dw84.brc_curbe_surf_index = GEN9_AVC_MBENC_BRC_CURBE_DATA_INDEX;
3953         cmd.g9->dw85.static_detection_cost_table_index = GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX;
3954     }
3955
3956     i965_gpe_context_unmap_curbe(gpe_context);
3957
3958     return;
3959 }
3960
3961 static void
3962 gen9_avc_fei_set_curbe_mbenc(VADriverContextP ctx,
3963                              struct encode_state *encode_state,
3964                              struct i965_gpe_context *gpe_context,
3965                              struct intel_encoder_context *encoder_context,
3966                              void * param)
3967 {
3968     struct i965_driver_data *i965 = i965_driver_data(ctx);
3969     gen9_avc_fei_mbenc_curbe_data *cmd;
3970     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3971     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3972     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3973     VASurfaceID surface_id;
3974     struct object_surface *obj_surface;
3975     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3976     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
3977     VAEncMiscParameterFEIFrameControlH264 *fei_param = avc_state->fei_framectl_param;
3978
3979     struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
3980     unsigned char qp = 0;
3981     unsigned char me_method = 0;
3982     unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
3983     unsigned int table_idx = 0;
3984     int ref_width, ref_height, len_sp;
3985     int is_bframe = (generic_state->frame_type == SLICE_TYPE_B);
3986     int is_pframe = (generic_state->frame_type == SLICE_TYPE_P);
3987     unsigned int preset = generic_state->preset;
3988
3989     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
3990
3991     assert(gpe_context != NULL);
3992     cmd = (gen9_avc_fei_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
3993     memset(cmd, 0, sizeof(gen9_avc_fei_mbenc_curbe_data));
3994
3995     if (mbenc_i_frame_dist_in_use) {
3996         memcpy(cmd, gen9_avc_fei_mbenc_curbe_i_frame_dist_init_data, sizeof(gen9_avc_fei_mbenc_curbe_data));
3997
3998     } else {
3999         switch (generic_state->frame_type) {
4000         case SLICE_TYPE_I:
4001             memcpy(cmd, gen9_avc_fei_mbenc_curbe_i_frame_init_data, sizeof(gen9_avc_fei_mbenc_curbe_data));
4002             break;
4003         case SLICE_TYPE_P:
4004             memcpy(cmd, gen9_avc_fei_mbenc_curbe_p_frame_init_data, sizeof(gen9_avc_fei_mbenc_curbe_data));
4005             break;
4006         case SLICE_TYPE_B:
4007             memcpy(cmd, gen9_avc_fei_mbenc_curbe_b_frame_init_data, sizeof(gen9_avc_fei_mbenc_curbe_data));
4008             break;
4009         default:
4010             assert(0);
4011         }
4012
4013     }
4014     /* 4 means full search, 6 means diamand search */
4015     me_method  = (fei_param->search_window == 5) ||
4016                  (fei_param->search_window == 8) ? 4 : 6;
4017
4018     ref_width    = fei_param->ref_width;
4019     ref_height   = fei_param->ref_height;
4020     len_sp       = fei_param->len_sp;
4021     /* If there is a serch_window, discard user provided ref_width, ref_height
4022      * and search_path length */
4023     switch (fei_param->search_window) {
4024     case 0:
4025         /*  not use predefined search window, there should be a search_path input */
4026         if ((fei_param->search_path != 0) &&
4027             (fei_param->search_path != 1) &&
4028             (fei_param->search_path != 2)) {
4029             WARN_ONCE("Invalid input search_path for SearchWindow=0  \n");
4030             assert(0);
4031         }
4032         /* 4 means full search, 6 means diamand search */
4033         me_method = (fei_param->search_path == 1) ? 6 : 4;
4034         if (((ref_width * ref_height) > 2048) || (ref_width > 64) || (ref_height > 64)) {
4035             WARN_ONCE("Invalid input ref_width/ref_height in"
4036                       "SearchWindow=0 case! \n");
4037             assert(0);
4038         }
4039         break;
4040
4041     case 1:
4042         /* Tiny - 4 SUs 24x24 window */
4043         ref_width  = 24;
4044         ref_height = 24;
4045         len_sp     = 4;
4046         break;
4047
4048     case 2:
4049         /* Small - 9 SUs 28x28 window */
4050         ref_width  = 28;
4051         ref_height = 28;
4052         len_sp     = 9;
4053         break;
4054     case 3:
4055         /* Diamond - 16 SUs 48x40 window */
4056         ref_width  = 48;
4057         ref_height = 40;
4058         len_sp     = 16;
4059         break;
4060     case 4:
4061         /* Large Diamond - 32 SUs 48x40 window */
4062         ref_width  = 48;
4063         ref_height = 40;
4064         len_sp     = 32;
4065         break;
4066     case 5:
4067         /* Exhaustive - 48 SUs 48x40 window */
4068         ref_width  = 48;
4069         ref_height = 40;
4070         len_sp     = 48;
4071         break;
4072     case 6:
4073         /* Diamond - 16 SUs 64x32 window */
4074         ref_width  = 64;
4075         ref_height = 32;
4076         len_sp     = 16;
4077         break;
4078     case 7:
4079         /* Large Diamond - 32 SUs 64x32 window */
4080         ref_width  = 64;
4081         ref_height = 32;
4082         len_sp     = 32;
4083         break;
4084     case 8:
4085         /* Exhaustive - 48 SUs 64x32 window */
4086         ref_width  = 64;
4087         ref_height = 32;
4088         len_sp     = 48;
4089         break;
4090
4091     default:
4092         assert(0);
4093     }
4094
4095     /* ref_width*ref_height = Max 64x32 one direction, Max 32x32 two directions */
4096     if (is_bframe) {
4097         CLIP(ref_width, 4, 32);
4098         CLIP(ref_height, 4, 32);
4099     } else if (is_pframe) {
4100         CLIP(ref_width, 4, 64);
4101         CLIP(ref_height, 4, 32);
4102     }
4103
4104     cmd->dw0.adaptive_enable =
4105         cmd->dw37.adaptive_enable = fei_param->adaptive_search;
4106     cmd->dw0.t8x8_flag_for_inter_enable = cmd->dw37.t8x8_flag_for_inter_enable
4107                                           = avc_state->transform_8x8_mode_enable;
4108     cmd->dw2.max_len_sp = len_sp;
4109     cmd->dw38.max_len_sp = 0; // HLD mandates this field to be Zero
4110     cmd->dw2.max_num_su = cmd->dw38.max_num_su = 57;
4111     cmd->dw3.src_access =
4112         cmd->dw3.ref_access = 0; // change it to (is_frame ? 0: 1) when interlace is suppoted
4113
4114     if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) {
4115         if (avc_state->ftq_override) {
4116             cmd->dw3.ft_enable = avc_state->ftq_enable;
4117         } else {
4118             if (generic_state->frame_type == SLICE_TYPE_P) {
4119                 cmd->dw3.ft_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
4120             } else {
4121                 cmd->dw3.ft_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
4122             }
4123         }
4124     } else {
4125         cmd->dw3.ft_enable = 0;
4126     }
4127
4128     if (avc_state->disable_sub_mb_partion)
4129         cmd->dw3.sub_mb_part_mask = 0x7;
4130
4131     if (mbenc_i_frame_dist_in_use) {
4132         /* Fixme: Not supported, no brc in fei */
4133         assert(0);
4134         cmd->dw2.pic_width = generic_state->downscaled_width_4x_in_mb;
4135         cmd->dw4.pic_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
4136         cmd->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4;
4137         cmd->dw6.batch_buffer_end = 0;
4138         cmd->dw31.intra_compute_type = 1;
4139     }
4140
4141     cmd->dw2.pic_width = generic_state->frame_width_in_mbs;
4142     cmd->dw4.pic_height_minus1 = generic_state->frame_height_in_mbs - 1;
4143     cmd->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ?
4144                                generic_state->frame_height_in_mbs : avc_state->slice_height;
4145     cmd->dw3.sub_mb_part_mask = fei_param->sub_mb_part_mask;
4146     cmd->dw3.sub_pel_mode = fei_param->sub_pel_mode;
4147     cmd->dw3.inter_sad = fei_param->inter_sad;
4148     cmd->dw3.Intra_sad = fei_param->intra_sad;
4149     cmd->dw3.search_ctrl = (is_bframe) ? 7 : 0;
4150     cmd->dw4.enable_fbr_bypass = avc_state->fbr_bypass_enable;
4151     cmd->dw4.enable_intra_cost_scaling_for_static_frame =
4152         avc_state->sfd_enable && generic_state->hme_enabled;
4153     cmd->dw4.true_distortion_enable = fei_param->distortion_type == 0 ? 1 : 0;
4154     cmd->dw4.constrained_intra_pred_flag =
4155         pic_param->pic_fields.bits.constrained_intra_pred_flag;
4156     cmd->dw4.hme_enable = 0;
4157     cmd->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
4158     cmd->dw4.use_actual_ref_qp_value =
4159         generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
4160     cmd->dw7.intra_part_mask = fei_param->intra_part_mask;
4161     cmd->dw7.src_field_polarity = 0;
4162
4163     /* mv mode cost */
4164     memcpy(&(cmd->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
4165     if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
4166         // cmd->dw8 = gen9_avc_old_intra_mode_cost[qp];
4167     } else if (avc_state->skip_bias_adjustment_enable) {
4168         // Load different MvCost for P picture when SkipBiasAdjustment is enabled
4169         // No need to check for P picture as the flag is only enabled for P picture
4170         cmd->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
4171     }
4172
4173     //dw16
4174     /* search path tables */
4175     table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
4176     memcpy(&(cmd->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
4177
4178     //ftq_skip_threshold_lut set,dw14 /15
4179
4180     //r5 disable NonFTQSkipThresholdLUT
4181     if (generic_state->frame_type == SLICE_TYPE_P) {
4182         cmd->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
4183     } else if (generic_state->frame_type == SLICE_TYPE_B) {
4184         cmd->dw32.skip_val =
4185             gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
4186     }
4187     cmd->dw13.qp_prime_y = qp;
4188     cmd->dw13.qp_prime_cb = qp;
4189     cmd->dw13.qp_prime_cr = qp;
4190     cmd->dw13.target_size_in_word = 0xff; /* hardcoded for brc disable */
4191
4192     if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) {
4193         cmd->dw32.mult_pred_l0_disable = fei_param->multi_pred_l0 ? 0x01 : 0x80;
4194         cmd->dw32.mult_pred_l1_disable = ((generic_state->frame_type == SLICE_TYPE_B) && fei_param->multi_pred_l1) ? 0x01 : 0x80;
4195     } else {
4196         /* disable */
4197         cmd->dw32.mult_pred_l0_disable = 0x80;
4198         cmd->dw32.mult_pred_l1_disable = 0x80;
4199     }
4200     /* no field pic setting, not supported */
4201
4202     //dw34 58
4203     if (avc_state->adaptive_transform_decision_enable) {
4204         if (generic_state->frame_type != SLICE_TYPE_I) {
4205             cmd->dw34.enable_adaptive_tx_decision = 1;
4206         }
4207
4208         cmd->dw58.mb_texture_threshold = 1024;
4209         cmd->dw58.tx_decision_threshold = 128;
4210     }
4211     if (generic_state->frame_type == SLICE_TYPE_B) {
4212         cmd->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
4213         cmd->dw34.list1_ref_id1_frm_field_parity = 0;
4214         cmd->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
4215     }
4216     cmd->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
4217     cmd->dw34.roi_enable_flag = generic_state->brc_roi_enable;
4218     cmd->dw34.mad_enable_falg = avc_state->mad_enable;
4219     cmd->dw34.mb_brc_enable = avc_state->mb_qp_data_enable && generic_state->mb_brc_enabled;
4220     cmd->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
4221     cmd->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
4222
4223     if (cmd->dw34.force_non_skip_check) {
4224         cmd->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
4225     }
4226     cmd->dw36.check_all_fractional_enable = avc_state->caf_enable;
4227     cmd->dw38.ref_threshold = 400;
4228     cmd->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
4229     // Default:2 used for MBBRC (MB QP Surface width and height are 4x downscaled picture in MB unit * 4  bytes)
4230     // 0 used for MBQP data surface (MB QP Surface width and height are same as the input picture size in MB unit * 1bytes)
4231     // starting GEN9, BRC use split kernel, MB QP surface is same size as input picture
4232     cmd->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled) ? 0 : 2;
4233     if (mbenc_i_frame_dist_in_use) {
4234         cmd->dw13.qp_prime_y = 0;
4235         cmd->dw13.qp_prime_cb = 0;
4236         cmd->dw13.qp_prime_cr = 0;
4237         cmd->dw33.intra_16x16_nondc_penalty = 0;
4238         cmd->dw33.intra_8x8_nondc_penalty = 0;
4239         cmd->dw33.intra_4x4_nondc_penalty = 0;
4240     }
4241     if (cmd->dw4.use_actual_ref_qp_value) {
4242         cmd->dw44.actual_qp_value_for_ref_id0_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 0);
4243         cmd->dw44.actual_qp_value_for_ref_id1_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 1);
4244         cmd->dw44.actual_qp_value_for_ref_id2_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 2);
4245         cmd->dw44.actual_qp_value_for_ref_id3_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 3);
4246         cmd->dw45.actual_qp_value_for_ref_id4_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 4);
4247         cmd->dw45.actual_qp_value_for_ref_id5_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 5);
4248         cmd->dw45.actual_qp_value_for_ref_id6_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 6);
4249         cmd->dw45.actual_qp_value_for_ref_id7_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 7);
4250         cmd->dw46.actual_qp_value_for_ref_id0_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 0);
4251         cmd->dw46.actual_qp_value_for_ref_id1_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 1);
4252     }
4253
4254     table_idx = slice_type_kernel[generic_state->frame_type];
4255     cmd->dw46.ref_cost = gen9_avc_ref_cost[table_idx][qp];
4256
4257     if (generic_state->frame_type == SLICE_TYPE_I) {
4258         cmd->dw0.skip_mode_enable = 0;
4259         cmd->dw37.skip_mode_enable = 0;
4260         cmd->dw36.hme_combine_overlap = 0;
4261         cmd->dw36.check_all_fractional_enable = 0;
4262         cmd->dw47.intra_cost_sf = 16;/* not used, but recommended to set 16 by kernel team */
4263         cmd->dw34.enable_direct_bias_adjustment = 0;
4264         cmd->dw34.enable_global_motion_bias_adjustment = 0;
4265
4266     } else if (generic_state->frame_type == SLICE_TYPE_P) {
4267         cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
4268         cmd->dw3.bme_disable_fbr = 1;
4269         cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
4270         cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
4271         cmd->dw7.non_skip_zmv_added = 1;
4272         cmd->dw7.non_skip_mode_added = 1;
4273         cmd->dw7.skip_center_mask = 1;
4274
4275         cmd->dw47.intra_cost_sf =
4276             (avc_state->adaptive_intra_scaling_enable) ?
4277             gen9_avc_adaptive_intra_scaling_factor[preset] :
4278             gen9_avc_intra_scaling_factor[preset];
4279
4280         cmd->dw47.max_vmv_r =
4281             i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
4282
4283         cmd->dw36.hme_combine_overlap = 1;
4284         cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
4285         cmd->dw36.check_all_fractional_enable = fei_param->repartition_check_enable;
4286         cmd->dw34.enable_direct_bias_adjustment = 0;
4287         cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
4288         if (avc_state->global_motion_bias_adjustment_enable)
4289             cmd->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
4290
4291         cmd->dw64.num_mv_predictors_l0 = fei_param->num_mv_predictors_l0;
4292
4293     } else { /* B slice */
4294
4295         cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
4296         cmd->dw1.bi_Weight = avc_state->bi_weight;
4297         cmd->dw3.search_ctrl = 7;
4298         cmd->dw3.skip_type = 1;
4299         cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
4300         cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
4301         cmd->dw7.skip_center_mask = 0xff;
4302
4303         cmd->dw47.intra_cost_sf = avc_state->adaptive_intra_scaling_enable ?
4304                                   gen9_avc_adaptive_intra_scaling_factor[qp] :
4305                                   gen9_avc_intra_scaling_factor[qp];
4306
4307         cmd->dw47.max_vmv_r =
4308             i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
4309
4310         cmd->dw36.hme_combine_overlap = 1;
4311
4312         //check is_fwd_frame_short_term_ref
4313         surface_id = slice_param->RefPicList1[0].picture_id;
4314         obj_surface = SURFACE(surface_id);
4315         if (!obj_surface) {
4316             WARN_ONCE("Invalid backward reference frame\n");
4317             if (gpe_context)
4318                 i965_gpe_context_unmap_curbe(gpe_context);
4319             return;
4320         }
4321         cmd->dw36.is_fwd_frame_short_term_ref = !!(slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
4322
4323         cmd->dw36.num_ref_idx_l0_minus_one =
4324             (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1
4325             : 0;
4326         cmd->dw36.num_ref_idx_l1_minus_one =
4327             (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l1_active_minus1
4328             : 0;
4329         cmd->dw36.check_all_fractional_enable = fei_param->repartition_check_enable;
4330
4331         cmd->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
4332         cmd->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
4333         cmd->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
4334         cmd->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
4335         cmd->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
4336         cmd->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
4337         cmd->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
4338         cmd->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
4339
4340         cmd->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
4341         if (cmd->dw34.enable_direct_bias_adjustment) {
4342             cmd->dw7.non_skip_mode_added = 1;
4343             cmd->dw7.non_skip_zmv_added = 1;
4344         }
4345
4346         cmd->dw34.enable_global_motion_bias_adjustment =
4347             avc_state->global_motion_bias_adjustment_enable;
4348         if (avc_state->global_motion_bias_adjustment_enable)
4349             cmd->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
4350
4351         cmd->dw64.num_mv_predictors_l0 = fei_param->num_mv_predictors_l0;
4352         cmd->dw64.num_mv_predictors_l1 = fei_param->num_mv_predictors_l1;
4353     }
4354
4355     avc_state->block_based_skip_enable = cmd->dw3.block_based_skip_enable;
4356
4357     if (avc_state->rolling_intra_refresh_enable) {
4358         //Not supported
4359         cmd->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
4360
4361     } else {
4362         cmd->dw34.widi_intra_refresh_en = 0;
4363     }
4364     cmd->dw34.enable_per_mb_static_check = avc_state->sfd_enable && generic_state->hme_enabled;
4365     cmd->dw34.enable_adaptive_search_window_size = avc_state->adaptive_search_window_enable;
4366
4367     /* Fixme: Skipped ROI stuffs for now */
4368
4369     /* r64: FEI specific fields */
4370     cmd->dw64.fei_enable = 1;
4371     cmd->dw64.multiple_mv_predictor_per_mb_enable = fei_param->mv_predictor_enable;
4372     if (fei_param->distortion != VA_INVALID_ID)
4373         cmd->dw64.vme_distortion_output_enable = 1;
4374     cmd->dw64.per_mb_qp_enable = fei_param->mb_qp;
4375     cmd->dw64.mb_input_enable = fei_param->mb_input;
4376
4377     // FEI mode is disabled when external MVP is available
4378     if (fei_param->mv_predictor_enable)
4379         cmd->dw64.fei_mode = 0;
4380     else
4381         cmd->dw64.fei_mode = 1;
4382
4383     cmd->dw80.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
4384     cmd->dw81.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
4385     cmd->dw82.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
4386     cmd->dw83.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
4387     cmd->dw84.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
4388     cmd->dw85.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
4389     cmd->dw86.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
4390     cmd->dw87.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
4391     cmd->dw88.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
4392     cmd->dw89.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
4393     cmd->dw90.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
4394     cmd->dw91.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
4395     cmd->dw92.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
4396     cmd->dw93.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
4397     cmd->dw94.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
4398     cmd->dw95.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
4399     cmd->dw96.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
4400     cmd->dw97.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
4401     cmd->dw98.force_non_skip_mb_map_surface = GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
4402     cmd->dw99.widi_wa_surf_index = GEN9_AVC_MBENC_WIDI_WA_INDEX;
4403     cmd->dw100.brc_curbe_surf_index = GEN9_AVC_MBENC_BRC_CURBE_DATA_INDEX;
4404     cmd->dw101.static_detection_cost_table_index = GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX;
4405     cmd->dw102.fei_mv_predictor_surf_index = GEN9_AVC_MBENC_MV_PREDICTOR_INDEX;
4406     i965_gpe_context_unmap_curbe(gpe_context);
4407
4408     return;
4409 }
4410
4411 static void
4412 gen9_avc_send_surface_mbenc(VADriverContextP ctx,
4413                             struct encode_state *encode_state,
4414                             struct i965_gpe_context *gpe_context,
4415                             struct intel_encoder_context *encoder_context,
4416                             void * param_mbenc)
4417 {
4418     struct i965_driver_data *i965 = i965_driver_data(ctx);
4419     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4420     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4421     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4422     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4423     struct object_surface *obj_surface;
4424     struct gen9_surface_avc *avc_priv_surface;
4425     struct i965_gpe_resource *gpe_resource;
4426     struct mbenc_param * param = (struct mbenc_param *)param_mbenc ;
4427     VASurfaceID surface_id;
4428     unsigned int mbenc_i_frame_dist_in_use = param->mbenc_i_frame_dist_in_use;
4429     unsigned int size = 0;
4430     unsigned int frame_mb_size = generic_state->frame_width_in_mbs *
4431                                  generic_state->frame_height_in_mbs;
4432     int i = 0;
4433     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4434     unsigned char is_g95 = 0;
4435
4436     if (IS_SKL(i965->intel.device_info) ||
4437         IS_BXT(i965->intel.device_info))
4438         is_g95 = 0;
4439     else if (IS_KBL(i965->intel.device_info) ||
4440              IS_GLK(i965->intel.device_info))
4441         is_g95 = 1;
4442
4443     obj_surface = encode_state->reconstructed_object;
4444
4445     if (!obj_surface || !obj_surface->private_data)
4446         return;
4447     avc_priv_surface = obj_surface->private_data;
4448
4449     /*pak obj command buffer output*/
4450     size = frame_mb_size * 16 * 4;
4451     gpe_resource = &avc_priv_surface->res_mb_code_surface;
4452     i965_add_buffer_gpe_surface(ctx,
4453                                 gpe_context,
4454                                 gpe_resource,
4455                                 0,
4456                                 size / 4,
4457                                 0,
4458                                 GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX);
4459
4460     /*mv data buffer output*/
4461     size = frame_mb_size * 32 * 4;
4462     gpe_resource = &avc_priv_surface->res_mv_data_surface;
4463     i965_add_buffer_gpe_surface(ctx,
4464                                 gpe_context,
4465                                 gpe_resource,
4466                                 0,
4467                                 size / 4,
4468                                 0,
4469                                 GEN9_AVC_MBENC_IND_MV_DATA_INDEX);
4470
4471     /*input current  YUV surface, current input Y/UV object*/
4472     if (mbenc_i_frame_dist_in_use) {
4473         obj_surface = encode_state->reconstructed_object;
4474         if (!obj_surface || !obj_surface->private_data)
4475             return;
4476         avc_priv_surface = obj_surface->private_data;
4477         obj_surface = avc_priv_surface->scaled_4x_surface_obj;
4478     } else {
4479         obj_surface = encode_state->input_yuv_object;
4480     }
4481     i965_add_2d_gpe_surface(ctx,
4482                             gpe_context,
4483                             obj_surface,
4484                             0,
4485                             1,
4486                             I965_SURFACEFORMAT_R8_UNORM,
4487                             GEN9_AVC_MBENC_CURR_Y_INDEX);
4488
4489     i965_add_2d_gpe_surface(ctx,
4490                             gpe_context,
4491                             obj_surface,
4492                             1,
4493                             1,
4494                             I965_SURFACEFORMAT_R16_UINT,
4495                             GEN9_AVC_MBENC_CURR_UV_INDEX);
4496
4497     if (generic_state->hme_enabled) {
4498         /*memv input 4x*/
4499         if (!IS_GEN8(i965->intel.device_info)) {
4500             gpe_resource = &(avc_ctx->s4x_memv_data_buffer);
4501             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4502                                            gpe_resource,
4503                                            1,
4504                                            I965_SURFACEFORMAT_R8_UNORM,
4505                                            GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX);
4506             /* memv distortion input*/
4507             gpe_resource = &(avc_ctx->s4x_memv_distortion_buffer);
4508             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4509                                            gpe_resource,
4510                                            1,
4511                                            I965_SURFACEFORMAT_R8_UNORM,
4512                                            GEN9_AVC_MBENC_4XME_DISTORTION_INDEX);
4513         } else if (generic_state->frame_type != SLICE_TYPE_I) {
4514             gpe_resource = &(avc_ctx->s4x_memv_data_buffer);
4515             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4516                                            gpe_resource,
4517                                            1,
4518                                            I965_SURFACEFORMAT_R8_UNORM,
4519                                            GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX);
4520             /* memv distortion input*/
4521             gpe_resource = &(avc_ctx->s4x_memv_distortion_buffer);
4522             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4523                                            gpe_resource,
4524                                            1,
4525                                            I965_SURFACEFORMAT_R8_UNORM,
4526                                            GEN9_AVC_MBENC_4XME_DISTORTION_INDEX);
4527         }
4528     }
4529
4530     /*mbbrc const data_buffer*/
4531     if (param->mb_const_data_buffer_in_use) {
4532         size = 16 * AVC_QP_MAX * sizeof(unsigned int);
4533         gpe_resource = &avc_ctx->res_mbbrc_const_data_buffer;
4534         i965_add_buffer_gpe_surface(ctx,
4535                                     gpe_context,
4536                                     gpe_resource,
4537                                     0,
4538                                     size / 4,
4539                                     0,
4540                                     GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX);
4541
4542     }
4543
4544     /*mb qp data_buffer*/
4545     if (param->mb_qp_buffer_in_use) {
4546         if (avc_state->mb_qp_data_enable)
4547             gpe_resource = &(avc_ctx->res_mb_qp_data_surface);
4548         else
4549             gpe_resource = &(avc_ctx->res_mbbrc_mb_qp_data_surface);
4550         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4551                                        gpe_resource,
4552                                        1,
4553                                        I965_SURFACEFORMAT_R8_UNORM,
4554                                        GEN9_AVC_MBENC_MBQP_INDEX);
4555     }
4556
4557     /*input current  YUV surface, current input Y/UV object*/
4558     if (mbenc_i_frame_dist_in_use) {
4559         obj_surface = encode_state->reconstructed_object;
4560         if (!obj_surface || !obj_surface->private_data)
4561             return;
4562         avc_priv_surface = obj_surface->private_data;
4563         obj_surface = avc_priv_surface->scaled_4x_surface_obj;
4564     } else {
4565         obj_surface = encode_state->input_yuv_object;
4566     }
4567     i965_add_adv_gpe_surface(ctx, gpe_context,
4568                              obj_surface,
4569                              GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX);
4570     /*input ref YUV surface*/
4571     for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
4572         surface_id = slice_param->RefPicList0[i].picture_id;
4573         obj_surface = SURFACE(surface_id);
4574         if (!obj_surface || !obj_surface->private_data)
4575             break;
4576
4577         i965_add_adv_gpe_surface(ctx, gpe_context,
4578                                  obj_surface,
4579                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 1);
4580     }
4581     /*input current  YUV surface, current input Y/UV object*/
4582     if (mbenc_i_frame_dist_in_use) {
4583         obj_surface = encode_state->reconstructed_object;
4584         if (!obj_surface || !obj_surface->private_data)
4585             return;
4586         avc_priv_surface = obj_surface->private_data;
4587         obj_surface = avc_priv_surface->scaled_4x_surface_obj;
4588     } else {
4589         obj_surface = encode_state->input_yuv_object;
4590     }
4591     i965_add_adv_gpe_surface(ctx, gpe_context,
4592                              obj_surface,
4593                              GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX);
4594
4595     for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
4596         if (i > 0) break; // only  one ref supported here for B frame
4597         surface_id = slice_param->RefPicList1[i].picture_id;
4598         obj_surface = SURFACE(surface_id);
4599         if (!obj_surface || !obj_surface->private_data)
4600             break;
4601
4602         i965_add_adv_gpe_surface(ctx, gpe_context,
4603                                  obj_surface,
4604                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1);
4605         i965_add_adv_gpe_surface(ctx, gpe_context,
4606                                  obj_surface,
4607                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 2);
4608         if (i == 0) {
4609             avc_priv_surface = obj_surface->private_data;
4610             /*pak obj command buffer output(mb code)*/
4611             size = frame_mb_size * 16 * 4;
4612             gpe_resource = &avc_priv_surface->res_mb_code_surface;
4613             i965_add_buffer_gpe_surface(ctx,
4614                                         gpe_context,
4615                                         gpe_resource,
4616                                         0,
4617                                         size / 4,
4618                                         0,
4619                                         GEN9_AVC_MBENC_FWD_MB_DATA_INDEX);
4620
4621             /*mv data buffer output*/
4622             size = frame_mb_size * 32 * 4;
4623             gpe_resource = &avc_priv_surface->res_mv_data_surface;
4624             i965_add_buffer_gpe_surface(ctx,
4625                                         gpe_context,
4626                                         gpe_resource,
4627                                         0,
4628                                         size / 4,
4629                                         0,
4630                                         GEN9_AVC_MBENC_FWD_MV_DATA_INDEX);
4631
4632         }
4633
4634         if (i < INTEL_AVC_MAX_BWD_REF_NUM) {
4635             i965_add_adv_gpe_surface(ctx, gpe_context,
4636                                      obj_surface,
4637                                      GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1 + INTEL_AVC_MAX_BWD_REF_NUM);
4638         }
4639
4640     }
4641
4642     /* BRC distortion data buffer for I frame*/
4643     if (mbenc_i_frame_dist_in_use) {
4644         gpe_resource = &(avc_ctx->res_brc_dist_data_surface);
4645         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4646                                        gpe_resource,
4647                                        1,
4648                                        I965_SURFACEFORMAT_R8_UNORM,
4649                                        GEN9_AVC_MBENC_BRC_DISTORTION_INDEX);
4650     }
4651
4652     /* as ref frame ,update later RefPicSelect of Current Picture*/
4653     obj_surface = encode_state->reconstructed_object;
4654     avc_priv_surface = obj_surface->private_data;
4655     if (avc_state->ref_pic_select_list_supported && avc_priv_surface->is_as_ref) {
4656         gpe_resource = &(avc_priv_surface->res_ref_pic_select_surface);
4657         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4658                                        gpe_resource,
4659                                        1,
4660                                        I965_SURFACEFORMAT_R8_UNORM,
4661                                        GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX);
4662
4663     }
4664     if (!IS_GEN8(i965->intel.device_info)) {
4665         if (param->mb_vproc_stats_enable) {
4666             /*mb status buffer input*/
4667             size = frame_mb_size * 16 * 4;
4668             gpe_resource = &(avc_ctx->res_mb_status_buffer);
4669             i965_add_buffer_gpe_surface(ctx,
4670                                         gpe_context,
4671                                         gpe_resource,
4672                                         0,
4673                                         size / 4,
4674                                         0,
4675                                         GEN9_AVC_MBENC_MB_STATS_INDEX);
4676
4677         } else if (avc_state->flatness_check_enable) {
4678             gpe_resource = &(avc_ctx->res_flatness_check_surface);
4679             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4680                                            gpe_resource,
4681                                            1,
4682                                            I965_SURFACEFORMAT_R8_UNORM,
4683                                            GEN9_AVC_MBENC_MB_STATS_INDEX);
4684         }
4685     } else if (avc_state->flatness_check_enable) {
4686         gpe_resource = &(avc_ctx->res_flatness_check_surface);
4687         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4688                                        gpe_resource,
4689                                        1,
4690                                        I965_SURFACEFORMAT_R8_UNORM,
4691                                        GEN9_AVC_MBENC_MB_STATS_INDEX);
4692     }
4693
4694     if (param->mad_enable) {
4695         /*mad buffer input*/
4696         size = 4;
4697         gpe_resource = &(avc_ctx->res_mad_data_buffer);
4698         i965_add_buffer_gpe_surface(ctx,
4699                                     gpe_context,
4700                                     gpe_resource,
4701                                     0,
4702                                     size / 4,
4703                                     0,
4704                                     GEN9_AVC_MBENC_MAD_DATA_INDEX);
4705         i965_zero_gpe_resource(gpe_resource);
4706     }
4707
4708     /*brc updated mbenc curbe data buffer,it is ignored by gen9 and used in gen95*/
4709     if (avc_state->mbenc_brc_buffer_size > 0) {
4710         size = avc_state->mbenc_brc_buffer_size;
4711         gpe_resource = &(avc_ctx->res_mbenc_brc_buffer);
4712         i965_add_buffer_gpe_surface(ctx,
4713                                     gpe_context,
4714                                     gpe_resource,
4715                                     0,
4716                                     size / 4,
4717                                     0,
4718                                     GEN95_AVC_MBENC_BRC_CURBE_DATA_INDEX);
4719     }
4720
4721     /*artitratry num mbs in slice*/
4722     if (avc_state->arbitrary_num_mbs_in_slice) {
4723         /*slice surface input*/
4724         gpe_resource = &(avc_ctx->res_mbenc_slice_map_surface);
4725         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4726                                        gpe_resource,
4727                                        1,
4728                                        I965_SURFACEFORMAT_R8_UNORM,
4729                                        GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX);
4730         gen9_avc_generate_slice_map(ctx, encode_state, encoder_context);
4731     }
4732
4733     /* BRC distortion data buffer for I frame */
4734     if (!mbenc_i_frame_dist_in_use) {
4735         if (avc_state->mb_disable_skip_map_enable) {
4736             gpe_resource = &(avc_ctx->res_mb_disable_skip_map_surface);
4737             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4738                                            gpe_resource,
4739                                            1,
4740                                            I965_SURFACEFORMAT_R8_UNORM,
4741                                            (is_g95 ? GEN95_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX : GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX));
4742         }
4743         if (IS_GEN8(i965->intel.device_info)) {
4744             if (avc_state->sfd_enable) {
4745                 size = 128 / sizeof(unsigned long);
4746                 gpe_resource = &(avc_ctx->res_sfd_output_buffer);
4747                 i965_add_buffer_gpe_surface(ctx,
4748                                             gpe_context,
4749                                             gpe_resource,
4750                                             0,
4751                                             size / 4,
4752                                             0,
4753                                             GEN8_AVC_MBENC_STATIC_FRAME_DETECTION_OUTPUT_CM);
4754
4755             }
4756         } else {
4757             if (avc_state->sfd_enable && generic_state->hme_enabled) {
4758                 if (generic_state->frame_type == SLICE_TYPE_P) {
4759                     gpe_resource = &(avc_ctx->res_sfd_cost_table_p_frame_buffer);
4760                 } else if (generic_state->frame_type == SLICE_TYPE_B) {
4761                     gpe_resource = &(avc_ctx->res_sfd_cost_table_b_frame_buffer);
4762                 }
4763                 if (generic_state->frame_type != SLICE_TYPE_I) {
4764                     i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4765                                                    gpe_resource,
4766                                                    1,
4767                                                    I965_SURFACEFORMAT_R8_UNORM,
4768                                                    (is_g95 ? GEN95_AVC_MBENC_SFD_COST_TABLE_INDEX : GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX));
4769
4770
4771                 }
4772             }
4773         }
4774     }
4775     return;
4776 }
4777
4778 static void
4779 gen9_avc_fei_send_surface_mbenc(VADriverContextP ctx,
4780                                 struct encode_state *encode_state,
4781                                 struct i965_gpe_context *gpe_context,
4782                                 struct intel_encoder_context *encoder_context,
4783                                 void * param_mbenc)
4784 {
4785     struct i965_driver_data *i965 = i965_driver_data(ctx);
4786     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4787     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4788     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4789     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4790     VAEncMiscParameterFEIFrameControlH264 *fei_param = NULL;
4791     struct object_buffer *obj_buffer = NULL;
4792     struct buffer_store *buffer_store = NULL;
4793     struct object_surface *obj_surface = NULL;
4794     struct gen9_surface_avc *avc_priv_surface;
4795     struct i965_gpe_resource *gpe_resource;
4796     VASurfaceID surface_id;
4797     unsigned int size = 0;
4798     unsigned int frame_mb_nums;
4799     int i = 0, allocate_flag = 1;
4800
4801     obj_surface = encode_state->reconstructed_object;
4802     if (!obj_surface || !obj_surface->private_data)
4803         return;
4804     avc_priv_surface = obj_surface->private_data;
4805
4806     frame_mb_nums = generic_state->frame_width_in_mbs *
4807                     generic_state->frame_height_in_mbs;
4808     fei_param = avc_state->fei_framectl_param;
4809
4810     assert(fei_param != NULL);
4811
4812     /* res_mb_code_surface for MB code */
4813     size = frame_mb_nums * FEI_AVC_MB_CODE_BUFFER_SIZE;
4814     if (avc_priv_surface->res_mb_code_surface.bo != NULL)
4815         i965_free_gpe_resource(&avc_priv_surface->res_mb_code_surface);
4816     if (fei_param->mb_code_data != VA_INVALID_ID) {
4817         obj_buffer = BUFFER(fei_param->mb_code_data);
4818         assert(obj_buffer != NULL);
4819         buffer_store = obj_buffer->buffer_store;
4820         assert(size <= buffer_store->bo->size);
4821         i965_dri_object_to_buffer_gpe_resource(
4822             &avc_priv_surface->res_mb_code_surface,
4823             buffer_store->bo);
4824     } else {
4825         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
4826                                                    &avc_priv_surface->res_mb_code_surface,
4827                                                    ALIGN(size, 0x1000),
4828                                                    "mb code buffer");
4829         assert(allocate_flag != 0);
4830     }
4831
4832     /* res_mv_data_surface for MV data */
4833     size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
4834     if (avc_priv_surface->res_mv_data_surface.bo != NULL)
4835         i965_free_gpe_resource(&avc_priv_surface->res_mv_data_surface);
4836     if (fei_param->mv_data != VA_INVALID_ID) {
4837         obj_buffer = BUFFER(fei_param->mv_data);
4838         assert(obj_buffer != NULL);
4839         buffer_store = obj_buffer->buffer_store;
4840         assert(size <= buffer_store->bo->size);
4841         i965_dri_object_to_buffer_gpe_resource(
4842             &avc_priv_surface->res_mv_data_surface,
4843             buffer_store->bo);
4844     } else {
4845         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
4846                                                    &avc_priv_surface->res_mv_data_surface,
4847                                                    ALIGN(size, 0x1000),
4848                                                    "mv data buffer");
4849         assert(allocate_flag != 0);
4850     }
4851
4852     /* fei mb control data surface */
4853     size = frame_mb_nums * FEI_AVC_MB_CONTROL_BUFFER_SIZE;
4854     if (fei_param->mb_input | fei_param->mb_size_ctrl) {
4855         assert(fei_param->mb_ctrl != VA_INVALID_ID);
4856         obj_buffer = BUFFER(fei_param->mb_ctrl);
4857         assert(obj_buffer != NULL);
4858         buffer_store = obj_buffer->buffer_store;
4859         assert(size <= buffer_store->bo->size);
4860         if (avc_priv_surface->res_fei_mb_cntrl_surface.bo != NULL)
4861             i965_free_gpe_resource(&avc_priv_surface->res_fei_mb_cntrl_surface);
4862         i965_dri_object_to_buffer_gpe_resource(
4863             &avc_priv_surface->res_fei_mb_cntrl_surface,
4864             buffer_store->bo);
4865     }
4866
4867     /* fei mv predictor surface*/
4868     size = frame_mb_nums * FEI_AVC_MV_PREDICTOR_BUFFER_SIZE;
4869     if (fei_param->mv_predictor_enable &&
4870         (fei_param->mv_predictor != VA_INVALID_ID)) {
4871         obj_buffer = BUFFER(fei_param->mv_predictor);
4872         assert(obj_buffer != NULL);
4873         buffer_store = obj_buffer->buffer_store;
4874         assert(size <= buffer_store->bo->size);
4875         if (avc_priv_surface->res_fei_mv_predictor_surface.bo != NULL)
4876             i965_free_gpe_resource(&avc_priv_surface->res_fei_mv_predictor_surface);
4877         i965_dri_object_to_buffer_gpe_resource(
4878             &avc_priv_surface->res_fei_mv_predictor_surface,
4879             buffer_store->bo);
4880     } else {
4881         if (fei_param->mv_predictor_enable)
4882             assert(fei_param->mv_predictor != VA_INVALID_ID);
4883     }
4884
4885     /* fei vme distortion */
4886     size = frame_mb_nums * FEI_AVC_DISTORTION_BUFFER_SIZE;
4887     if (avc_priv_surface->res_fei_vme_distortion_surface.bo != NULL)
4888         i965_free_gpe_resource(&avc_priv_surface->res_fei_vme_distortion_surface);
4889     if (fei_param->distortion != VA_INVALID_ID) {
4890         obj_buffer = BUFFER(fei_param->distortion);
4891         assert(obj_buffer != NULL);
4892         buffer_store = obj_buffer->buffer_store;
4893         assert(size <= buffer_store->bo->size);
4894         i965_dri_object_to_buffer_gpe_resource(
4895             &avc_priv_surface->res_fei_vme_distortion_surface,
4896             buffer_store->bo);
4897     } else {
4898         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
4899                                                    &avc_priv_surface->res_fei_vme_distortion_surface,
4900                                                    ALIGN(size, 0x1000),
4901                                                    "fei vme distortion");
4902         assert(allocate_flag != 0);
4903     }
4904
4905     /* fei mb qp  */
4906     /* Fixme/Confirm:  not sure why we need 3 byte padding here */
4907     size = frame_mb_nums * FEI_AVC_QP_BUFFER_SIZE + 3;
4908     if (fei_param->mb_qp && (fei_param->qp != VA_INVALID_ID)) {
4909         obj_buffer = BUFFER(fei_param->qp);
4910         assert(obj_buffer != NULL);
4911         buffer_store = obj_buffer->buffer_store;
4912         assert((size - 3) <= buffer_store->bo->size);
4913         if (avc_priv_surface->res_fei_mb_qp_surface.bo != NULL)
4914             i965_free_gpe_resource(&avc_priv_surface->res_fei_mb_qp_surface);
4915         i965_dri_object_to_buffer_gpe_resource(
4916             &avc_priv_surface->res_fei_mb_qp_surface,
4917             buffer_store->bo);
4918     } else {
4919         if (fei_param->mb_qp)
4920             assert(fei_param->qp != VA_INVALID_ID);
4921     }
4922
4923     /*==== pak obj command buffer output ====*/
4924     size = frame_mb_nums * FEI_AVC_MB_CODE_BUFFER_SIZE;
4925     gpe_resource = &avc_priv_surface->res_mb_code_surface;
4926     i965_add_buffer_gpe_surface(ctx,
4927                                 gpe_context,
4928                                 gpe_resource,
4929                                 0,
4930                                 size / 4,
4931                                 0,
4932                                 GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX);
4933
4934
4935     /*=== mv data buffer output */
4936     size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
4937     gpe_resource = &avc_priv_surface->res_mv_data_surface;
4938     i965_add_buffer_gpe_surface(ctx,
4939                                 gpe_context,
4940                                 gpe_resource,
4941                                 0,
4942                                 size / 4,
4943                                 0,
4944                                 GEN9_AVC_MBENC_IND_MV_DATA_INDEX);
4945
4946
4947     /* === current input Y (binding table offset = 3)=== */
4948     obj_surface = encode_state->input_yuv_object;
4949     i965_add_2d_gpe_surface(ctx,
4950                             gpe_context,
4951                             obj_surface,
4952                             0,
4953                             1,
4954                             I965_SURFACEFORMAT_R8_UNORM,
4955                             GEN9_AVC_MBENC_CURR_Y_INDEX);
4956
4957     /* === current input UV === (binding table offset == 4)*/
4958     i965_add_2d_gpe_surface(ctx,
4959                             gpe_context,
4960                             obj_surface,
4961                             1,
4962                             1,
4963                             I965_SURFACEFORMAT_R16_UINT,
4964                             GEN9_AVC_MBENC_CURR_UV_INDEX);
4965
4966     /* === input current YUV surface, (binding table offset == 15) === */
4967     i965_add_adv_gpe_surface(ctx, gpe_context,
4968                              obj_surface,
4969                              GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX);
4970
4971
4972     /*== input current  YUV surface, (binding table offset == 32)*/
4973     i965_add_adv_gpe_surface(ctx, gpe_context,
4974                              obj_surface,
4975                              GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX);
4976
4977     /* list 0 references */
4978     for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
4979
4980         surface_id = slice_param->RefPicList0[i].picture_id;
4981         obj_surface = SURFACE(surface_id);
4982         if (!obj_surface || !obj_surface->private_data)
4983             break;
4984         i965_add_adv_gpe_surface(ctx, gpe_context,
4985                                  obj_surface,
4986                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 1);
4987     }
4988
4989
4990     /* list 1 references */
4991     for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
4992         if (i > 0) break; // only  one ref supported here for B frame
4993         surface_id = slice_param->RefPicList1[i].picture_id;
4994         obj_surface = SURFACE(surface_id);
4995         if (!obj_surface || !obj_surface->private_data)
4996             break;
4997
4998         i965_add_adv_gpe_surface(ctx, gpe_context,
4999                                  obj_surface,
5000                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 2);
5001         if (i == 0) {
5002             avc_priv_surface = obj_surface->private_data;
5003             /* mb code of Backward reference frame */
5004             size = frame_mb_nums * FEI_AVC_MB_CODE_BUFFER_SIZE;
5005             gpe_resource = &avc_priv_surface->res_mb_code_surface;
5006             i965_add_buffer_gpe_surface(ctx,
5007                                         gpe_context,
5008                                         gpe_resource,
5009                                         0,
5010                                         size / 4,
5011                                         0,
5012                                         GEN9_AVC_MBENC_FWD_MB_DATA_INDEX);
5013
5014             /* mv data of backward ref frame */
5015             size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
5016             gpe_resource = &avc_priv_surface->res_mv_data_surface;
5017             i965_add_buffer_gpe_surface(ctx,
5018                                         gpe_context,
5019                                         gpe_resource,
5020                                         0,
5021                                         size / 4,
5022                                         0,
5023                                         GEN9_AVC_MBENC_FWD_MV_DATA_INDEX);
5024
5025         }
5026         //again
5027         if (i < INTEL_AVC_MAX_BWD_REF_NUM) {
5028             i965_add_adv_gpe_surface(ctx, gpe_context,
5029                                      obj_surface,
5030                                      GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1);
5031         }
5032     }
5033
5034     /* as ref frame ,update later RefPicSelect of Current Picture*/
5035     obj_surface = encode_state->reconstructed_object;
5036     avc_priv_surface = obj_surface->private_data;
5037     if (avc_state->ref_pic_select_list_supported && avc_priv_surface->is_as_ref) {
5038         gpe_resource = &(avc_priv_surface->res_ref_pic_select_surface);
5039         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5040                                        gpe_resource,
5041                                        1,
5042                                        I965_SURFACEFORMAT_R8_UNORM,
5043                                        GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX);
5044
5045     }
5046
5047
5048     /* mb specific data, macroblock control parameters */
5049     if ((fei_param->mb_input | fei_param->mb_size_ctrl) &&
5050         (fei_param->mb_ctrl != VA_INVALID_ID)) {
5051         size = frame_mb_nums * FEI_AVC_MB_CONTROL_BUFFER_SIZE;
5052         gpe_resource = &avc_priv_surface->res_fei_mb_cntrl_surface;
5053         i965_add_buffer_gpe_surface(ctx,
5054                                     gpe_context,
5055                                     gpe_resource,
5056                                     0,
5057                                     size / 4,
5058                                     0,
5059                                     GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX);
5060     }
5061
5062     /* multi mv predictor surface */
5063     if (fei_param->mv_predictor_enable && (fei_param->mv_predictor != VA_INVALID_ID)) {
5064         size = frame_mb_nums * 48; //sizeof (VAEncMVPredictorH264Intel) == 40
5065         gpe_resource = &avc_priv_surface->res_fei_mv_predictor_surface;
5066         i965_add_buffer_gpe_surface(ctx,
5067                                     gpe_context,
5068                                     gpe_resource,
5069                                     0,
5070                                     size / 4,
5071                                     0,
5072                                     GEN9_AVC_MBENC_MV_PREDICTOR_INDEX);
5073     }
5074
5075     /* mb qp */
5076     if (fei_param->mb_qp && (fei_param->qp != VA_INVALID_ID)) {
5077         size = frame_mb_nums  + 3;
5078         gpe_resource = &avc_priv_surface->res_fei_mb_qp_surface,
5079         i965_add_buffer_gpe_surface(ctx,
5080                                     gpe_context,
5081                                     gpe_resource,
5082                                     0,
5083                                     size / 4,
5084                                     0,
5085                                     GEN9_AVC_MBENC_MBQP_INDEX);
5086     }
5087
5088
5089     /*=== FEI distortion surface ====*/
5090     size = frame_mb_nums * 48; //sizeof (VAEncFEIDistortionBufferH264Intel) == 48
5091     gpe_resource = &avc_priv_surface->res_fei_vme_distortion_surface;
5092     i965_add_buffer_gpe_surface(ctx,
5093                                 gpe_context,
5094                                 gpe_resource,
5095                                 0,
5096                                 size / 4,
5097                                 0,
5098                                 GEN9_AVC_MBENC_AUX_VME_OUT_INDEX);
5099
5100     return;
5101 }
5102
5103 static VAStatus
5104 gen9_avc_kernel_mbenc(VADriverContextP ctx,
5105                       struct encode_state *encode_state,
5106                       struct intel_encoder_context *encoder_context,
5107                       bool i_frame_dist_in_use)
5108 {
5109     struct i965_driver_data *i965 = i965_driver_data(ctx);
5110     struct i965_gpe_table *gpe = &i965->gpe_table;
5111     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5112     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5113     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5114     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5115     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5116
5117     struct i965_gpe_context *gpe_context;
5118     struct gpe_media_object_walker_parameter media_object_walker_param;
5119     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
5120     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
5121     int media_function = 0;
5122     int kernel_idx = 0;
5123     unsigned int mb_const_data_buffer_in_use = 0;
5124     unsigned int mb_qp_buffer_in_use = 0;
5125     unsigned int brc_enabled = 0;
5126     unsigned int roi_enable = (generic_state->num_roi > 0) ? 1 : 0;
5127     unsigned int dirty_roi_enable = ((generic_state->dirty_num_roi > 0) && (generic_state->frame_type == SLICE_TYPE_P) && (0));
5128     struct mbenc_param param ;
5129
5130     int mbenc_i_frame_dist_in_use = i_frame_dist_in_use;
5131     int mad_enable = 0;
5132     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5133
5134     mb_const_data_buffer_in_use =
5135         generic_state->mb_brc_enabled ||
5136         roi_enable ||
5137         dirty_roi_enable ||
5138         avc_state->mb_qp_data_enable ||
5139         avc_state->rolling_intra_refresh_enable;
5140     mb_qp_buffer_in_use =
5141         generic_state->mb_brc_enabled ||
5142         generic_state->brc_roi_enable ||
5143         avc_state->mb_qp_data_enable;
5144
5145     if (mbenc_i_frame_dist_in_use) {
5146         media_function = INTEL_MEDIA_STATE_ENC_I_FRAME_DIST;
5147         kernel_idx = GEN9_AVC_KERNEL_BRC_I_FRAME_DIST;
5148         downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
5149         downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
5150         mad_enable = 0;
5151         brc_enabled = 0;
5152
5153         gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
5154     } else {
5155         switch (generic_state->kernel_mode) {
5156         case INTEL_ENC_KERNEL_NORMAL : {
5157             media_function = INTEL_MEDIA_STATE_ENC_NORMAL;
5158             kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_NORMAL_I;
5159             break;
5160         }
5161         case INTEL_ENC_KERNEL_PERFORMANCE : {
5162             media_function = INTEL_MEDIA_STATE_ENC_PERFORMANCE;
5163             kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_PERFORMANCE_I;
5164             break;
5165         }
5166         case INTEL_ENC_KERNEL_QUALITY : {
5167             media_function = INTEL_MEDIA_STATE_ENC_QUALITY;
5168             kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_QUALITY_I;
5169             break;
5170         }
5171         default:
5172             assert(0);
5173
5174         }
5175
5176         if (encoder_context->fei_enabled) {
5177             media_function = INTEL_MEDIA_STATE_ENC_NORMAL;
5178             kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_FEI_KERNEL_I;
5179         }
5180
5181         if (generic_state->frame_type == SLICE_TYPE_P) {
5182             kernel_idx += 1;
5183         } else if (generic_state->frame_type == SLICE_TYPE_B) {
5184             kernel_idx += 2;
5185         }
5186
5187         downscaled_width_in_mb = generic_state->frame_width_in_mbs;
5188         downscaled_height_in_mb = generic_state->frame_height_in_mbs;
5189         mad_enable = avc_state->mad_enable;
5190         brc_enabled = generic_state->brc_enabled;
5191
5192         gpe_context = &(avc_ctx->context_mbenc.gpe_contexts[kernel_idx]);
5193     }
5194
5195     memset(&param, 0, sizeof(struct mbenc_param));
5196
5197     param.mb_const_data_buffer_in_use = mb_const_data_buffer_in_use;
5198     param.mb_qp_buffer_in_use = mb_qp_buffer_in_use;
5199     param.mbenc_i_frame_dist_in_use = mbenc_i_frame_dist_in_use;
5200     param.mad_enable = mad_enable;
5201     param.brc_enabled = brc_enabled;
5202     param.roi_enabled = roi_enable;
5203
5204     if (avc_state->mb_status_supported) {
5205         param.mb_vproc_stats_enable =  avc_state->flatness_check_enable || avc_state->adaptive_transform_decision_enable;
5206     }
5207
5208     if (!avc_state->mbenc_curbe_set_in_brc_update) {
5209         gpe->context_init(ctx, gpe_context);
5210     }
5211
5212     gpe->reset_binding_table(ctx, gpe_context);
5213
5214     if (!avc_state->mbenc_curbe_set_in_brc_update) {
5215         /*set curbe here*/
5216         generic_ctx->pfn_set_curbe_mbenc(ctx, encode_state, gpe_context, encoder_context, &param);
5217     }
5218
5219     /* MB brc const data buffer set up*/
5220     if (mb_const_data_buffer_in_use) {
5221         // caculate the lambda table, it is kernel controlled trellis quantization,gen95+
5222         if (avc_state->lambda_table_enable)
5223             gen95_avc_calc_lambda_table(ctx, encode_state, encoder_context);
5224
5225         gen9_avc_load_mb_brc_const_data(ctx, encode_state, encoder_context);
5226     }
5227
5228     /*clear the mad buffer*/
5229     if (mad_enable) {
5230         i965_zero_gpe_resource(&(avc_ctx->res_mad_data_buffer));
5231     }
5232     /*send surface*/
5233     generic_ctx->pfn_send_mbenc_surface(ctx, encode_state, gpe_context, encoder_context, &param);
5234
5235     gpe->setup_interface_data(ctx, gpe_context);
5236
5237     /*walker setting*/
5238     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
5239
5240     kernel_walker_param.use_scoreboard = 1;
5241     kernel_walker_param.resolution_x = downscaled_width_in_mb ;
5242     kernel_walker_param.resolution_y = downscaled_height_in_mb ;
5243     if (mbenc_i_frame_dist_in_use) {
5244         kernel_walker_param.no_dependency = 1;
5245     } else {
5246         switch (generic_state->frame_type) {
5247         case SLICE_TYPE_I:
5248             kernel_walker_param.walker_degree = WALKER_45_DEGREE;
5249             break;
5250         case SLICE_TYPE_P:
5251             kernel_walker_param.walker_degree = WALKER_26_DEGREE;
5252             break;
5253         case SLICE_TYPE_B:
5254             kernel_walker_param.walker_degree = WALKER_26_DEGREE;
5255             if (!slice_param->direct_spatial_mv_pred_flag) {
5256                 kernel_walker_param.walker_degree = WALKER_45_DEGREE;
5257             }
5258             break;
5259         default:
5260             assert(0);
5261         }
5262         kernel_walker_param.no_dependency = 0;
5263     }
5264
5265     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
5266
5267     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
5268                                             gpe_context,
5269                                             media_function,
5270                                             &media_object_walker_param);
5271     return VA_STATUS_SUCCESS;
5272 }
5273
5274 /*
5275 me kernle related function
5276 */
5277 static void
5278 gen9_avc_set_curbe_me(VADriverContextP ctx,
5279                       struct encode_state *encode_state,
5280                       struct i965_gpe_context *gpe_context,
5281                       struct intel_encoder_context *encoder_context,
5282                       void * param)
5283 {
5284     gen9_avc_me_curbe_data *curbe_cmd;
5285     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5286     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5287     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5288
5289     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5290
5291     struct me_param * curbe_param = (struct me_param *)param ;
5292     unsigned char  use_mv_from_prev_step = 0;
5293     unsigned char write_distortions = 0;
5294     unsigned char qp_prime_y = 0;
5295     unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
5296     unsigned char seach_table_idx = 0;
5297     unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
5298     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
5299     unsigned int scale_factor = 0;
5300
5301     qp_prime_y = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta;
5302     switch (curbe_param->hme_type) {
5303     case INTEL_ENC_HME_4x : {
5304         use_mv_from_prev_step = (generic_state->b16xme_enabled) ? 1 : 0;
5305         write_distortions = 1;
5306         mv_shift_factor = 2;
5307         scale_factor = 4;
5308         prev_mv_read_pos_factor = 0;
5309         break;
5310     }
5311     case INTEL_ENC_HME_16x : {
5312         use_mv_from_prev_step = (generic_state->b32xme_enabled) ? 1 : 0;
5313         write_distortions = 0;
5314         mv_shift_factor = 2;
5315         scale_factor = 16;
5316         prev_mv_read_pos_factor = 1;
5317         break;
5318     }
5319     case INTEL_ENC_HME_32x : {
5320         use_mv_from_prev_step = 0;
5321         write_distortions = 0;
5322         mv_shift_factor = 1;
5323         scale_factor = 32;
5324         prev_mv_read_pos_factor = 0;
5325         break;
5326     }
5327     default:
5328         assert(0);
5329
5330     }
5331     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
5332
5333     if (!curbe_cmd)
5334         return;
5335
5336     downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
5337     downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
5338
5339     memcpy(curbe_cmd, gen9_avc_me_curbe_init_data, sizeof(gen9_avc_me_curbe_data));
5340
5341     curbe_cmd->dw3.sub_pel_mode = 3;
5342     if (avc_state->field_scaling_output_interleaved) {
5343         /*frame set to zero,field specified*/
5344         curbe_cmd->dw3.src_access = 0;
5345         curbe_cmd->dw3.ref_access = 0;
5346         curbe_cmd->dw7.src_field_polarity = 0;
5347     }
5348     curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
5349     curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
5350     curbe_cmd->dw5.qp_prime_y = qp_prime_y;
5351
5352     curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
5353     curbe_cmd->dw6.write_distortions = write_distortions;
5354     curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
5355     curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
5356
5357     if (generic_state->frame_type == SLICE_TYPE_B) {
5358         curbe_cmd->dw1.bi_weight = 32;
5359         curbe_cmd->dw13.num_ref_idx_l1_minus1 = slice_param->num_ref_idx_l1_active_minus1;
5360         me_method = gen9_avc_b_me_method[generic_state->preset];
5361         seach_table_idx = 1;
5362     }
5363
5364     if (generic_state->frame_type == SLICE_TYPE_P ||
5365         generic_state->frame_type == SLICE_TYPE_B)
5366         curbe_cmd->dw13.num_ref_idx_l0_minus1 = slice_param->num_ref_idx_l0_active_minus1;
5367
5368     curbe_cmd->dw13.ref_streamin_cost = 5;
5369     curbe_cmd->dw13.roi_enable = 0;
5370
5371     curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
5372     curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
5373
5374     memcpy(&curbe_cmd->dw16, table_enc_search_path[seach_table_idx][me_method], 14 * sizeof(int));
5375
5376     curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN9_AVC_ME_MV_DATA_SURFACE_INDEX;
5377     curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x) ? GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX : GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX ;
5378     curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN9_AVC_ME_DISTORTION_SURFACE_INDEX;
5379     curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN9_AVC_ME_BRC_DISTORTION_INDEX;
5380     curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX;
5381     curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX;
5382     curbe_cmd->dw38.reserved = GEN9_AVC_ME_VDENC_STREAMIN_INDEX;
5383
5384     i965_gpe_context_unmap_curbe(gpe_context);
5385     return;
5386 }
5387
5388 static void
5389 gen9_avc_send_surface_me(VADriverContextP ctx,
5390                          struct encode_state *encode_state,
5391                          struct i965_gpe_context *gpe_context,
5392                          struct intel_encoder_context *encoder_context,
5393                          void * param)
5394 {
5395     struct i965_driver_data *i965 = i965_driver_data(ctx);
5396
5397     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5398     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5399     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5400     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5401
5402     struct object_surface *obj_surface, *input_surface;
5403     struct gen9_surface_avc *avc_priv_surface;
5404     struct i965_gpe_resource *gpe_resource;
5405     struct me_param * curbe_param = (struct me_param *)param ;
5406
5407     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5408     VASurfaceID surface_id;
5409     int i = 0;
5410
5411     /* all scaled input surface stored in reconstructed_object*/
5412     obj_surface = encode_state->reconstructed_object;
5413     if (!obj_surface || !obj_surface->private_data)
5414         return;
5415     avc_priv_surface = obj_surface->private_data;
5416
5417
5418     switch (curbe_param->hme_type) {
5419     case INTEL_ENC_HME_4x : {
5420         /*memv output 4x*/
5421         gpe_resource = &avc_ctx->s4x_memv_data_buffer;
5422         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5423                                        gpe_resource,
5424                                        1,
5425                                        I965_SURFACEFORMAT_R8_UNORM,
5426                                        GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
5427
5428         /*memv input 16x*/
5429         if (generic_state->b16xme_enabled) {
5430             gpe_resource = &avc_ctx->s16x_memv_data_buffer;
5431             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5432                                            gpe_resource,
5433                                            1,
5434                                            I965_SURFACEFORMAT_R8_UNORM,
5435                                            GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX);
5436         }
5437         /* brc distortion  output*/
5438         gpe_resource = &avc_ctx->res_brc_dist_data_surface;
5439         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5440                                        gpe_resource,
5441                                        1,
5442                                        I965_SURFACEFORMAT_R8_UNORM,
5443                                        GEN9_AVC_ME_BRC_DISTORTION_INDEX);
5444         /* memv distortion output*/
5445         gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
5446         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5447                                        gpe_resource,
5448                                        1,
5449                                        I965_SURFACEFORMAT_R8_UNORM,
5450                                        GEN9_AVC_ME_DISTORTION_SURFACE_INDEX);
5451         /*input current down scaled YUV surface*/
5452         obj_surface = encode_state->reconstructed_object;
5453         avc_priv_surface = obj_surface->private_data;
5454         input_surface = avc_priv_surface->scaled_4x_surface_obj;
5455         i965_add_adv_gpe_surface(ctx, gpe_context,
5456                                  input_surface,
5457                                  GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
5458         /*input ref scaled YUV surface*/
5459         for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
5460             surface_id = slice_param->RefPicList0[i].picture_id;
5461             obj_surface = SURFACE(surface_id);
5462             if (!obj_surface || !obj_surface->private_data)
5463                 break;
5464             avc_priv_surface = obj_surface->private_data;
5465
5466             input_surface = avc_priv_surface->scaled_4x_surface_obj;
5467
5468             i965_add_adv_gpe_surface(ctx, gpe_context,
5469                                      input_surface,
5470                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
5471         }
5472
5473         obj_surface = encode_state->reconstructed_object;
5474         avc_priv_surface = obj_surface->private_data;
5475         input_surface = avc_priv_surface->scaled_4x_surface_obj;
5476
5477         i965_add_adv_gpe_surface(ctx, gpe_context,
5478                                  input_surface,
5479                                  GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
5480
5481         for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
5482             surface_id = slice_param->RefPicList1[i].picture_id;
5483             obj_surface = SURFACE(surface_id);
5484             if (!obj_surface || !obj_surface->private_data)
5485                 break;
5486             avc_priv_surface = obj_surface->private_data;
5487
5488             input_surface = avc_priv_surface->scaled_4x_surface_obj;
5489
5490             i965_add_adv_gpe_surface(ctx, gpe_context,
5491                                      input_surface,
5492                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
5493         }
5494         break;
5495
5496     }
5497     case INTEL_ENC_HME_16x : {
5498         gpe_resource = &avc_ctx->s16x_memv_data_buffer;
5499         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5500                                        gpe_resource,
5501                                        1,
5502                                        I965_SURFACEFORMAT_R8_UNORM,
5503                                        GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
5504
5505         if (generic_state->b32xme_enabled) {
5506             gpe_resource = &avc_ctx->s32x_memv_data_buffer;
5507             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5508                                            gpe_resource,
5509                                            1,
5510                                            I965_SURFACEFORMAT_R8_UNORM,
5511                                            GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX);
5512         }
5513
5514         obj_surface = encode_state->reconstructed_object;
5515         avc_priv_surface = obj_surface->private_data;
5516         input_surface = avc_priv_surface->scaled_16x_surface_obj;
5517         i965_add_adv_gpe_surface(ctx, gpe_context,
5518                                  input_surface,
5519                                  GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
5520
5521         for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
5522             surface_id = slice_param->RefPicList0[i].picture_id;
5523             obj_surface = SURFACE(surface_id);
5524             if (!obj_surface || !obj_surface->private_data)
5525                 break;
5526             avc_priv_surface = obj_surface->private_data;
5527
5528             input_surface = avc_priv_surface->scaled_16x_surface_obj;
5529
5530             i965_add_adv_gpe_surface(ctx, gpe_context,
5531                                      input_surface,
5532                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
5533         }
5534
5535         obj_surface = encode_state->reconstructed_object;
5536         avc_priv_surface = obj_surface->private_data;
5537         input_surface = avc_priv_surface->scaled_16x_surface_obj;
5538
5539         i965_add_adv_gpe_surface(ctx, gpe_context,
5540                                  input_surface,
5541                                  GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
5542
5543         for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
5544             surface_id = slice_param->RefPicList1[i].picture_id;
5545             obj_surface = SURFACE(surface_id);
5546             if (!obj_surface || !obj_surface->private_data)
5547                 break;
5548             avc_priv_surface = obj_surface->private_data;
5549
5550             input_surface = avc_priv_surface->scaled_16x_surface_obj;
5551
5552             i965_add_adv_gpe_surface(ctx, gpe_context,
5553                                      input_surface,
5554                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
5555         }
5556         break;
5557     }
5558     case INTEL_ENC_HME_32x : {
5559         gpe_resource = &avc_ctx->s32x_memv_data_buffer;
5560         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5561                                        gpe_resource,
5562                                        1,
5563                                        I965_SURFACEFORMAT_R8_UNORM,
5564                                        GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
5565
5566         obj_surface = encode_state->reconstructed_object;
5567         avc_priv_surface = obj_surface->private_data;
5568         input_surface = avc_priv_surface->scaled_32x_surface_obj;
5569         i965_add_adv_gpe_surface(ctx, gpe_context,
5570                                  input_surface,
5571                                  GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
5572
5573         for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
5574             surface_id = slice_param->RefPicList0[i].picture_id;
5575             obj_surface = SURFACE(surface_id);
5576             if (!obj_surface || !obj_surface->private_data)
5577                 break;
5578             avc_priv_surface = obj_surface->private_data;
5579
5580             input_surface = avc_priv_surface->scaled_32x_surface_obj;
5581
5582             i965_add_adv_gpe_surface(ctx, gpe_context,
5583                                      input_surface,
5584                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
5585         }
5586
5587         obj_surface = encode_state->reconstructed_object;
5588         avc_priv_surface = obj_surface->private_data;
5589         input_surface = avc_priv_surface->scaled_32x_surface_obj;
5590
5591         i965_add_adv_gpe_surface(ctx, gpe_context,
5592                                  input_surface,
5593                                  GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
5594
5595         for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
5596             surface_id = slice_param->RefPicList1[i].picture_id;
5597             obj_surface = SURFACE(surface_id);
5598             if (!obj_surface || !obj_surface->private_data)
5599                 break;
5600             avc_priv_surface = obj_surface->private_data;
5601
5602             input_surface = avc_priv_surface->scaled_32x_surface_obj;
5603
5604             i965_add_adv_gpe_surface(ctx, gpe_context,
5605                                      input_surface,
5606                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
5607         }
5608         break;
5609     }
5610     default:
5611         assert(0);
5612
5613     }
5614 }
5615
5616 static VAStatus
5617 gen9_avc_kernel_me(VADriverContextP ctx,
5618                    struct encode_state *encode_state,
5619                    struct intel_encoder_context *encoder_context,
5620                    int hme_type)
5621 {
5622     struct i965_driver_data *i965 = i965_driver_data(ctx);
5623     struct i965_gpe_table *gpe = &i965->gpe_table;
5624     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5625     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5626     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5627     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5628
5629     struct i965_gpe_context *gpe_context;
5630     struct gpe_media_object_walker_parameter media_object_walker_param;
5631     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
5632     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
5633     int media_function = 0;
5634     int kernel_idx = 0;
5635     struct me_param param ;
5636     unsigned int scale_factor = 0;
5637
5638     switch (hme_type) {
5639     case INTEL_ENC_HME_4x : {
5640         media_function = INTEL_MEDIA_STATE_4X_ME;
5641         scale_factor = 4;
5642         break;
5643     }
5644     case INTEL_ENC_HME_16x : {
5645         media_function = INTEL_MEDIA_STATE_16X_ME;
5646         scale_factor = 16;
5647         break;
5648     }
5649     case INTEL_ENC_HME_32x : {
5650         media_function = INTEL_MEDIA_STATE_32X_ME;
5651         scale_factor = 32;
5652         break;
5653     }
5654     default:
5655         assert(0);
5656
5657     }
5658
5659     downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
5660     downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
5661
5662     /* I frame should not come here.*/
5663     kernel_idx = (generic_state->frame_type == SLICE_TYPE_P) ? GEN9_AVC_KERNEL_ME_P_IDX : GEN9_AVC_KERNEL_ME_B_IDX;
5664     gpe_context = &(avc_ctx->context_me.gpe_contexts[kernel_idx]);
5665
5666     gpe->context_init(ctx, gpe_context);
5667     gpe->reset_binding_table(ctx, gpe_context);
5668
5669     /*set curbe*/
5670     memset(&param, 0, sizeof(param));
5671     param.hme_type = hme_type;
5672     generic_ctx->pfn_set_curbe_me(ctx, encode_state, gpe_context, encoder_context, &param);
5673
5674     /*send surface*/
5675     generic_ctx->pfn_send_me_surface(ctx, encode_state, gpe_context, encoder_context, &param);
5676
5677     gpe->setup_interface_data(ctx, gpe_context);
5678
5679     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
5680     /* the scaling is based on 8x8 blk level */
5681     kernel_walker_param.resolution_x = downscaled_width_in_mb ;
5682     kernel_walker_param.resolution_y = downscaled_height_in_mb ;
5683     kernel_walker_param.no_dependency = 1;
5684
5685     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
5686
5687     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
5688                                             gpe_context,
5689                                             media_function,
5690                                             &media_object_walker_param);
5691
5692     return VA_STATUS_SUCCESS;
5693 }
5694
5695 /*
5696 wp related function
5697 */
5698 static void
5699 gen9_avc_set_curbe_wp(VADriverContextP ctx,
5700                       struct encode_state *encode_state,
5701                       struct i965_gpe_context *gpe_context,
5702                       struct intel_encoder_context *encoder_context,
5703                       void * param)
5704 {
5705     gen9_avc_wp_curbe_data *cmd;
5706     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5707     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5708     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5709     struct wp_param * curbe_param = (struct wp_param *)param;
5710
5711     cmd = i965_gpe_context_map_curbe(gpe_context);
5712
5713     if (!cmd)
5714         return;
5715     memset(cmd, 0, sizeof(gen9_avc_wp_curbe_data));
5716     if (curbe_param->ref_list_idx) {
5717         cmd->dw0.default_weight = slice_param->luma_weight_l1[0];
5718         cmd->dw0.default_offset = slice_param->luma_offset_l1[0];
5719     } else {
5720         cmd->dw0.default_weight = slice_param->luma_weight_l0[0];
5721         cmd->dw0.default_offset = slice_param->luma_offset_l0[0];
5722     }
5723
5724     cmd->dw49.input_surface = GEN9_AVC_WP_INPUT_REF_SURFACE_INDEX;
5725     cmd->dw50.output_surface = GEN9_AVC_WP_OUTPUT_SCALED_SURFACE_INDEX;
5726
5727     i965_gpe_context_unmap_curbe(gpe_context);
5728
5729 }
5730
5731 static void
5732 gen9_avc_send_surface_wp(VADriverContextP ctx,
5733                          struct encode_state *encode_state,
5734                          struct i965_gpe_context *gpe_context,
5735                          struct intel_encoder_context *encoder_context,
5736                          void * param)
5737 {
5738     struct i965_driver_data *i965 = i965_driver_data(ctx);
5739     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5740     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5741     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5742     struct wp_param * curbe_param = (struct wp_param *)param;
5743     struct object_surface *obj_surface;
5744     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5745     VASurfaceID surface_id;
5746
5747     if (curbe_param->ref_list_idx) {
5748         surface_id = slice_param->RefPicList1[0].picture_id;
5749         obj_surface = SURFACE(surface_id);
5750         if (!obj_surface || !obj_surface->private_data)
5751             avc_state->weighted_ref_l1_enable = 0;
5752         else
5753             avc_state->weighted_ref_l1_enable = 1;
5754     } else {
5755         surface_id = slice_param->RefPicList0[0].picture_id;
5756         obj_surface = SURFACE(surface_id);
5757         if (!obj_surface || !obj_surface->private_data)
5758             avc_state->weighted_ref_l0_enable = 0;
5759         else
5760             avc_state->weighted_ref_l0_enable = 1;
5761     }
5762     if (!obj_surface)
5763         obj_surface = encode_state->reference_objects[0];
5764
5765
5766     i965_add_adv_gpe_surface(ctx, gpe_context,
5767                              obj_surface,
5768                              GEN9_AVC_WP_INPUT_REF_SURFACE_INDEX);
5769
5770     obj_surface = avc_ctx->wp_output_pic_select_surface_obj[curbe_param->ref_list_idx];
5771     i965_add_adv_gpe_surface(ctx, gpe_context,
5772                              obj_surface,
5773                              GEN9_AVC_WP_OUTPUT_SCALED_SURFACE_INDEX);
5774 }
5775
5776
5777 static VAStatus
5778 gen9_avc_kernel_wp(VADriverContextP ctx,
5779                    struct encode_state *encode_state,
5780                    struct intel_encoder_context *encoder_context,
5781                    unsigned int list1_in_use)
5782 {
5783     struct i965_driver_data *i965 = i965_driver_data(ctx);
5784     struct i965_gpe_table *gpe = &i965->gpe_table;
5785     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5786     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5787     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5788     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5789
5790     struct i965_gpe_context *gpe_context;
5791     struct gpe_media_object_walker_parameter media_object_walker_param;
5792     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
5793     int media_function = INTEL_MEDIA_STATE_ENC_WP;
5794     struct wp_param param;
5795
5796     gpe_context = &(avc_ctx->context_wp.gpe_contexts);
5797
5798     gpe->context_init(ctx, gpe_context);
5799     gpe->reset_binding_table(ctx, gpe_context);
5800
5801     memset(&param, 0, sizeof(param));
5802     param.ref_list_idx = (list1_in_use == 1) ? 1 : 0;
5803     /*set curbe*/
5804     generic_ctx->pfn_set_curbe_wp(ctx, encode_state, gpe_context, encoder_context, &param);
5805
5806     /*send surface*/
5807     generic_ctx->pfn_send_wp_surface(ctx, encode_state, gpe_context, encoder_context, &param);
5808
5809     gpe->setup_interface_data(ctx, gpe_context);
5810
5811     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
5812     /* the scaling is based on 8x8 blk level */
5813     kernel_walker_param.resolution_x = generic_state->frame_width_in_mbs;
5814     kernel_walker_param.resolution_y = generic_state->frame_height_in_mbs;
5815     kernel_walker_param.no_dependency = 1;
5816
5817     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
5818
5819     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
5820                                             gpe_context,
5821                                             media_function,
5822                                             &media_object_walker_param);
5823
5824     return VA_STATUS_SUCCESS;
5825 }
5826
5827
5828 /*
5829 sfd related function
5830 */
5831 static void
5832 gen9_avc_set_curbe_sfd(VADriverContextP ctx,
5833                        struct encode_state *encode_state,
5834                        struct i965_gpe_context *gpe_context,
5835                        struct intel_encoder_context *encoder_context,
5836                        void * param)
5837 {
5838     gen9_avc_sfd_curbe_data *cmd;
5839     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5840     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5841     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5842     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5843
5844     cmd = i965_gpe_context_map_curbe(gpe_context);
5845
5846     if (!cmd)
5847         return;
5848     memset(cmd, 0, sizeof(gen9_avc_sfd_curbe_data));
5849
5850     cmd->dw0.enable_intra_cost_scaling_for_static_frame = 1 ;
5851     cmd->dw0.enable_adaptive_mv_stream_in = 0 ;
5852     cmd->dw0.stream_in_type = 7 ;
5853     cmd->dw0.slice_type = slice_type_kernel[generic_state->frame_type]  ;
5854     cmd->dw0.brc_mode_enable = generic_state->brc_enabled ;
5855     cmd->dw0.vdenc_mode_disable = 1 ;
5856
5857     cmd->dw1.hme_stream_in_ref_cost = 5 ;
5858     cmd->dw1.num_of_refs = slice_param->num_ref_idx_l0_active_minus1 ;
5859     cmd->dw1.qp_value = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta ;
5860
5861     cmd->dw2.frame_width_in_mbs = generic_state->frame_width_in_mbs ;
5862     cmd->dw2.frame_height_in_mbs = generic_state->frame_height_in_mbs ;
5863
5864     cmd->dw3.large_mv_threshold = 128 ;
5865     cmd->dw4.total_large_mv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs) / 100 ;
5866     cmd->dw5.zmv_threshold = 4 ;
5867     cmd->dw6.total_zmv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * avc_state->zero_mv_threshold) / 100 ; // zero_mv_threshold = 60;
5868     cmd->dw7.min_dist_threshold = 10 ;
5869
5870     if (generic_state->frame_type == SLICE_TYPE_P) {
5871         memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_p_frame, AVC_QP_MAX * sizeof(unsigned char));
5872
5873     } else if (generic_state->frame_type == SLICE_TYPE_B) {
5874         memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_b_frame, AVC_QP_MAX * sizeof(unsigned char));
5875     }
5876
5877     cmd->dw21.actual_width_in_mb = cmd->dw2.frame_width_in_mbs ;
5878     cmd->dw21.actual_height_in_mb = cmd->dw2.frame_height_in_mbs ;
5879     cmd->dw24.vdenc_input_image_state_index = GEN9_AVC_SFD_VDENC_INPUT_IMAGE_STATE_INDEX ;
5880     cmd->dw26.mv_data_surface_index = GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX ;
5881     cmd->dw27.inter_distortion_surface_index = GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX ;
5882     cmd->dw28.output_data_surface_index = GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX ;
5883     cmd->dw29.vdenc_output_image_state_index = GEN9_AVC_SFD_VDENC_OUTPUT_IMAGE_STATE_INDEX ;
5884
5885     i965_gpe_context_unmap_curbe(gpe_context);
5886
5887 }
5888
5889 static void
5890 gen9_avc_send_surface_sfd(VADriverContextP ctx,
5891                           struct encode_state *encode_state,
5892                           struct i965_gpe_context *gpe_context,
5893                           struct intel_encoder_context *encoder_context,
5894                           void * param)
5895 {
5896     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5897     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5898     struct i965_gpe_resource *gpe_resource;
5899     int size = 0;
5900
5901     /*HME mv data surface memv output 4x*/
5902     gpe_resource = &avc_ctx->s4x_memv_data_buffer;
5903     i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5904                                    gpe_resource,
5905                                    1,
5906                                    I965_SURFACEFORMAT_R8_UNORM,
5907                                    GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX);
5908
5909     /* memv distortion */
5910     gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
5911     i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5912                                    gpe_resource,
5913                                    1,
5914                                    I965_SURFACEFORMAT_R8_UNORM,
5915                                    GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX);
5916     /*buffer output*/
5917     size = 32 * 4 * 4;
5918     gpe_resource = &avc_ctx->res_sfd_output_buffer;
5919     i965_add_buffer_gpe_surface(ctx,
5920                                 gpe_context,
5921                                 gpe_resource,
5922                                 0,
5923                                 size / 4,
5924                                 0,
5925                                 GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX);
5926
5927 }
5928
5929 static VAStatus
5930 gen9_avc_kernel_sfd(VADriverContextP ctx,
5931                     struct encode_state *encode_state,
5932                     struct intel_encoder_context *encoder_context)
5933 {
5934     struct i965_driver_data *i965 = i965_driver_data(ctx);
5935     struct i965_gpe_table *gpe = &i965->gpe_table;
5936     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5937     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5938     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5939
5940     struct i965_gpe_context *gpe_context;
5941     struct gpe_media_object_parameter media_object_param;
5942     struct gpe_media_object_inline_data media_object_inline_data;
5943     int media_function = INTEL_MEDIA_STATE_STATIC_FRAME_DETECTION;
5944     gpe_context = &(avc_ctx->context_sfd.gpe_contexts);
5945
5946     gpe->context_init(ctx, gpe_context);
5947     gpe->reset_binding_table(ctx, gpe_context);
5948
5949     /*set curbe*/
5950     generic_ctx->pfn_set_curbe_sfd(ctx, encode_state, gpe_context, encoder_context, NULL);
5951
5952     /*send surface*/
5953     generic_ctx->pfn_send_sfd_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
5954
5955     gpe->setup_interface_data(ctx, gpe_context);
5956
5957     memset(&media_object_param, 0, sizeof(media_object_param));
5958     memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
5959     media_object_param.pinline_data = &media_object_inline_data;
5960     media_object_param.inline_size = sizeof(media_object_inline_data);
5961
5962     gen9_avc_run_kernel_media_object(ctx, encoder_context,
5963                                      gpe_context,
5964                                      media_function,
5965                                      &media_object_param);
5966
5967     return VA_STATUS_SUCCESS;
5968 }
5969
5970 static void
5971 gen8_avc_set_curbe_mbenc(VADriverContextP ctx,
5972                          struct encode_state *encode_state,
5973                          struct i965_gpe_context *gpe_context,
5974                          struct intel_encoder_context *encoder_context,
5975                          void * param)
5976 {
5977     struct i965_driver_data *i965 = i965_driver_data(ctx);
5978     gen8_avc_mbenc_curbe_data *cmd;
5979     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5980     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5981     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5982
5983     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5984     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
5985     VASurfaceID surface_id;
5986     struct object_surface *obj_surface;
5987
5988     struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
5989     unsigned char qp = 0;
5990     unsigned char me_method = 0;
5991     unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
5992     unsigned int table_idx = 0;
5993     unsigned int curbe_size = 0;
5994
5995     unsigned int preset = generic_state->preset;
5996     if (IS_GEN8(i965->intel.device_info)) {
5997         cmd = (gen8_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
5998         if (!cmd)
5999             return;
6000         curbe_size = sizeof(gen8_avc_mbenc_curbe_data);
6001         memset(cmd, 0, curbe_size);
6002
6003         if (mbenc_i_frame_dist_in_use) {
6004             memcpy(cmd, gen8_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
6005         } else {
6006             switch (generic_state->frame_type) {
6007             case SLICE_TYPE_I:
6008                 memcpy(cmd, gen8_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
6009                 break;
6010             case SLICE_TYPE_P:
6011                 memcpy(cmd, gen8_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
6012                 break;
6013             case SLICE_TYPE_B:
6014                 memcpy(cmd, gen8_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
6015                 break;
6016             default:
6017                 assert(0);
6018             }
6019         }
6020     } else {
6021         assert(0);
6022
6023         return;
6024     }
6025
6026     me_method = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_b_me_method[preset] : gen9_avc_p_me_method[preset];
6027     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
6028
6029     cmd->dw0.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
6030     cmd->dw37.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
6031     cmd->dw0.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
6032     cmd->dw37.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
6033
6034     cmd->dw2.max_len_sp = gen9_avc_max_len_sp[preset];
6035     cmd->dw38.max_len_sp = 0;
6036
6037     cmd->dw3.src_access = 0;
6038     cmd->dw3.ref_access = 0;
6039
6040     if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) {
6041         //disable ftq_override by now.
6042         if (avc_state->ftq_override) {
6043             cmd->dw3.ftq_enable = avc_state->ftq_enable;
6044
6045         } else {
6046             if (generic_state->frame_type == SLICE_TYPE_P) {
6047                 cmd->dw3.ftq_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
6048
6049             } else {
6050                 cmd->dw3.ftq_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
6051             }
6052         }
6053     } else {
6054         cmd->dw3.ftq_enable = 0;
6055     }
6056
6057     if (avc_state->disable_sub_mb_partion)
6058         cmd->dw3.sub_mb_part_mask = 0x7;
6059
6060     if (mbenc_i_frame_dist_in_use) {
6061         cmd->dw2.pitch_width = generic_state->downscaled_width_4x_in_mb;
6062         cmd->dw4.picture_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
6063         cmd->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4;
6064         cmd->dw6.batch_buffer_end = 0;
6065         cmd->dw31.intra_compute_type = 1;
6066     } else {
6067         cmd->dw2.pitch_width = generic_state->frame_width_in_mbs;
6068         cmd->dw4.picture_height_minus1 = generic_state->frame_height_in_mbs - 1;
6069         cmd->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ? generic_state->frame_height_in_mbs : avc_state->slice_height;
6070
6071         {
6072             memcpy(&(cmd->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
6073             if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
6074             } else if (avc_state->skip_bias_adjustment_enable) {
6075                 /* Load different MvCost for P picture when SkipBiasAdjustment is enabled
6076                 // No need to check for P picture as the flag is only enabled for P picture */
6077                 cmd->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
6078             }
6079         }
6080         table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
6081         memcpy(&(cmd->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
6082     }
6083     cmd->dw4.enable_intra_cost_scaling_for_static_frame = avc_state->sfd_enable && generic_state->hme_enabled;
6084     cmd->dw4.field_parity_flag = 0;//bottom field
6085     cmd->dw4.enable_cur_fld_idr = 0;//field realted
6086     cmd->dw4.contrained_intra_pred_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
6087     cmd->dw4.hme_enable = generic_state->hme_enabled;
6088     cmd->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
6089     cmd->dw4.use_actual_ref_qp_value = generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
6090
6091     cmd->dw7.intra_part_mask = avc_state->transform_8x8_mode_enable ? 0 : 0x02;
6092     cmd->dw7.src_field_polarity = 0;//field related
6093
6094     /*ftq_skip_threshold_lut set,dw14 /15*/
6095
6096     /*r5 disable NonFTQSkipThresholdLUT*/
6097     if (generic_state->frame_type == SLICE_TYPE_P) {
6098         cmd->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
6099     } else if (generic_state->frame_type == SLICE_TYPE_B) {
6100         cmd->dw32.skip_val = gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
6101     }
6102
6103     cmd->dw13.qp_prime_y = qp;
6104     cmd->dw13.qp_prime_cb = qp;
6105     cmd->dw13.qp_prime_cr = qp;
6106     cmd->dw13.target_size_in_word = 0xff;//hardcode for brc disable
6107
6108     if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) {
6109         switch (gen9_avc_multi_pred[preset]) {
6110         case 0:
6111             cmd->dw32.mult_pred_l0_disable = 128;
6112             cmd->dw32.mult_pred_l1_disable = 128;
6113             break;
6114         case 1:
6115             cmd->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_P) ? 1 : 128;
6116             cmd->dw32.mult_pred_l1_disable = 128;
6117             break;
6118         case 2:
6119             cmd->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
6120             cmd->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
6121             break;
6122         case 3:
6123             cmd->dw32.mult_pred_l0_disable = 1;
6124             cmd->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
6125             break;
6126         }
6127
6128     } else {
6129         cmd->dw32.mult_pred_l0_disable = 128;
6130         cmd->dw32.mult_pred_l1_disable = 128;
6131     }
6132
6133     if (generic_state->frame_type == SLICE_TYPE_B) {
6134         cmd->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
6135         cmd->dw34.list1_ref_id0_frm_field_parity = 0;
6136         cmd->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
6137     }
6138
6139     cmd->dw34.b_original_bff = 0; //frame only
6140     cmd->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
6141     cmd->dw34.roi_enable_flag = curbe_param->roi_enabled;
6142     cmd->dw34.mad_enable_falg = avc_state->mad_enable;
6143     cmd->dw34.mb_brc_enable = avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled;
6144     cmd->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
6145     cmd->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
6146
6147     if (cmd->dw34.force_non_skip_check) {
6148         cmd->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
6149     }
6150
6151     cmd->dw36.check_all_fractional_enable = avc_state->caf_enable;
6152     cmd->dw38.ref_threshold = 400;
6153     cmd->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
6154     cmd->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable) ? 0 : 2;
6155
6156     if (mbenc_i_frame_dist_in_use) {
6157         cmd->dw13.qp_prime_y = 0;
6158         cmd->dw13.qp_prime_cb = 0;
6159         cmd->dw13.qp_prime_cr = 0;
6160         cmd->dw33.intra_16x16_nondc_penalty = 0;
6161         cmd->dw33.intra_8x8_nondc_penalty = 0;
6162         cmd->dw33.intra_4x4_nondc_penalty = 0;
6163     }
6164     if (cmd->dw4.use_actual_ref_qp_value) {
6165         cmd->dw44.actual_qp_value_for_ref_id0_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 0);
6166         cmd->dw44.actual_qp_value_for_ref_id1_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 1);
6167         cmd->dw44.actual_qp_value_for_ref_id2_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 2);
6168         cmd->dw44.actual_qp_value_for_ref_id3_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 3);
6169         cmd->dw45.actual_qp_value_for_ref_id4_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 4);
6170         cmd->dw45.actual_qp_value_for_ref_id5_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 5);
6171         cmd->dw45.actual_qp_value_for_ref_id6_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 6);
6172         cmd->dw45.actual_qp_value_for_ref_id7_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 7);
6173         cmd->dw46.actual_qp_value_for_ref_id0_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 0);
6174         cmd->dw46.actual_qp_value_for_ref_id1_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 1);
6175     }
6176
6177     table_idx = slice_type_kernel[generic_state->frame_type];
6178     cmd->dw46.ref_cost = gen8_avc_ref_cost[table_idx][qp];
6179     if (generic_state->frame_type == SLICE_TYPE_I) {
6180         cmd->dw0.skip_mode_enable = 0;
6181         cmd->dw37.skip_mode_enable = 0;
6182         cmd->dw36.hme_combine_overlap = 0;
6183         cmd->dw47.intra_cost_sf = 16;
6184         cmd->dw34.enable_direct_bias_adjustment = 0;
6185         cmd->dw34.enable_global_motion_bias_adjustment = 0;
6186
6187     } else if (generic_state->frame_type == SLICE_TYPE_P) {
6188         cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
6189         cmd->dw3.bme_disable_fbr = 1;
6190         cmd->dw5.ref_width = gen9_avc_search_x[preset];
6191         cmd->dw5.ref_height = gen9_avc_search_y[preset];
6192         cmd->dw7.non_skip_zmv_added = 1;
6193         cmd->dw7.non_skip_mode_added = 1;
6194         cmd->dw7.skip_center_mask = 1;
6195         cmd->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
6196         cmd->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame onlys
6197         cmd->dw36.hme_combine_overlap = 1;
6198         cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
6199         cmd->dw39.ref_width = gen9_avc_search_x[preset];
6200         cmd->dw39.ref_height = gen9_avc_search_y[preset];
6201         cmd->dw34.enable_direct_bias_adjustment = 0;
6202         cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
6203         if (avc_state->global_motion_bias_adjustment_enable)
6204             cmd->dw58.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
6205     } else {
6206         cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
6207         cmd->dw1.bi_weight = avc_state->bi_weight;
6208         cmd->dw3.search_ctrl = 7;
6209         cmd->dw3.skip_type = 1;
6210         cmd->dw5.ref_width = gen9_avc_b_search_x[preset];
6211         cmd->dw5.ref_height = gen9_avc_b_search_y[preset];
6212         cmd->dw7.skip_center_mask = 0xff;
6213         cmd->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
6214         cmd->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
6215         cmd->dw36.hme_combine_overlap = 1;
6216         surface_id = slice_param->RefPicList1[0].picture_id;
6217         obj_surface = SURFACE(surface_id);
6218         if (!obj_surface) {
6219             WARN_ONCE("Invalid backward reference frame\n");
6220             return;
6221         }
6222         cmd->dw36.is_fwd_frame_short_term_ref = !!(slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
6223         cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
6224         cmd->dw36.num_ref_idx_l1_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l1_active_minus1 : 0;
6225         cmd->dw39.ref_width = gen9_avc_b_search_x[preset];
6226         cmd->dw39.ref_height = gen9_avc_b_search_y[preset];
6227         cmd->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
6228         cmd->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
6229         cmd->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
6230         cmd->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
6231         cmd->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
6232         cmd->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
6233         cmd->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
6234         cmd->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
6235         cmd->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
6236         if (cmd->dw34.enable_direct_bias_adjustment) {
6237             cmd->dw7.non_skip_zmv_added = 1;
6238             cmd->dw7.non_skip_mode_added = 1;
6239         }
6240
6241         cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
6242         if (avc_state->global_motion_bias_adjustment_enable)
6243             cmd->dw58.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
6244     }
6245     avc_state->block_based_skip_enable = cmd->dw3.block_based_skip_enable;
6246
6247     if (avc_state->rolling_intra_refresh_enable) {
6248         /*by now disable it*/
6249         if (generic_state->brc_enabled) {
6250             cmd->dw4.enable_intra_refresh = false;
6251             cmd->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
6252             cmd->dw48.widi_intra_refresh_mbx = 0;
6253             cmd->dw58.widi_intra_refresh_mby = 0;
6254         } else {
6255             cmd->dw4.enable_intra_refresh = true;
6256             cmd->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
6257         }
6258         cmd->dw32.mult_pred_l0_disable = 128;
6259         /* Pass the same IntraRefreshUnit to the kernel w/o the adjustment by -1, so as to have an overlap of one MB row or column of Intra macroblocks
6260          across one P frame to another P frame, as needed by the RollingI algo */
6261         cmd->dw48.widi_intra_refresh_mbx = 0;
6262         cmd->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
6263         cmd->dw48.widi_intra_refresh_qp_delta = 0;
6264
6265     } else {
6266         cmd->dw34.widi_intra_refresh_en = 0;
6267     }
6268
6269     /*roi set disable by now. 49-56*/
6270     if (curbe_param->roi_enabled) {
6271         cmd->dw49.roi_1_x_left   = generic_state->roi[0].left;
6272         cmd->dw49.roi_1_y_top    = generic_state->roi[0].top;
6273         cmd->dw50.roi_1_x_right  = generic_state->roi[0].right;
6274         cmd->dw50.roi_1_y_bottom = generic_state->roi[0].bottom;
6275
6276         cmd->dw51.roi_2_x_left   = generic_state->roi[1].left;
6277         cmd->dw51.roi_2_y_top    = generic_state->roi[1].top;
6278         cmd->dw52.roi_2_x_right  = generic_state->roi[1].right;
6279         cmd->dw52.roi_2_y_bottom = generic_state->roi[1].bottom;
6280
6281         cmd->dw53.roi_3_x_left   = generic_state->roi[2].left;
6282         cmd->dw53.roi_3_y_top    = generic_state->roi[2].top;
6283         cmd->dw54.roi_3_x_right  = generic_state->roi[2].right;
6284         cmd->dw54.roi_3_y_bottom = generic_state->roi[2].bottom;
6285
6286         cmd->dw55.roi_4_x_left   = generic_state->roi[3].left;
6287         cmd->dw55.roi_4_y_top    = generic_state->roi[3].top;
6288         cmd->dw56.roi_4_x_right  = generic_state->roi[3].right;
6289         cmd->dw56.roi_4_y_bottom = generic_state->roi[3].bottom;
6290
6291         cmd->dw36.enable_cabac_work_around = 0;
6292
6293         if (!generic_state->brc_enabled) {
6294             char tmp = 0;
6295             tmp = generic_state->roi[0].value;
6296             CLIP(tmp, -qp, AVC_QP_MAX - qp);
6297             cmd->dw57.roi_1_dqp_prime_y = tmp;
6298             tmp = generic_state->roi[1].value;
6299             CLIP(tmp, -qp, AVC_QP_MAX - qp);
6300             cmd->dw57.roi_2_dqp_prime_y = tmp;
6301             tmp = generic_state->roi[2].value;
6302             CLIP(tmp, -qp, AVC_QP_MAX - qp);
6303             cmd->dw57.roi_3_dqp_prime_y = tmp;
6304             tmp = generic_state->roi[3].value;
6305             CLIP(tmp, -qp, AVC_QP_MAX - qp);
6306             cmd->dw57.roi_4_dqp_prime_y = tmp;
6307         } else {
6308             cmd->dw34.roi_enable_flag = 0;
6309         }
6310     }
6311
6312     cmd->dw65.mb_data_surf_index = GEN8_AVC_MBENC_MFC_AVC_PAK_OBJ_CM;
6313     cmd->dw66.mv_data_surf_index =  GEN8_AVC_MBENC_IND_MV_DATA_CM;
6314     cmd->dw67.i_dist_surf_index = GEN8_AVC_MBENC_BRC_DISTORTION_CM;
6315     cmd->dw68.src_y_surf_index = GEN8_AVC_MBENC_CURR_Y_CM;
6316     cmd->dw69.mb_specific_data_surf_index = GEN8_AVC_MBENC_MB_SPECIFIC_DATA_CM;
6317     cmd->dw70.aux_vme_out_surf_index = GEN8_AVC_MBENC_AUX_VME_OUT_CM;
6318     cmd->dw71.curr_ref_pic_sel_surf_index = GEN8_AVC_MBENC_REFPICSELECT_L0_CM;
6319     cmd->dw72.hme_mv_pred_fwd_bwd_surf_index = GEN8_AVC_MBENC_MV_DATA_FROM_ME_CM;
6320     cmd->dw73.hme_dist_surf_index = GEN8_AVC_MBENC_4xME_DISTORTION_CM;
6321     cmd->dw74.slice_map_surf_index = GEN8_AVC_MBENC_SLICEMAP_DATA_CM;
6322     cmd->dw75.fwd_frm_mb_data_surf_index = GEN8_AVC_MBENC_FWD_MB_DATA_CM;
6323     cmd->dw76.fwd_frm_mv_surf_index = GEN8_AVC_MBENC_FWD_MV_DATA_CM;
6324     cmd->dw77.mb_qp_buffer = GEN8_AVC_MBENC_MBQP_CM;
6325     cmd->dw78.mb_brc_lut = GEN8_AVC_MBENC_MBBRC_CONST_DATA_CM;
6326     cmd->dw79.vme_inter_prediction_surf_index = GEN8_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_CM;
6327     cmd->dw80.vme_inter_prediction_mr_surf_index = GEN8_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_CM;
6328     cmd->dw81.flatness_chk_surf_index = GEN8_AVC_MBENC_FLATNESS_CHECK_CM;
6329     cmd->dw82.mad_surf_index = GEN8_AVC_MBENC_MAD_DATA_CM;
6330     cmd->dw83.force_non_skip_mb_map_surface = GEN8_AVC_MBENC_FORCE_NONSKIP_MB_MAP_CM;
6331     cmd->dw84.widi_wa_surf_index = GEN8_AVC_MBENC_WIDI_WA_DATA_CM;
6332     cmd->dw85.brc_curbe_surf_index = GEN8_AVC_MBENC_BRC_CURBE_DATA_CM;
6333     cmd->dw86.static_detection_cost_table_index = GEN8_AVC_MBENC_STATIC_FRAME_DETECTION_OUTPUT_CM;
6334
6335     i965_gpe_context_unmap_curbe(gpe_context);
6336
6337     return;
6338 }
6339
6340 static void
6341 gen8_avc_set_curbe_scaling4x(VADriverContextP ctx,
6342                              struct encode_state *encode_state,
6343                              struct i965_gpe_context *gpe_context,
6344                              struct intel_encoder_context *encoder_context,
6345                              void *param)
6346 {
6347     gen8_avc_scaling4x_curbe_data *curbe_cmd;
6348     struct scaling_param *surface_param = (struct scaling_param *)param;
6349
6350     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
6351
6352     if (!curbe_cmd)
6353         return;
6354
6355     memset(curbe_cmd, 0, sizeof(gen9_avc_scaling4x_curbe_data));
6356
6357     curbe_cmd->dw0.input_picture_width  = surface_param->input_frame_width;
6358     curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
6359
6360     curbe_cmd->dw1.input_y_bti = GEN8_SCALING_FRAME_SRC_Y_CM;
6361     curbe_cmd->dw2.output_y_bti = GEN8_SCALING_FRAME_DST_Y_CM;
6362
6363     curbe_cmd->dw5.flatness_threshold = 0;
6364     if (surface_param->enable_mb_flatness_check) {
6365         curbe_cmd->dw5.flatness_threshold = 128;
6366         curbe_cmd->dw8.flatness_output_bti_top_field = 4;
6367     }
6368
6369     curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
6370     curbe_cmd->dw6.enable_mb_variance_output = surface_param->enable_mb_variance_output;
6371     curbe_cmd->dw6.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
6372
6373     if (curbe_cmd->dw6.enable_mb_variance_output ||
6374         curbe_cmd->dw6.enable_mb_pixel_average_output) {
6375         curbe_cmd->dw10.mbv_proc_states_bti_top_field  = GEN8_SCALING_FIELD_TOP_MBVPROCSTATS_DST_CM;
6376         curbe_cmd->dw11.mbv_proc_states_bti_bottom_field = GEN8_SCALING_FIELD_BOT_MBVPROCSTATS_DST_CM;
6377     }
6378
6379     i965_gpe_context_unmap_curbe(gpe_context);
6380     return;
6381 }
6382
6383 static void
6384 gen8_avc_set_curbe_me(VADriverContextP ctx,
6385                       struct encode_state *encode_state,
6386                       struct i965_gpe_context *gpe_context,
6387                       struct intel_encoder_context *encoder_context,
6388                       void * param)
6389 {
6390     gen8_avc_me_curbe_data *curbe_cmd;
6391     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6392     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6393     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6394
6395     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
6396
6397     struct me_param * curbe_param = (struct me_param *)param ;
6398     unsigned char  use_mv_from_prev_step = 0;
6399     unsigned char write_distortions = 0;
6400     unsigned char qp_prime_y = 0;
6401     unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
6402     unsigned char seach_table_idx = 0;
6403     unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
6404     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
6405     unsigned int scale_factor = 0;
6406
6407     qp_prime_y = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta;
6408     switch (curbe_param->hme_type) {
6409     case INTEL_ENC_HME_4x : {
6410         use_mv_from_prev_step = (generic_state->b16xme_enabled) ? 1 : 0;
6411         write_distortions = 1;
6412         mv_shift_factor = 2;
6413         scale_factor = 4;
6414         prev_mv_read_pos_factor = 0;
6415         break;
6416     }
6417     case INTEL_ENC_HME_16x : {
6418         use_mv_from_prev_step = (generic_state->b32xme_enabled) ? 1 : 0;
6419         write_distortions = 0;
6420         mv_shift_factor = 2;
6421         scale_factor = 16;
6422         prev_mv_read_pos_factor = 1;
6423         break;
6424     }
6425     case INTEL_ENC_HME_32x : {
6426         use_mv_from_prev_step = 0;
6427         write_distortions = 0;
6428         mv_shift_factor = 1;
6429         scale_factor = 32;
6430         prev_mv_read_pos_factor = 0;
6431         break;
6432     }
6433     default:
6434         assert(0);
6435
6436     }
6437     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
6438
6439     if (!curbe_cmd)
6440         return;
6441
6442     downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
6443     downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
6444
6445     memcpy(curbe_cmd, gen8_avc_me_curbe_init_data, sizeof(gen8_avc_me_curbe_data));
6446
6447     curbe_cmd->dw3.sub_pel_mode = 3;
6448     if (avc_state->field_scaling_output_interleaved) {
6449         /*frame set to zero,field specified*/
6450         curbe_cmd->dw3.src_access = 0;
6451         curbe_cmd->dw3.ref_access = 0;
6452         curbe_cmd->dw7.src_field_polarity = 0;
6453     }
6454     curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
6455     curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
6456     curbe_cmd->dw5.qp_prime_y = qp_prime_y;
6457
6458     curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
6459     curbe_cmd->dw6.write_distortions = write_distortions;
6460     curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
6461     curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
6462
6463     if (generic_state->frame_type == SLICE_TYPE_B) {
6464         curbe_cmd->dw1.bi_weight = 32;
6465         curbe_cmd->dw13.num_ref_idx_l1_minus1 = slice_param->num_ref_idx_l1_active_minus1;
6466         me_method = gen9_avc_b_me_method[generic_state->preset];
6467         seach_table_idx = 1;
6468     }
6469
6470     if (generic_state->frame_type == SLICE_TYPE_P ||
6471         generic_state->frame_type == SLICE_TYPE_B)
6472         curbe_cmd->dw13.num_ref_idx_l0_minus1 = slice_param->num_ref_idx_l0_active_minus1;
6473
6474     curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
6475     curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
6476
6477     memcpy(&curbe_cmd->dw16, table_enc_search_path[seach_table_idx][me_method], 14 * sizeof(int));
6478
6479     curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN8_AVC_ME_MV_DATA_SURFACE_CM;
6480     curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x) ? GEN8_AVC_32xME_MV_DATA_SURFACE_CM : GEN8_AVC_16xME_MV_DATA_SURFACE_CM ;
6481     curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN8_AVC_ME_DISTORTION_SURFACE_CM;
6482     curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN8_AVC_ME_BRC_DISTORTION_CM;
6483     curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN8_AVC_ME_CURR_FOR_FWD_REF_CM;
6484     curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN8_AVC_ME_CURR_FOR_BWD_REF_CM;
6485     curbe_cmd->dw38.reserved = 0;
6486
6487     i965_gpe_context_unmap_curbe(gpe_context);
6488     return;
6489 }
6490
6491 static void
6492 gen8_avc_set_curbe_brc_frame_update(VADriverContextP ctx,
6493                                     struct encode_state *encode_state,
6494                                     struct i965_gpe_context *gpe_context,
6495                                     struct intel_encoder_context *encoder_context,
6496                                     void * param)
6497 {
6498     gen8_avc_frame_brc_update_curbe_data *cmd;
6499     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6500     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6501     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6502     struct object_surface *obj_surface;
6503     struct gen9_surface_avc *avc_priv_surface;
6504     struct avc_param common_param;
6505
6506     obj_surface = encode_state->reconstructed_object;
6507
6508     if (!obj_surface || !obj_surface->private_data)
6509         return;
6510     avc_priv_surface = obj_surface->private_data;
6511
6512     cmd = i965_gpe_context_map_curbe(gpe_context);
6513
6514     if (!cmd)
6515         return;
6516
6517     memcpy(cmd, &gen8_avc_frame_brc_update_curbe_init_data, sizeof(gen8_avc_frame_brc_update_curbe_data));
6518
6519     cmd->dw5.target_size_flag = 0 ;
6520     if (generic_state->brc_init_current_target_buf_full_in_bits > (double)generic_state->brc_init_reset_buf_size_in_bits) {
6521         /*overflow*/
6522         generic_state->brc_init_current_target_buf_full_in_bits -= (double)generic_state->brc_init_reset_buf_size_in_bits;
6523         cmd->dw5.target_size_flag = 1 ;
6524     }
6525
6526     if (generic_state->skip_frame_enbale) {
6527         cmd->dw6.num_skip_frames = generic_state->num_skip_frames ;
6528         cmd->dw7.size_skip_frames = generic_state->size_skip_frames;
6529
6530         generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame * generic_state->num_skip_frames;
6531
6532     }
6533     cmd->dw0.target_size = (unsigned int)generic_state->brc_init_current_target_buf_full_in_bits ;
6534     cmd->dw1.frame_number = generic_state->seq_frame_number ;
6535     cmd->dw2.size_of_pic_headers = generic_state->herder_bytes_inserted << 3 ;
6536     cmd->dw5.cur_frame_type = generic_state->frame_type ;
6537     cmd->dw5.brc_flag = 0 ;
6538     cmd->dw5.brc_flag |= (avc_priv_surface->is_as_ref) ? INTEL_ENCODE_BRCUPDATE_IS_REFERENCE : 0 ;
6539
6540     if (avc_state->multi_pre_enable) {
6541         cmd->dw5.brc_flag  |= INTEL_ENCODE_BRCUPDATE_IS_ACTUALQP ;
6542         cmd->dw14.qp_index_of_cur_pic = avc_priv_surface->frame_idx ; //do not know this. use -1
6543     }
6544
6545     cmd->dw5.max_num_paks = generic_state->num_pak_passes ;
6546     if (avc_state->min_max_qp_enable) {
6547         switch (generic_state->frame_type) {
6548         case SLICE_TYPE_I:
6549             cmd->dw6.minimum_qp = avc_state->min_qp_i ;
6550             cmd->dw6.maximum_qp = avc_state->max_qp_i ;
6551             break;
6552         case SLICE_TYPE_P:
6553             cmd->dw6.minimum_qp = avc_state->min_qp_p ;
6554             cmd->dw6.maximum_qp = avc_state->max_qp_p ;
6555             break;
6556         case SLICE_TYPE_B:
6557             cmd->dw6.minimum_qp = avc_state->min_qp_b ;
6558             cmd->dw6.maximum_qp = avc_state->max_qp_b ;
6559             break;
6560         }
6561     } else {
6562         cmd->dw6.minimum_qp = 0 ;
6563         cmd->dw6.maximum_qp = 0 ;
6564     }
6565
6566     generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame;
6567
6568     if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
6569         cmd->dw3.start_gadj_frame0 = (unsigned int)((10 *   generic_state->avbr_convergence) / (double)150);
6570         cmd->dw3.start_gadj_frame1 = (unsigned int)((50 *   generic_state->avbr_convergence) / (double)150);
6571         cmd->dw4.start_gadj_frame2 = (unsigned int)((100 *  generic_state->avbr_convergence) / (double)150);
6572         cmd->dw4.start_gadj_frame3 = (unsigned int)((150 *  generic_state->avbr_convergence) / (double)150);
6573         cmd->dw11.g_rate_ratio_threshold_0 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 40)));
6574         cmd->dw11.g_rate_ratio_threshold_1 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 75)));
6575         cmd->dw12.g_rate_ratio_threshold_2 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 97)));
6576         cmd->dw12.g_rate_ratio_threshold_3 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (103 - 100)));
6577         cmd->dw12.g_rate_ratio_threshold_4 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (125 - 100)));
6578         cmd->dw12.g_rate_ratio_threshold_5 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (160 - 100)));
6579
6580     }
6581
6582     memset(&common_param, 0, sizeof(common_param));
6583     common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
6584     common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
6585     common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
6586     common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
6587     common_param.frames_per_100s = generic_state->frames_per_100s;
6588     common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
6589     common_param.target_bit_rate = generic_state->target_bit_rate;
6590
6591     i965_gpe_context_unmap_curbe(gpe_context);
6592
6593     return;
6594 }
6595
6596 /*
6597 kernel related function:init/destroy etc
6598 */
6599 static void
6600 gen9_avc_kernel_init_scaling(VADriverContextP ctx,
6601                              struct generic_encoder_context *generic_context,
6602                              struct gen_avc_scaling_context *kernel_context)
6603 {
6604     struct i965_driver_data *i965 = i965_driver_data(ctx);
6605     struct i965_gpe_table *gpe = &i965->gpe_table;
6606     struct i965_gpe_context *gpe_context = NULL;
6607     struct encoder_kernel_parameter kernel_param ;
6608     struct encoder_scoreboard_parameter scoreboard_param;
6609     struct i965_kernel common_kernel;
6610
6611     memset(&kernel_param, 0, sizeof(kernel_param));
6612     if (IS_SKL(i965->intel.device_info) ||
6613         IS_BXT(i965->intel.device_info)) {
6614         kernel_param.curbe_size = sizeof(gen9_avc_scaling4x_curbe_data);
6615         kernel_param.inline_data_size = sizeof(gen9_avc_scaling4x_curbe_data);
6616     } else if (IS_KBL(i965->intel.device_info) ||
6617                IS_GLK(i965->intel.device_info)) {
6618         kernel_param.curbe_size = sizeof(gen95_avc_scaling4x_curbe_data);
6619         kernel_param.inline_data_size = sizeof(gen95_avc_scaling4x_curbe_data);
6620     } else if (IS_GEN8(i965->intel.device_info)) {
6621         kernel_param.curbe_size = sizeof(gen8_avc_scaling4x_curbe_data);
6622         kernel_param.inline_data_size = sizeof(gen8_avc_scaling4x_curbe_data);
6623     }
6624     else
6625         assert(0);
6626
6627     /* 4x scaling kernel*/
6628     kernel_param.sampler_size = 0;
6629
6630     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
6631     scoreboard_param.mask = 0xFF;
6632     scoreboard_param.enable = generic_context->use_hw_scoreboard;
6633     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
6634     scoreboard_param.walkpat_flag = 0;
6635
6636     gpe_context = &kernel_context->gpe_contexts[GEN9_AVC_KERNEL_SCALING_4X_IDX];
6637     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
6638     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
6639
6640     memset(&common_kernel, 0, sizeof(common_kernel));
6641
6642     intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
6643                                          generic_context->enc_kernel_size,
6644                                          INTEL_GENERIC_ENC_SCALING4X,
6645                                          0,
6646                                          &common_kernel);
6647
6648     gpe->load_kernels(ctx,
6649                       gpe_context,
6650                       &common_kernel,
6651                       1);
6652
6653     /*2x scaling kernel*/
6654     kernel_param.curbe_size = sizeof(gen9_avc_scaling2x_curbe_data);
6655     kernel_param.inline_data_size = 0;
6656     kernel_param.sampler_size = 0;
6657
6658     gpe_context = &kernel_context->gpe_contexts[GEN9_AVC_KERNEL_SCALING_2X_IDX];
6659     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
6660     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
6661
6662     memset(&common_kernel, 0, sizeof(common_kernel));
6663
6664     intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
6665                                          generic_context->enc_kernel_size,
6666                                          INTEL_GENERIC_ENC_SCALING2X,
6667                                          0,
6668                                          &common_kernel);
6669
6670     gpe->load_kernels(ctx,
6671                       gpe_context,
6672                       &common_kernel,
6673                       1);
6674
6675 }
6676
6677 static void
6678 gen9_avc_kernel_init_me(VADriverContextP ctx,
6679                         struct generic_encoder_context *generic_context,
6680                         struct gen_avc_me_context *kernel_context)
6681 {
6682     struct i965_driver_data *i965 = i965_driver_data(ctx);
6683     struct i965_gpe_table *gpe = &i965->gpe_table;
6684     struct i965_gpe_context *gpe_context = NULL;
6685     struct encoder_kernel_parameter kernel_param ;
6686     struct encoder_scoreboard_parameter scoreboard_param;
6687     struct i965_kernel common_kernel;
6688     int i = 0;
6689
6690     if (IS_GEN8(i965->intel.device_info)) {
6691         kernel_param.curbe_size = sizeof(gen8_avc_me_curbe_data);
6692     } else {
6693         kernel_param.curbe_size = sizeof(gen9_avc_me_curbe_data);
6694     }
6695     kernel_param.inline_data_size = 0;
6696     kernel_param.sampler_size = 0;
6697
6698     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
6699     scoreboard_param.mask = 0xFF;
6700     scoreboard_param.enable = generic_context->use_hw_scoreboard;
6701     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
6702     scoreboard_param.walkpat_flag = 0;
6703
6704     for (i = 0; i < 2; i++) {
6705         gpe_context = &kernel_context->gpe_contexts[i];
6706         gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
6707         gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
6708
6709         memset(&common_kernel, 0, sizeof(common_kernel));
6710
6711         intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
6712                                              generic_context->enc_kernel_size,
6713                                              INTEL_GENERIC_ENC_ME,
6714                                              i,
6715                                              &common_kernel);
6716
6717         gpe->load_kernels(ctx,
6718                           gpe_context,
6719                           &common_kernel,
6720                           1);
6721     }
6722
6723 }
6724
6725 static void
6726 gen9_avc_kernel_init_mbenc(VADriverContextP ctx,
6727                            struct generic_encoder_context *generic_context,
6728                            struct gen_avc_mbenc_context *kernel_context,
6729                            int fei_enabled)
6730 {
6731     struct i965_driver_data *i965 = i965_driver_data(ctx);
6732     struct i965_gpe_table *gpe = &i965->gpe_table;
6733     struct i965_gpe_context *gpe_context = NULL;
6734     struct encoder_kernel_parameter kernel_param ;
6735     struct encoder_scoreboard_parameter scoreboard_param;
6736     struct i965_kernel common_kernel;
6737     int i = 0;
6738     unsigned int curbe_size = 0;
6739     unsigned int num_mbenc_kernels = 0;
6740
6741     if (IS_SKL(i965->intel.device_info) ||
6742         IS_BXT(i965->intel.device_info)) {
6743         if (!fei_enabled) {
6744             curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
6745             num_mbenc_kernels = NUM_GEN9_AVC_KERNEL_MBENC;
6746         } else {
6747             curbe_size = sizeof(gen9_avc_fei_mbenc_curbe_data);
6748             num_mbenc_kernels = NUM_GEN9_AVC_FEI_KERNEL_MBENC;
6749         }
6750     } else if (IS_KBL(i965->intel.device_info) ||
6751                IS_GLK(i965->intel.device_info)) {
6752         curbe_size = sizeof(gen95_avc_mbenc_curbe_data);
6753         num_mbenc_kernels = NUM_GEN9_AVC_KERNEL_MBENC;
6754     } else if (IS_GEN8(i965->intel.device_info)) {
6755         curbe_size = sizeof(gen8_avc_mbenc_curbe_data);
6756         num_mbenc_kernels = NUM_GEN9_AVC_KERNEL_MBENC;
6757     }
6758
6759     assert(curbe_size > 0);
6760     kernel_param.curbe_size = curbe_size;
6761     kernel_param.inline_data_size = 0;
6762     kernel_param.sampler_size = 0;
6763
6764     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
6765     scoreboard_param.mask = 0xFF;
6766     scoreboard_param.enable = generic_context->use_hw_scoreboard;
6767     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
6768     scoreboard_param.walkpat_flag = 0;
6769
6770     for (i = 0; i < num_mbenc_kernels ; i++) {
6771         gpe_context = &kernel_context->gpe_contexts[i];
6772         gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
6773         gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
6774
6775         memset(&common_kernel, 0, sizeof(common_kernel));
6776
6777         generic_context->get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
6778                                                     generic_context->enc_kernel_size,
6779                                                     INTEL_GENERIC_ENC_MBENC,
6780                                                     i,
6781                                                     &common_kernel);
6782
6783         gpe->load_kernels(ctx,
6784                           gpe_context,
6785                           &common_kernel,
6786                           1);
6787     }
6788
6789 }
6790
6791 static void
6792 gen9_avc_kernel_init_brc(VADriverContextP ctx,
6793                          struct generic_encoder_context *generic_context,
6794                          struct gen_avc_brc_context *kernel_context)
6795 {
6796     struct i965_driver_data *i965 = i965_driver_data(ctx);
6797     struct i965_gpe_table *gpe = &i965->gpe_table;
6798     struct i965_gpe_context *gpe_context = NULL;
6799     struct encoder_kernel_parameter kernel_param ;
6800     struct encoder_scoreboard_parameter scoreboard_param;
6801     struct i965_kernel common_kernel;
6802     int num_brc_init_kernels = 0;
6803     int i = 0;
6804
6805     if (IS_GEN8(i965->intel.device_info)) {
6806         num_brc_init_kernels = NUM_GEN9_AVC_KERNEL_BRC - 1;
6807     } else {
6808         num_brc_init_kernels = NUM_GEN9_AVC_KERNEL_BRC;
6809     }
6810
6811     const int gen8_brc_curbe_size[NUM_GEN9_AVC_KERNEL_BRC - 1] = {
6812         (sizeof(gen9_avc_brc_init_reset_curbe_data)),
6813         (sizeof(gen8_avc_frame_brc_update_curbe_data)),
6814         (sizeof(gen9_avc_brc_init_reset_curbe_data)),
6815         (sizeof(gen8_avc_mbenc_curbe_data)),
6816         0,
6817     };
6818     const int gen9_brc_curbe_size[NUM_GEN9_AVC_KERNEL_BRC] = {
6819         (sizeof(gen9_avc_brc_init_reset_curbe_data)),
6820         (sizeof(gen9_avc_frame_brc_update_curbe_data)),
6821         (sizeof(gen9_avc_brc_init_reset_curbe_data)),
6822         ((IS_SKL(i965->intel.device_info) || IS_BXT(i965->intel.device_info)) ? sizeof(gen9_avc_mbenc_curbe_data) : sizeof(gen95_avc_mbenc_curbe_data)),
6823         0,
6824         (sizeof(gen9_avc_mb_brc_curbe_data))
6825     };
6826
6827     kernel_param.inline_data_size = 0;
6828     kernel_param.sampler_size = 0;
6829
6830     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
6831     scoreboard_param.mask = 0xFF;
6832     scoreboard_param.enable = generic_context->use_hw_scoreboard;
6833     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
6834     scoreboard_param.walkpat_flag = 0;
6835
6836     for (i = 0; i < num_brc_init_kernels; i++) {
6837         if (IS_GEN8(i965->intel.device_info)) {
6838             kernel_param.curbe_size = gen8_brc_curbe_size[i];
6839         } else {
6840             kernel_param.curbe_size = gen9_brc_curbe_size[i];
6841         }
6842         gpe_context = &kernel_context->gpe_contexts[i];
6843         gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
6844         gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
6845
6846         memset(&common_kernel, 0, sizeof(common_kernel));
6847
6848         intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
6849                                              generic_context->enc_kernel_size,
6850                                              INTEL_GENERIC_ENC_BRC,
6851                                              i,
6852                                              &common_kernel);
6853
6854         gpe->load_kernels(ctx,
6855                           gpe_context,
6856                           &common_kernel,
6857                           1);
6858     }
6859
6860 }
6861
6862 static void
6863 gen9_avc_kernel_init_wp(VADriverContextP ctx,
6864                         struct generic_encoder_context *generic_context,
6865                         struct gen_avc_wp_context *kernel_context)
6866 {
6867     struct i965_driver_data *i965 = i965_driver_data(ctx);
6868     struct i965_gpe_table *gpe = &i965->gpe_table;
6869     struct i965_gpe_context *gpe_context = NULL;
6870     struct encoder_kernel_parameter kernel_param ;
6871     struct encoder_scoreboard_parameter scoreboard_param;
6872     struct i965_kernel common_kernel;
6873
6874     kernel_param.curbe_size = sizeof(gen9_avc_wp_curbe_data);
6875     kernel_param.inline_data_size = 0;
6876     kernel_param.sampler_size = 0;
6877
6878     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
6879     scoreboard_param.mask = 0xFF;
6880     scoreboard_param.enable = generic_context->use_hw_scoreboard;
6881     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
6882     scoreboard_param.walkpat_flag = 0;
6883
6884     gpe_context = &kernel_context->gpe_contexts;
6885     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
6886     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
6887
6888     memset(&common_kernel, 0, sizeof(common_kernel));
6889
6890     intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
6891                                          generic_context->enc_kernel_size,
6892                                          INTEL_GENERIC_ENC_WP,
6893                                          0,
6894                                          &common_kernel);
6895
6896     gpe->load_kernels(ctx,
6897                       gpe_context,
6898                       &common_kernel,
6899                       1);
6900
6901 }
6902
6903 static void
6904 gen9_avc_kernel_init_sfd(VADriverContextP ctx,
6905                          struct generic_encoder_context *generic_context,
6906                          struct gen_avc_sfd_context *kernel_context)
6907 {
6908     struct i965_driver_data *i965 = i965_driver_data(ctx);
6909     struct i965_gpe_table *gpe = &i965->gpe_table;
6910     struct i965_gpe_context *gpe_context = NULL;
6911     struct encoder_kernel_parameter kernel_param ;
6912     struct encoder_scoreboard_parameter scoreboard_param;
6913     struct i965_kernel common_kernel;
6914
6915     kernel_param.curbe_size = sizeof(gen9_avc_sfd_curbe_data);
6916     kernel_param.inline_data_size = 0;
6917     kernel_param.sampler_size = 0;
6918
6919     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
6920     scoreboard_param.mask = 0xFF;
6921     scoreboard_param.enable = generic_context->use_hw_scoreboard;
6922     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
6923     scoreboard_param.walkpat_flag = 0;
6924
6925     gpe_context = &kernel_context->gpe_contexts;
6926     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
6927     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
6928
6929     memset(&common_kernel, 0, sizeof(common_kernel));
6930
6931     intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
6932                                          generic_context->enc_kernel_size,
6933                                          INTEL_GENERIC_ENC_SFD,
6934                                          0,
6935                                          &common_kernel);
6936
6937     gpe->load_kernels(ctx,
6938                       gpe_context,
6939                       &common_kernel,
6940                       1);
6941
6942 }
6943
6944 static void
6945 gen9_avc_kernel_destroy(struct encoder_vme_mfc_context * vme_context)
6946 {
6947
6948     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
6949     struct i965_driver_data *i965 = i965_driver_data(avc_ctx->ctx);
6950     struct i965_gpe_table *gpe = &i965->gpe_table;
6951
6952     int i = 0;
6953
6954     gen9_avc_free_resources(vme_context);
6955
6956     for (i = 0; i < NUM_GEN9_AVC_KERNEL_SCALING; i++)
6957         gpe->context_destroy(&avc_ctx->context_scaling.gpe_contexts[i]);
6958
6959     for (i = 0; i < NUM_GEN9_AVC_KERNEL_BRC; i++)
6960         gpe->context_destroy(&avc_ctx->context_brc.gpe_contexts[i]);
6961
6962     for (i = 0; i < NUM_GEN9_AVC_KERNEL_ME; i++)
6963         gpe->context_destroy(&avc_ctx->context_me.gpe_contexts[i]);
6964
6965     for (i = 0; i < NUM_GEN9_AVC_KERNEL_MBENC; i++)
6966         gpe->context_destroy(&avc_ctx->context_mbenc.gpe_contexts[i]);
6967
6968     gpe->context_destroy(&avc_ctx->context_wp.gpe_contexts);
6969
6970     gpe->context_destroy(&avc_ctx->context_sfd.gpe_contexts);
6971
6972 }
6973
6974 /*
6975 vme pipeline
6976 */
6977 static void
6978 gen9_avc_update_parameters(VADriverContextP ctx,
6979                            VAProfile profile,
6980                            struct encode_state *encode_state,
6981                            struct intel_encoder_context *encoder_context)
6982 {
6983     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6984     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6985     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6986     VAEncSequenceParameterBufferH264 *seq_param;
6987     VAEncSliceParameterBufferH264 *slice_param;
6988     VAEncMiscParameterBuffer *fei_misc_param;
6989     int i, j, slice_index;
6990     unsigned int preset = generic_state->preset;
6991     unsigned int fei_enabled = encoder_context->fei_enabled;
6992
6993     /* seq/pic/slice parameter setting */
6994     generic_state->b16xme_supported = gen9_avc_super_hme[preset];
6995     generic_state->b32xme_supported = gen9_avc_ultra_hme[preset];
6996
6997     avc_state->seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
6998     avc_state->pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
6999
7000     if (fei_enabled &&
7001         encode_state->misc_param[VAEncMiscParameterTypeFEIFrameControl]) {
7002         fei_misc_param = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeFEIFrameControl][0]->buffer;
7003         avc_state->fei_framectl_param =
7004             (VAEncMiscParameterFEIFrameControlH264 *)fei_misc_param->data;
7005     }
7006
7007     avc_state->slice_num = 0;
7008     slice_index = 0;
7009     for (j = 0; j < encode_state->num_slice_params_ext; j++) {
7010         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
7011         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
7012             avc_state->slice_param[slice_index] = slice_param;
7013             slice_param++;
7014             slice_index++;
7015             avc_state->slice_num++;
7016         }
7017     }
7018
7019     /* how many slices support by now? 1 slice or multi slices, but row slice.not slice group. */
7020     seq_param = avc_state->seq_param;
7021     slice_param = avc_state->slice_param[0];
7022
7023     generic_state->frame_type = avc_state->slice_param[0]->slice_type;
7024
7025     if (slice_param->slice_type == SLICE_TYPE_I ||
7026         slice_param->slice_type == SLICE_TYPE_SI)
7027         generic_state->frame_type = SLICE_TYPE_I;
7028     else if (slice_param->slice_type == SLICE_TYPE_P)
7029         generic_state->frame_type = SLICE_TYPE_P;
7030     else if (slice_param->slice_type == SLICE_TYPE_B)
7031         generic_state->frame_type = SLICE_TYPE_B;
7032     if (profile == VAProfileH264High)
7033         avc_state->transform_8x8_mode_enable = 0;//work around for high profile to disabel pic_param->pic_fields.bits.transform_8x8_mode_flag
7034     else
7035         avc_state->transform_8x8_mode_enable = 0;
7036
7037     /* rc init*/
7038     if (generic_state->brc_enabled && (!generic_state->brc_inited || generic_state->brc_need_reset)) {
7039         generic_state->target_bit_rate = ALIGN(seq_param->bits_per_second, 1000) / 1000;
7040         generic_state->init_vbv_buffer_fullness_in_bit = seq_param->bits_per_second;
7041         generic_state->vbv_buffer_size_in_bit = (uint64_t)seq_param->bits_per_second << 1;
7042         generic_state->frames_per_100s = 3000; /* 30fps */
7043     }
7044
7045     generic_state->gop_size = seq_param->intra_period;
7046     generic_state->gop_ref_distance = seq_param->ip_period;
7047
7048     if (generic_state->internal_rate_mode == VA_RC_CBR) {
7049         generic_state->max_bit_rate = generic_state->target_bit_rate;
7050         generic_state->min_bit_rate = generic_state->target_bit_rate;
7051     }
7052
7053     if (generic_state->frame_type == SLICE_TYPE_I || generic_state->first_frame) {
7054         gen9_avc_update_misc_parameters(ctx, encode_state, encoder_context);
7055     }
7056
7057     generic_state->preset = encoder_context->quality_level;
7058     if (encoder_context->quality_level == INTEL_PRESET_UNKNOWN) {
7059         generic_state->preset = INTEL_PRESET_RT_SPEED;
7060     }
7061     generic_state->kernel_mode = gen9_avc_kernel_mode[generic_state->preset];
7062
7063     if (!generic_state->brc_inited) {
7064         generic_state->brc_init_reset_input_bits_per_frame = ((double)(generic_state->max_bit_rate * 1000) * 100) / generic_state->frames_per_100s;;
7065         generic_state->brc_init_current_target_buf_full_in_bits = generic_state->init_vbv_buffer_fullness_in_bit;
7066         generic_state->brc_init_reset_buf_size_in_bits = generic_state->vbv_buffer_size_in_bit;
7067         generic_state->brc_target_size = generic_state->init_vbv_buffer_fullness_in_bit;
7068     }
7069
7070
7071     generic_state->curr_pak_pass = 0;
7072     generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
7073
7074     if (generic_state->internal_rate_mode == VA_RC_CBR ||
7075         generic_state->internal_rate_mode == VA_RC_VBR)
7076         generic_state->brc_enabled = 1;
7077     else
7078         generic_state->brc_enabled = 0;
7079
7080     if (generic_state->brc_enabled &&
7081         (!generic_state->init_vbv_buffer_fullness_in_bit ||
7082          !generic_state->vbv_buffer_size_in_bit ||
7083          !generic_state->max_bit_rate ||
7084          !generic_state->target_bit_rate ||
7085          !generic_state->frames_per_100s)) {
7086         WARN_ONCE("Rate control parameter is required for BRC\n");
7087         generic_state->brc_enabled = 0;
7088     }
7089
7090     if (!generic_state->brc_enabled) {
7091         generic_state->target_bit_rate = 0;
7092         generic_state->max_bit_rate = 0;
7093         generic_state->min_bit_rate = 0;
7094         generic_state->init_vbv_buffer_fullness_in_bit = 0;
7095         generic_state->vbv_buffer_size_in_bit = 0;
7096         generic_state->num_pak_passes = 1;
7097     } else {
7098         generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
7099     }
7100
7101
7102     generic_state->frame_width_in_mbs = seq_param->picture_width_in_mbs;
7103     generic_state->frame_height_in_mbs = seq_param->picture_height_in_mbs;
7104     generic_state->frame_width_in_pixel = generic_state->frame_width_in_mbs * 16;
7105     generic_state->frame_height_in_pixel = generic_state->frame_height_in_mbs * 16;
7106
7107     generic_state->frame_width_4x  = ALIGN(generic_state->frame_width_in_pixel / 4, 16);
7108     generic_state->frame_height_4x = ALIGN(generic_state->frame_height_in_pixel / 4, 16);
7109     generic_state->downscaled_width_4x_in_mb  = generic_state->frame_width_4x / 16 ;
7110     generic_state->downscaled_height_4x_in_mb = generic_state->frame_height_4x / 16;
7111
7112     generic_state->frame_width_16x  =  ALIGN(generic_state->frame_width_in_pixel / 16, 16);
7113     generic_state->frame_height_16x =  ALIGN(generic_state->frame_height_in_pixel / 16, 16);
7114     generic_state->downscaled_width_16x_in_mb  = generic_state->frame_width_16x / 16 ;
7115     generic_state->downscaled_height_16x_in_mb = generic_state->frame_height_16x / 16;
7116
7117     generic_state->frame_width_32x  = ALIGN(generic_state->frame_width_in_pixel / 32, 16);
7118     generic_state->frame_height_32x = ALIGN(generic_state->frame_height_in_pixel / 32, 16);
7119     generic_state->downscaled_width_32x_in_mb  = generic_state->frame_width_32x / 16 ;
7120     generic_state->downscaled_height_32x_in_mb = generic_state->frame_height_32x / 16;
7121
7122     if (generic_state->hme_supported) {
7123         generic_state->hme_enabled = 1;
7124     } else {
7125         generic_state->hme_enabled = 0;
7126     }
7127
7128     if (generic_state->b16xme_supported) {
7129         generic_state->b16xme_enabled = 1;
7130     } else {
7131         generic_state->b16xme_enabled = 0;
7132     }
7133
7134     if (generic_state->b32xme_supported) {
7135         generic_state->b32xme_enabled = 1;
7136     } else {
7137         generic_state->b32xme_enabled = 0;
7138     }
7139     /* disable HME/16xME if the size is too small */
7140     if (generic_state->frame_width_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
7141         generic_state->b32xme_supported = 0;
7142         generic_state->b32xme_enabled = 0;
7143         generic_state->b16xme_supported = 0;
7144         generic_state->b16xme_enabled = 0;
7145         generic_state->frame_width_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
7146         generic_state->downscaled_width_4x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
7147     }
7148     if (generic_state->frame_height_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
7149         generic_state->b32xme_supported = 0;
7150         generic_state->b32xme_enabled = 0;
7151         generic_state->b16xme_supported = 0;
7152         generic_state->b16xme_enabled = 0;
7153         generic_state->frame_height_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
7154         generic_state->downscaled_height_4x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
7155     }
7156
7157     if (generic_state->frame_width_16x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
7158         generic_state->b32xme_supported = 0;
7159         generic_state->b32xme_enabled = 0;
7160         generic_state->frame_width_16x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
7161         generic_state->downscaled_width_16x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
7162     }
7163     if (generic_state->frame_height_16x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
7164         generic_state->b32xme_supported = 0;
7165         generic_state->b32xme_enabled = 0;
7166         generic_state->frame_height_16x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
7167         generic_state->downscaled_height_16x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
7168     }
7169
7170     if (generic_state->frame_width_32x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
7171         generic_state->frame_width_32x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
7172         generic_state->downscaled_width_32x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
7173     }
7174     if (generic_state->frame_height_32x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
7175         generic_state->frame_height_32x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
7176         generic_state->downscaled_height_32x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
7177     }
7178
7179 }
7180
7181 static VAStatus
7182 gen9_avc_encode_check_parameter(VADriverContextP ctx,
7183                                 struct encode_state *encode_state,
7184                                 struct intel_encoder_context *encoder_context)
7185 {
7186     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7187     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
7188     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
7189     unsigned int rate_control_mode = encoder_context->rate_control_mode;
7190     unsigned int preset = generic_state->preset;
7191     VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param ;
7192     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
7193     int i = 0;
7194     int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
7195     /*avbr init*/
7196     generic_state->avbr_curracy = 30;
7197     generic_state->avbr_convergence = 150;
7198
7199     switch (rate_control_mode & 0x7f) {
7200     case VA_RC_CBR:
7201         generic_state->internal_rate_mode = VA_RC_CBR;
7202         break;
7203
7204     case VA_RC_VBR:
7205         generic_state->internal_rate_mode = VA_RC_VBR;
7206         break;
7207
7208     case VA_RC_CQP:
7209     default:
7210         generic_state->internal_rate_mode = VA_RC_CQP;
7211         break;
7212     }
7213
7214     if (rate_control_mode != VA_RC_NONE &&
7215         rate_control_mode != VA_RC_CQP) {
7216         generic_state->brc_enabled = 1;
7217         generic_state->brc_distortion_buffer_supported = 1;
7218         generic_state->brc_constant_buffer_supported = 1;
7219         generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
7220     }
7221
7222     /*check brc parameter*/
7223     if (generic_state->brc_enabled) {
7224         avc_state->mb_qp_data_enable = 0;
7225     }
7226
7227     /*set the brc init and reset accordingly*/
7228     if (generic_state->brc_need_reset &&
7229         (generic_state->brc_distortion_buffer_supported == 0 ||
7230          rate_control_mode == VA_RC_CQP)) {
7231         generic_state->brc_need_reset = 0;// not support by CQP
7232     }
7233     if (generic_state->internal_rate_mode == VA_RC_CBR || generic_state->internal_rate_mode == VA_RC_VBR || generic_state->frame_type == SLICE_TYPE_I) {
7234         avc_state->sfd_enable = 0;
7235     } else {
7236         avc_state->sfd_enable = 1;
7237     }
7238
7239     if (generic_state->frames_per_window_size == 0) {
7240         generic_state->frames_per_window_size = (generic_state->frames_per_100s / 100 < 60) ? (generic_state->frames_per_100s / 100) : 60;
7241     } else if (generic_state->frames_per_window_size > 2 * generic_state->frames_per_100s / 100) {
7242         generic_state->frames_per_window_size = (generic_state->frames_per_100s / 100 < 60) ? (generic_state->frames_per_100s / 100) : 60;
7243     }
7244
7245     if (generic_state->brc_enabled) {
7246         generic_state->hme_enabled = generic_state->frame_type != SLICE_TYPE_I;
7247         if (avc_state->min_max_qp_enable) {
7248             generic_state->num_pak_passes = 1;
7249         }
7250         generic_state->brc_roi_enable = (rate_control_mode != VA_RC_CQP) && (generic_state->num_roi > 0);// only !CQP
7251         generic_state->mb_brc_enabled = generic_state->mb_brc_enabled || generic_state->brc_roi_enable;
7252     } else {
7253         generic_state->num_pak_passes = 1;// CQP only one pass
7254     }
7255
7256     avc_state->mbenc_i_frame_dist_in_use = 0;
7257     avc_state->mbenc_i_frame_dist_in_use = (generic_state->brc_enabled) && (generic_state->brc_distortion_buffer_supported) && (generic_state->frame_type == SLICE_TYPE_I);
7258
7259     /*ROI must enable mbbrc.*/
7260
7261     /*CAD check*/
7262     if (avc_state->caf_supported) {
7263         switch (generic_state->frame_type) {
7264         case SLICE_TYPE_I:
7265             avc_state->caf_enable = 0;
7266             break;
7267         case SLICE_TYPE_P:
7268             avc_state->caf_enable = gen9_avc_all_fractional[preset] & 0x01;
7269             break;
7270         case SLICE_TYPE_B:
7271             avc_state->caf_enable = (gen9_avc_all_fractional[preset] >> 1) & 0x01;
7272             break;
7273         }
7274
7275         if (avc_state->caf_enable && avc_state->caf_disable_hd && gen9_avc_disable_all_fractional_check_for_high_res[preset]) {
7276             if (generic_state->frame_width_in_pixel >= 1280 && generic_state->frame_height_in_pixel >= 720)
7277                 avc_state->caf_enable = 0;
7278         }
7279     }
7280
7281     avc_state->adaptive_transform_decision_enable &= gen9_avc_enable_adaptive_tx_decision[preset & 0x7];
7282
7283     /* Flatness check is enabled only if scaling will be performed and CAF is enabled. here only frame */
7284     if (avc_state->flatness_check_supported) {
7285         avc_state->flatness_check_enable = ((avc_state->caf_enable) && (generic_state->brc_enabled || generic_state->hme_supported)) ;
7286     } else {
7287         avc_state->flatness_check_enable = 0;
7288     }
7289
7290     /* check mb_status_supported/enbale*/
7291     if (avc_state->adaptive_transform_decision_enable) {
7292         avc_state->mb_status_enable = 1;
7293     } else {
7294         avc_state->mb_status_enable = 0;
7295     }
7296     /*slice check,all the slices use the same slice height except the last slice*/
7297     avc_state->arbitrary_num_mbs_in_slice = 0;
7298     for (i = 0; i < avc_state->slice_num; i++) {
7299         if (avc_state->slice_param[i]->num_macroblocks % generic_state->frame_width_in_mbs > 0) {
7300             avc_state->arbitrary_num_mbs_in_slice = 1;
7301             avc_state->slice_height = 1; /* slice height will be ignored by kernel ans here set it as default value */
7302         } else {
7303             avc_state->slice_height = avc_state->slice_param[i]->num_macroblocks / generic_state->frame_width_in_mbs;
7304         }
7305     }
7306
7307     if (generic_state->frame_type == SLICE_TYPE_I) {
7308         generic_state->hme_enabled = 0;
7309         generic_state->b16xme_enabled = 0;
7310         generic_state->b32xme_enabled = 0;
7311     }
7312
7313     if (generic_state->frame_type == SLICE_TYPE_B) {
7314         gen9_avc_get_dist_scale_factor(ctx, encode_state, encoder_context);
7315         avc_state->bi_weight = gen9_avc_get_biweight(avc_state->dist_scale_factor_list0[0], pic_param->pic_fields.bits.weighted_bipred_idc);
7316     }
7317
7318     /* Determine if SkipBiasAdjustment should be enabled for P picture 1. No B frame 2. Qp >= 22 3. CQP mode */
7319     avc_state->skip_bias_adjustment_enable = avc_state->skip_bias_adjustment_supported && (generic_state->frame_type == SLICE_TYPE_P)
7320                                              && (generic_state->gop_ref_distance == 1) && (avc_state->pic_param->pic_init_qp + avc_state->slice_param[0]->slice_qp_delta >= 22) && !generic_state->brc_enabled;
7321
7322     if (generic_state->kernel_mode == INTEL_ENC_KERNEL_QUALITY) {
7323         avc_state->tq_enable = 1;
7324         avc_state->tq_rounding = 6;
7325         if (generic_state->brc_enabled) {
7326             generic_state->mb_brc_enabled = 1;
7327         }
7328     }
7329
7330     //check the inter rounding
7331     avc_state->rounding_value = 0;
7332     avc_state->rounding_inter_p = 255;//default
7333     avc_state->rounding_inter_b = 255; //default
7334     avc_state->rounding_inter_b_ref = 255; //default
7335
7336     if (generic_state->frame_type == SLICE_TYPE_P) {
7337         if (avc_state->rounding_inter_p == AVC_INVALID_ROUNDING_VALUE) {
7338             if (avc_state->adaptive_rounding_inter_enable && !(generic_state->brc_enabled)) {
7339                 if (generic_state->gop_ref_distance == 1)
7340                     avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_p_without_b[slice_qp];
7341                 else
7342                     avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_p[slice_qp];
7343             } else {
7344                 avc_state->rounding_value = gen9_avc_inter_rounding_p[generic_state->preset];
7345             }
7346
7347         } else {
7348             avc_state->rounding_value = avc_state->rounding_inter_p;
7349         }
7350     } else if (generic_state->frame_type == SLICE_TYPE_B) {
7351         if (pic_param->pic_fields.bits.reference_pic_flag) {
7352             if (avc_state->rounding_inter_b_ref == AVC_INVALID_ROUNDING_VALUE)
7353                 avc_state->rounding_value = gen9_avc_inter_rounding_b_ref[generic_state->preset];
7354             else
7355                 avc_state->rounding_value = avc_state->rounding_inter_b_ref;
7356         } else {
7357             if (avc_state->rounding_inter_b == AVC_INVALID_ROUNDING_VALUE) {
7358                 if (avc_state->adaptive_rounding_inter_enable && !(generic_state->brc_enabled))
7359                     avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_b[slice_qp];
7360                 else
7361                     avc_state->rounding_value = gen9_avc_inter_rounding_b[generic_state->preset];
7362             } else {
7363                 avc_state->rounding_value = avc_state->rounding_inter_b;
7364             }
7365         }
7366     }
7367     return VA_STATUS_SUCCESS;
7368 }
7369
7370 static VAStatus
7371 gen9_avc_vme_gpe_kernel_prepare(VADriverContextP ctx,
7372                                 struct encode_state *encode_state,
7373                                 struct intel_encoder_context *encoder_context)
7374 {
7375     VAStatus va_status;
7376     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7377     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
7378     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
7379     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
7380     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
7381
7382     struct object_surface *obj_surface;
7383     struct object_buffer *obj_buffer;
7384     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
7385     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
7386     struct i965_coded_buffer_segment *coded_buffer_segment;
7387
7388     struct gen9_surface_avc *avc_priv_surface;
7389     dri_bo *bo;
7390     struct avc_surface_param surface_param;
7391     int i, j = 0;
7392     unsigned char * pdata;
7393
7394     /* Setup current reconstruct frame */
7395     obj_surface = encode_state->reconstructed_object;
7396     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
7397
7398     if (va_status != VA_STATUS_SUCCESS)
7399         return va_status;
7400
7401     memset(&surface_param, 0, sizeof(surface_param));
7402     surface_param.frame_width = generic_state->frame_width_in_pixel;
7403     surface_param.frame_height = generic_state->frame_height_in_pixel;
7404     va_status = gen9_avc_init_check_surfaces(ctx,
7405                                              obj_surface,
7406                                              encoder_context,
7407                                              &surface_param);
7408     if (va_status != VA_STATUS_SUCCESS)
7409         return va_status;
7410     {
7411         /* init the member of avc_priv_surface,frame_store_id,qp_value*/
7412         avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
7413         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = 0;
7414         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = 0;
7415         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2]);
7416         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1]);
7417         i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2], avc_priv_surface->dmv_top);
7418         i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1], avc_priv_surface->dmv_bottom);
7419         avc_priv_surface->qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
7420         avc_priv_surface->frame_store_id = 0;
7421         avc_priv_surface->frame_idx = pic_param->CurrPic.frame_idx;
7422         avc_priv_surface->top_field_order_cnt = pic_param->CurrPic.TopFieldOrderCnt;
7423         avc_priv_surface->is_as_ref = pic_param->pic_fields.bits.reference_pic_flag;
7424         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = avc_priv_surface->top_field_order_cnt;
7425         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = avc_priv_surface->top_field_order_cnt + 1;
7426     }
7427     i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
7428     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_reconstructed_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
7429
7430     /* input YUV surface*/
7431     obj_surface = encode_state->input_yuv_object;
7432     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
7433
7434     if (va_status != VA_STATUS_SUCCESS)
7435         return va_status;
7436     i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
7437     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_uncompressed_input_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
7438
7439     /* Reference surfaces */
7440     for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
7441         i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
7442         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2]);
7443         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1]);
7444         obj_surface = encode_state->reference_objects[i];
7445         avc_state->top_field_poc[2 * i] = 0;
7446         avc_state->top_field_poc[2 * i + 1] = 0;
7447
7448         if (obj_surface && obj_surface->bo) {
7449             i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->list_reference_res[i], obj_surface, GPE_RESOURCE_ALIGNMENT);
7450
7451             /* actually it should be handled when it is reconstructed surface*/
7452             va_status = gen9_avc_init_check_surfaces(ctx,
7453                                                      obj_surface, encoder_context,
7454                                                      &surface_param);
7455             if (va_status != VA_STATUS_SUCCESS)
7456                 return va_status;
7457             avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
7458             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2], avc_priv_surface->dmv_top);
7459             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1], avc_priv_surface->dmv_bottom);
7460             avc_state->top_field_poc[2 * i] = avc_priv_surface->top_field_order_cnt;
7461             avc_state->top_field_poc[2 * i + 1] = avc_priv_surface->top_field_order_cnt + 1;
7462             avc_priv_surface->frame_store_id = i;
7463         } else {
7464             break;
7465         }
7466     }
7467
7468     /* Encoded bitstream ?*/
7469     obj_buffer = encode_state->coded_buf_object;
7470     bo = obj_buffer->buffer_store->bo;
7471     i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
7472     i965_dri_object_to_buffer_gpe_resource(&generic_ctx->compressed_bitstream.res, bo);
7473     generic_ctx->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
7474     generic_ctx->compressed_bitstream.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
7475
7476     /*status buffer */
7477     avc_ctx->status_buffer.bo = bo;
7478
7479     /* set the internal flag to 0 to indicate the coded size is unknown */
7480     dri_bo_map(bo, 1);
7481     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
7482     coded_buffer_segment->mapped = 0;
7483     coded_buffer_segment->codec = encoder_context->codec;
7484     coded_buffer_segment->status_support = 1;
7485
7486     pdata = bo->virtual + avc_ctx->status_buffer.base_offset;
7487     memset(pdata, 0, avc_ctx->status_buffer.status_buffer_size);
7488     dri_bo_unmap(bo);
7489
7490     //frame id, it is the ref pic id in the reference_objects list.
7491     avc_state->num_refs[0] = 0;
7492     avc_state->num_refs[1] = 0;
7493     if (generic_state->frame_type == SLICE_TYPE_P) {
7494         avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
7495
7496         if (slice_param->num_ref_idx_active_override_flag)
7497             avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
7498     } else if (generic_state->frame_type == SLICE_TYPE_B) {
7499         avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
7500         avc_state->num_refs[1] = pic_param->num_ref_idx_l1_active_minus1 + 1;
7501
7502         if (slice_param->num_ref_idx_active_override_flag) {
7503             avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
7504             avc_state->num_refs[1] = slice_param->num_ref_idx_l1_active_minus1 + 1;
7505         }
7506     }
7507
7508     if (avc_state->num_refs[0] > ARRAY_ELEMS(avc_state->list_ref_idx[0]))
7509         return VA_STATUS_ERROR_INVALID_VALUE;
7510     if (avc_state->num_refs[1] > ARRAY_ELEMS(avc_state->list_ref_idx[1]))
7511         return VA_STATUS_ERROR_INVALID_VALUE;
7512
7513     for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[0]); i++) {
7514         VAPictureH264 *va_pic;
7515
7516         assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(avc_state->list_ref_idx[0]));
7517         avc_state->list_ref_idx[0][i] = 0;
7518
7519         if (i >= avc_state->num_refs[0])
7520             continue;
7521
7522         va_pic = &slice_param->RefPicList0[i];
7523
7524         for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
7525             obj_surface = encode_state->reference_objects[j];
7526
7527             if (obj_surface &&
7528                 obj_surface->bo &&
7529                 obj_surface->base.id == va_pic->picture_id) {
7530
7531                 assert(obj_surface->base.id != VA_INVALID_SURFACE);
7532                 avc_state->list_ref_idx[0][i] = j;
7533
7534                 break;
7535             }
7536         }
7537     }
7538     for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[1]); i++) {
7539         VAPictureH264 *va_pic;
7540
7541         assert(ARRAY_ELEMS(slice_param->RefPicList1) == ARRAY_ELEMS(avc_state->list_ref_idx[1]));
7542         avc_state->list_ref_idx[1][i] = 0;
7543
7544         if (i >= avc_state->num_refs[1])
7545             continue;
7546
7547         va_pic = &slice_param->RefPicList1[i];
7548
7549         for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
7550             obj_surface = encode_state->reference_objects[j];
7551
7552             if (obj_surface &&
7553                 obj_surface->bo &&
7554                 obj_surface->base.id == va_pic->picture_id) {
7555
7556                 assert(obj_surface->base.id != VA_INVALID_SURFACE);
7557                 avc_state->list_ref_idx[1][i] = j;
7558
7559                 break;
7560             }
7561         }
7562     }
7563
7564     return VA_STATUS_SUCCESS;
7565 }
7566
7567 static VAStatus
7568 gen9_avc_vme_gpe_kernel_init(VADriverContextP ctx,
7569                              struct encode_state *encode_state,
7570                              struct intel_encoder_context *encoder_context)
7571 {
7572     return VA_STATUS_SUCCESS;
7573 }
7574
7575 static VAStatus
7576 gen9_avc_vme_gpe_kernel_final(VADriverContextP ctx,
7577                               struct encode_state *encode_state,
7578                               struct intel_encoder_context *encoder_context)
7579 {
7580
7581     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7582     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
7583     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
7584
7585     /*set this flag when all kernel is finished*/
7586     if (generic_state->brc_enabled) {
7587         generic_state->brc_inited = 1;
7588         generic_state->brc_need_reset = 0;
7589         avc_state->mbenc_curbe_set_in_brc_update = 0;
7590     }
7591     return VA_STATUS_SUCCESS;
7592 }
7593
7594 static VAStatus
7595 gen9_avc_vme_gpe_kernel_run(VADriverContextP ctx,
7596                             struct encode_state *encode_state,
7597                             struct intel_encoder_context *encoder_context)
7598 {
7599     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7600     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
7601     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
7602     int fei_enabled = encoder_context->fei_enabled;
7603
7604     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
7605     VAEncSliceParameterBufferH264 *slice_param = avc_state->slice_param[0];
7606     int sfd_in_use = 0;
7607
7608     /* BRC init/reset needs to be called before HME since it will reset the Brc Distortion surface*/
7609     if (!fei_enabled && generic_state->brc_enabled && (!generic_state->brc_inited || generic_state->brc_need_reset)) {
7610         gen9_avc_kernel_brc_init_reset(ctx, encode_state, encoder_context);
7611     }
7612
7613     /*down scaling*/
7614     if (generic_state->hme_supported) {
7615         gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
7616         if (generic_state->b16xme_supported) {
7617             gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_16x);
7618             if (generic_state->b32xme_supported) {
7619                 gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_32x);
7620             }
7621         }
7622     }
7623
7624     /*me kernel*/
7625     if (generic_state->hme_enabled) {
7626         if (generic_state->b16xme_enabled) {
7627             if (generic_state->b32xme_enabled) {
7628                 gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_32x);
7629             }
7630             gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_16x);
7631         }
7632         gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
7633     }
7634
7635     /*call SFD kernel after HME in same command buffer*/
7636     sfd_in_use = avc_state->sfd_enable && generic_state->hme_enabled;
7637     sfd_in_use = sfd_in_use && !avc_state->sfd_mb_enable;
7638     if (sfd_in_use) {
7639         gen9_avc_kernel_sfd(ctx, encode_state, encoder_context);
7640     }
7641
7642     /* BRC and MbEnc are included in the same task phase*/
7643     if (generic_state->brc_enabled) {
7644         if (avc_state->mbenc_i_frame_dist_in_use) {
7645             gen9_avc_kernel_mbenc(ctx, encode_state, encoder_context, true);
7646         }
7647         gen9_avc_kernel_brc_frame_update(ctx, encode_state, encoder_context);
7648
7649         if (avc_state->brc_split_enable && generic_state->mb_brc_enabled) {
7650             gen9_avc_kernel_brc_mb_update(ctx, encode_state, encoder_context);
7651         }
7652     }
7653
7654     /*weight prediction,disable by now */
7655     avc_state->weighted_ref_l0_enable = 0;
7656     avc_state->weighted_ref_l1_enable = 0;
7657     if (avc_state->weighted_prediction_supported &&
7658         ((generic_state->frame_type == SLICE_TYPE_P && pic_param->pic_fields.bits.weighted_pred_flag) ||
7659          (generic_state->frame_type == SLICE_TYPE_B && pic_param->pic_fields.bits.weighted_bipred_idc == INTEL_AVC_WP_MODE_EXPLICIT))) {
7660         if (slice_param->luma_weight_l0_flag & 1) {
7661             gen9_avc_kernel_wp(ctx, encode_state, encoder_context, 0);
7662
7663         } else if (!(slice_param->chroma_weight_l0_flag & 1)) {
7664             pic_param->pic_fields.bits.weighted_pred_flag = 0;// it should be handled in app
7665         }
7666
7667         if (generic_state->frame_type == SLICE_TYPE_B && pic_param->pic_fields.bits.weighted_bipred_idc == INTEL_AVC_WP_MODE_EXPLICIT) {
7668             if (slice_param->luma_weight_l1_flag & 1) {
7669                 gen9_avc_kernel_wp(ctx, encode_state, encoder_context, 1);
7670             } else if (!((slice_param->luma_weight_l0_flag & 1) ||
7671                          (slice_param->chroma_weight_l0_flag & 1) ||
7672                          (slice_param->chroma_weight_l1_flag & 1))) {
7673                 pic_param->pic_fields.bits.weighted_bipred_idc = INTEL_AVC_WP_MODE_DEFAULT;// it should be handled in app
7674             }
7675         }
7676     }
7677
7678     /*mbenc kernel*/
7679     gen9_avc_kernel_mbenc(ctx, encode_state, encoder_context, false);
7680
7681     /*ignore the reset vertical line kernel*/
7682
7683     return VA_STATUS_SUCCESS;
7684 }
7685
7686 static VAStatus
7687 gen9_avc_vme_pipeline(VADriverContextP ctx,
7688                       VAProfile profile,
7689                       struct encode_state *encode_state,
7690                       struct intel_encoder_context *encoder_context)
7691 {
7692     VAStatus va_status;
7693
7694     gen9_avc_update_parameters(ctx, profile, encode_state, encoder_context);
7695
7696     va_status = gen9_avc_encode_check_parameter(ctx, encode_state, encoder_context);
7697     if (va_status != VA_STATUS_SUCCESS)
7698         return va_status;
7699
7700     va_status = gen9_avc_allocate_resources(ctx, encode_state, encoder_context);
7701     if (va_status != VA_STATUS_SUCCESS)
7702         return va_status;
7703
7704     va_status = gen9_avc_vme_gpe_kernel_prepare(ctx, encode_state, encoder_context);
7705     if (va_status != VA_STATUS_SUCCESS)
7706         return va_status;
7707
7708     va_status = gen9_avc_vme_gpe_kernel_init(ctx, encode_state, encoder_context);
7709     if (va_status != VA_STATUS_SUCCESS)
7710         return va_status;
7711
7712     va_status = gen9_avc_vme_gpe_kernel_run(ctx, encode_state, encoder_context);
7713     if (va_status != VA_STATUS_SUCCESS)
7714         return va_status;
7715
7716     gen9_avc_vme_gpe_kernel_final(ctx, encode_state, encoder_context);
7717
7718     return VA_STATUS_SUCCESS;
7719 }
7720
7721 static void
7722 gen9_avc_vme_context_destroy(void * context)
7723 {
7724     struct encoder_vme_mfc_context *vme_context = (struct encoder_vme_mfc_context *)context;
7725     struct generic_encoder_context *generic_ctx;
7726     struct i965_avc_encoder_context *avc_ctx;
7727     struct generic_enc_codec_state *generic_state;
7728     struct avc_enc_state *avc_state;
7729
7730     if (!vme_context)
7731         return;
7732
7733     generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
7734     avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
7735     generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
7736     avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
7737
7738     gen9_avc_kernel_destroy(vme_context);
7739
7740     free(generic_ctx);
7741     free(avc_ctx);
7742     free(generic_state);
7743     free(avc_state);
7744     free(vme_context);
7745     return;
7746
7747 }
7748
7749 static void
7750 gen8_avc_kernel_init(VADriverContextP ctx,
7751                      struct intel_encoder_context *encoder_context)
7752 {
7753     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7754     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
7755     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
7756     int fei_enabled = encoder_context->fei_enabled;
7757
7758     generic_ctx->get_kernel_header_and_size = fei_enabled ?
7759                                               intel_avc_fei_get_kernel_header_and_size :
7760                                               intel_avc_get_kernel_header_and_size ;
7761     gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling);
7762     gen9_avc_kernel_init_brc(ctx, generic_ctx, &avc_ctx->context_brc);
7763     gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me);
7764     gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc, fei_enabled);
7765     gen9_avc_kernel_init_sfd(ctx, generic_ctx, &avc_ctx->context_sfd);
7766
7767     //function pointer
7768     generic_ctx->pfn_set_curbe_scaling2x = gen9_avc_set_curbe_scaling2x;
7769     generic_ctx->pfn_set_curbe_scaling4x = gen8_avc_set_curbe_scaling4x;
7770     generic_ctx->pfn_set_curbe_me = gen8_avc_set_curbe_me;
7771     generic_ctx->pfn_set_curbe_mbenc = gen8_avc_set_curbe_mbenc;
7772     generic_ctx->pfn_set_curbe_brc_init_reset = gen9_avc_set_curbe_brc_init_reset;
7773     generic_ctx->pfn_set_curbe_brc_frame_update = gen8_avc_set_curbe_brc_frame_update;
7774     generic_ctx->pfn_set_curbe_sfd = gen9_avc_set_curbe_sfd;
7775
7776     generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
7777     generic_ctx->pfn_send_me_surface = gen9_avc_send_surface_me;
7778     generic_ctx->pfn_send_mbenc_surface = gen9_avc_send_surface_mbenc;
7779     generic_ctx->pfn_send_brc_init_reset_surface = gen9_avc_send_surface_brc_init_reset;
7780     generic_ctx->pfn_send_brc_frame_update_surface = gen9_avc_send_surface_brc_frame_update;
7781     generic_ctx->pfn_send_sfd_surface = gen9_avc_send_surface_sfd;
7782 }
7783 static void
7784 gen9_avc_kernel_init(VADriverContextP ctx,
7785                      struct intel_encoder_context *encoder_context)
7786 {
7787     struct i965_driver_data *i965 = i965_driver_data(ctx);
7788     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7789     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
7790     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
7791     int fei_enabled = encoder_context->fei_enabled;
7792
7793     generic_ctx->get_kernel_header_and_size = fei_enabled ?
7794                                               intel_avc_fei_get_kernel_header_and_size :
7795                                               intel_avc_get_kernel_header_and_size ;
7796
7797     gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc,
7798                                encoder_context->fei_enabled);
7799
7800     if (!fei_enabled) {
7801         gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling);
7802         gen9_avc_kernel_init_brc(ctx, generic_ctx, &avc_ctx->context_brc);
7803         gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me);
7804         gen9_avc_kernel_init_wp(ctx, generic_ctx, &avc_ctx->context_wp);
7805         gen9_avc_kernel_init_sfd(ctx, generic_ctx, &avc_ctx->context_sfd);
7806
7807         //function pointer
7808         generic_ctx->pfn_set_curbe_scaling2x = gen9_avc_set_curbe_scaling2x;
7809         generic_ctx->pfn_set_curbe_scaling4x = gen9_avc_set_curbe_scaling4x;
7810         generic_ctx->pfn_set_curbe_me = gen9_avc_set_curbe_me;
7811         generic_ctx->pfn_set_curbe_mbenc = gen9_avc_set_curbe_mbenc;
7812         generic_ctx->pfn_set_curbe_brc_init_reset = gen9_avc_set_curbe_brc_init_reset;
7813         generic_ctx->pfn_set_curbe_brc_frame_update = gen9_avc_set_curbe_brc_frame_update;
7814         generic_ctx->pfn_set_curbe_brc_mb_update = gen9_avc_set_curbe_brc_mb_update;
7815         generic_ctx->pfn_set_curbe_sfd = gen9_avc_set_curbe_sfd;
7816         generic_ctx->pfn_set_curbe_wp = gen9_avc_set_curbe_wp;
7817
7818         generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
7819         generic_ctx->pfn_send_me_surface = gen9_avc_send_surface_me;
7820         generic_ctx->pfn_send_mbenc_surface = gen9_avc_send_surface_mbenc;
7821         generic_ctx->pfn_send_brc_init_reset_surface = gen9_avc_send_surface_brc_init_reset;
7822         generic_ctx->pfn_send_brc_frame_update_surface = gen9_avc_send_surface_brc_frame_update;
7823         generic_ctx->pfn_send_brc_mb_update_surface = gen9_avc_send_surface_brc_mb_update;
7824         generic_ctx->pfn_send_sfd_surface = gen9_avc_send_surface_sfd;
7825         generic_ctx->pfn_send_wp_surface = gen9_avc_send_surface_wp;
7826
7827         if (IS_SKL(i965->intel.device_info) ||
7828             IS_BXT(i965->intel.device_info))
7829             generic_ctx->pfn_set_curbe_scaling4x = gen9_avc_set_curbe_scaling4x;
7830         else if (IS_KBL(i965->intel.device_info) ||
7831                  IS_GLK(i965->intel.device_info))
7832             generic_ctx->pfn_set_curbe_scaling4x = gen95_avc_set_curbe_scaling4x;
7833     } else {
7834         generic_ctx->pfn_set_curbe_mbenc = gen9_avc_fei_set_curbe_mbenc;
7835         generic_ctx->pfn_send_mbenc_surface = gen9_avc_fei_send_surface_mbenc;
7836     }
7837 }
7838
7839 /*
7840 PAK pipeline related function
7841 */
7842 extern int
7843 intel_avc_enc_slice_type_fixup(int slice_type);
7844
7845 /* Allocate resources needed for PAK only mode (get invoked only in FEI encode) */
7846 static VAStatus
7847 gen9_avc_allocate_pak_resources(VADriverContextP ctx,
7848                                 struct encode_state *encode_state,
7849                                 struct intel_encoder_context *encoder_context)
7850 {
7851     struct i965_driver_data *i965 = i965_driver_data(ctx);
7852     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7853     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
7854     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
7855     unsigned int size  = 0;
7856     int allocate_flag = 1;
7857
7858     /*second level batch buffer for image state write when cqp etc*/
7859     i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
7860     size = INTEL_AVC_IMAGE_STATE_CMD_SIZE ;
7861     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
7862                                                &avc_ctx->res_image_state_batch_buffer_2nd_level,
7863                                                ALIGN(size, 0x1000),
7864                                                "second levle batch (image state write) buffer");
7865     if (!allocate_flag)
7866         goto failed_allocation;
7867
7868     if (!generic_state->brc_allocated) {
7869         i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
7870         size = 64;//44
7871         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
7872                                                    &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
7873                                                    ALIGN(size, 0x1000),
7874                                                    "brc pak statistic buffer");
7875         if (!allocate_flag)
7876             goto failed_allocation;
7877     }
7878
7879     return VA_STATUS_SUCCESS;
7880
7881 failed_allocation:
7882     return VA_STATUS_ERROR_ALLOCATION_FAILED;
7883 }
7884
7885 static void
7886 gen9_mfc_avc_pipe_mode_select(VADriverContextP ctx,
7887                               struct encode_state *encode_state,
7888                               struct intel_encoder_context *encoder_context)
7889 {
7890     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7891     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
7892     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
7893     struct intel_batchbuffer *batch = encoder_context->base.batch;
7894
7895     BEGIN_BCS_BATCH(batch, 5);
7896
7897     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
7898     OUT_BCS_BATCH(batch,
7899                   (0 << 29) |
7900                   (MFX_LONG_MODE << 17) |       /* Must be long format for encoder */
7901                   (MFD_MODE_VLD << 15) |
7902                   (0 << 13) |                   /* Non-VDEnc mode  is 0*/
7903                   ((generic_state->curr_pak_pass != (generic_state->num_pak_passes - 1)) << 10) |                  /* Stream-Out Enable */
7904                   ((!!avc_ctx->res_post_deblocking_output.bo) << 9)  |    /* Post Deblocking Output */
7905                   ((!!avc_ctx->res_pre_deblocking_output.bo) << 8)  |     /* Pre Deblocking Output */
7906                   (0 << 7)  |                   /* Scaled surface enable */
7907                   (0 << 6)  |                   /* Frame statistics stream out enable */
7908                   (0 << 5)  |                   /* not in stitch mode */
7909                   (1 << 4)  |                   /* encoding mode */
7910                   (MFX_FORMAT_AVC << 0));
7911     OUT_BCS_BATCH(batch,
7912                   (0 << 7)  | /* expand NOA bus flag */
7913                   (0 << 6)  | /* disable slice-level clock gating */
7914                   (0 << 5)  | /* disable clock gating for NOA */
7915                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
7916                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
7917                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
7918                   (0 << 1)  |
7919                   (0 << 0));
7920     OUT_BCS_BATCH(batch, 0);
7921     OUT_BCS_BATCH(batch, 0);
7922
7923     ADVANCE_BCS_BATCH(batch);
7924 }
7925
7926 static void
7927 gen9_mfc_avc_surface_state(VADriverContextP ctx,
7928                            struct intel_encoder_context *encoder_context,
7929                            struct i965_gpe_resource *gpe_resource,
7930                            int id)
7931 {
7932     struct intel_batchbuffer *batch = encoder_context->base.batch;
7933
7934     BEGIN_BCS_BATCH(batch, 6);
7935
7936     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
7937     OUT_BCS_BATCH(batch, id);
7938     OUT_BCS_BATCH(batch,
7939                   ((gpe_resource->height - 1) << 18) |
7940                   ((gpe_resource->width - 1) << 4));
7941     OUT_BCS_BATCH(batch,
7942                   (MFX_SURFACE_PLANAR_420_8 << 28) |    /* 420 planar YUV surface */
7943                   (1 << 27) |                           /* must be 1 for interleave U/V, hardware requirement */
7944                   ((gpe_resource->pitch - 1) << 3) |    /* pitch */
7945                   (0 << 2)  |                           /* must be 0 for interleave U/V */
7946                   (1 << 1)  |                           /* must be tiled */
7947                   (I965_TILEWALK_YMAJOR << 0));         /* tile walk, TILEWALK_YMAJOR */
7948     OUT_BCS_BATCH(batch,
7949                   (0 << 16) |                   /* must be 0 for interleave U/V */
7950                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
7951     OUT_BCS_BATCH(batch,
7952                   (0 << 16) |                   /* must be 0 for interleave U/V */
7953                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
7954
7955     ADVANCE_BCS_BATCH(batch);
7956 }
7957
7958 static void
7959 gen9_mfc_avc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
7960 {
7961     struct i965_driver_data *i965 = i965_driver_data(ctx);
7962     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7963     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
7964     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
7965     struct intel_batchbuffer *batch = encoder_context->base.batch;
7966     int i;
7967
7968     BEGIN_BCS_BATCH(batch, 65);
7969
7970     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (65 - 2));
7971
7972     /* the DW1-3 is for pre_deblocking */
7973     OUT_BUFFER_3DW(batch, avc_ctx->res_pre_deblocking_output.bo, 1, 0, i965->intel.mocs_state);
7974
7975     /* the DW4-6 is for the post_deblocking */
7976     OUT_BUFFER_3DW(batch, avc_ctx->res_post_deblocking_output.bo, 1, 0, i965->intel.mocs_state);
7977
7978     /* the DW7-9 is for the uncompressed_picture */
7979     OUT_BUFFER_3DW(batch, generic_ctx->res_uncompressed_input_surface.bo, 0, 0, i965->intel.mocs_state);
7980
7981     /* the DW10-12 is for PAK information (write) */
7982     OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, i965->intel.mocs_state);//?
7983
7984     /* the DW13-15 is for the intra_row_store_scratch */
7985     OUT_BUFFER_3DW(batch, avc_ctx->res_intra_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
7986
7987     /* the DW16-18 is for the deblocking filter */
7988     OUT_BUFFER_3DW(batch, avc_ctx->res_deblocking_filter_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
7989
7990     /* the DW 19-50 is for Reference pictures*/
7991     for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
7992         OUT_BUFFER_2DW(batch, avc_ctx->list_reference_res[i].bo, 0, 0);
7993     }
7994
7995     /* DW 51, reference picture attributes */
7996     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
7997
7998     /* The DW 52-54 is for PAK information (read) */
7999     OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, i965->intel.mocs_state);
8000
8001     /* the DW 55-57 is the ILDB buffer */
8002     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
8003
8004     /* the DW 58-60 is the second ILDB buffer */
8005     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
8006
8007     /* DW 61, memory compress enable & mode */
8008     OUT_BCS_BATCH(batch, 0);
8009
8010     /* the DW 62-64 is the buffer */
8011     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
8012
8013     ADVANCE_BCS_BATCH(batch);
8014 }
8015
8016 static void
8017 gen9_mfc_avc_ind_obj_base_addr_state(VADriverContextP ctx,
8018                                      struct encode_state *encode_state,
8019                                      struct intel_encoder_context *encoder_context)
8020 {
8021     struct i965_driver_data *i965 = i965_driver_data(ctx);
8022     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8023     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
8024     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
8025     struct intel_batchbuffer *batch = encoder_context->base.batch;
8026     struct object_surface *obj_surface;
8027     struct gen9_surface_avc *avc_priv_surface;
8028     unsigned int size = 0;
8029     unsigned int w_mb = generic_state->frame_width_in_mbs;
8030     unsigned int h_mb = generic_state->frame_height_in_mbs;
8031
8032     obj_surface = encode_state->reconstructed_object;
8033
8034     if (!obj_surface || !obj_surface->private_data)
8035         return;
8036     avc_priv_surface = obj_surface->private_data;
8037
8038     BEGIN_BCS_BATCH(batch, 26);
8039
8040     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
8041     /* The DW1-5 is for the MFX indirect bistream offset */
8042     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
8043     OUT_BUFFER_2DW(batch, NULL, 0, 0);
8044
8045     /* the DW6-10 is for MFX Indirect MV Object Base Address */
8046     size = w_mb * h_mb * 32 * 4;
8047     OUT_BUFFER_3DW(batch,
8048                    avc_priv_surface->res_mv_data_surface.bo,
8049                    1,
8050                    0,
8051                    i965->intel.mocs_state);
8052     OUT_BUFFER_2DW(batch,
8053                    avc_priv_surface->res_mv_data_surface.bo,
8054                    1,
8055                    ALIGN(size, 0x1000));
8056
8057     /* The DW11-15 is for MFX IT-COFF. Not used on encoder */
8058     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
8059     OUT_BUFFER_2DW(batch, NULL, 0, 0);
8060
8061     /* The DW16-20 is for MFX indirect DBLK. Not used on encoder */
8062     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
8063     OUT_BUFFER_2DW(batch, NULL, 0, 0);
8064
8065     /* The DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder
8066      * Note: an offset is specified in MFX_AVC_SLICE_STATE
8067      */
8068     OUT_BUFFER_3DW(batch,
8069                    generic_ctx->compressed_bitstream.res.bo,
8070                    1,
8071                    0,
8072                    i965->intel.mocs_state);
8073     OUT_BUFFER_2DW(batch,
8074                    generic_ctx->compressed_bitstream.res.bo,
8075                    1,
8076                    generic_ctx->compressed_bitstream.end_offset);
8077
8078     ADVANCE_BCS_BATCH(batch);
8079 }
8080
8081 static void
8082 gen9_mfc_avc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
8083 {
8084     struct i965_driver_data *i965 = i965_driver_data(ctx);
8085     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8086     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
8087     struct intel_batchbuffer *batch = encoder_context->base.batch;
8088
8089     BEGIN_BCS_BATCH(batch, 10);
8090
8091     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
8092
8093     /* The DW1-3 is for bsd/mpc row store scratch buffer */
8094     OUT_BUFFER_3DW(batch, avc_ctx->res_bsd_mpc_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
8095
8096     /* The DW4-6 is for MPR Row Store Scratch Buffer Base Address, ignore for encoder */
8097     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
8098
8099     /* The DW7-9 is for Bitplane Read Buffer Base Address, ignore for encoder */
8100     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
8101
8102     ADVANCE_BCS_BATCH(batch);
8103 }
8104
8105 static void
8106 gen9_mfc_avc_directmode_state(VADriverContextP ctx,
8107                               struct intel_encoder_context *encoder_context)
8108 {
8109     struct i965_driver_data *i965 = i965_driver_data(ctx);
8110     struct intel_batchbuffer *batch = encoder_context->base.batch;
8111     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8112     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
8113     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
8114
8115     int i;
8116
8117     BEGIN_BCS_BATCH(batch, 71);
8118
8119     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
8120
8121     /* Reference frames and Current frames */
8122     /* the DW1-32 is for the direct MV for reference */
8123     for (i = 0; i < NUM_MFC_AVC_DMV_BUFFERS - 2; i += 2) {
8124         if (avc_ctx->res_direct_mv_buffersr[i].bo != NULL) {
8125             OUT_BCS_RELOC64(batch, avc_ctx->res_direct_mv_buffersr[i].bo,
8126                             I915_GEM_DOMAIN_INSTRUCTION, 0,
8127                             0);
8128         } else {
8129             OUT_BCS_BATCH(batch, 0);
8130             OUT_BCS_BATCH(batch, 0);
8131         }
8132     }
8133
8134     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
8135
8136     /* the DW34-36 is the MV for the current frame */
8137     OUT_BCS_RELOC64(batch, avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2].bo,
8138                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
8139                     0);
8140
8141     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
8142
8143     /* POL list */
8144     for (i = 0; i < 32; i++) {
8145         OUT_BCS_BATCH(batch, avc_state->top_field_poc[i]);
8146     }
8147     OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2]);
8148     OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1]);
8149
8150     ADVANCE_BCS_BATCH(batch);
8151 }
8152
8153 static void
8154 gen9_mfc_qm_state(VADriverContextP ctx,
8155                   int qm_type,
8156                   const unsigned int *qm,
8157                   int qm_length,
8158                   struct intel_encoder_context *encoder_context)
8159 {
8160     struct intel_batchbuffer *batch = encoder_context->base.batch;
8161     unsigned int qm_buffer[16];
8162
8163     assert(qm_length <= 16);
8164     assert(sizeof(*qm) == 4);
8165     memset(qm_buffer, 0, 16 * 4);
8166     memcpy(qm_buffer, qm, qm_length * 4);
8167
8168     BEGIN_BCS_BATCH(batch, 18);
8169     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
8170     OUT_BCS_BATCH(batch, qm_type << 0);
8171     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
8172     ADVANCE_BCS_BATCH(batch);
8173 }
8174
8175 static void
8176 gen9_mfc_avc_qm_state(VADriverContextP ctx,
8177                       struct encode_state *encode_state,
8178                       struct intel_encoder_context *encoder_context)
8179 {
8180     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8181     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
8182     VAEncSequenceParameterBufferH264  *seq_param = avc_state->seq_param;
8183     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
8184
8185
8186     const unsigned int *qm_4x4_intra;
8187     const unsigned int *qm_4x4_inter;
8188     const unsigned int *qm_8x8_intra;
8189     const unsigned int *qm_8x8_inter;
8190
8191     if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
8192         && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
8193         qm_4x4_intra = qm_4x4_inter = qm_8x8_intra = qm_8x8_inter = qm_flat;
8194     } else {
8195         VAIQMatrixBufferH264 *qm;
8196         assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
8197         qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
8198         qm_4x4_intra = (unsigned int *)qm->ScalingList4x4[0];
8199         qm_4x4_inter = (unsigned int *)qm->ScalingList4x4[3];
8200         qm_8x8_intra = (unsigned int *)qm->ScalingList8x8[0];
8201         qm_8x8_inter = (unsigned int *)qm->ScalingList8x8[1];
8202     }
8203
8204     gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm_4x4_intra, 12, encoder_context);
8205     gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm_4x4_inter, 12, encoder_context);
8206     gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm_8x8_intra, 16, encoder_context);
8207     gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm_8x8_inter, 16, encoder_context);
8208 }
8209
8210 static void
8211 gen9_mfc_fqm_state(VADriverContextP ctx,
8212                    int fqm_type,
8213                    const unsigned int *fqm,
8214                    int fqm_length,
8215                    struct intel_encoder_context *encoder_context)
8216 {
8217     struct intel_batchbuffer *batch = encoder_context->base.batch;
8218     unsigned int fqm_buffer[32];
8219
8220     assert(fqm_length <= 32);
8221     assert(sizeof(*fqm) == 4);
8222     memset(fqm_buffer, 0, 32 * 4);
8223     memcpy(fqm_buffer, fqm, fqm_length * 4);
8224
8225     BEGIN_BCS_BATCH(batch, 34);
8226     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
8227     OUT_BCS_BATCH(batch, fqm_type << 0);
8228     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
8229     ADVANCE_BCS_BATCH(batch);
8230 }
8231
8232 static void
8233 gen9_mfc_fill_fqm(uint8_t *qm, uint16_t *fqm, int len)
8234 {
8235     int i, j;
8236     for (i = 0; i < len; i++)
8237         for (j = 0; j < len; j++) {
8238             assert(qm[j * len + i]);
8239             fqm[i * len + j] = (1 << 16) / qm[j * len + i];
8240         }
8241 }
8242
8243 static void
8244 gen9_mfc_avc_fqm_state(VADriverContextP ctx,
8245                        struct encode_state *encode_state,
8246                        struct intel_encoder_context *encoder_context)
8247 {
8248     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8249     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
8250     VAEncSequenceParameterBufferH264  *seq_param = avc_state->seq_param;
8251     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
8252
8253     if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
8254         && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
8255         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm_flat, 24, encoder_context);
8256         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm_flat, 24, encoder_context);
8257         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm_flat, 32, encoder_context);
8258         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm_flat, 32, encoder_context);
8259     } else {
8260         int i;
8261         uint32_t fqm[32];
8262         VAIQMatrixBufferH264 *qm;
8263         assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
8264         qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
8265
8266         for (i = 0; i < 3; i++)
8267             gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * i, 4);
8268         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm, 24, encoder_context);
8269
8270         for (i = 3; i < 6; i++)
8271             gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * (i - 3), 4);
8272         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm, 24, encoder_context);
8273
8274         gen9_mfc_fill_fqm(qm->ScalingList8x8[0], (uint16_t *)fqm, 8);
8275         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm, 32, encoder_context);
8276
8277         gen9_mfc_fill_fqm(qm->ScalingList8x8[1], (uint16_t *)fqm, 8);
8278         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm, 32, encoder_context);
8279     }
8280 }
8281
8282 static void
8283 gen9_mfc_avc_insert_object(VADriverContextP ctx,
8284                            struct intel_encoder_context *encoder_context,
8285                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
8286                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
8287                            int slice_header_indicator,
8288                            struct intel_batchbuffer *batch)
8289 {
8290     if (data_bits_in_last_dw == 0)
8291         data_bits_in_last_dw = 32;
8292
8293     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
8294
8295     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws));
8296     OUT_BCS_BATCH(batch,
8297                   (0 << 16) |   /* always start at offset 0 */
8298                   (slice_header_indicator << 14) |
8299                   (data_bits_in_last_dw << 8) |
8300                   (skip_emul_byte_count << 4) |
8301                   (!!emulation_flag << 3) |
8302                   ((!!is_last_header) << 2) |
8303                   ((!!is_end_of_slice) << 1) |
8304                   (0 << 0));    /* check this flag */
8305     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
8306
8307     ADVANCE_BCS_BATCH(batch);
8308 }
8309
8310 static void
8311 gen9_mfc_avc_insert_aud_packed_data(VADriverContextP ctx,
8312                                     struct encode_state *encode_state,
8313                                     struct intel_encoder_context *encoder_context,
8314                                     struct intel_batchbuffer *batch)
8315 {
8316     VAEncPackedHeaderParameterBuffer *param = NULL;
8317     unsigned int length_in_bits;
8318     unsigned int *header_data = NULL;
8319     unsigned char *nal_type = NULL;
8320     int count, i, start_index;
8321
8322     count = encode_state->slice_rawdata_count[0];
8323     start_index = (encode_state->slice_rawdata_index[0] & SLICE_PACKED_DATA_INDEX_MASK);
8324
8325     for (i = 0; i < count; i++) {
8326         unsigned int skip_emul_byte_cnt;
8327
8328         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
8329         nal_type = (unsigned char *)header_data;
8330
8331         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
8332         if (param->type != VAEncPackedHeaderRawData)
8333             continue;
8334
8335         length_in_bits = param->bit_length;
8336
8337         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
8338
8339         if ((*(nal_type + skip_emul_byte_cnt - 1) & 0x1f) == AVC_NAL_DELIMITER) {
8340             gen9_mfc_avc_insert_object(ctx,
8341                                        encoder_context,
8342                                        header_data,
8343                                        ALIGN(length_in_bits, 32) >> 5,
8344                                        length_in_bits & 0x1f,
8345                                        skip_emul_byte_cnt,
8346                                        0,
8347                                        0,
8348                                        !param->has_emulation_bytes,
8349                                        0,
8350                                        batch);
8351             break;
8352         }
8353     }
8354 }
8355
8356 static void
8357 gen9_mfc_avc_insert_slice_packed_data(VADriverContextP ctx,
8358                                       struct encode_state *encode_state,
8359                                       struct intel_encoder_context *encoder_context,
8360                                       int slice_index,
8361                                       struct intel_batchbuffer *batch)
8362 {
8363     VAEncPackedHeaderParameterBuffer *param = NULL;
8364     unsigned int length_in_bits;
8365     unsigned int *header_data = NULL;
8366     int count, i, start_index;
8367     int slice_header_index;
8368     unsigned char *nal_type = NULL;
8369
8370     if (encode_state->slice_header_index[slice_index] == 0)
8371         slice_header_index = -1;
8372     else
8373         slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
8374
8375     count = encode_state->slice_rawdata_count[slice_index];
8376     start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
8377
8378     for (i = 0; i < count; i++) {
8379         unsigned int skip_emul_byte_cnt;
8380
8381         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
8382         nal_type = (unsigned char *)header_data;
8383
8384         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
8385
8386         length_in_bits = param->bit_length;
8387
8388         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
8389
8390         /* skip the slice header packed data type as it is lastly inserted */
8391         if (param->type == VAEncPackedHeaderSlice || (*(nal_type + skip_emul_byte_cnt - 1) & 0x1f) == AVC_NAL_DELIMITER)
8392             continue;
8393
8394         /* as the slice header is still required, the last header flag is set to
8395          * zero.
8396          */
8397         gen9_mfc_avc_insert_object(ctx,
8398                                    encoder_context,
8399                                    header_data,
8400                                    ALIGN(length_in_bits, 32) >> 5,
8401                                    length_in_bits & 0x1f,
8402                                    skip_emul_byte_cnt,
8403                                    0,
8404                                    0,
8405                                    !param->has_emulation_bytes,
8406                                    0,
8407                                    batch);
8408     }
8409
8410     if (slice_header_index == -1) {
8411         VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
8412         VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
8413         VAEncSliceParameterBufferH264 *slice_params = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
8414         unsigned char *slice_header = NULL;
8415         int slice_header_length_in_bits = 0;
8416
8417         /* No slice header data is passed. And the driver needs to generate it */
8418         /* For the Normal H264 */
8419         slice_header_length_in_bits = build_avc_slice_header(seq_param,
8420                                                              pic_param,
8421                                                              slice_params,
8422                                                              &slice_header);
8423         gen9_mfc_avc_insert_object(ctx,
8424                                    encoder_context,
8425                                    (unsigned int *)slice_header,
8426                                    ALIGN(slice_header_length_in_bits, 32) >> 5,
8427                                    slice_header_length_in_bits & 0x1f,
8428                                    5,  /* first 5 bytes are start code + nal unit type */
8429                                    1, 0, 1,
8430                                    1,
8431                                    batch);
8432
8433         free(slice_header);
8434     } else {
8435         unsigned int skip_emul_byte_cnt;
8436
8437         header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
8438
8439         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[slice_header_index]->buffer);
8440         length_in_bits = param->bit_length;
8441
8442         /* as the slice header is the last header data for one slice,
8443          * the last header flag is set to one.
8444          */
8445         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
8446
8447         gen9_mfc_avc_insert_object(ctx,
8448                                    encoder_context,
8449                                    header_data,
8450                                    ALIGN(length_in_bits, 32) >> 5,
8451                                    length_in_bits & 0x1f,
8452                                    skip_emul_byte_cnt,
8453                                    1,
8454                                    0,
8455                                    !param->has_emulation_bytes,
8456                                    1,
8457                                    batch);
8458     }
8459
8460     return;
8461 }
8462
8463 static void
8464 gen9_mfc_avc_inset_headers(VADriverContextP ctx,
8465                            struct encode_state *encode_state,
8466                            struct intel_encoder_context *encoder_context,
8467                            VAEncSliceParameterBufferH264 *slice_param,
8468                            int slice_index,
8469                            struct intel_batchbuffer *batch)
8470 {
8471     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8472     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
8473     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
8474     unsigned int internal_rate_mode = generic_state->internal_rate_mode;
8475     unsigned int skip_emul_byte_cnt;
8476
8477     if (slice_index == 0) {
8478
8479         /* if AUD exist and insert it firstly */
8480         gen9_mfc_avc_insert_aud_packed_data(ctx, encode_state, encoder_context, batch);
8481
8482         if (encode_state->packed_header_data[idx]) {
8483             VAEncPackedHeaderParameterBuffer *param = NULL;
8484             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
8485             unsigned int length_in_bits;
8486
8487             assert(encode_state->packed_header_param[idx]);
8488             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
8489             length_in_bits = param->bit_length;
8490
8491             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
8492             gen9_mfc_avc_insert_object(ctx,
8493                                        encoder_context,
8494                                        header_data,
8495                                        ALIGN(length_in_bits, 32) >> 5,
8496                                        length_in_bits & 0x1f,
8497                                        skip_emul_byte_cnt,
8498                                        0,
8499                                        0,
8500                                        !param->has_emulation_bytes,
8501                                        0,
8502                                        batch);
8503         }
8504
8505         idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
8506
8507         if (encode_state->packed_header_data[idx]) {
8508             VAEncPackedHeaderParameterBuffer *param = NULL;
8509             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
8510             unsigned int length_in_bits;
8511
8512             assert(encode_state->packed_header_param[idx]);
8513             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
8514             length_in_bits = param->bit_length;
8515
8516             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
8517
8518             gen9_mfc_avc_insert_object(ctx,
8519                                        encoder_context,
8520                                        header_data,
8521                                        ALIGN(length_in_bits, 32) >> 5,
8522                                        length_in_bits & 0x1f,
8523                                        skip_emul_byte_cnt,
8524                                        0,
8525                                        0,
8526                                        !param->has_emulation_bytes,
8527                                        0,
8528                                        batch);
8529         }
8530
8531         idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
8532
8533         if (encode_state->packed_header_data[idx]) {
8534             VAEncPackedHeaderParameterBuffer *param = NULL;
8535             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
8536             unsigned int length_in_bits;
8537
8538             assert(encode_state->packed_header_param[idx]);
8539             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
8540             length_in_bits = param->bit_length;
8541
8542             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
8543             gen9_mfc_avc_insert_object(ctx,
8544                                        encoder_context,
8545                                        header_data,
8546                                        ALIGN(length_in_bits, 32) >> 5,
8547                                        length_in_bits & 0x1f,
8548                                        skip_emul_byte_cnt,
8549                                        0,
8550                                        0,
8551                                        !param->has_emulation_bytes,
8552                                        0,
8553                                        batch);
8554         } else if (internal_rate_mode == VA_RC_CBR) {
8555             /* insert others */
8556         }
8557     }
8558
8559     gen9_mfc_avc_insert_slice_packed_data(ctx,
8560                                           encode_state,
8561                                           encoder_context,
8562                                           slice_index,
8563                                           batch);
8564 }
8565
8566 static void
8567 gen9_mfc_avc_slice_state(VADriverContextP ctx,
8568                          struct encode_state *encode_state,
8569                          struct intel_encoder_context *encoder_context,
8570                          VAEncPictureParameterBufferH264 *pic_param,
8571                          VAEncSliceParameterBufferH264 *slice_param,
8572                          VAEncSliceParameterBufferH264 *next_slice_param,
8573                          struct intel_batchbuffer *batch)
8574 {
8575     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8576     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
8577     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
8578     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
8579     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
8580     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
8581     unsigned char correct[6], grow, shrink;
8582     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
8583     int max_qp_n, max_qp_p;
8584     int i;
8585     int weighted_pred_idc = 0;
8586     int num_ref_l0 = 0, num_ref_l1 = 0;
8587     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
8588     int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
8589     unsigned int rc_panic_enable = 0;
8590     unsigned int rate_control_counter_enable = 0;
8591     unsigned int rounding_value = 0;
8592     unsigned int rounding_inter_enable = 0;
8593
8594     slice_hor_pos = slice_param->macroblock_address % generic_state->frame_width_in_mbs;
8595     slice_ver_pos = slice_param->macroblock_address / generic_state->frame_width_in_mbs;
8596
8597     if (next_slice_param) {
8598         next_slice_hor_pos = next_slice_param->macroblock_address % generic_state->frame_width_in_mbs;
8599         next_slice_ver_pos = next_slice_param->macroblock_address / generic_state->frame_width_in_mbs;
8600     } else {
8601         next_slice_hor_pos = 0;
8602         next_slice_ver_pos = generic_state->frame_height_in_mbs;
8603     }
8604
8605     if (slice_type == SLICE_TYPE_I) {
8606         luma_log2_weight_denom = 0;
8607         chroma_log2_weight_denom = 0;
8608     } else if (slice_type == SLICE_TYPE_P) {
8609         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
8610         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
8611         rounding_inter_enable = avc_state->rounding_inter_enable;
8612         rounding_value = avc_state->rounding_value;
8613
8614         if (slice_param->num_ref_idx_active_override_flag)
8615             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
8616     } else if (slice_type == SLICE_TYPE_B) {
8617         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
8618         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
8619         num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
8620         rounding_inter_enable = avc_state->rounding_inter_enable;
8621         rounding_value = avc_state->rounding_value;
8622
8623         if (slice_param->num_ref_idx_active_override_flag) {
8624             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
8625             num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
8626         }
8627
8628         if (weighted_pred_idc == 2) {
8629             /* 8.4.3 - Derivation process for prediction weights (8-279) */
8630             luma_log2_weight_denom = 5;
8631             chroma_log2_weight_denom = 5;
8632         }
8633     }
8634
8635     max_qp_n = 0;
8636     max_qp_p = 0;
8637     grow = 0;
8638     shrink = 0;
8639
8640     rate_control_counter_enable = (generic_state->brc_enabled && (generic_state->curr_pak_pass != 0));
8641     rc_panic_enable = (avc_state->rc_panic_enable &&
8642                        (!avc_state->min_max_qp_enable) &&
8643                        (encoder_context->rate_control_mode != VA_RC_CQP) &&
8644                        (generic_state->curr_pak_pass == (generic_state->num_pak_passes - 1)));
8645
8646     for (i = 0; i < 6; i++)
8647         correct[i] = 0;
8648
8649     BEGIN_BCS_BATCH(batch, 11);
8650
8651     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
8652     OUT_BCS_BATCH(batch, slice_type);
8653     OUT_BCS_BATCH(batch,
8654                   (num_ref_l1 << 24) |
8655                   (num_ref_l0 << 16) |
8656                   (chroma_log2_weight_denom << 8) |
8657                   (luma_log2_weight_denom << 0));
8658     OUT_BCS_BATCH(batch,
8659                   (weighted_pred_idc << 30) |
8660                   (((slice_type == SLICE_TYPE_B) ? slice_param->direct_spatial_mv_pred_flag : 0) << 29) |
8661                   (slice_param->disable_deblocking_filter_idc << 27) |
8662                   (slice_param->cabac_init_idc << 24) |
8663                   (slice_qp << 16) |
8664                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
8665                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
8666
8667     OUT_BCS_BATCH(batch,
8668                   slice_ver_pos << 24 |
8669                   slice_hor_pos << 16 |
8670                   slice_param->macroblock_address);
8671     OUT_BCS_BATCH(batch,
8672                   next_slice_ver_pos << 16 |
8673                   next_slice_hor_pos);
8674
8675     OUT_BCS_BATCH(batch,
8676                   (rate_control_counter_enable << 31) |
8677                   (1 << 30) |           /* ResetRateControlCounter */
8678                   (2 << 28) |           /* Loose Rate Control */
8679                   (0 << 24) |           /* RC Stable Tolerance */
8680                   (rc_panic_enable << 23) |           /* RC Panic Enable */
8681                   (1 << 22) |           /* CBP mode */
8682                   (0 << 21) |           /* MB Type Direct Conversion, 0: Enable, 1: Disable */
8683                   (0 << 20) |           /* MB Type Skip Conversion, 0: Enable, 1: Disable */
8684                   (!next_slice_param << 19) |                   /* Is Last Slice */
8685                   (0 << 18) |           /* BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable */
8686                   (1 << 17) |           /* HeaderPresentFlag */
8687                   (1 << 16) |           /* SliceData PresentFlag */
8688                   (0 << 15) |           /* TailPresentFlag  */
8689                   (1 << 13) |           /* RBSP NAL TYPE */
8690                   (1 << 12));           /* CabacZeroWordInsertionEnable */
8691
8692     OUT_BCS_BATCH(batch, generic_ctx->compressed_bitstream.start_offset);
8693
8694     OUT_BCS_BATCH(batch,
8695                   (max_qp_n << 24) |     /*Target QP - 24 is lowest QP*/
8696                   (max_qp_p << 16) |     /*Target QP + 20 is highest QP*/
8697                   (shrink << 8) |
8698                   (grow << 0));
8699     OUT_BCS_BATCH(batch,
8700                   (rounding_inter_enable << 31) |
8701                   (rounding_value << 28) |
8702                   (1 << 27) |
8703                   (5 << 24) |
8704                   (correct[5] << 20) |
8705                   (correct[4] << 16) |
8706                   (correct[3] << 12) |
8707                   (correct[2] << 8) |
8708                   (correct[1] << 4) |
8709                   (correct[0] << 0));
8710     OUT_BCS_BATCH(batch, 0);
8711
8712     ADVANCE_BCS_BATCH(batch);
8713 }
8714
8715 static uint8_t
8716 gen9_mfc_avc_get_ref_idx_state(VAPictureH264 *va_pic, unsigned int frame_store_id)
8717 {
8718     unsigned int is_long_term =
8719         !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
8720     unsigned int is_top_field =
8721         !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
8722     unsigned int is_bottom_field =
8723         !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
8724
8725     return ((is_long_term                         << 6) |
8726             (0 << 5) |
8727             (frame_store_id                       << 1) |
8728             ((is_top_field ^ 1) & is_bottom_field));
8729 }
8730
8731 static void
8732 gen9_mfc_avc_ref_idx_state(VADriverContextP ctx,
8733                            struct encode_state *encode_state,
8734                            struct intel_encoder_context *encoder_context,
8735                            VAEncSliceParameterBufferH264 *slice_param,
8736                            struct intel_batchbuffer *batch)
8737 {
8738     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8739     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
8740     VAPictureH264 *ref_pic;
8741     int i, slice_type, ref_idx_shift;
8742     unsigned int fwd_ref_entry;
8743     unsigned int bwd_ref_entry;
8744
8745     /* max 4 ref frames are allowed for l0 and l1 */
8746     fwd_ref_entry = 0x80808080;
8747     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
8748
8749     if ((slice_type == SLICE_TYPE_P) ||
8750         (slice_type == SLICE_TYPE_B)) {
8751         for (i = 0; i < MIN(avc_state->num_refs[0], 4); i++) {
8752             ref_pic = &slice_param->RefPicList0[i];
8753             ref_idx_shift = i * 8;
8754
8755             fwd_ref_entry &= ~(0xFF << ref_idx_shift);
8756             fwd_ref_entry += (gen9_mfc_avc_get_ref_idx_state(ref_pic, avc_state->list_ref_idx[0][i]) << ref_idx_shift);
8757         }
8758     }
8759
8760     bwd_ref_entry = 0x80808080;
8761     if (slice_type == SLICE_TYPE_B) {
8762         for (i = 0; i < MIN(avc_state->num_refs[1], 4); i++) {
8763             ref_pic = &slice_param->RefPicList1[i];
8764             ref_idx_shift = i * 8;
8765
8766             bwd_ref_entry &= ~(0xFF << ref_idx_shift);
8767             bwd_ref_entry += (gen9_mfc_avc_get_ref_idx_state(ref_pic, avc_state->list_ref_idx[1][i]) << ref_idx_shift);
8768         }
8769     }
8770
8771     if ((slice_type == SLICE_TYPE_P) ||
8772         (slice_type == SLICE_TYPE_B)) {
8773         BEGIN_BCS_BATCH(batch, 10);
8774         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
8775         OUT_BCS_BATCH(batch, 0);                        // L0
8776         OUT_BCS_BATCH(batch, fwd_ref_entry);
8777
8778         for (i = 0; i < 7; i++) {
8779             OUT_BCS_BATCH(batch, 0x80808080);
8780         }
8781
8782         ADVANCE_BCS_BATCH(batch);
8783     }
8784
8785     if (slice_type == SLICE_TYPE_B) {
8786         BEGIN_BCS_BATCH(batch, 10);
8787         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
8788         OUT_BCS_BATCH(batch, 1);                  //Select L1
8789         OUT_BCS_BATCH(batch, bwd_ref_entry);      //max 4 reference allowed
8790         for (i = 0; i < 7; i++) {
8791             OUT_BCS_BATCH(batch, 0x80808080);
8792         }
8793         ADVANCE_BCS_BATCH(batch);
8794     }
8795 }
8796
8797 static void
8798 gen9_mfc_avc_weightoffset_state(VADriverContextP ctx,
8799                                 struct encode_state *encode_state,
8800                                 struct intel_encoder_context *encoder_context,
8801                                 VAEncPictureParameterBufferH264 *pic_param,
8802                                 VAEncSliceParameterBufferH264 *slice_param,
8803                                 struct intel_batchbuffer *batch)
8804 {
8805     int i, slice_type;
8806     short weightoffsets[32 * 6];
8807
8808     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
8809
8810     if (slice_type == SLICE_TYPE_P &&
8811         pic_param->pic_fields.bits.weighted_pred_flag == 1) {
8812         memset(weightoffsets, 0, 32 * 6 * sizeof(short));
8813         for (i = 0; i < 32; i++) {
8814             weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
8815             weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
8816             weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
8817             weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
8818             weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
8819             weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
8820         }
8821
8822         BEGIN_BCS_BATCH(batch, 98);
8823         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
8824         OUT_BCS_BATCH(batch, 0);
8825         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
8826
8827         ADVANCE_BCS_BATCH(batch);
8828     }
8829
8830     if (slice_type == SLICE_TYPE_B &&
8831         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
8832         memset(weightoffsets, 0, 32 * 6 * sizeof(short));
8833         for (i = 0; i < 32; i++) {
8834             weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
8835             weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
8836             weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
8837             weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
8838             weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
8839             weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
8840         }
8841
8842         BEGIN_BCS_BATCH(batch, 98);
8843         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
8844         OUT_BCS_BATCH(batch, 0);
8845         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
8846         ADVANCE_BCS_BATCH(batch);
8847
8848         memset(weightoffsets, 0, 32 * 6 * sizeof(short));
8849         for (i = 0; i < 32; i++) {
8850             weightoffsets[i * 6 + 0] = slice_param->luma_weight_l1[i];
8851             weightoffsets[i * 6 + 1] = slice_param->luma_offset_l1[i];
8852             weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l1[i][0];
8853             weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l1[i][0];
8854             weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l1[i][1];
8855             weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l1[i][1];
8856         }
8857
8858         BEGIN_BCS_BATCH(batch, 98);
8859         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
8860         OUT_BCS_BATCH(batch, 1);
8861         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
8862         ADVANCE_BCS_BATCH(batch);
8863     }
8864 }
8865
8866 static void
8867 gen9_mfc_avc_single_slice(VADriverContextP ctx,
8868                           struct encode_state *encode_state,
8869                           struct intel_encoder_context *encoder_context,
8870                           VAEncSliceParameterBufferH264 *slice_param,
8871                           VAEncSliceParameterBufferH264 *next_slice_param,
8872                           int slice_index)
8873 {
8874     struct i965_driver_data *i965 = i965_driver_data(ctx);
8875     struct i965_gpe_table *gpe = &i965->gpe_table;
8876     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8877     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
8878     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
8879     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
8880     struct intel_batchbuffer *batch = encoder_context->base.batch;
8881     struct intel_batchbuffer *slice_batch = avc_ctx->pres_slice_batch_buffer_2nd_level;
8882     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
8883     struct gpe_mi_batch_buffer_start_parameter second_level_batch;
8884     struct object_surface *obj_surface;
8885     struct gen9_surface_avc *avc_priv_surface;
8886
8887     unsigned int slice_offset = 0;
8888
8889     if (generic_state->curr_pak_pass == 0) {
8890         slice_offset = intel_batchbuffer_used_size(slice_batch);
8891         avc_state->slice_batch_offset[slice_index] = slice_offset;
8892         gen9_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context, slice_param, slice_batch);
8893         gen9_mfc_avc_weightoffset_state(ctx,
8894                                         encode_state,
8895                                         encoder_context,
8896                                         pic_param,
8897                                         slice_param,
8898                                         slice_batch);
8899         gen9_mfc_avc_slice_state(ctx,
8900                                  encode_state,
8901                                  encoder_context,
8902                                  pic_param,
8903                                  slice_param,
8904                                  next_slice_param,
8905                                  slice_batch);
8906         gen9_mfc_avc_inset_headers(ctx,
8907                                    encode_state,
8908                                    encoder_context,
8909                                    slice_param,
8910                                    slice_index,
8911                                    slice_batch);
8912
8913         BEGIN_BCS_BATCH(slice_batch, 2);
8914         OUT_BCS_BATCH(slice_batch, 0);
8915         OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END);
8916         ADVANCE_BCS_BATCH(slice_batch);
8917
8918     } else {
8919         slice_offset = avc_state->slice_batch_offset[slice_index];
8920     }
8921     /* insert slice as second level.*/
8922     memset(&second_level_batch, 0, sizeof(second_level_batch));
8923     second_level_batch.is_second_level = 1; /* Must be the second level batch buffer */
8924     second_level_batch.offset = slice_offset;
8925     second_level_batch.bo = slice_batch->buffer;
8926     gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
8927
8928     /* insert mb code as second level.*/
8929     obj_surface = encode_state->reconstructed_object;
8930     assert(obj_surface->private_data);
8931     avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
8932
8933     memset(&second_level_batch, 0, sizeof(second_level_batch));
8934     second_level_batch.is_second_level = 1; /* Must be the second level batch buffer */
8935     second_level_batch.offset = slice_param->macroblock_address * 16 * 4;
8936     second_level_batch.bo = avc_priv_surface->res_mb_code_surface.bo;
8937     gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
8938
8939 }
8940
8941 static void
8942 gen9_avc_pak_slice_level(VADriverContextP ctx,
8943                          struct encode_state *encode_state,
8944                          struct intel_encoder_context *encoder_context)
8945 {
8946     struct i965_driver_data *i965 = i965_driver_data(ctx);
8947     struct i965_gpe_table *gpe = &i965->gpe_table;
8948     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8949     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
8950     struct intel_batchbuffer *batch = encoder_context->base.batch;
8951     struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
8952     VAEncSliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
8953     int i, j;
8954     int slice_index = 0;
8955     int is_frame_level = (avc_state->slice_num > 1) ? 0 : 1;   /* check it for SKL,now single slice per frame */
8956     int has_tail = 0;             /* check it later */
8957
8958     for (j = 0; j < encode_state->num_slice_params_ext; j++) {
8959         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
8960
8961         if (j == encode_state->num_slice_params_ext - 1)
8962             next_slice_group_param = NULL;
8963         else
8964             next_slice_group_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j + 1]->buffer;
8965
8966         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
8967             if (i < encode_state->slice_params_ext[j]->num_elements - 1)
8968                 next_slice_param = slice_param + 1;
8969             else
8970                 next_slice_param = next_slice_group_param;
8971
8972             gen9_mfc_avc_single_slice(ctx,
8973                                       encode_state,
8974                                       encoder_context,
8975                                       slice_param,
8976                                       next_slice_param,
8977                                       slice_index);
8978             slice_param++;
8979             slice_index++;
8980
8981             if (is_frame_level)
8982                 break;
8983         }
8984
8985         if (is_frame_level)
8986             break;
8987     }
8988
8989     if (has_tail) {
8990         /* insert a tail if required */
8991     }
8992
8993     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
8994     mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
8995     gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_params);
8996 }
8997 static void
8998 gen9_avc_pak_picture_level(VADriverContextP ctx,
8999                            struct encode_state *encode_state,
9000                            struct intel_encoder_context *encoder_context)
9001 {
9002     struct i965_driver_data *i965 = i965_driver_data(ctx);
9003     struct i965_gpe_table *gpe = &i965->gpe_table;
9004     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9005     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
9006     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9007     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9008     struct gpe_mi_batch_buffer_start_parameter second_level_batch;
9009     struct intel_batchbuffer *batch = encoder_context->base.batch;
9010
9011     if (generic_state->brc_enabled &&
9012         generic_state->curr_pak_pass) {
9013         struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
9014         struct encoder_status_buffer_internal *status_buffer;
9015         status_buffer = &(avc_ctx->status_buffer);
9016
9017         memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
9018         mi_conditional_batch_buffer_end_params.offset = status_buffer->image_status_mask_offset;
9019         mi_conditional_batch_buffer_end_params.bo = status_buffer->bo;
9020         mi_conditional_batch_buffer_end_params.compare_data = 0;
9021         mi_conditional_batch_buffer_end_params.compare_mask_mode_disabled = 0;
9022         gpe->mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
9023     }
9024
9025     gen9_mfc_avc_pipe_mode_select(ctx, encode_state, encoder_context);
9026     gen9_mfc_avc_surface_state(ctx, encoder_context, &(generic_ctx->res_reconstructed_surface), 0);
9027     gen9_mfc_avc_surface_state(ctx, encoder_context, &(generic_ctx->res_uncompressed_input_surface), 4);
9028     gen9_mfc_avc_pipe_buf_addr_state(ctx, encoder_context);
9029     gen9_mfc_avc_ind_obj_base_addr_state(ctx, encode_state, encoder_context);
9030     gen9_mfc_avc_bsp_buf_base_addr_state(ctx, encoder_context);
9031
9032     if (generic_state->brc_enabled) {
9033         memset(&second_level_batch, 0, sizeof(second_level_batch));
9034         if (generic_state->curr_pak_pass == 0) {
9035             second_level_batch.offset = 0;
9036         } else {
9037             second_level_batch.offset = generic_state->curr_pak_pass * INTEL_AVC_IMAGE_STATE_CMD_SIZE;
9038         }
9039         second_level_batch.is_second_level = 1;
9040         second_level_batch.bo = avc_ctx->res_brc_image_state_read_buffer.bo;
9041         gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
9042     } else {
9043         /*generate a new image state */
9044         gen9_avc_set_image_state_non_brc(ctx, encode_state, encoder_context, &(avc_ctx->res_image_state_batch_buffer_2nd_level));
9045         memset(&second_level_batch, 0, sizeof(second_level_batch));
9046         second_level_batch.offset = 0;
9047         second_level_batch.is_second_level = 1;
9048         second_level_batch.bo = avc_ctx->res_image_state_batch_buffer_2nd_level.bo;
9049         gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
9050     }
9051
9052     gen9_mfc_avc_qm_state(ctx, encode_state, encoder_context);
9053     gen9_mfc_avc_fqm_state(ctx, encode_state, encoder_context);
9054     gen9_mfc_avc_directmode_state(ctx, encoder_context);
9055
9056 }
9057
9058 static void
9059 gen9_avc_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
9060 {
9061     struct i965_driver_data *i965 = i965_driver_data(ctx);
9062     struct i965_gpe_table *gpe = &i965->gpe_table;
9063     struct intel_batchbuffer *batch = encoder_context->base.batch;
9064     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9065     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9066     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9067
9068     struct gpe_mi_store_register_mem_parameter mi_store_reg_mem_param;
9069     struct gpe_mi_store_data_imm_parameter mi_store_data_imm_param;
9070     struct gpe_mi_flush_dw_parameter mi_flush_dw_param;
9071     struct encoder_status_buffer_internal *status_buffer;
9072
9073     status_buffer = &(avc_ctx->status_buffer);
9074
9075     memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
9076     gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
9077
9078     /* read register and store into status_buffer and pak_statitistic info */
9079     memset(&mi_store_reg_mem_param, 0, sizeof(mi_store_reg_mem_param));
9080     mi_store_reg_mem_param.bo = status_buffer->bo;
9081     mi_store_reg_mem_param.offset = status_buffer->bs_byte_count_frame_offset;
9082     mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_reg_offset;
9083     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
9084
9085     mi_store_reg_mem_param.bo = status_buffer->bo;
9086     mi_store_reg_mem_param.offset = status_buffer->image_status_mask_offset;
9087     mi_store_reg_mem_param.mmio_offset = status_buffer->image_status_mask_reg_offset;
9088     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
9089
9090     /*update the status in the pak_statistic_surface */
9091     mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
9092     mi_store_reg_mem_param.offset = 0;
9093     mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_reg_offset;
9094     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
9095
9096     mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
9097     mi_store_reg_mem_param.offset = 4;
9098     mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_nh_reg_offset;
9099     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
9100
9101     memset(&mi_store_data_imm_param, 0, sizeof(mi_store_data_imm_param));
9102     mi_store_data_imm_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
9103     mi_store_data_imm_param.offset = sizeof(unsigned int) * 2;
9104     mi_store_data_imm_param.dw0 = (generic_state->curr_pak_pass + 1);
9105     gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm_param);
9106
9107     mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
9108     mi_store_reg_mem_param.offset = sizeof(unsigned int) * (4 + generic_state->curr_pak_pass) ;
9109     mi_store_reg_mem_param.mmio_offset = status_buffer->image_status_ctrl_reg_offset;
9110     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
9111
9112     memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
9113     gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
9114
9115     return;
9116 }
9117
9118 static void
9119 gen9_avc_pak_brc_prepare(struct encode_state *encode_state,
9120                          struct intel_encoder_context *encoder_context)
9121 {
9122     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9123     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9124     unsigned int rate_control_mode = encoder_context->rate_control_mode;
9125
9126     switch (rate_control_mode & 0x7f) {
9127     case VA_RC_CBR:
9128         generic_state->internal_rate_mode = VA_RC_CBR;
9129         break;
9130
9131     case VA_RC_VBR:
9132         generic_state->internal_rate_mode = VA_RC_VBR;//AVBR
9133         break;
9134
9135     case VA_RC_CQP:
9136     default:
9137         generic_state->internal_rate_mode = VA_RC_CQP;
9138         break;
9139     }
9140
9141     if (encoder_context->quality_level == 0)
9142         encoder_context->quality_level = ENCODER_DEFAULT_QUALITY_AVC;
9143 }
9144
9145 /* allcate resources for pak only (fei mode) */
9146 static VAStatus
9147 gen9_avc_fei_pak_pipeline_prepare(VADriverContextP ctx,
9148                                   VAProfile profile,
9149                                   struct encode_state *encode_state,
9150                                   struct intel_encoder_context *encoder_context)
9151 {
9152     VAStatus va_status;
9153     struct i965_driver_data *i965 = i965_driver_data(ctx);
9154     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9155     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
9156     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9157     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9158     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
9159     struct gen9_surface_avc *avc_priv_surface;
9160     VAEncPictureParameterBufferH264  *pic_param;
9161     VAEncSliceParameterBufferH264 *slice_param;
9162     VAEncMiscParameterFEIFrameControlH264 *fei_param = NULL;
9163     unsigned int size = 0, i, j;
9164     unsigned int frame_mb_nums;
9165     struct object_buffer *obj_buffer = NULL;
9166     struct buffer_store *buffer_store = NULL;
9167     struct object_surface *obj_surface = NULL;
9168     struct avc_surface_param surface_param;
9169     struct i965_coded_buffer_segment *coded_buffer_segment;
9170     dri_bo *bo;
9171     unsigned char * pdata;
9172
9173     gen9_avc_update_parameters(ctx, profile, encode_state, encoder_context);
9174
9175     pic_param = avc_state->pic_param;
9176     slice_param = avc_state->slice_param[0];
9177
9178     va_status = gen9_avc_encode_check_parameter(ctx, encode_state, encoder_context);
9179     if (va_status != VA_STATUS_SUCCESS)
9180         return va_status;
9181
9182     va_status = gen9_avc_allocate_pak_resources(ctx, encode_state, encoder_context);
9183     if (va_status != VA_STATUS_SUCCESS)
9184         return va_status;
9185
9186     /* Encoded bitstream ?*/
9187     obj_buffer = encode_state->coded_buf_object;
9188     bo = obj_buffer->buffer_store->bo;
9189     i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
9190     i965_dri_object_to_buffer_gpe_resource(&generic_ctx->compressed_bitstream.res, bo);
9191     generic_ctx->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
9192     generic_ctx->compressed_bitstream.end_offset =
9193         ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
9194
9195     /*status buffer */
9196     dri_bo_unreference(avc_ctx->status_buffer.bo);
9197     avc_ctx->status_buffer.bo = bo;
9198     dri_bo_reference(bo);
9199
9200     /* set the internal flag to 0 to indicate the coded size is unknown */
9201     dri_bo_map(bo, 1);
9202     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
9203     coded_buffer_segment->mapped = 0;
9204     coded_buffer_segment->codec = encoder_context->codec;
9205     coded_buffer_segment->status_support = 1;
9206
9207     pdata = bo->virtual + avc_ctx->status_buffer.base_offset;
9208     memset(pdata, 0, avc_ctx->status_buffer.status_buffer_size);
9209     dri_bo_unmap(bo);
9210     //frame id, it is the ref pic id in the reference_objects list.
9211     avc_state->num_refs[0] = 0;
9212     avc_state->num_refs[1] = 0;
9213     if (generic_state->frame_type == SLICE_TYPE_P) {
9214         avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
9215
9216         if (slice_param->num_ref_idx_active_override_flag)
9217             avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
9218     } else if (generic_state->frame_type == SLICE_TYPE_B) {
9219         avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
9220         avc_state->num_refs[1] = pic_param->num_ref_idx_l1_active_minus1 + 1;
9221
9222         if (slice_param->num_ref_idx_active_override_flag) {
9223             avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
9224             avc_state->num_refs[1] = slice_param->num_ref_idx_l1_active_minus1 + 1;
9225         }
9226     }
9227     for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[0]); i++) {
9228         VAPictureH264 *va_pic;
9229
9230         assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(avc_state->list_ref_idx[0]));
9231         avc_state->list_ref_idx[0][i] = 0;
9232
9233         if (i >= avc_state->num_refs[0])
9234             continue;
9235
9236         va_pic = &slice_param->RefPicList0[i];
9237
9238         for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
9239             obj_surface = encode_state->reference_objects[j];
9240
9241             if (obj_surface &&
9242                 obj_surface->bo &&
9243                 obj_surface->base.id == va_pic->picture_id) {
9244
9245                 assert(obj_surface->base.id != VA_INVALID_SURFACE);
9246                 avc_state->list_ref_idx[0][i] = j;
9247
9248                 break;
9249             }
9250         }
9251     }
9252     for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[1]); i++) {
9253         VAPictureH264 *va_pic;
9254
9255         assert(ARRAY_ELEMS(slice_param->RefPicList1) == ARRAY_ELEMS(avc_state->list_ref_idx[1]));
9256         avc_state->list_ref_idx[1][i] = 0;
9257
9258         if (i >= avc_state->num_refs[1])
9259             continue;
9260
9261         va_pic = &slice_param->RefPicList1[i];
9262
9263         for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
9264             obj_surface = encode_state->reference_objects[j];
9265
9266
9267             if (obj_surface &&
9268                 obj_surface->bo &&
9269                 obj_surface->base.id == va_pic->picture_id) {
9270
9271                 assert(obj_surface->base.id != VA_INVALID_SURFACE);
9272                 avc_state->list_ref_idx[1][i] = j;
9273
9274                 break;
9275                 break;
9276             }
9277         }
9278     }
9279
9280     obj_surface = encode_state->reconstructed_object;
9281     fei_param = avc_state->fei_framectl_param;
9282     frame_mb_nums = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs;
9283
9284     /* Setup current reconstruct frame */
9285     obj_surface = encode_state->reconstructed_object;
9286     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
9287
9288     if (va_status != VA_STATUS_SUCCESS)
9289         return va_status;
9290
9291     memset(&surface_param, 0, sizeof(surface_param));
9292     surface_param.frame_width = generic_state->frame_width_in_pixel;
9293     surface_param.frame_height = generic_state->frame_height_in_pixel;
9294     va_status = gen9_avc_init_check_surfaces(ctx,
9295                                              obj_surface, encoder_context,
9296                                              &surface_param);
9297     avc_priv_surface = obj_surface->private_data;
9298
9299     /* res_mb_code_surface for MB code */
9300     /* PAK only mode must have the mb_code_surface from middleware,
9301      * so the code shouldn't reach here without an externally provided
9302      * MB Code buffer */
9303     assert(fei_param->mb_code_data != VA_INVALID_ID);
9304     size = frame_mb_nums * FEI_AVC_MB_CODE_BUFFER_SIZE;
9305     obj_buffer = BUFFER(fei_param->mb_code_data);
9306     assert(obj_buffer != NULL);
9307     buffer_store = obj_buffer->buffer_store;
9308     assert(size <= buffer_store->bo->size);
9309     if (avc_priv_surface->res_mb_code_surface.bo != NULL)
9310         i965_free_gpe_resource(&avc_priv_surface->res_mb_code_surface);
9311     i965_dri_object_to_buffer_gpe_resource(&avc_priv_surface->res_mb_code_surface,
9312                                            buffer_store->bo);
9313     /* res_mv_data_surface for MV data */
9314     size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
9315     if (fei_param->mv_data != VA_INVALID_ID) {
9316         obj_buffer = BUFFER(fei_param->mv_data);
9317         assert(obj_buffer != NULL);
9318         buffer_store = obj_buffer->buffer_store;
9319         assert(size <= buffer_store->bo->size);
9320         if (avc_priv_surface->res_mv_data_surface.bo != NULL)
9321             i965_free_gpe_resource(&avc_priv_surface->res_mv_data_surface);
9322         i965_dri_object_to_buffer_gpe_resource(&avc_priv_surface->res_mv_data_surface,
9323                                                buffer_store->bo);
9324     }
9325
9326     return VA_STATUS_SUCCESS;
9327
9328 }
9329
9330 static VAStatus
9331 gen9_avc_pak_pipeline_prepare(VADriverContextP ctx,
9332                               VAProfile profile,
9333                               struct encode_state *encode_state,
9334                               struct intel_encoder_context *encoder_context)
9335 {
9336     VAStatus va_status;
9337     struct i965_driver_data *i965 = i965_driver_data(ctx);
9338     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9339     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
9340     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9341     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9342     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
9343
9344     struct object_surface *obj_surface;
9345     VAEncPictureParameterBufferH264  *pic_param;
9346     VAEncSliceParameterBufferH264 *slice_param;
9347
9348     struct gen9_surface_avc *avc_priv_surface;
9349     struct avc_surface_param surface_param;
9350     int i, j, enable_avc_ildb = 0;
9351     unsigned int allocate_flag = 1;
9352     unsigned int size, w_mb, h_mb;
9353
9354     if (encoder_context->fei_function_mode == VA_FEI_FUNCTION_PAK) {
9355         va_status = gen9_avc_fei_pak_pipeline_prepare(ctx, profile, encode_state, encoder_context);
9356         if (va_status != VA_STATUS_SUCCESS)
9357             return va_status;
9358     }
9359
9360     pic_param = avc_state->pic_param;
9361     slice_param = avc_state->slice_param[0];
9362     w_mb = generic_state->frame_width_in_mbs;
9363     h_mb = generic_state->frame_height_in_mbs;
9364
9365     /* update the parameter and check slice parameter */
9366     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
9367         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
9368         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
9369
9370         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
9371             assert((slice_param->slice_type == SLICE_TYPE_I) ||
9372                    (slice_param->slice_type == SLICE_TYPE_SI) ||
9373                    (slice_param->slice_type == SLICE_TYPE_P) ||
9374                    (slice_param->slice_type == SLICE_TYPE_SP) ||
9375                    (slice_param->slice_type == SLICE_TYPE_B));
9376
9377             if (slice_param->disable_deblocking_filter_idc != 1) {
9378                 enable_avc_ildb = 1;
9379                 break;
9380             }
9381
9382             slice_param++;
9383         }
9384     }
9385     avc_state->enable_avc_ildb = enable_avc_ildb;
9386
9387     /* setup the all surface and buffer for PAK */
9388     /* Setup current reconstruct frame */
9389     obj_surface = encode_state->reconstructed_object;
9390     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
9391
9392     if (va_status != VA_STATUS_SUCCESS)
9393         return va_status;
9394
9395     memset(&surface_param, 0, sizeof(surface_param));
9396     surface_param.frame_width = generic_state->frame_width_in_pixel;
9397     surface_param.frame_height = generic_state->frame_height_in_pixel;
9398     va_status = gen9_avc_init_check_surfaces(ctx,
9399                                              obj_surface, encoder_context,
9400                                              &surface_param);
9401     if (va_status != VA_STATUS_SUCCESS)
9402         return va_status;
9403     /* init the member of avc_priv_surface,frame_store_id,qp_value */
9404     {
9405         avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
9406         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = 0;
9407         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = 0;
9408         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2]);
9409         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1]);
9410         i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2], avc_priv_surface->dmv_top);
9411         i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1], avc_priv_surface->dmv_bottom);
9412         avc_priv_surface->qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
9413         avc_priv_surface->frame_store_id = 0;
9414         avc_priv_surface->frame_idx = pic_param->CurrPic.frame_idx;
9415         avc_priv_surface->top_field_order_cnt = pic_param->CurrPic.TopFieldOrderCnt;
9416         avc_priv_surface->is_as_ref = pic_param->pic_fields.bits.reference_pic_flag;
9417         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = avc_priv_surface->top_field_order_cnt;
9418         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = avc_priv_surface->top_field_order_cnt + 1;
9419     }
9420     i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
9421     i965_free_gpe_resource(&avc_ctx->res_post_deblocking_output);
9422     i965_free_gpe_resource(&avc_ctx->res_pre_deblocking_output);
9423     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_reconstructed_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
9424
9425
9426     if (avc_state->enable_avc_ildb) {
9427         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_post_deblocking_output, obj_surface, GPE_RESOURCE_ALIGNMENT);
9428     } else {
9429         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_pre_deblocking_output, obj_surface, GPE_RESOURCE_ALIGNMENT);
9430     }
9431     /* input YUV surface */
9432     obj_surface = encode_state->input_yuv_object;
9433     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
9434
9435     if (va_status != VA_STATUS_SUCCESS)
9436         return va_status;
9437     i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
9438     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_uncompressed_input_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
9439
9440     /* Reference surfaces */
9441     for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
9442         i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
9443         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2]);
9444         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1]);
9445         obj_surface = encode_state->reference_objects[i];
9446         avc_state->top_field_poc[2 * i] = 0;
9447         avc_state->top_field_poc[2 * i + 1] = 0;
9448
9449         if (obj_surface && obj_surface->bo) {
9450             i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->list_reference_res[i], obj_surface, GPE_RESOURCE_ALIGNMENT);
9451
9452             /* actually it should be handled when it is reconstructed surface */
9453             va_status = gen9_avc_init_check_surfaces(ctx,
9454                                                      obj_surface, encoder_context,
9455                                                      &surface_param);
9456             if (va_status != VA_STATUS_SUCCESS)
9457                 return va_status;
9458             avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
9459             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2], avc_priv_surface->dmv_top);
9460             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1], avc_priv_surface->dmv_bottom);
9461             avc_priv_surface->frame_store_id = i;
9462             avc_state->top_field_poc[2 * i] = avc_priv_surface->top_field_order_cnt;
9463             avc_state->top_field_poc[2 * i + 1] = avc_priv_surface->top_field_order_cnt + 1;
9464         } else {
9465             break;
9466         }
9467     }
9468
9469     if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
9470         intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
9471         avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
9472     }
9473
9474     avc_ctx->pres_slice_batch_buffer_2nd_level =
9475         intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD,
9476                               4096 *
9477                               encode_state->num_slice_params_ext);
9478     if (!avc_ctx->pres_slice_batch_buffer_2nd_level)
9479         return VA_STATUS_ERROR_ALLOCATION_FAILED;
9480
9481     for (i = 0; i < MAX_AVC_SLICE_NUM; i++) {
9482         avc_state->slice_batch_offset[i] = 0;
9483     }
9484
9485
9486     size = w_mb * 64;
9487     i965_free_gpe_resource(&avc_ctx->res_intra_row_store_scratch_buffer);
9488     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
9489                                                &avc_ctx->res_intra_row_store_scratch_buffer,
9490                                                size,
9491                                                "PAK Intra row store scratch buffer");
9492     if (!allocate_flag)
9493         goto failed_allocation;
9494
9495     size = w_mb * 4 * 64;
9496     i965_free_gpe_resource(&avc_ctx->res_deblocking_filter_row_store_scratch_buffer);
9497     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
9498                                                &avc_ctx->res_deblocking_filter_row_store_scratch_buffer,
9499                                                size,
9500                                                "PAK Deblocking filter row store scratch buffer");
9501     if (!allocate_flag)
9502         goto failed_allocation;
9503
9504     size = w_mb * 2 * 64;
9505     i965_free_gpe_resource(&avc_ctx->res_bsd_mpc_row_store_scratch_buffer);
9506     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
9507                                                &avc_ctx->res_bsd_mpc_row_store_scratch_buffer,
9508                                                size,
9509                                                "PAK BSD/MPC row store scratch buffer");
9510     if (!allocate_flag)
9511         goto failed_allocation;
9512
9513     size = w_mb * h_mb * 16;
9514     i965_free_gpe_resource(&avc_ctx->res_pak_mb_status_buffer);
9515     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
9516                                                &avc_ctx->res_pak_mb_status_buffer,
9517                                                size,
9518                                                "PAK MB status buffer");
9519     if (!allocate_flag)
9520         goto failed_allocation;
9521
9522     return VA_STATUS_SUCCESS;
9523
9524 failed_allocation:
9525     return VA_STATUS_ERROR_ALLOCATION_FAILED;
9526 }
9527
9528 static VAStatus
9529 gen9_avc_encode_picture(VADriverContextP ctx,
9530                         VAProfile profile,
9531                         struct encode_state *encode_state,
9532                         struct intel_encoder_context *encoder_context)
9533 {
9534     VAStatus va_status;
9535     struct i965_driver_data *i965 = i965_driver_data(ctx);
9536     struct i965_gpe_table *gpe = &i965->gpe_table;
9537     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9538     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
9539     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
9540     struct intel_batchbuffer *batch = encoder_context->base.batch;
9541
9542     va_status = gen9_avc_pak_pipeline_prepare(ctx, profile, encode_state, encoder_context);
9543
9544     if (va_status != VA_STATUS_SUCCESS)
9545         return va_status;
9546
9547     if (i965->intel.has_bsd2)
9548         intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
9549     else
9550         intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
9551     intel_batchbuffer_emit_mi_flush(batch);
9552     for (generic_state->curr_pak_pass = 0;
9553          generic_state->curr_pak_pass < generic_state->num_pak_passes;
9554          generic_state->curr_pak_pass++) {
9555
9556         if (generic_state->curr_pak_pass == 0) {
9557             /* Initialize the avc Image Ctrl reg for the first pass,write 0 to staturs/control register, is it needed in AVC? */
9558             struct gpe_mi_load_register_imm_parameter mi_load_reg_imm;
9559             struct encoder_status_buffer_internal *status_buffer;
9560
9561             status_buffer = &(avc_ctx->status_buffer);
9562             memset(&mi_load_reg_imm, 0, sizeof(mi_load_reg_imm));
9563             mi_load_reg_imm.mmio_offset = status_buffer->image_status_ctrl_reg_offset;
9564             mi_load_reg_imm.data = 0;
9565             gpe->mi_load_register_imm(ctx, batch, &mi_load_reg_imm);
9566         }
9567         gen9_avc_pak_picture_level(ctx, encode_state, encoder_context);
9568         gen9_avc_pak_slice_level(ctx, encode_state, encoder_context);
9569         gen9_avc_read_mfc_status(ctx, encoder_context);
9570     }
9571
9572     if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
9573         intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
9574         avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
9575     }
9576
9577     intel_batchbuffer_end_atomic(batch);
9578     intel_batchbuffer_flush(batch);
9579
9580     generic_state->seq_frame_number++;
9581     generic_state->total_frame_number++;
9582     generic_state->first_frame = 0;
9583     return VA_STATUS_SUCCESS;
9584 }
9585
9586 static VAStatus
9587 gen9_avc_pak_pipeline(VADriverContextP ctx,
9588                       VAProfile profile,
9589                       struct encode_state *encode_state,
9590                       struct intel_encoder_context *encoder_context)
9591 {
9592     VAStatus vaStatus;
9593
9594     switch (profile) {
9595     case VAProfileH264ConstrainedBaseline:
9596     case VAProfileH264Main:
9597     case VAProfileH264High:
9598     case VAProfileH264MultiviewHigh:
9599     case VAProfileH264StereoHigh:
9600         vaStatus = gen9_avc_encode_picture(ctx, profile, encode_state, encoder_context);
9601         break;
9602
9603     default:
9604         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
9605         break;
9606     }
9607
9608     return vaStatus;
9609 }
9610
9611 static void
9612 gen9_avc_pak_context_destroy(void * context)
9613 {
9614     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)context;
9615     struct generic_encoder_context * generic_ctx;
9616     struct i965_avc_encoder_context * avc_ctx;
9617     int i = 0;
9618
9619     if (!pak_context)
9620         return;
9621
9622     generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
9623     avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9624
9625     // other things
9626     i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
9627     i965_free_gpe_resource(&avc_ctx->res_post_deblocking_output);
9628     i965_free_gpe_resource(&avc_ctx->res_pre_deblocking_output);
9629     i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
9630
9631     i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
9632     i965_free_gpe_resource(&avc_ctx->res_intra_row_store_scratch_buffer);
9633     i965_free_gpe_resource(&avc_ctx->res_deblocking_filter_row_store_scratch_buffer);
9634     i965_free_gpe_resource(&avc_ctx->res_bsd_mpc_row_store_scratch_buffer);
9635     i965_free_gpe_resource(&avc_ctx->res_pak_mb_status_buffer);
9636
9637     for (i = 0 ; i < MAX_MFC_AVC_REFERENCE_SURFACES; i++) {
9638         i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
9639     }
9640
9641     for (i = 0 ; i < NUM_MFC_AVC_DMV_BUFFERS; i++) {
9642         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i]);
9643     }
9644
9645     if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
9646         intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
9647         avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
9648     }
9649
9650 }
9651
9652 static VAStatus
9653 gen9_avc_get_coded_status(VADriverContextP ctx,
9654                           struct intel_encoder_context *encoder_context,
9655                           struct i965_coded_buffer_segment *coded_buf_seg)
9656 {
9657     struct encoder_status *avc_encode_status;
9658
9659     if (!encoder_context || !coded_buf_seg)
9660         return VA_STATUS_ERROR_INVALID_BUFFER;
9661
9662     avc_encode_status = (struct encoder_status *)coded_buf_seg->codec_private_data;
9663     coded_buf_seg->base.size = avc_encode_status->bs_byte_count_frame;
9664
9665     return VA_STATUS_SUCCESS;
9666 }
9667
9668 Bool
9669 gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
9670 {
9671     /* VME & PAK share the same context */
9672     struct i965_driver_data *i965 = i965_driver_data(ctx);
9673     struct encoder_vme_mfc_context * vme_context = NULL;
9674     struct generic_encoder_context * generic_ctx = NULL;
9675     struct i965_avc_encoder_context * avc_ctx = NULL;
9676     struct generic_enc_codec_state * generic_state = NULL;
9677     struct avc_enc_state * avc_state = NULL;
9678     struct encoder_status_buffer_internal *status_buffer;
9679     uint32_t base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
9680
9681     vme_context = calloc(1, sizeof(struct encoder_vme_mfc_context));
9682     generic_ctx = calloc(1, sizeof(struct generic_encoder_context));
9683     avc_ctx = calloc(1, sizeof(struct i965_avc_encoder_context));
9684     generic_state = calloc(1, sizeof(struct generic_enc_codec_state));
9685     avc_state = calloc(1, sizeof(struct avc_enc_state));
9686
9687     if (!vme_context || !generic_ctx || !avc_ctx || !generic_state || !avc_state)
9688         goto allocate_structure_failed;
9689
9690     memset(vme_context, 0, sizeof(struct encoder_vme_mfc_context));
9691     memset(generic_ctx, 0, sizeof(struct generic_encoder_context));
9692     memset(avc_ctx, 0, sizeof(struct i965_avc_encoder_context));
9693     memset(generic_state, 0, sizeof(struct generic_enc_codec_state));
9694     memset(avc_state, 0, sizeof(struct avc_enc_state));
9695
9696     encoder_context->vme_context = vme_context;
9697     vme_context->generic_enc_ctx = generic_ctx;
9698     vme_context->private_enc_ctx = avc_ctx;
9699     vme_context->generic_enc_state = generic_state;
9700     vme_context->private_enc_state = avc_state;
9701
9702     if (IS_SKL(i965->intel.device_info) ||
9703         IS_BXT(i965->intel.device_info)) {
9704         if (!encoder_context->fei_enabled) {
9705             generic_ctx->enc_kernel_ptr = (void *)skl_avc_encoder_kernels;
9706             generic_ctx->enc_kernel_size = sizeof(skl_avc_encoder_kernels);
9707         } else {
9708             generic_ctx->enc_kernel_ptr = (void *)skl_avc_fei_encoder_kernels;
9709             generic_ctx->enc_kernel_size = sizeof(skl_avc_fei_encoder_kernels);
9710         }
9711     } else if (IS_GEN8(i965->intel.device_info)) {
9712         generic_ctx->enc_kernel_ptr = (void *)bdw_avc_encoder_kernels;
9713         generic_ctx->enc_kernel_size = sizeof(bdw_avc_encoder_kernels);
9714     } else if (IS_KBL(i965->intel.device_info) ||
9715                IS_GLK(i965->intel.device_info)) {
9716         generic_ctx->enc_kernel_ptr = (void *)kbl_avc_encoder_kernels;
9717         generic_ctx->enc_kernel_size = sizeof(kbl_avc_encoder_kernels);
9718     } else
9719         goto allocate_structure_failed;
9720
9721     /* initialize misc ? */
9722     avc_ctx->ctx = ctx;
9723     generic_ctx->use_hw_scoreboard = 1;
9724     generic_ctx->use_hw_non_stalling_scoreboard = 1;
9725
9726     /* initialize generic state */
9727
9728     generic_state->kernel_mode = INTEL_ENC_KERNEL_NORMAL;
9729     generic_state->preset = INTEL_PRESET_RT_SPEED;
9730     generic_state->seq_frame_number = 0;
9731     generic_state->total_frame_number = 0;
9732     generic_state->frame_type = 0;
9733     generic_state->first_frame = 1;
9734
9735     generic_state->frame_width_in_pixel = 0;
9736     generic_state->frame_height_in_pixel = 0;
9737     generic_state->frame_width_in_mbs = 0;
9738     generic_state->frame_height_in_mbs = 0;
9739     generic_state->frame_width_4x = 0;
9740     generic_state->frame_height_4x = 0;
9741     generic_state->frame_width_16x = 0;
9742     generic_state->frame_height_16x = 0;
9743     generic_state->frame_width_32x = 0;
9744     generic_state->downscaled_width_4x_in_mb = 0;
9745     generic_state->downscaled_height_4x_in_mb = 0;
9746     generic_state->downscaled_width_16x_in_mb = 0;
9747     generic_state->downscaled_height_16x_in_mb = 0;
9748     generic_state->downscaled_width_32x_in_mb = 0;
9749     generic_state->downscaled_height_32x_in_mb = 0;
9750
9751     if (!encoder_context->fei_enabled) {
9752         generic_state->hme_supported = 1;
9753         generic_state->b16xme_supported = 1;
9754     }
9755     generic_state->b16xme_supported = 1;
9756     generic_state->b32xme_supported = 0;
9757     generic_state->hme_enabled = 0;
9758     generic_state->b16xme_enabled = 0;
9759     generic_state->b32xme_enabled = 0;
9760     generic_state->brc_distortion_buffer_supported = 1;
9761     generic_state->brc_constant_buffer_supported = 0;
9762
9763
9764     generic_state->frame_rate = 30;
9765     generic_state->brc_allocated = 0;
9766     generic_state->brc_inited = 0;
9767     generic_state->brc_need_reset = 0;
9768     generic_state->is_low_delay = 0;
9769     generic_state->brc_enabled = 0;//default
9770     generic_state->internal_rate_mode = 0;
9771     generic_state->curr_pak_pass = 0;
9772     generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
9773     generic_state->is_first_pass = 1;
9774     generic_state->is_last_pass = 0;
9775     generic_state->mb_brc_enabled = 0; // enable mb brc
9776     generic_state->brc_roi_enable = 0;
9777     generic_state->brc_dirty_roi_enable = 0;
9778     generic_state->skip_frame_enbale = 0;
9779
9780     generic_state->target_bit_rate = 0;
9781     generic_state->max_bit_rate = 0;
9782     generic_state->min_bit_rate = 0;
9783     generic_state->init_vbv_buffer_fullness_in_bit = 0;
9784     generic_state->vbv_buffer_size_in_bit = 0;
9785     generic_state->frames_per_100s = 0;
9786     generic_state->gop_size = 0;
9787     generic_state->gop_ref_distance = 0;
9788     generic_state->brc_target_size = 0;
9789     generic_state->brc_mode = 0;
9790     generic_state->brc_init_current_target_buf_full_in_bits = 0.0;
9791     generic_state->brc_init_reset_input_bits_per_frame = 0.0;
9792     generic_state->brc_init_reset_buf_size_in_bits = 0;
9793     generic_state->brc_init_previous_target_buf_full_in_bits = 0;
9794     generic_state->frames_per_window_size = 0;//default
9795     generic_state->target_percentage = 0;
9796
9797     generic_state->avbr_curracy = 0;
9798     generic_state->avbr_convergence = 0;
9799
9800     generic_state->num_skip_frames = 0;
9801     generic_state->size_skip_frames = 0;
9802
9803     generic_state->num_roi = 0;
9804     generic_state->max_delta_qp = 0;
9805     generic_state->min_delta_qp = 0;
9806
9807     if (encoder_context->rate_control_mode != VA_RC_NONE &&
9808         encoder_context->rate_control_mode != VA_RC_CQP) {
9809         generic_state->brc_enabled = 1;
9810         generic_state->brc_distortion_buffer_supported = 1;
9811         generic_state->brc_constant_buffer_supported = 1;
9812         generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
9813     }
9814     /*avc state initialization */
9815     avc_state->mad_enable = 0;
9816     avc_state->mb_disable_skip_map_enable = 0;
9817     avc_state->sfd_enable = 1;//default
9818     avc_state->sfd_mb_enable = 1;//set it true
9819     avc_state->adaptive_search_window_enable = 1;//default
9820     avc_state->mb_qp_data_enable = 0;
9821     avc_state->intra_refresh_i_enable = 0;
9822     avc_state->min_max_qp_enable = 0;
9823     avc_state->skip_bias_adjustment_enable = 0;//default,same as   skip_bias_adjustment_supporte? no
9824
9825     //external input
9826     avc_state->non_ftq_skip_threshold_lut_input_enable = 0;
9827     avc_state->ftq_skip_threshold_lut_input_enable = 0;
9828     avc_state->ftq_override = 0;
9829
9830     avc_state->direct_bias_adjustment_enable = 0;
9831     avc_state->global_motion_bias_adjustment_enable = 0;
9832     avc_state->disable_sub_mb_partion = 0;
9833     avc_state->arbitrary_num_mbs_in_slice = 0;
9834     avc_state->adaptive_transform_decision_enable = 0;//default
9835     avc_state->skip_check_disable = 0;
9836     avc_state->tq_enable = 0;
9837     avc_state->enable_avc_ildb = 0;
9838     avc_state->mbaff_flag = 0;
9839     avc_state->enable_force_skip = 1;//default
9840     avc_state->rc_panic_enable = 1;//default
9841     avc_state->suppress_recon_enable = 1;//default
9842
9843     avc_state->ref_pic_select_list_supported = 1;
9844     avc_state->mb_brc_supported = 1;//?,default
9845     avc_state->multi_pre_enable = 1;//default
9846     avc_state->ftq_enable = 1;//default
9847     avc_state->caf_supported = 1; //default
9848     avc_state->caf_enable = 0;
9849     avc_state->caf_disable_hd = 1;//default
9850     avc_state->skip_bias_adjustment_supported = 1;//default
9851
9852     avc_state->adaptive_intra_scaling_enable = 1;//default
9853     avc_state->old_mode_cost_enable = 0;//default
9854     avc_state->multi_ref_qp_enable = 1;//default
9855     avc_state->weighted_ref_l0_enable = 1;//default
9856     avc_state->weighted_ref_l1_enable = 1;//default
9857     avc_state->weighted_prediction_supported = 0;
9858     avc_state->brc_split_enable = 0;
9859     avc_state->slice_level_report_supported = 0;
9860
9861     avc_state->fbr_bypass_enable = 1;//default
9862     avc_state->field_scaling_output_interleaved = 0;
9863     avc_state->mb_variance_output_enable = 0;
9864     avc_state->mb_pixel_average_output_enable = 0;
9865     avc_state->rolling_intra_refresh_enable = 0;// same as intra_refresh_i_enable?
9866     avc_state->mbenc_curbe_set_in_brc_update = 0;
9867     avc_state->rounding_inter_enable = 1; //default
9868     avc_state->adaptive_rounding_inter_enable = 1;//default
9869
9870     avc_state->mbenc_i_frame_dist_in_use = 0;
9871     avc_state->mb_status_supported = 1; //set in intialization for gen9
9872     avc_state->mb_status_enable = 0;
9873     avc_state->mb_vproc_stats_enable = 0;
9874     avc_state->flatness_check_enable = 0;
9875     avc_state->flatness_check_supported = 1;//default
9876     avc_state->block_based_skip_enable = 0;
9877     avc_state->use_widi_mbenc_kernel = 0;
9878     avc_state->kernel_trellis_enable = 0;
9879     avc_state->generic_reserved = 0;
9880
9881     avc_state->rounding_value = 0;
9882     avc_state->rounding_inter_p = AVC_INVALID_ROUNDING_VALUE;//default
9883     avc_state->rounding_inter_b = AVC_INVALID_ROUNDING_VALUE; //default
9884     avc_state->rounding_inter_b_ref = AVC_INVALID_ROUNDING_VALUE; //default
9885     avc_state->min_qp_i = INTEL_AVC_MIN_QP;
9886     avc_state->min_qp_p = INTEL_AVC_MIN_QP;
9887     avc_state->min_qp_b = INTEL_AVC_MIN_QP;
9888     avc_state->max_qp_i = INTEL_AVC_MAX_QP;
9889     avc_state->max_qp_p = INTEL_AVC_MAX_QP;
9890     avc_state->max_qp_b = INTEL_AVC_MAX_QP;
9891
9892     memset(avc_state->non_ftq_skip_threshold_lut, 0, AVC_QP_MAX * sizeof(uint8_t));
9893     memset(avc_state->ftq_skip_threshold_lut, 0, AVC_QP_MAX * sizeof(uint8_t));
9894     memset(avc_state->lamda_value_lut, 0, AVC_QP_MAX * 2 * sizeof(uint32_t));
9895
9896     avc_state->intra_refresh_qp_threshold = 0;
9897     avc_state->trellis_flag = 0;
9898     avc_state->hme_mv_cost_scaling_factor = 0;
9899     avc_state->slice_height = 1;
9900     avc_state->slice_num = 1;
9901     memset(avc_state->dist_scale_factor_list0, 0, 32 * sizeof(uint32_t));
9902     avc_state->bi_weight = 0;
9903
9904     avc_state->lambda_table_enable = 0;
9905
9906     if (IS_GEN8(i965->intel.device_info)) {
9907         avc_state->brc_const_data_surface_width = 64;
9908         avc_state->brc_const_data_surface_height = 44;
9909         avc_state->mb_status_supported = 0;
9910     } else if (IS_SKL(i965->intel.device_info) ||
9911                IS_BXT(i965->intel.device_info)) {
9912         avc_state->brc_const_data_surface_width = 64;
9913         avc_state->brc_const_data_surface_height = 44;
9914         avc_state->brc_split_enable = 1;
9915     } else if (IS_KBL(i965->intel.device_info) ||
9916                IS_GLK(i965->intel.device_info)) {
9917         avc_state->brc_const_data_surface_width = 64;
9918         avc_state->brc_const_data_surface_height = 53;
9919         //gen95
9920         avc_state->decouple_mbenc_curbe_from_brc_enable = 1;
9921         avc_state->extended_mv_cost_range_enable = 0;
9922         avc_state->reserved_g95 = 0;
9923         avc_state->mbenc_brc_buffer_size = 128;
9924         avc_state->kernel_trellis_enable = 1;
9925         avc_state->lambda_table_enable = 1;
9926         avc_state->brc_split_enable = 1;
9927     }
9928
9929     avc_state->num_refs[0] = 0;
9930     avc_state->num_refs[1] = 0;
9931     memset(avc_state->list_ref_idx, 0, 32 * 2 * sizeof(uint32_t));
9932     memset(avc_state->top_field_poc, 0, NUM_MFC_AVC_DMV_BUFFERS * sizeof(int32_t));
9933     avc_state->tq_rounding = 0;
9934     avc_state->zero_mv_threshold = 0;
9935     avc_state->slice_second_levle_batch_buffer_in_use = 0;
9936
9937     //1. seq/pic/slice
9938
9939     /* the definition of status buffer offset for Encoder */
9940
9941     status_buffer = &avc_ctx->status_buffer;
9942     memset(status_buffer, 0, sizeof(struct encoder_status_buffer_internal));
9943
9944     status_buffer->base_offset = base_offset;
9945     status_buffer->bs_byte_count_frame_offset = base_offset + offsetof(struct encoder_status, bs_byte_count_frame);
9946     status_buffer->bs_byte_count_frame_nh_offset = base_offset + offsetof(struct encoder_status, bs_byte_count_frame_nh);
9947     status_buffer->image_status_mask_offset = base_offset + offsetof(struct encoder_status, image_status_mask);
9948     status_buffer->image_status_ctrl_offset = base_offset + offsetof(struct encoder_status, image_status_ctrl);
9949     status_buffer->mfc_qp_status_count_offset = base_offset + offsetof(struct encoder_status, mfc_qp_status_count);
9950     status_buffer->media_index_offset       = base_offset + offsetof(struct encoder_status, media_index);
9951
9952     status_buffer->status_buffer_size = sizeof(struct encoder_status);
9953     status_buffer->bs_byte_count_frame_reg_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG;
9954     status_buffer->bs_byte_count_frame_nh_reg_offset = MFC_BITSTREAM_BYTECOUNT_SLICE_REG;
9955     status_buffer->image_status_mask_reg_offset = MFC_IMAGE_STATUS_MASK_REG;
9956     status_buffer->image_status_ctrl_reg_offset = MFC_IMAGE_STATUS_CTRL_REG;
9957     status_buffer->mfc_qp_status_count_reg_offset = MFC_QP_STATUS_COUNT_REG;
9958
9959     if (IS_GEN8(i965->intel.device_info)) {
9960         gen8_avc_kernel_init(ctx, encoder_context);
9961     } else {
9962         gen9_avc_kernel_init(ctx, encoder_context);
9963     }
9964     encoder_context->vme_context = vme_context;
9965     encoder_context->vme_pipeline = gen9_avc_vme_pipeline;
9966     encoder_context->vme_context_destroy = gen9_avc_vme_context_destroy;
9967
9968     return true;
9969
9970 allocate_structure_failed:
9971
9972     free(vme_context);
9973     free(generic_ctx);
9974     free(avc_ctx);
9975     free(generic_state);
9976     free(avc_state);
9977     return false;
9978 }
9979
9980 Bool
9981 gen9_avc_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
9982 {
9983     /* VME & PAK share the same context */
9984     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9985
9986     if (!pak_context)
9987         return false;
9988
9989     encoder_context->mfc_context = pak_context;
9990     encoder_context->mfc_context_destroy = gen9_avc_pak_context_destroy;
9991     encoder_context->mfc_pipeline = gen9_avc_pak_pipeline;
9992     encoder_context->mfc_brc_prepare = gen9_avc_pak_brc_prepare;
9993     encoder_context->get_status = gen9_avc_get_coded_status;
9994     return true;
9995 }