OSDN Git Service

gen9_avc_encoder: brc_curbe_size is not static anymore
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_avc_encoder.c
1 /*
2  * Copyright @ 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWAR OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Pengfei Qu <Pengfei.qu@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <stdbool.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35 #include <va/va.h>
36
37 #include "intel_batchbuffer.h"
38 #include "intel_driver.h"
39
40 #include "i965_defines.h"
41 #include "i965_structs.h"
42 #include "i965_drv_video.h"
43 #include "i965_encoder.h"
44 #include "i965_encoder_utils.h"
45 #include "intel_media.h"
46
47 #include "i965_gpe_utils.h"
48 #include "i965_encoder_common.h"
49 #include "i965_avc_encoder_common.h"
50 #include "gen9_avc_encoder_kernels.h"
51 #include "gen9_avc_encoder.h"
52 #include "gen9_avc_const_def.h"
53
54 #define MAX_URB_SIZE                    4096 /* In register */
55 #define NUM_KERNELS_PER_GPE_CONTEXT     1
56 #define MBENC_KERNEL_BASE GEN9_AVC_KERNEL_MBENC_QUALITY_I
57 #define GPE_RESOURCE_ALIGNMENT 4  /* 4 means 16 = 1 << 4) */
58
59 #define OUT_BUFFER_2DW(batch, bo, is_target, delta)  do {               \
60         if (bo) {                                                       \
61             OUT_BCS_RELOC64(batch,                                        \
62                             bo,                                         \
63                             I915_GEM_DOMAIN_INSTRUCTION,                \
64                             is_target ? I915_GEM_DOMAIN_RENDER : 0,     \
65                             delta);                                     \
66         } else {                                                        \
67             OUT_BCS_BATCH(batch, 0);                                    \
68             OUT_BCS_BATCH(batch, 0);                                    \
69         }                                                               \
70     } while (0)
71
72 #define OUT_BUFFER_3DW(batch, bo, is_target, delta, attr)  do { \
73         OUT_BUFFER_2DW(batch, bo, is_target, delta);            \
74         OUT_BCS_BATCH(batch, attr);                             \
75     } while (0)
76
77 static const uint32_t qm_flat[16] = {
78     0x10101010, 0x10101010, 0x10101010, 0x10101010,
79     0x10101010, 0x10101010, 0x10101010, 0x10101010,
80     0x10101010, 0x10101010, 0x10101010, 0x10101010,
81     0x10101010, 0x10101010, 0x10101010, 0x10101010
82 };
83
84 static const uint32_t fqm_flat[32] = {
85     0x10001000, 0x10001000, 0x10001000, 0x10001000,
86     0x10001000, 0x10001000, 0x10001000, 0x10001000,
87     0x10001000, 0x10001000, 0x10001000, 0x10001000,
88     0x10001000, 0x10001000, 0x10001000, 0x10001000,
89     0x10001000, 0x10001000, 0x10001000, 0x10001000,
90     0x10001000, 0x10001000, 0x10001000, 0x10001000,
91     0x10001000, 0x10001000, 0x10001000, 0x10001000,
92     0x10001000, 0x10001000, 0x10001000, 0x10001000
93 };
94
95 static const unsigned int slice_type_kernel[3] = {1, 2, 0};
96
97 static const gen9_avc_brc_init_reset_curbe_data gen9_avc_brc_init_reset_curbe_init_data = {
98     // unsigned int 0
99     {
100         0
101     },
102
103     // unsigned int 1
104     {
105         0
106     },
107
108     // unsigned int 2
109     {
110         0
111     },
112
113     // unsigned int 3
114     {
115         0
116     },
117
118     // unsigned int 4
119     {
120         0
121     },
122
123     // unsigned int 5
124     {
125         0
126     },
127
128     // unsigned int 6
129     {
130         0
131     },
132
133     // unsigned int 7
134     {
135         0
136     },
137
138     // unsigned int 8
139     {
140         0,
141         0
142     },
143
144     // unsigned int 9
145     {
146         0,
147         0
148     },
149
150     // unsigned int 10
151     {
152         0,
153         0
154     },
155
156     // unsigned int 11
157     {
158         0,
159         1
160     },
161
162     // unsigned int 12
163     {
164         51,
165         0
166     },
167
168     // unsigned int 13
169     {
170         40,
171         60,
172         80,
173         120
174     },
175
176     // unsigned int 14
177     {
178         35,
179         60,
180         80,
181         120
182     },
183
184     // unsigned int 15
185     {
186         40,
187         60,
188         90,
189         115
190     },
191
192     // unsigned int 16
193     {
194         0,
195         0,
196         0,
197         0
198     },
199
200     // unsigned int 17
201     {
202         0,
203         0,
204         0,
205         0
206     },
207
208     // unsigned int 18
209     {
210         0,
211         0,
212         0,
213         0
214     },
215
216     // unsigned int 19
217     {
218         0,
219         0,
220         0,
221         0
222     },
223
224     // unsigned int 20
225     {
226         0,
227         0,
228         0,
229         0
230     },
231
232     // unsigned int 21
233     {
234         0,
235         0,
236         0,
237         0
238     },
239
240     // unsigned int 22
241     {
242         0,
243         0,
244         0,
245         0
246     },
247
248     // unsigned int 23
249     {
250         0
251     }
252 };
253
254 static const gen9_avc_frame_brc_update_curbe_data gen9_avc_frame_brc_update_curbe_init_data = {
255     // unsigned int 0
256     {
257         0
258     },
259
260     // unsigned int 1
261     {
262         0
263     },
264
265     // unsigned int 2
266     {
267         0
268     },
269
270     // unsigned int 3
271     {
272         10,
273         50
274     },
275
276     // unsigned int 4
277     {
278         100,
279         150
280     },
281
282     // unsigned int 5
283     {
284         0,
285         0,
286         0,
287         0
288     },
289
290     // unsigned int 6
291     {
292         0,
293         0,
294         0,
295         0,
296         0,
297         0
298     },
299
300     // unsigned int 7
301     {
302         0
303     },
304
305     // unsigned int 8
306     {
307         1,
308         1,
309         3,
310         2
311     },
312
313     // unsigned int 9
314     {
315         1,
316         40,
317         5,
318         5
319     },
320
321     // unsigned int 10
322     {
323         3,
324         1,
325         7,
326         18
327     },
328
329     // unsigned int 11
330     {
331         25,
332         37,
333         40,
334         75
335     },
336
337     // unsigned int 12
338     {
339         97,
340         103,
341         125,
342         160
343     },
344
345     // unsigned int 13
346     {
347         -3,
348         -2,
349         -1,
350         0
351     },
352
353     // unsigned int 14
354     {
355         1,
356         2,
357         3,
358         0xff
359     },
360
361     // unsigned int 15
362     {
363         0,
364         0,
365         0,
366         0
367     },
368
369     // unsigned int 16
370     {
371         0
372     },
373
374     // unsigned int 17
375     {
376         0
377     },
378
379     // unsigned int 18
380     {
381         0
382     },
383
384     // unsigned int 19
385     {
386         0
387     },
388
389     // unsigned int 20
390     {
391         0
392     },
393
394     // unsigned int 21
395     {
396         0
397     },
398
399     // unsigned int 22
400     {
401         0
402     },
403
404     // unsigned int 23
405     {
406         0
407     },
408
409 };
410
411 static void
412 gen9_avc_update_misc_parameters(VADriverContextP ctx,
413                                 struct encode_state *encode_state,
414                                 struct intel_encoder_context *encoder_context)
415 {
416     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
417     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
418     int i;
419
420     /* brc */
421     generic_state->max_bit_rate = (encoder_context->brc.bits_per_second[0] + 1000 - 1) / 1000;
422
423     generic_state->brc_need_reset = encoder_context->brc.need_reset;
424
425     if (generic_state->internal_rate_mode == VA_RC_CBR) {
426         generic_state->min_bit_rate = generic_state->max_bit_rate;
427         generic_state->mb_brc_enabled = encoder_context->brc.mb_rate_control[0] == 1;
428
429         if (generic_state->target_bit_rate != generic_state->max_bit_rate) {
430             generic_state->target_bit_rate = generic_state->max_bit_rate;
431             generic_state->brc_need_reset = 1;
432         }
433     } else if (generic_state->internal_rate_mode == VA_RC_VBR) {
434         generic_state->min_bit_rate = generic_state->max_bit_rate * (2 * encoder_context->brc.target_percentage[0] - 100) / 100;
435         generic_state->mb_brc_enabled = encoder_context->brc.mb_rate_control[0] == 1;
436
437         if (generic_state->target_bit_rate != generic_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100) {
438             generic_state->target_bit_rate = generic_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100;
439             generic_state->brc_need_reset = 1;
440         }
441     }
442
443     /*  frame rate */
444     if (generic_state->internal_rate_mode != VA_RC_CQP) {
445         generic_state->frames_per_100s = encoder_context->brc.framerate[0].num * 100 / encoder_context->brc.framerate[0].den ;
446         generic_state->frame_rate = encoder_context->brc.framerate[0].num / encoder_context->brc.framerate[0].den ;
447         generic_state->frames_per_window_size = (int)(encoder_context->brc.window_size * generic_state->frame_rate / 1000); // brc.windows size in ms as the unit
448     } else {
449         generic_state->frames_per_100s = 30 * 100;
450         generic_state->frame_rate = 30 ;
451         generic_state->frames_per_window_size = 30;
452     }
453
454     /*  HRD */
455     if (generic_state->internal_rate_mode != VA_RC_CQP) {
456         generic_state->vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;//misc->buffer_size;
457         generic_state->init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;//misc->initial_buffer_fullness;
458     }
459
460     /* ROI */
461     generic_state->num_roi = MIN(encoder_context->brc.num_roi, 3);
462     if (generic_state->num_roi > 0) {
463         generic_state->max_delta_qp = encoder_context->brc.roi_max_delta_qp;
464         generic_state->min_delta_qp = encoder_context->brc.roi_min_delta_qp;
465
466         for (i = 0; i < generic_state->num_roi; i++) {
467             generic_state->roi[i].left   = encoder_context->brc.roi[i].left;
468             generic_state->roi[i].right  = encoder_context->brc.roi[i].right;
469             generic_state->roi[i].top    = encoder_context->brc.roi[i].top;
470             generic_state->roi[i].bottom = encoder_context->brc.roi[i].bottom;
471             generic_state->roi[i].value  = encoder_context->brc.roi[i].value;
472
473             generic_state->roi[i].left /= 16;
474             generic_state->roi[i].right /= 16;
475             generic_state->roi[i].top /= 16;
476             generic_state->roi[i].bottom /= 16;
477         }
478     }
479
480 }
481
482 static bool
483 intel_avc_get_kernel_header_and_size(void *pvbinary,
484                                      int binary_size,
485                                      INTEL_GENERIC_ENC_OPERATION operation,
486                                      int krnstate_idx,
487                                      struct i965_kernel *ret_kernel)
488 {
489     typedef uint32_t BIN_PTR[4];
490
491     char *bin_start;
492     gen9_avc_encoder_kernel_header      *pkh_table;
493     kernel_header          *pcurr_header, *pinvalid_entry, *pnext_header;
494     int next_krnoffset;
495
496     if (!pvbinary || !ret_kernel)
497         return false;
498
499     bin_start = (char *)pvbinary;
500     pkh_table = (gen9_avc_encoder_kernel_header *)pvbinary;
501     pinvalid_entry = &(pkh_table->static_detection) + 1;
502     next_krnoffset = binary_size;
503
504     if (operation == INTEL_GENERIC_ENC_SCALING4X) {
505         pcurr_header = &pkh_table->ply_dscale_ply;
506     } else if (operation == INTEL_GENERIC_ENC_SCALING2X) {
507         pcurr_header = &pkh_table->ply_2xdscale_ply;
508     } else if (operation == INTEL_GENERIC_ENC_ME) {
509         pcurr_header = &pkh_table->me_p;
510     } else if (operation == INTEL_GENERIC_ENC_BRC) {
511         pcurr_header = &pkh_table->frame_brc_init;
512     } else if (operation == INTEL_GENERIC_ENC_MBENC) {
513         pcurr_header = &pkh_table->mbenc_quality_I;
514     } else if (operation == INTEL_GENERIC_ENC_WP) {
515         pcurr_header = &pkh_table->wp;
516     } else if (operation == INTEL_GENERIC_ENC_SFD) {
517         pcurr_header = &pkh_table->static_detection;
518     } else {
519         return false;
520     }
521
522     pcurr_header += krnstate_idx;
523     ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
524
525     pnext_header = (pcurr_header + 1);
526     if (pnext_header < pinvalid_entry) {
527         next_krnoffset = pnext_header->kernel_start_pointer << 6;
528     }
529     ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
530
531     return true;
532 }
533 static void
534 gen9_free_surfaces_avc(void **data)
535 {
536     struct gen9_surface_avc *avc_surface;
537
538     if (!data || !*data)
539         return;
540
541     avc_surface = *data;
542
543     if (avc_surface->scaled_4x_surface_obj) {
544         i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_4x_surface_id, 1);
545         avc_surface->scaled_4x_surface_id = VA_INVALID_SURFACE;
546         avc_surface->scaled_4x_surface_obj = NULL;
547     }
548
549     if (avc_surface->scaled_16x_surface_obj) {
550         i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_16x_surface_id, 1);
551         avc_surface->scaled_16x_surface_id = VA_INVALID_SURFACE;
552         avc_surface->scaled_16x_surface_obj = NULL;
553     }
554
555     if (avc_surface->scaled_32x_surface_obj) {
556         i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_32x_surface_id, 1);
557         avc_surface->scaled_32x_surface_id = VA_INVALID_SURFACE;
558         avc_surface->scaled_32x_surface_obj = NULL;
559     }
560
561     i965_free_gpe_resource(&avc_surface->res_mb_code_surface);
562     i965_free_gpe_resource(&avc_surface->res_mv_data_surface);
563     i965_free_gpe_resource(&avc_surface->res_ref_pic_select_surface);
564
565     dri_bo_unreference(avc_surface->dmv_top);
566     avc_surface->dmv_top = NULL;
567     dri_bo_unreference(avc_surface->dmv_bottom);
568     avc_surface->dmv_bottom = NULL;
569
570     free(avc_surface);
571
572     *data = NULL;
573
574     return;
575 }
576
577 static VAStatus
578 gen9_avc_init_check_surfaces(VADriverContextP ctx,
579                              struct object_surface *obj_surface,
580                              struct intel_encoder_context *encoder_context,
581                              struct avc_surface_param *surface_param)
582 {
583     struct i965_driver_data *i965 = i965_driver_data(ctx);
584     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
585     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
586     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
587
588     struct gen9_surface_avc *avc_surface;
589     int downscaled_width_4x, downscaled_height_4x;
590     int downscaled_width_16x, downscaled_height_16x;
591     int downscaled_width_32x, downscaled_height_32x;
592     int size = 0;
593     unsigned int frame_width_in_mbs = ALIGN(surface_param->frame_width, 16) / 16;
594     unsigned int frame_height_in_mbs = ALIGN(surface_param->frame_height, 16) / 16;
595     unsigned int frame_mb_nums = frame_width_in_mbs * frame_height_in_mbs;
596     int allocate_flag = 1;
597     int width, height;
598
599     if (!obj_surface || !obj_surface->bo)
600         return VA_STATUS_ERROR_INVALID_SURFACE;
601
602     if (obj_surface->private_data) {
603         return VA_STATUS_SUCCESS;
604     }
605
606     avc_surface = calloc(1, sizeof(struct gen9_surface_avc));
607
608     if (!avc_surface)
609         return VA_STATUS_ERROR_ALLOCATION_FAILED;
610
611     avc_surface->ctx = ctx;
612     obj_surface->private_data = avc_surface;
613     obj_surface->free_private_data = gen9_free_surfaces_avc;
614
615     downscaled_width_4x = generic_state->frame_width_4x;
616     downscaled_height_4x = generic_state->frame_height_4x;
617
618     i965_CreateSurfaces(ctx,
619                         downscaled_width_4x,
620                         downscaled_height_4x,
621                         VA_RT_FORMAT_YUV420,
622                         1,
623                         &avc_surface->scaled_4x_surface_id);
624
625     avc_surface->scaled_4x_surface_obj = SURFACE(avc_surface->scaled_4x_surface_id);
626
627     if (!avc_surface->scaled_4x_surface_obj) {
628         return VA_STATUS_ERROR_ALLOCATION_FAILED;
629     }
630
631     i965_check_alloc_surface_bo(ctx, avc_surface->scaled_4x_surface_obj, 1,
632                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
633
634     downscaled_width_16x = generic_state->frame_width_16x;
635     downscaled_height_16x = generic_state->frame_height_16x;
636     i965_CreateSurfaces(ctx,
637                         downscaled_width_16x,
638                         downscaled_height_16x,
639                         VA_RT_FORMAT_YUV420,
640                         1,
641                         &avc_surface->scaled_16x_surface_id);
642     avc_surface->scaled_16x_surface_obj = SURFACE(avc_surface->scaled_16x_surface_id);
643
644     if (!avc_surface->scaled_16x_surface_obj) {
645         return VA_STATUS_ERROR_ALLOCATION_FAILED;
646     }
647
648     i965_check_alloc_surface_bo(ctx, avc_surface->scaled_16x_surface_obj, 1,
649                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
650
651     if (generic_state->b32xme_supported ||
652         generic_state->b32xme_enabled) {
653         downscaled_width_32x = generic_state->frame_width_32x;
654         downscaled_height_32x = generic_state->frame_height_32x;
655         i965_CreateSurfaces(ctx,
656                             downscaled_width_32x,
657                             downscaled_height_32x,
658                             VA_RT_FORMAT_YUV420,
659                             1,
660                             &avc_surface->scaled_32x_surface_id);
661         avc_surface->scaled_32x_surface_obj = SURFACE(avc_surface->scaled_32x_surface_id);
662
663         if (!avc_surface->scaled_32x_surface_obj) {
664             return VA_STATUS_ERROR_ALLOCATION_FAILED;
665         }
666
667         i965_check_alloc_surface_bo(ctx, avc_surface->scaled_32x_surface_obj, 1,
668                                     VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
669     }
670
671     /*mb code and mv data for each frame*/
672     size = frame_mb_nums * 16 * 4;
673     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
674                                                &avc_surface->res_mb_code_surface,
675                                                ALIGN(size, 0x1000),
676                                                "mb code buffer");
677     if (!allocate_flag)
678         goto failed_allocation;
679
680     size = frame_mb_nums * 32 * 4;
681     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
682                                                &avc_surface->res_mv_data_surface,
683                                                ALIGN(size, 0x1000),
684                                                "mv data buffer");
685     if (!allocate_flag)
686         goto failed_allocation;
687
688     /* ref pic list*/
689     if (avc_state->ref_pic_select_list_supported) {
690         width = ALIGN(frame_width_in_mbs * 8, 64);
691         height = frame_height_in_mbs ;
692         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
693                                                       &avc_surface->res_ref_pic_select_surface,
694                                                       width, height,
695                                                       width,
696                                                       "Ref pic select list buffer");
697         if (!allocate_flag)
698             goto failed_allocation;
699     }
700
701     /*direct mv*/
702     avc_surface->dmv_top =
703         dri_bo_alloc(i965->intel.bufmgr,
704                      "direct mv top Buffer",
705                      68 * frame_mb_nums,
706                      64);
707     avc_surface->dmv_bottom =
708         dri_bo_alloc(i965->intel.bufmgr,
709                      "direct mv bottom Buffer",
710                      68 * frame_mb_nums,
711                      64);
712     assert(avc_surface->dmv_top);
713     assert(avc_surface->dmv_bottom);
714
715     return VA_STATUS_SUCCESS;
716
717 failed_allocation:
718     return VA_STATUS_ERROR_ALLOCATION_FAILED;
719 }
720
721 static void
722 gen9_avc_generate_slice_map(VADriverContextP ctx,
723                             struct encode_state *encode_state,
724                             struct intel_encoder_context *encoder_context)
725 {
726     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
727     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
728     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
729     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
730
731     struct i965_gpe_resource *gpe_resource = NULL;
732     VAEncSliceParameterBufferH264 * slice_param = NULL;
733     unsigned int * data = NULL;
734     unsigned int * data_row = NULL;
735     int i, j, count = 0;
736     unsigned int pitch = ALIGN((generic_state->frame_width_in_mbs + 1) * 4, 64) / 4;
737
738     if (!avc_state->arbitrary_num_mbs_in_slice)
739         return;
740
741     gpe_resource = &(avc_ctx->res_mbenc_slice_map_surface);
742     assert(gpe_resource);
743
744     i965_zero_gpe_resource(gpe_resource);
745
746     data_row = (unsigned int *)i965_map_gpe_resource(gpe_resource);
747     assert(data_row);
748
749     data = data_row;
750     for (i = 0; i < avc_state->slice_num; i++) {
751         slice_param = avc_state->slice_param[i];
752         for (j = 0; j < slice_param->num_macroblocks; j++) {
753             *data++ = i;
754             if ((count > 0) && (count % generic_state->frame_width_in_mbs == 0)) {
755                 data_row += pitch;
756                 data = data_row;
757                 *data++ = i;
758             }
759             count++;
760         }
761     }
762     *data++ = 0xFFFFFFFF;
763
764     i965_unmap_gpe_resource(gpe_resource);
765 }
766
767 static VAStatus
768 gen9_avc_allocate_resources(VADriverContextP ctx,
769                             struct encode_state *encode_state,
770                             struct intel_encoder_context *encoder_context)
771 {
772     struct i965_driver_data *i965 = i965_driver_data(ctx);
773     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
774     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
775     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
776     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
777     unsigned int size  = 0;
778     unsigned int width  = 0;
779     unsigned int height  = 0;
780     unsigned char * data  = NULL;
781     int allocate_flag = 1;
782     int i = 0;
783
784     /*all the surface/buffer are allocated here*/
785
786     /*second level batch buffer for image state write when cqp etc*/
787     i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
788     size = INTEL_AVC_IMAGE_STATE_CMD_SIZE ;
789     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
790                                                &avc_ctx->res_image_state_batch_buffer_2nd_level,
791                                                ALIGN(size, 0x1000),
792                                                "second levle batch (image state write) buffer");
793     if (!allocate_flag)
794         goto failed_allocation;
795
796     /* scaling related surface   */
797     if (avc_state->mb_status_supported) {
798         i965_free_gpe_resource(&avc_ctx->res_mb_status_buffer);
799         size = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * 16 * 4 + 1023) & ~0x3ff;
800         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
801                                                    &avc_ctx->res_mb_status_buffer,
802                                                    ALIGN(size, 0x1000),
803                                                    "MB statistics output buffer");
804         if (!allocate_flag)
805             goto failed_allocation;
806         i965_zero_gpe_resource(&avc_ctx->res_mb_status_buffer);
807     }
808
809     if (avc_state->flatness_check_supported) {
810         width = generic_state->frame_width_in_mbs * 4;
811         height = generic_state->frame_height_in_mbs * 4;
812         i965_free_gpe_resource(&avc_ctx->res_flatness_check_surface);
813         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
814                                                       &avc_ctx->res_flatness_check_surface,
815                                                       width, height,
816                                                       ALIGN(width, 64),
817                                                       "Flatness check buffer");
818         if (!allocate_flag)
819             goto failed_allocation;
820     }
821     /* me related surface */
822     width = generic_state->downscaled_width_4x_in_mb * 8;
823     height = generic_state->downscaled_height_4x_in_mb * 4 * 10;
824     i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
825     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
826                                                   &avc_ctx->s4x_memv_distortion_buffer,
827                                                   width, height,
828                                                   ALIGN(width, 64),
829                                                   "4x MEMV distortion buffer");
830     if (!allocate_flag)
831         goto failed_allocation;
832     i965_zero_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
833
834     width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
835     height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
836     i965_free_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
837     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
838                                                   &avc_ctx->s4x_memv_min_distortion_brc_buffer,
839                                                   width, height,
840                                                   width,
841                                                   "4x MEMV min distortion brc buffer");
842     if (!allocate_flag)
843         goto failed_allocation;
844     i965_zero_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
845
846
847     width = ALIGN(generic_state->downscaled_width_4x_in_mb * 32, 64);
848     height = generic_state->downscaled_height_4x_in_mb * 4 * 2 * 10;
849     i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
850     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
851                                                   &avc_ctx->s4x_memv_data_buffer,
852                                                   width, height,
853                                                   width,
854                                                   "4x MEMV data buffer");
855     if (!allocate_flag)
856         goto failed_allocation;
857     i965_zero_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
858
859
860     width = ALIGN(generic_state->downscaled_width_16x_in_mb * 32, 64);
861     height = generic_state->downscaled_height_16x_in_mb * 4 * 2 * 10 ;
862     i965_free_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
863     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
864                                                   &avc_ctx->s16x_memv_data_buffer,
865                                                   width, height,
866                                                   width,
867                                                   "16x MEMV data buffer");
868     if (!allocate_flag)
869         goto failed_allocation;
870     i965_zero_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
871
872
873     width = ALIGN(generic_state->downscaled_width_32x_in_mb * 32, 64);
874     height = generic_state->downscaled_height_32x_in_mb * 4 * 2 * 10 ;
875     i965_free_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
876     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
877                                                   &avc_ctx->s32x_memv_data_buffer,
878                                                   width, height,
879                                                   width,
880                                                   "32x MEMV data buffer");
881     if (!allocate_flag)
882         goto failed_allocation;
883     i965_zero_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
884
885
886     if (!generic_state->brc_allocated) {
887         /*brc related surface */
888         i965_free_gpe_resource(&avc_ctx->res_brc_history_buffer);
889         size = 864;
890         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
891                                                    &avc_ctx->res_brc_history_buffer,
892                                                    ALIGN(size, 0x1000),
893                                                    "brc history buffer");
894         if (!allocate_flag)
895             goto failed_allocation;
896
897         i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
898         size = 64;//44
899         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
900                                                    &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
901                                                    ALIGN(size, 0x1000),
902                                                    "brc pak statistic buffer");
903         if (!allocate_flag)
904             goto failed_allocation;
905
906         i965_free_gpe_resource(&avc_ctx->res_brc_image_state_read_buffer);
907         size = INTEL_AVC_IMAGE_STATE_CMD_SIZE * 7;
908         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
909                                                    &avc_ctx->res_brc_image_state_read_buffer,
910                                                    ALIGN(size, 0x1000),
911                                                    "brc image state read buffer");
912         if (!allocate_flag)
913             goto failed_allocation;
914
915         i965_free_gpe_resource(&avc_ctx->res_brc_image_state_write_buffer);
916         size = INTEL_AVC_IMAGE_STATE_CMD_SIZE * 7;
917         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
918                                                    &avc_ctx->res_brc_image_state_write_buffer,
919                                                    ALIGN(size, 0x1000),
920                                                    "brc image state write buffer");
921         if (!allocate_flag)
922             goto failed_allocation;
923
924         width = ALIGN(avc_state->brc_const_data_surface_width, 64);
925         height = avc_state->brc_const_data_surface_height;
926         i965_free_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
927         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
928                                                       &avc_ctx->res_brc_const_data_buffer,
929                                                       width, height,
930                                                       width,
931                                                       "brc const data buffer");
932         if (!allocate_flag)
933             goto failed_allocation;
934
935         if (generic_state->brc_distortion_buffer_supported) {
936             width = ALIGN(generic_state->downscaled_width_4x_in_mb * 8, 64);
937             height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
938             width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
939             height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
940             i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
941             allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
942                                                           &avc_ctx->res_brc_dist_data_surface,
943                                                           width, height,
944                                                           width,
945                                                           "brc dist data buffer");
946             if (!allocate_flag)
947                 goto failed_allocation;
948             i965_zero_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
949         }
950
951         if (generic_state->brc_roi_enable) {
952             width = ALIGN(generic_state->downscaled_width_4x_in_mb * 16, 64);
953             height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
954             i965_free_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
955             allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
956                                                           &avc_ctx->res_mbbrc_roi_surface,
957                                                           width, height,
958                                                           width,
959                                                           "mbbrc roi buffer");
960             if (!allocate_flag)
961                 goto failed_allocation;
962             i965_zero_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
963         }
964
965         /*mb qp in mb brc*/
966         width = ALIGN(generic_state->downscaled_width_4x_in_mb * 4, 64);
967         height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
968         i965_free_gpe_resource(&avc_ctx->res_mbbrc_mb_qp_data_surface);
969         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
970                                                       &avc_ctx->res_mbbrc_mb_qp_data_surface,
971                                                       width, height,
972                                                       width,
973                                                       "mbbrc mb qp buffer");
974         if (!allocate_flag)
975             goto failed_allocation;
976
977         i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
978         size = 16 * AVC_QP_MAX * 4;
979         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
980                                                    &avc_ctx->res_mbbrc_const_data_buffer,
981                                                    ALIGN(size, 0x1000),
982                                                    "mbbrc const data buffer");
983         if (!allocate_flag)
984             goto failed_allocation;
985
986         if (avc_state->decouple_mbenc_curbe_from_brc_enable) {
987             i965_free_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
988             size = avc_state->mbenc_brc_buffer_size;
989             allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
990                                                        &avc_ctx->res_mbenc_brc_buffer,
991                                                        ALIGN(size, 0x1000),
992                                                        "mbenc brc buffer");
993             if (!allocate_flag)
994                 goto failed_allocation;
995             i965_zero_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
996         }
997         generic_state->brc_allocated = 1;
998     }
999
1000     /*mb qp external*/
1001     if (avc_state->mb_qp_data_enable) {
1002         width = ALIGN(generic_state->downscaled_width_4x_in_mb * 4, 64);
1003         height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1004         i965_free_gpe_resource(&avc_ctx->res_mb_qp_data_surface);
1005         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1006                                                       &avc_ctx->res_mb_qp_data_surface,
1007                                                       width, height,
1008                                                       width,
1009                                                       "external mb qp buffer");
1010         if (!allocate_flag)
1011             goto failed_allocation;
1012     }
1013
1014     /*     mbenc related surface. it share most of surface with other kernels     */
1015     if (avc_state->arbitrary_num_mbs_in_slice) {
1016         width = ALIGN((generic_state->frame_width_in_mbs + 1) * 4, 64);
1017         height = generic_state->frame_height_in_mbs ;
1018         i965_free_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1019         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1020                                                       &avc_ctx->res_mbenc_slice_map_surface,
1021                                                       width, height,
1022                                                       width,
1023                                                       "slice map buffer");
1024         if (!allocate_flag)
1025             goto failed_allocation;
1026         i965_zero_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1027
1028         /*generate slice map,default one slice per frame.*/
1029     }
1030
1031     /* sfd related surface  */
1032     if (avc_state->sfd_enable) {
1033         i965_free_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1034         size = 128;
1035         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1036                                                    &avc_ctx->res_sfd_output_buffer,
1037                                                    size,
1038                                                    "sfd output buffer");
1039         if (!allocate_flag)
1040             goto failed_allocation;
1041
1042         i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer);
1043         size = ALIGN(52, 64);
1044         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1045                                                    &avc_ctx->res_sfd_cost_table_p_frame_buffer,
1046                                                    size,
1047                                                    "sfd P frame cost table buffer");
1048         if (!allocate_flag)
1049             goto failed_allocation;
1050         data = i965_map_gpe_resource(&(avc_ctx->res_sfd_cost_table_p_frame_buffer));
1051         assert(data);
1052         memcpy(data, gen9_avc_sfd_cost_table_p_frame, sizeof(unsigned char) * 52);
1053         i965_unmap_gpe_resource(&(avc_ctx->res_sfd_cost_table_p_frame_buffer));
1054
1055         i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_b_frame_buffer);
1056         size = ALIGN(52, 64);
1057         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1058                                                    &avc_ctx->res_sfd_cost_table_b_frame_buffer,
1059                                                    size,
1060                                                    "sfd B frame cost table buffer");
1061         if (!allocate_flag)
1062             goto failed_allocation;
1063         data = i965_map_gpe_resource(&(avc_ctx->res_sfd_cost_table_b_frame_buffer));
1064         assert(data);
1065         memcpy(data, gen9_avc_sfd_cost_table_b_frame, sizeof(unsigned char) * 52);
1066         i965_unmap_gpe_resource(&(avc_ctx->res_sfd_cost_table_b_frame_buffer));
1067     }
1068
1069     /* wp related surfaces */
1070     if (avc_state->weighted_prediction_supported) {
1071         for (i = 0; i < 2 ; i++) {
1072             if (avc_ctx->wp_output_pic_select_surface_obj[i]) {
1073                 continue;
1074             }
1075
1076             width = generic_state->frame_width_in_pixel;
1077             height = generic_state->frame_height_in_pixel ;
1078             i965_CreateSurfaces(ctx,
1079                                 width,
1080                                 height,
1081                                 VA_RT_FORMAT_YUV420,
1082                                 1,
1083                                 &avc_ctx->wp_output_pic_select_surface_id[i]);
1084             avc_ctx->wp_output_pic_select_surface_obj[i] = SURFACE(avc_ctx->wp_output_pic_select_surface_id[i]);
1085
1086             if (!avc_ctx->wp_output_pic_select_surface_obj[i]) {
1087                 goto failed_allocation;
1088             }
1089
1090             i965_check_alloc_surface_bo(ctx, avc_ctx->wp_output_pic_select_surface_obj[i], 1,
1091                                         VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
1092         }
1093         i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[0]);
1094         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_wp_output_pic_select_surface_list[0], avc_ctx->wp_output_pic_select_surface_obj[0], GPE_RESOURCE_ALIGNMENT);
1095         i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[1]);
1096         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_wp_output_pic_select_surface_list[1], avc_ctx->wp_output_pic_select_surface_obj[1], GPE_RESOURCE_ALIGNMENT);
1097     }
1098
1099     /* other   */
1100
1101     i965_free_gpe_resource(&avc_ctx->res_mad_data_buffer);
1102     size = 4 * 1;
1103     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1104                                                &avc_ctx->res_mad_data_buffer,
1105                                                ALIGN(size, 0x1000),
1106                                                "MAD data buffer");
1107     if (!allocate_flag)
1108         goto failed_allocation;
1109
1110     return VA_STATUS_SUCCESS;
1111
1112 failed_allocation:
1113     return VA_STATUS_ERROR_ALLOCATION_FAILED;
1114 }
1115
1116 static void
1117 gen9_avc_free_resources(struct encoder_vme_mfc_context * vme_context)
1118 {
1119     if (!vme_context)
1120         return;
1121
1122     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1123     VADriverContextP ctx = avc_ctx->ctx;
1124     int i = 0;
1125
1126     /* free all the surface/buffer here*/
1127     i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
1128     i965_free_gpe_resource(&avc_ctx->res_mb_status_buffer);
1129     i965_free_gpe_resource(&avc_ctx->res_flatness_check_surface);
1130     i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
1131     i965_free_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
1132     i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
1133     i965_free_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
1134     i965_free_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
1135     i965_free_gpe_resource(&avc_ctx->res_brc_history_buffer);
1136     i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
1137     i965_free_gpe_resource(&avc_ctx->res_brc_image_state_read_buffer);
1138     i965_free_gpe_resource(&avc_ctx->res_brc_image_state_write_buffer);
1139     i965_free_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
1140     i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
1141     i965_free_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
1142     i965_free_gpe_resource(&avc_ctx->res_mbbrc_mb_qp_data_surface);
1143     i965_free_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
1144     i965_free_gpe_resource(&avc_ctx->res_mb_qp_data_surface);
1145     i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
1146     i965_free_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1147     i965_free_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1148     i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer);
1149     i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_b_frame_buffer);
1150     i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[0]);
1151     i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[1]);
1152     i965_free_gpe_resource(&avc_ctx->res_mad_data_buffer);
1153
1154     for (i = 0; i < 2 ; i++) {
1155         if (avc_ctx->wp_output_pic_select_surface_obj[i]) {
1156             i965_DestroySurfaces(ctx, &avc_ctx->wp_output_pic_select_surface_id[i], 1);
1157             avc_ctx->wp_output_pic_select_surface_id[i] = VA_INVALID_SURFACE;
1158             avc_ctx->wp_output_pic_select_surface_obj[i] = NULL;
1159         }
1160     }
1161
1162 }
1163
1164 static void
1165 gen9_avc_run_kernel_media_object(VADriverContextP ctx,
1166                                  struct intel_encoder_context *encoder_context,
1167                                  struct i965_gpe_context *gpe_context,
1168                                  int media_function,
1169                                  struct gpe_media_object_parameter *param)
1170 {
1171     struct i965_driver_data *i965 = i965_driver_data(ctx);
1172     struct i965_gpe_table *gpe = &i965->gpe_table;
1173     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1174     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1175
1176     struct intel_batchbuffer *batch = encoder_context->base.batch;
1177     struct encoder_status_buffer_internal *status_buffer;
1178     struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1179
1180     if (!batch)
1181         return;
1182
1183     intel_batchbuffer_start_atomic(batch, 0x1000);
1184     intel_batchbuffer_emit_mi_flush(batch);
1185
1186     status_buffer = &(avc_ctx->status_buffer);
1187     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1188     mi_store_data_imm.bo = status_buffer->bo;
1189     mi_store_data_imm.offset = status_buffer->media_index_offset;
1190     mi_store_data_imm.dw0 = media_function;
1191     gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1192
1193     gpe->pipeline_setup(ctx, gpe_context, batch);
1194     gpe->media_object(ctx, gpe_context, batch, param);
1195     gpe->media_state_flush(ctx, gpe_context, batch);
1196
1197     gpe->pipeline_end(ctx, gpe_context, batch);
1198
1199     intel_batchbuffer_end_atomic(batch);
1200
1201     intel_batchbuffer_flush(batch);
1202 }
1203
1204 static void
1205 gen9_avc_run_kernel_media_object_walker(VADriverContextP ctx,
1206                                         struct intel_encoder_context *encoder_context,
1207                                         struct i965_gpe_context *gpe_context,
1208                                         int media_function,
1209                                         struct gpe_media_object_walker_parameter *param)
1210 {
1211     struct i965_driver_data *i965 = i965_driver_data(ctx);
1212     struct i965_gpe_table *gpe = &i965->gpe_table;
1213     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1214     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1215
1216     struct intel_batchbuffer *batch = encoder_context->base.batch;
1217     struct encoder_status_buffer_internal *status_buffer;
1218     struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1219
1220     if (!batch)
1221         return;
1222
1223     intel_batchbuffer_start_atomic(batch, 0x1000);
1224
1225     intel_batchbuffer_emit_mi_flush(batch);
1226
1227     status_buffer = &(avc_ctx->status_buffer);
1228     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1229     mi_store_data_imm.bo = status_buffer->bo;
1230     mi_store_data_imm.offset = status_buffer->media_index_offset;
1231     mi_store_data_imm.dw0 = media_function;
1232     gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1233
1234     gpe->pipeline_setup(ctx, gpe_context, batch);
1235     gpe->media_object_walker(ctx, gpe_context, batch, param);
1236     gpe->media_state_flush(ctx, gpe_context, batch);
1237
1238     gpe->pipeline_end(ctx, gpe_context, batch);
1239
1240     intel_batchbuffer_end_atomic(batch);
1241
1242     intel_batchbuffer_flush(batch);
1243 }
1244
1245 static void
1246 gen9_init_gpe_context_avc(VADriverContextP ctx,
1247                           struct i965_gpe_context *gpe_context,
1248                           struct encoder_kernel_parameter *kernel_param)
1249 {
1250     struct i965_driver_data *i965 = i965_driver_data(ctx);
1251
1252     gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
1253
1254     gpe_context->sampler.entry_size = 0;
1255     gpe_context->sampler.max_entries = 0;
1256
1257     if (kernel_param->sampler_size) {
1258         gpe_context->sampler.entry_size = ALIGN(kernel_param->sampler_size, 64);
1259         gpe_context->sampler.max_entries = 1;
1260     }
1261
1262     gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
1263     gpe_context->idrt.max_entries = NUM_KERNELS_PER_GPE_CONTEXT;
1264
1265     gpe_context->surface_state_binding_table.max_entries = MAX_AVC_ENCODER_SURFACES;
1266     gpe_context->surface_state_binding_table.binding_table_offset = 0;
1267     gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_AVC_ENCODER_SURFACES * 4, 64);
1268     gpe_context->surface_state_binding_table.length = ALIGN(MAX_AVC_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_AVC_ENCODER_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
1269
1270     if (i965->intel.eu_total > 0)
1271         gpe_context->vfe_state.max_num_threads = 6 * i965->intel.eu_total;
1272     else
1273         gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads
1274
1275     gpe_context->vfe_state.curbe_allocation_size = MAX(1, ALIGN(gpe_context->curbe.length, 32) >> 5); // in registers
1276     gpe_context->vfe_state.urb_entry_size = MAX(1, ALIGN(kernel_param->inline_data_size, 32) >> 5); // in registers
1277     gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE -
1278                                               gpe_context->vfe_state.curbe_allocation_size -
1279                                               ((gpe_context->idrt.entry_size >> 5) *
1280                                                gpe_context->idrt.max_entries)) / gpe_context->vfe_state.urb_entry_size;
1281     gpe_context->vfe_state.num_urb_entries = CLAMP(1, 127, gpe_context->vfe_state.num_urb_entries);
1282     gpe_context->vfe_state.gpgpu_mode = 0;
1283 }
1284
1285 static void
1286 gen9_init_vfe_scoreboard_avc(struct i965_gpe_context *gpe_context,
1287                              struct encoder_scoreboard_parameter *scoreboard_param)
1288 {
1289     gpe_context->vfe_desc5.scoreboard0.mask = scoreboard_param->mask;
1290     gpe_context->vfe_desc5.scoreboard0.type = scoreboard_param->type;
1291     gpe_context->vfe_desc5.scoreboard0.enable = scoreboard_param->enable;
1292
1293     if (scoreboard_param->walkpat_flag) {
1294         gpe_context->vfe_desc5.scoreboard0.mask = 0x0F;
1295         gpe_context->vfe_desc5.scoreboard0.type = 1;
1296
1297         gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0x0;
1298         gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0xF;
1299
1300         gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
1301         gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xE;
1302
1303         gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0xF;
1304         gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0x3;
1305
1306         gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
1307         gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0x1;
1308     } else {
1309         // Scoreboard 0
1310         gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0xF;
1311         gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0x0;
1312
1313         // Scoreboard 1
1314         gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
1315         gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xF;
1316
1317         // Scoreboard 2
1318         gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0x1;
1319         gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0xF;
1320
1321         // Scoreboard 3
1322         gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
1323         gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0xF;
1324
1325         // Scoreboard 4
1326         gpe_context->vfe_desc7.scoreboard2.delta_x4 = 0xF;
1327         gpe_context->vfe_desc7.scoreboard2.delta_y4 = 0x1;
1328
1329         // Scoreboard 5
1330         gpe_context->vfe_desc7.scoreboard2.delta_x5 = 0x0;
1331         gpe_context->vfe_desc7.scoreboard2.delta_y5 = 0xE;
1332
1333         // Scoreboard 6
1334         gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0x1;
1335         gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
1336
1337         // Scoreboard 7
1338         gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0xF;
1339         gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
1340     }
1341 }
1342 /*
1343 VME pipeline related function
1344 */
1345
1346 /*
1347 scaling kernel related function
1348 */
1349 static void
1350 gen9_avc_set_curbe_scaling4x(VADriverContextP ctx,
1351                              struct encode_state *encode_state,
1352                              struct i965_gpe_context *gpe_context,
1353                              struct intel_encoder_context *encoder_context,
1354                              void *param)
1355 {
1356     gen9_avc_scaling4x_curbe_data *curbe_cmd;
1357     struct scaling_param *surface_param = (struct scaling_param *)param;
1358
1359     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1360
1361     if (!curbe_cmd)
1362         return;
1363
1364     memset(curbe_cmd, 0, sizeof(gen9_avc_scaling4x_curbe_data));
1365
1366     curbe_cmd->dw0.input_picture_width  = surface_param->input_frame_width;
1367     curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1368
1369     curbe_cmd->dw1.input_y_bti = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1370     curbe_cmd->dw2.output_y_bti = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1371
1372
1373     curbe_cmd->dw5.flatness_threshold = 128;
1374     curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
1375     curbe_cmd->dw7.enable_mb_variance_output = surface_param->enable_mb_variance_output;
1376     curbe_cmd->dw8.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
1377
1378     if (curbe_cmd->dw6.enable_mb_flatness_check ||
1379         curbe_cmd->dw7.enable_mb_variance_output ||
1380         curbe_cmd->dw8.enable_mb_pixel_average_output) {
1381         curbe_cmd->dw10.mbv_proc_stat_bti = GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX;
1382     }
1383
1384     i965_gpe_context_unmap_curbe(gpe_context);
1385     return;
1386 }
1387
1388 static void
1389 gen95_avc_set_curbe_scaling4x(VADriverContextP ctx,
1390                               struct encode_state *encode_state,
1391                               struct i965_gpe_context *gpe_context,
1392                               struct intel_encoder_context *encoder_context,
1393                               void *param)
1394 {
1395     gen95_avc_scaling4x_curbe_data *curbe_cmd;
1396     struct scaling_param *surface_param = (struct scaling_param *)param;
1397
1398     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1399
1400     if (!curbe_cmd)
1401         return;
1402
1403     memset(curbe_cmd, 0, sizeof(gen95_avc_scaling4x_curbe_data));
1404
1405     curbe_cmd->dw0.input_picture_width  = surface_param->input_frame_width;
1406     curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1407
1408     curbe_cmd->dw1.input_y_bti_frame = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1409     curbe_cmd->dw2.output_y_bti_frame = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1410
1411     if (surface_param->enable_mb_flatness_check)
1412         curbe_cmd->dw5.flatness_threshold = 128;
1413     curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
1414     curbe_cmd->dw6.enable_mb_variance_output = surface_param->enable_mb_variance_output;
1415     curbe_cmd->dw6.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
1416     curbe_cmd->dw6.enable_block8x8_statistics_output = surface_param->blk8x8_stat_enabled;
1417
1418     if (curbe_cmd->dw6.enable_mb_flatness_check ||
1419         curbe_cmd->dw6.enable_mb_variance_output ||
1420         curbe_cmd->dw6.enable_mb_pixel_average_output) {
1421         curbe_cmd->dw8.mbv_proc_stat_bti_frame = GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX;
1422     }
1423
1424     i965_gpe_context_unmap_curbe(gpe_context);
1425     return;
1426 }
1427
1428 static void
1429 gen9_avc_set_curbe_scaling2x(VADriverContextP ctx,
1430                              struct encode_state *encode_state,
1431                              struct i965_gpe_context *gpe_context,
1432                              struct intel_encoder_context *encoder_context,
1433                              void *param)
1434 {
1435     gen9_avc_scaling2x_curbe_data *curbe_cmd;
1436     struct scaling_param *surface_param = (struct scaling_param *)param;
1437
1438     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1439
1440     if (!curbe_cmd)
1441         return;
1442
1443     memset(curbe_cmd, 0, sizeof(gen9_avc_scaling2x_curbe_data));
1444
1445     curbe_cmd->dw0.input_picture_width  = surface_param->input_frame_width;
1446     curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1447
1448     curbe_cmd->dw8.input_y_bti = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1449     curbe_cmd->dw9.output_y_bti = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1450
1451     i965_gpe_context_unmap_curbe(gpe_context);
1452     return;
1453 }
1454
1455 static void
1456 gen9_avc_send_surface_scaling(VADriverContextP ctx,
1457                               struct encode_state *encode_state,
1458                               struct i965_gpe_context *gpe_context,
1459                               struct intel_encoder_context *encoder_context,
1460                               void *param)
1461 {
1462     struct scaling_param *surface_param = (struct scaling_param *)param;
1463     unsigned int surface_format;
1464     unsigned int res_size;
1465
1466     if (surface_param->scaling_out_use_32unorm_surf_fmt)
1467         surface_format = I965_SURFACEFORMAT_R32_UNORM;
1468     else if (surface_param->scaling_out_use_16unorm_surf_fmt)
1469         surface_format = I965_SURFACEFORMAT_R16_UNORM;
1470     else
1471         surface_format = I965_SURFACEFORMAT_R8_UNORM;
1472
1473     gen9_add_2d_gpe_surface(ctx, gpe_context,
1474                             surface_param->input_surface,
1475                             0, 1, surface_format,
1476                             GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX);
1477
1478     gen9_add_2d_gpe_surface(ctx, gpe_context,
1479                             surface_param->output_surface,
1480                             0, 1, surface_format,
1481                             GEN9_AVC_SCALING_FRAME_DST_Y_INDEX);
1482
1483     /*add buffer mv_proc_stat, here need change*/
1484     if (surface_param->mbv_proc_stat_enabled) {
1485         res_size = 16 * (surface_param->input_frame_width / 16) * (surface_param->input_frame_height / 16) * sizeof(unsigned int);
1486
1487         gen9_add_buffer_gpe_surface(ctx,
1488                                     gpe_context,
1489                                     surface_param->pres_mbv_proc_stat_buffer,
1490                                     0,
1491                                     res_size / 4,
1492                                     0,
1493                                     GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
1494     } else if (surface_param->enable_mb_flatness_check) {
1495         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
1496                                        surface_param->pres_flatness_check_surface,
1497                                        1,
1498                                        I965_SURFACEFORMAT_R8_UNORM,
1499                                        GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
1500     }
1501
1502     return;
1503 }
1504
1505 static VAStatus
1506 gen9_avc_kernel_scaling(VADriverContextP ctx,
1507                         struct encode_state *encode_state,
1508                         struct intel_encoder_context *encoder_context,
1509                         int hme_type)
1510 {
1511     struct i965_driver_data *i965 = i965_driver_data(ctx);
1512     struct i965_gpe_table *gpe = &i965->gpe_table;
1513     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1514     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1515     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1516     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1517     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
1518
1519     struct i965_gpe_context *gpe_context;
1520     struct scaling_param surface_param;
1521     struct object_surface *obj_surface;
1522     struct gen9_surface_avc *avc_priv_surface;
1523     struct gpe_media_object_walker_parameter media_object_walker_param;
1524     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
1525     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
1526     int media_function = 0;
1527     int kernel_idx = 0;
1528
1529     obj_surface = encode_state->reconstructed_object;
1530     avc_priv_surface = obj_surface->private_data;
1531
1532     memset(&surface_param, 0, sizeof(struct scaling_param));
1533     switch (hme_type) {
1534     case INTEL_ENC_HME_4x : {
1535         media_function = INTEL_MEDIA_STATE_4X_SCALING;
1536         kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
1537         downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
1538         downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
1539
1540         surface_param.input_surface = encode_state->input_yuv_object ;
1541         surface_param.input_frame_width = generic_state->frame_width_in_pixel ;
1542         surface_param.input_frame_height = generic_state->frame_height_in_pixel ;
1543
1544         surface_param.output_surface = avc_priv_surface->scaled_4x_surface_obj ;
1545         surface_param.output_frame_width = generic_state->frame_width_4x ;
1546         surface_param.output_frame_height = generic_state->frame_height_4x ;
1547
1548         surface_param.enable_mb_flatness_check = avc_state->flatness_check_enable;
1549         surface_param.enable_mb_variance_output = avc_state->mb_status_enable;
1550         surface_param.enable_mb_pixel_average_output = avc_state->mb_status_enable;
1551
1552         surface_param.blk8x8_stat_enabled = 0 ;
1553         surface_param.use_4x_scaling  = 1 ;
1554         surface_param.use_16x_scaling = 0 ;
1555         surface_param.use_32x_scaling = 0 ;
1556         break;
1557     }
1558     case INTEL_ENC_HME_16x : {
1559         media_function = INTEL_MEDIA_STATE_16X_SCALING;
1560         kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
1561         downscaled_width_in_mb = generic_state->downscaled_width_16x_in_mb;
1562         downscaled_height_in_mb = generic_state->downscaled_height_16x_in_mb;
1563
1564         surface_param.input_surface = avc_priv_surface->scaled_4x_surface_obj ;
1565         surface_param.input_frame_width = generic_state->frame_width_4x ;
1566         surface_param.input_frame_height = generic_state->frame_height_4x ;
1567
1568         surface_param.output_surface = avc_priv_surface->scaled_16x_surface_obj ;
1569         surface_param.output_frame_width = generic_state->frame_width_16x ;
1570         surface_param.output_frame_height = generic_state->frame_height_16x ;
1571
1572         surface_param.enable_mb_flatness_check = 0 ;
1573         surface_param.enable_mb_variance_output = 0 ;
1574         surface_param.enable_mb_pixel_average_output = 0 ;
1575
1576         surface_param.blk8x8_stat_enabled = 0 ;
1577         surface_param.use_4x_scaling  = 0 ;
1578         surface_param.use_16x_scaling = 1 ;
1579         surface_param.use_32x_scaling = 0 ;
1580
1581         break;
1582     }
1583     case INTEL_ENC_HME_32x : {
1584         media_function = INTEL_MEDIA_STATE_32X_SCALING;
1585         kernel_idx = GEN9_AVC_KERNEL_SCALING_2X_IDX;
1586         downscaled_width_in_mb = generic_state->downscaled_width_32x_in_mb;
1587         downscaled_height_in_mb = generic_state->downscaled_height_32x_in_mb;
1588
1589         surface_param.input_surface = avc_priv_surface->scaled_16x_surface_obj ;
1590         surface_param.input_frame_width = generic_state->frame_width_16x ;
1591         surface_param.input_frame_height = generic_state->frame_height_16x ;
1592
1593         surface_param.output_surface = avc_priv_surface->scaled_32x_surface_obj ;
1594         surface_param.output_frame_width = generic_state->frame_width_32x ;
1595         surface_param.output_frame_height = generic_state->frame_height_32x ;
1596
1597         surface_param.enable_mb_flatness_check = 0 ;
1598         surface_param.enable_mb_variance_output = 0 ;
1599         surface_param.enable_mb_pixel_average_output = 0 ;
1600
1601         surface_param.blk8x8_stat_enabled = 0 ;
1602         surface_param.use_4x_scaling  = 0 ;
1603         surface_param.use_16x_scaling = 0 ;
1604         surface_param.use_32x_scaling = 1 ;
1605         break;
1606     }
1607     default :
1608         assert(0);
1609
1610     }
1611
1612     gpe_context = &(avc_ctx->context_scaling.gpe_contexts[kernel_idx]);
1613
1614     gpe->context_init(ctx, gpe_context);
1615     gpe->reset_binding_table(ctx, gpe_context);
1616
1617     if (surface_param.use_32x_scaling) {
1618         generic_ctx->pfn_set_curbe_scaling2x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1619     } else {
1620         generic_ctx->pfn_set_curbe_scaling4x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1621     }
1622
1623     if (surface_param.use_32x_scaling) {
1624         surface_param.scaling_out_use_16unorm_surf_fmt = 1 ;
1625         surface_param.scaling_out_use_32unorm_surf_fmt = 0 ;
1626     } else {
1627         surface_param.scaling_out_use_16unorm_surf_fmt = 0 ;
1628         surface_param.scaling_out_use_32unorm_surf_fmt = 1 ;
1629     }
1630
1631     if (surface_param.use_4x_scaling) {
1632         if (avc_state->mb_status_supported) {
1633             surface_param.enable_mb_flatness_check = 0;
1634             surface_param.mbv_proc_stat_enabled = (surface_param.use_4x_scaling) ? (avc_state->mb_status_enable || avc_state->flatness_check_enable) : 0 ;
1635             surface_param.pres_mbv_proc_stat_buffer = &(avc_ctx->res_mb_status_buffer);
1636
1637         } else {
1638             surface_param.enable_mb_flatness_check = (surface_param.use_4x_scaling) ? avc_state->flatness_check_enable : 0;
1639             surface_param.mbv_proc_stat_enabled = 0 ;
1640             surface_param.pres_flatness_check_surface = &(avc_ctx->res_flatness_check_surface);
1641         }
1642     }
1643
1644     generic_ctx->pfn_send_scaling_surface(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1645
1646     /* setup the interface data */
1647     gpe->setup_interface_data(ctx, gpe_context);
1648
1649     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
1650     if (surface_param.use_32x_scaling) {
1651         kernel_walker_param.resolution_x = downscaled_width_in_mb ;
1652         kernel_walker_param.resolution_y = downscaled_height_in_mb ;
1653     } else {
1654         /* the scaling is based on 8x8 blk level */
1655         kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
1656         kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
1657     }
1658     kernel_walker_param.no_dependency = 1;
1659
1660     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
1661
1662     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
1663                                             gpe_context,
1664                                             media_function,
1665                                             &media_object_walker_param);
1666
1667     return VA_STATUS_SUCCESS;
1668 }
1669
1670 /*
1671 frame/mb brc related function
1672 */
1673 static void
1674 gen9_avc_init_mfx_avc_img_state(VADriverContextP ctx,
1675                                 struct encode_state *encode_state,
1676                                 struct intel_encoder_context *encoder_context,
1677                                 struct gen9_mfx_avc_img_state *pstate)
1678 {
1679     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1680     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1681     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1682
1683     VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
1684     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
1685
1686     memset(pstate, 0, sizeof(*pstate));
1687
1688     pstate->dw0.dword_length = (sizeof(struct gen9_mfx_avc_img_state)) / 4 - 2;
1689     pstate->dw0.sub_opcode_b = 0;
1690     pstate->dw0.sub_opcode_a = 0;
1691     pstate->dw0.command_opcode = 1;
1692     pstate->dw0.pipeline = 2;
1693     pstate->dw0.command_type = 3;
1694
1695     pstate->dw1.frame_size_in_mbs = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs ;
1696
1697     pstate->dw2.frame_width_in_mbs_minus1 = generic_state->frame_width_in_mbs - 1;
1698     pstate->dw2.frame_height_in_mbs_minus1 = generic_state->frame_height_in_mbs - 1;
1699
1700     pstate->dw3.image_structure = 0;//frame is zero
1701     pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
1702     pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
1703     pstate->dw3.brc_domain_rate_control_enable = 0;//0,set for non-vdenc mode;
1704     pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
1705     pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
1706
1707     pstate->dw4.field_picture_flag = 0;
1708     pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
1709     pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
1710     pstate->dw4.transform_8x8_idct_mode_flag = pic_param->pic_fields.bits.transform_8x8_mode_flag;
1711     pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
1712     pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
1713     pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
1714     pstate->dw4.mb_mv_format_flag = 1;
1715     pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
1716     pstate->dw4.mv_unpacked_flag = 1;
1717     pstate->dw4.insert_test_flag = 0;
1718     pstate->dw4.load_slice_pointer_flag = 0;
1719     pstate->dw4.macroblock_stat_enable = 0;        /* disable in the first pass */
1720     pstate->dw4.minimum_frame_size = 0;
1721     pstate->dw5.intra_mb_max_bit_flag = 1;
1722     pstate->dw5.inter_mb_max_bit_flag = 1;
1723     pstate->dw5.frame_size_over_flag = 1;
1724     pstate->dw5.frame_size_under_flag = 1;
1725     pstate->dw5.intra_mb_ipcm_flag = 1;
1726     pstate->dw5.mb_rate_ctrl_flag = 0;
1727     pstate->dw5.non_first_pass_flag = 0;
1728     pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
1729     pstate->dw5.aq_chroma_disable = 1;
1730     if (pstate->dw4.entropy_coding_flag && (avc_state->tq_enable)) {
1731         pstate->dw5.aq_enable = avc_state->tq_enable;
1732         pstate->dw5.aq_rounding = avc_state->tq_rounding;
1733     } else {
1734         pstate->dw5.aq_rounding = 0;
1735     }
1736
1737     pstate->dw6.intra_mb_max_size = 2700;
1738     pstate->dw6.inter_mb_max_size = 4095;
1739
1740     pstate->dw8.slice_delta_qp_max0 = 0;
1741     pstate->dw8.slice_delta_qp_max1 = 0;
1742     pstate->dw8.slice_delta_qp_max2 = 0;
1743     pstate->dw8.slice_delta_qp_max3 = 0;
1744
1745     pstate->dw9.slice_delta_qp_min0 = 0;
1746     pstate->dw9.slice_delta_qp_min1 = 0;
1747     pstate->dw9.slice_delta_qp_min2 = 0;
1748     pstate->dw9.slice_delta_qp_min3 = 0;
1749
1750     pstate->dw10.frame_bitrate_min = 0;
1751     pstate->dw10.frame_bitrate_min_unit = 1;
1752     pstate->dw10.frame_bitrate_min_unit_mode = 1;
1753     pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
1754     pstate->dw10.frame_bitrate_max_unit = 1;
1755     pstate->dw10.frame_bitrate_max_unit_mode = 1;
1756
1757     pstate->dw11.frame_bitrate_min_delta = 0;
1758     pstate->dw11.frame_bitrate_max_delta = 0;
1759
1760     pstate->dw12.vad_error_logic = 1;
1761     /* set paramters DW19/DW20 for slices */
1762 }
1763
1764 void gen9_avc_set_image_state(VADriverContextP ctx,
1765                               struct encode_state *encode_state,
1766                               struct intel_encoder_context *encoder_context,
1767                               struct i965_gpe_resource *gpe_resource)
1768 {
1769     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1770     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
1771     char *pdata;
1772     int i;
1773     unsigned int * data;
1774     struct gen9_mfx_avc_img_state cmd;
1775
1776     pdata = i965_map_gpe_resource(gpe_resource);
1777
1778     if (!pdata)
1779         return;
1780
1781     gen9_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
1782     for (i = 0; i < generic_state->num_pak_passes; i++) {
1783
1784         if (i == 0) {
1785             cmd.dw4.macroblock_stat_enable = 0;
1786             cmd.dw5.non_first_pass_flag = 0;
1787         } else {
1788             cmd.dw4.macroblock_stat_enable = 1;
1789             cmd.dw5.non_first_pass_flag = 1;
1790             cmd.dw5.intra_mb_ipcm_flag = 1;
1791
1792         }
1793         cmd.dw5.mb_rate_ctrl_flag = 0;
1794         memcpy(pdata, &cmd, sizeof(struct gen9_mfx_avc_img_state));
1795         data = (unsigned int *)(pdata + sizeof(struct gen9_mfx_avc_img_state));
1796         *data = MI_BATCH_BUFFER_END;
1797
1798         pdata += INTEL_AVC_IMAGE_STATE_CMD_SIZE;
1799     }
1800     i965_unmap_gpe_resource(gpe_resource);
1801     return;
1802 }
1803
1804 void gen9_avc_set_image_state_non_brc(VADriverContextP ctx,
1805                                       struct encode_state *encode_state,
1806                                       struct intel_encoder_context *encoder_context,
1807                                       struct i965_gpe_resource *gpe_resource)
1808 {
1809     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1810     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
1811     char *pdata;
1812
1813     unsigned int * data;
1814     struct gen9_mfx_avc_img_state cmd;
1815
1816     pdata = i965_map_gpe_resource(gpe_resource);
1817
1818     if (!pdata)
1819         return;
1820
1821     gen9_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
1822
1823     if (generic_state->curr_pak_pass == 0) {
1824         cmd.dw4.macroblock_stat_enable = 0;
1825         cmd.dw5.non_first_pass_flag = 0;
1826
1827     } else {
1828         cmd.dw4.macroblock_stat_enable = 1;
1829         cmd.dw5.non_first_pass_flag = 0;
1830         cmd.dw5.intra_mb_ipcm_flag = 1;
1831     }
1832
1833     cmd.dw5.mb_rate_ctrl_flag = 0;
1834     memcpy(pdata, &cmd, sizeof(struct gen9_mfx_avc_img_state));
1835     data = (unsigned int *)(pdata + sizeof(struct gen9_mfx_avc_img_state));
1836     *data = MI_BATCH_BUFFER_END;
1837
1838     i965_unmap_gpe_resource(gpe_resource);
1839     return;
1840 }
1841
1842 static void
1843 gen95_avc_calc_lambda_table(VADriverContextP ctx,
1844                             struct encode_state *encode_state,
1845                             struct intel_encoder_context *encoder_context)
1846 {
1847     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1848     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1849     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1850     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
1851     unsigned int value, inter, intra;
1852     unsigned int rounding_value = 0;
1853     unsigned int size = 0;
1854     int i = 0;
1855     int col = 0;
1856     unsigned int * lambda_table = (unsigned int *)(avc_state->lamda_value_lut);
1857
1858     value = 0;
1859     inter = 0;
1860     intra = 0;
1861
1862     size = AVC_QP_MAX * 2 * sizeof(unsigned int);
1863     switch (generic_state->frame_type) {
1864     case SLICE_TYPE_I:
1865         memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_i_frame[0][0], size * sizeof(unsigned char));
1866         break;
1867     case SLICE_TYPE_P:
1868         memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_p_frame[0][0], size * sizeof(unsigned char));
1869         break;
1870     case SLICE_TYPE_B:
1871         memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_b_frame[0][0], size * sizeof(unsigned char));
1872         break;
1873     default:
1874         assert(0);
1875         break;
1876     }
1877
1878     for (i = 0; i < AVC_QP_MAX ; i++) {
1879         for (col = 0; col < 2; col++) {
1880             value = *(lambda_table + i * 2 + col);
1881             intra = value >> 16;
1882
1883             if (intra < GEN95_AVC_MAX_LAMBDA) {
1884                 if (intra == 0xfffa) {
1885                     intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
1886                 }
1887             }
1888
1889             intra = intra << 16;
1890             inter = value & 0xffff;
1891
1892             if (inter < GEN95_AVC_MAX_LAMBDA) {
1893                 if (inter == 0xffef) {
1894                     if (generic_state->frame_type == SLICE_TYPE_P) {
1895                         if (avc_state->rounding_inter_p == AVC_INVALID_ROUNDING_VALUE)
1896                             rounding_value = gen9_avc_inter_rounding_p[generic_state->preset];
1897                         else
1898                             rounding_value = avc_state->rounding_inter_p;
1899                     } else if (generic_state->frame_type == SLICE_TYPE_B) {
1900                         if (pic_param->pic_fields.bits.reference_pic_flag) {
1901                             if (avc_state->rounding_inter_b_ref == AVC_INVALID_ROUNDING_VALUE)
1902                                 rounding_value = gen9_avc_inter_rounding_b_ref[generic_state->preset];
1903                             else
1904                                 rounding_value = avc_state->rounding_inter_b_ref;
1905                         } else {
1906                             if (avc_state->rounding_inter_b == AVC_INVALID_ROUNDING_VALUE)
1907                                 rounding_value = gen9_avc_inter_rounding_b[generic_state->preset];
1908                             else
1909                                 rounding_value = avc_state->rounding_inter_b;
1910                         }
1911                     }
1912                 }
1913                 inter = 0xf000 + rounding_value;
1914             }
1915             *(lambda_table + i * 2 + col) = intra + inter;
1916         }
1917     }
1918 }
1919
1920 static void
1921 gen9_avc_init_brc_const_data(VADriverContextP ctx,
1922                              struct encode_state *encode_state,
1923                              struct intel_encoder_context *encoder_context)
1924 {
1925     struct i965_driver_data *i965 = i965_driver_data(ctx);
1926     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1927     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1928     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1929     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1930
1931     struct i965_gpe_resource *gpe_resource = NULL;
1932     unsigned char * data = NULL;
1933     unsigned char * data_tmp = NULL;
1934     unsigned int size = 0;
1935     unsigned int table_idx = 0;
1936     unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
1937     int i = 0;
1938
1939     struct object_surface *obj_surface;
1940     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
1941     VASurfaceID surface_id;
1942     unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
1943
1944     gpe_resource = &(avc_ctx->res_brc_const_data_buffer);
1945     assert(gpe_resource);
1946
1947     i965_zero_gpe_resource(gpe_resource);
1948
1949     data = i965_map_gpe_resource(gpe_resource);
1950     assert(data);
1951
1952     table_idx = slice_type_kernel[generic_state->frame_type];
1953
1954     /* Fill surface with QP Adjustment table, Distortion threshold table, MaxFrame threshold table, Distortion QP Adjustment Table*/
1955     size = sizeof(gen9_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb);
1956     memcpy(data, gen9_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb, size * sizeof(unsigned char));
1957
1958     data += size;
1959
1960     /* skip threshold table*/
1961     size = 128;
1962     switch (generic_state->frame_type) {
1963     case SLICE_TYPE_P:
1964         memcpy(data, gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
1965         break;
1966     case SLICE_TYPE_B:
1967         memcpy(data, gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
1968         break;
1969     default:
1970         /*SLICE_TYPE_I,no change */
1971         break;
1972     }
1973
1974     if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->non_ftq_skip_threshold_lut_input_enable) {
1975         for (i = 0; i < AVC_QP_MAX ; i++) {
1976             *(data + 1 + (i * 2)) = (unsigned char)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
1977         }
1978     }
1979     data += size;
1980
1981     /*fill the qp for ref list*/
1982     size = 32 + 32 + 32 + 160;
1983     memset(data, 0xff, 32);
1984     memset(data + 32 + 32, 0xff, 32);
1985     switch (generic_state->frame_type) {
1986     case SLICE_TYPE_P: {
1987         for (i = 0 ; i <  slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
1988             surface_id = slice_param->RefPicList0[i].picture_id;
1989             obj_surface = SURFACE(surface_id);
1990             if (!obj_surface)
1991                 break;
1992             *(data + i) = avc_state->list_ref_idx[0][i];//?
1993         }
1994     }
1995     break;
1996     case SLICE_TYPE_B: {
1997         data = data + 32 + 32;
1998         for (i = 0 ; i <  slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
1999             surface_id = slice_param->RefPicList1[i].picture_id;
2000             obj_surface = SURFACE(surface_id);
2001             if (!obj_surface)
2002                 break;
2003             *(data + i) = avc_state->list_ref_idx[1][i];//?
2004         }
2005
2006         data = data - 32 - 32;
2007
2008         for (i = 0 ; i <  slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
2009             surface_id = slice_param->RefPicList0[i].picture_id;
2010             obj_surface = SURFACE(surface_id);
2011             if (!obj_surface)
2012                 break;
2013             *(data + i) = avc_state->list_ref_idx[0][i];//?
2014         }
2015     }
2016     break;
2017     default:
2018         /*SLICE_TYPE_I,no change */
2019         break;
2020     }
2021     data += size;
2022
2023     /*mv cost and mode cost*/
2024     size = 1664;
2025     memcpy(data, (unsigned char *)&gen9_avc_mode_mv_cost_table[table_idx][0][0], size * sizeof(unsigned char));
2026
2027     if (avc_state->old_mode_cost_enable) {
2028         data_tmp = data;
2029         for (i = 0; i < AVC_QP_MAX ; i++) {
2030             *(data_tmp + 3) = (unsigned int)gen9_avc_old_intra_mode_cost[i];
2031             data_tmp += 16;
2032         }
2033     }
2034
2035     if (avc_state->ftq_skip_threshold_lut_input_enable) {
2036         for (i = 0; i < AVC_QP_MAX ; i++) {
2037             *(data + (i * 32) + 24) =
2038                 *(data + (i * 32) + 25) =
2039                     *(data + (i * 32) + 27) =
2040                         *(data + (i * 32) + 28) =
2041                             *(data + (i * 32) + 29) =
2042                                 *(data + (i * 32) + 30) =
2043                                     *(data + (i * 32) + 31) = avc_state->ftq_skip_threshold_lut[i];
2044         }
2045
2046     }
2047     data += size;
2048
2049     /*ref cost*/
2050     size = 128;
2051     memcpy(data, (unsigned char *)&gen9_avc_ref_cost[table_idx][0], size * sizeof(unsigned char));
2052     data += size;
2053
2054     /*scaling factor*/
2055     size = 64;
2056     if (avc_state->adaptive_intra_scaling_enable) {
2057         memcpy(data, (unsigned char *)gen9_avc_adaptive_intra_scaling_factor, size * sizeof(unsigned char));
2058     } else {
2059         memcpy(data, (unsigned char *)gen9_avc_intra_scaling_factor, size * sizeof(unsigned char));
2060     }
2061
2062     if (IS_KBL(i965->intel.device_info) ||
2063         IS_GLK(i965->intel.device_info)) {
2064         data += size;
2065
2066         size = 512;
2067         memcpy(data, (unsigned char *)gen95_avc_lambda_data, size * sizeof(unsigned char));
2068         data += size;
2069
2070         size = 64;
2071         memcpy(data, (unsigned char *)gen95_avc_ftq25, size * sizeof(unsigned char));
2072     }
2073
2074     i965_unmap_gpe_resource(gpe_resource);
2075 }
2076
2077 static void
2078 gen9_avc_init_brc_const_data_old(VADriverContextP ctx,
2079                                  struct encode_state *encode_state,
2080                                  struct intel_encoder_context *encoder_context)
2081 {
2082     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2083     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2084     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2085     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2086
2087     struct i965_gpe_resource *gpe_resource = NULL;
2088     unsigned int * data = NULL;
2089     unsigned int * data_tmp = NULL;
2090     unsigned int size = 0;
2091     unsigned int table_idx = 0;
2092     unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
2093     unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
2094     int i = 0;
2095
2096     gpe_resource = &(avc_ctx->res_brc_const_data_buffer);
2097     assert(gpe_resource);
2098
2099     i965_zero_gpe_resource(gpe_resource);
2100
2101     data = i965_map_gpe_resource(gpe_resource);
2102     assert(data);
2103
2104     table_idx = slice_type_kernel[generic_state->frame_type];
2105
2106     /* Fill surface with QP Adjustment table, Distortion threshold table, MaxFrame threshold table, Distortion QP Adjustment Table*/
2107     size = sizeof(gen75_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb);
2108     memcpy(data, gen75_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb, size * sizeof(unsigned char));
2109
2110     data += size;
2111
2112     /* skip threshold table*/
2113     size = 128;
2114     switch (generic_state->frame_type) {
2115     case SLICE_TYPE_P:
2116         memcpy(data, gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2117         break;
2118     case SLICE_TYPE_B:
2119         memcpy(data, gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2120         break;
2121     default:
2122         /*SLICE_TYPE_I,no change */
2123         break;
2124     }
2125
2126     if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->non_ftq_skip_threshold_lut_input_enable) {
2127         for (i = 0; i < AVC_QP_MAX ; i++) {
2128             *(data + 1 + (i * 2)) = (unsigned char)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
2129         }
2130     }
2131     data += size;
2132
2133     /*fill the qp for ref list*/
2134     size = 128;
2135     data += size;
2136     size = 128;
2137     data += size;
2138
2139     /*mv cost and mode cost*/
2140     size = 1664;
2141     memcpy(data, (unsigned char *)&gen75_avc_mode_mv_cost_table[table_idx][0][0], size * sizeof(unsigned char));
2142
2143     if (avc_state->old_mode_cost_enable) {
2144         data_tmp = data;
2145         for (i = 0; i < AVC_QP_MAX ; i++) {
2146             *(data_tmp + 3) = (unsigned int)gen9_avc_old_intra_mode_cost[i];
2147             data_tmp += 16;
2148         }
2149     }
2150
2151     if (avc_state->ftq_skip_threshold_lut_input_enable) {
2152         for (i = 0; i < AVC_QP_MAX ; i++) {
2153             *(data + (i * 32) + 24) =
2154                 *(data + (i * 32) + 25) =
2155                     *(data + (i * 32) + 27) =
2156                         *(data + (i * 32) + 28) =
2157                             *(data + (i * 32) + 29) =
2158                                 *(data + (i * 32) + 30) =
2159                                     *(data + (i * 32) + 31) = avc_state->ftq_skip_threshold_lut[i];
2160         }
2161
2162     }
2163     data += size;
2164
2165     /*ref cost*/
2166     size = 128;
2167     memcpy(data, (unsigned char *)&gen9_avc_ref_cost[table_idx][0], size * sizeof(unsigned char));
2168
2169     i965_unmap_gpe_resource(gpe_resource);
2170 }
2171 static void
2172 gen9_avc_set_curbe_brc_init_reset(VADriverContextP ctx,
2173                                   struct encode_state *encode_state,
2174                                   struct i965_gpe_context *gpe_context,
2175                                   struct intel_encoder_context *encoder_context,
2176                                   void * param)
2177 {
2178     gen9_avc_brc_init_reset_curbe_data *cmd;
2179     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2180     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2181     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2182     double input_bits_per_frame = 0;
2183     double bps_ratio = 0;
2184     VAEncSequenceParameterBufferH264 * seq_param = avc_state->seq_param;
2185     struct avc_param common_param;
2186
2187     cmd = i965_gpe_context_map_curbe(gpe_context);
2188
2189     if (!cmd)
2190         return;
2191
2192     memcpy(cmd, &gen9_avc_brc_init_reset_curbe_init_data, sizeof(gen9_avc_brc_init_reset_curbe_data));
2193
2194     memset(&common_param, 0, sizeof(common_param));
2195     common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
2196     common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
2197     common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
2198     common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
2199     common_param.frames_per_100s = generic_state->frames_per_100s;
2200     common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
2201     common_param.target_bit_rate = generic_state->target_bit_rate;
2202
2203     cmd->dw0.profile_level_max_frame = i965_avc_get_profile_level_max_frame(&common_param, seq_param->level_idc);
2204     cmd->dw1.init_buf_full_in_bits = generic_state->init_vbv_buffer_fullness_in_bit;
2205     cmd->dw2.buf_size_in_bits = generic_state->vbv_buffer_size_in_bit;
2206     cmd->dw3.average_bit_rate = generic_state->target_bit_rate * 1000;
2207     cmd->dw4.max_bit_rate = generic_state->max_bit_rate * 1000;
2208     cmd->dw8.gop_p = (generic_state->gop_ref_distance) ? ((generic_state->gop_size - 1) / generic_state->gop_ref_distance) : 0;
2209     cmd->dw9.gop_b = (generic_state->gop_size - 1 - cmd->dw8.gop_p);
2210     cmd->dw9.frame_width_in_bytes = generic_state->frame_width_in_pixel;
2211     cmd->dw10.frame_height_in_bytes = generic_state->frame_height_in_pixel;
2212     cmd->dw12.no_slices = avc_state->slice_num;
2213
2214     //VUI
2215     if (seq_param->vui_parameters_present_flag && generic_state->internal_rate_mode != INTEL_BRC_AVBR) {
2216         cmd->dw4.max_bit_rate = cmd->dw4.max_bit_rate;
2217         if (generic_state->internal_rate_mode == VA_RC_CBR) {
2218             cmd->dw3.average_bit_rate = cmd->dw4.max_bit_rate;
2219
2220         }
2221
2222     }
2223     cmd->dw6.frame_rate_m = generic_state->frames_per_100s;
2224     cmd->dw7.frame_rate_d = 100;
2225     cmd->dw8.brc_flag = 0;
2226     cmd->dw8.brc_flag |= (generic_state->mb_brc_enabled) ? 0 : 0x8000;
2227
2228
2229     if (generic_state->internal_rate_mode == VA_RC_CBR) {
2230         //CBR
2231         cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate;
2232         cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISCBR;
2233
2234     } else if (generic_state->internal_rate_mode == VA_RC_VBR) {
2235         //VBR
2236         if (cmd->dw4.max_bit_rate < cmd->dw3.average_bit_rate) {
2237             cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate << 1;
2238         }
2239         cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISVBR;
2240
2241     } else if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2242         //AVBR
2243         cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate;
2244         cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISAVBR;
2245
2246     }
2247     //igonre icq/vcm/qvbr
2248
2249     cmd->dw10.avbr_accuracy = generic_state->avbr_curracy;
2250     cmd->dw11.avbr_convergence = generic_state->avbr_convergence;
2251
2252     //frame bits
2253     input_bits_per_frame = (double)(cmd->dw4.max_bit_rate) * (double)(cmd->dw7.frame_rate_d) / (double)(cmd->dw6.frame_rate_m);;
2254
2255     if (cmd->dw2.buf_size_in_bits == 0) {
2256         cmd->dw2.buf_size_in_bits = (unsigned int)(input_bits_per_frame * 4);
2257     }
2258
2259     if (cmd->dw1.init_buf_full_in_bits == 0) {
2260         cmd->dw1.init_buf_full_in_bits = cmd->dw2.buf_size_in_bits * 7 / 8;
2261     }
2262     if (cmd->dw1.init_buf_full_in_bits < (unsigned int)(input_bits_per_frame * 2)) {
2263         cmd->dw1.init_buf_full_in_bits = (unsigned int)(input_bits_per_frame * 2);
2264     }
2265     if (cmd->dw1.init_buf_full_in_bits > cmd->dw2.buf_size_in_bits) {
2266         cmd->dw1.init_buf_full_in_bits = cmd->dw2.buf_size_in_bits;
2267     }
2268
2269     //AVBR
2270     if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2271         cmd->dw2.buf_size_in_bits = 2 * generic_state->target_bit_rate * 1000;
2272         cmd->dw1.init_buf_full_in_bits = (unsigned int)(3 * cmd->dw2.buf_size_in_bits / 4);
2273
2274     }
2275
2276     bps_ratio = input_bits_per_frame / (cmd->dw2.buf_size_in_bits / 30.0);
2277     bps_ratio = (bps_ratio < 0.1) ? 0.1 : (bps_ratio > 3.5) ? 3.5 : bps_ratio;
2278
2279
2280     cmd->dw16.deviation_threshold_0_pand_b = (unsigned int)(-50 * pow(0.90, bps_ratio));
2281     cmd->dw16.deviation_threshold_1_pand_b = (unsigned int)(-50 * pow(0.66, bps_ratio));
2282     cmd->dw16.deviation_threshold_2_pand_b = (unsigned int)(-50 * pow(0.46, bps_ratio));
2283     cmd->dw16.deviation_threshold_3_pand_b = (unsigned int)(-50 * pow(0.3, bps_ratio));
2284     cmd->dw17.deviation_threshold_4_pand_b = (unsigned int)(50 *  pow(0.3, bps_ratio));
2285     cmd->dw17.deviation_threshold_5_pand_b = (unsigned int)(50 * pow(0.46, bps_ratio));
2286     cmd->dw17.deviation_threshold_6_pand_b = (unsigned int)(50 * pow(0.7,  bps_ratio));
2287     cmd->dw17.deviation_threshold_7_pand_b = (unsigned int)(50 * pow(0.9,  bps_ratio));
2288     cmd->dw18.deviation_threshold_0_vbr = (unsigned int)(-50 * pow(0.9, bps_ratio));
2289     cmd->dw18.deviation_threshold_1_vbr = (unsigned int)(-50 * pow(0.7, bps_ratio));
2290     cmd->dw18.deviation_threshold_2_vbr = (unsigned int)(-50 * pow(0.5, bps_ratio));
2291     cmd->dw18.deviation_threshold_3_vbr = (unsigned int)(-50 * pow(0.3, bps_ratio));
2292     cmd->dw19.deviation_threshold_4_vbr = (unsigned int)(100 * pow(0.4, bps_ratio));
2293     cmd->dw19.deviation_threshold_5_vbr = (unsigned int)(100 * pow(0.5, bps_ratio));
2294     cmd->dw19.deviation_threshold_6_vbr = (unsigned int)(100 * pow(0.75, bps_ratio));
2295     cmd->dw19.deviation_threshold_7_vbr = (unsigned int)(100 * pow(0.9, bps_ratio));
2296     cmd->dw20.deviation_threshold_0_i = (unsigned int)(-50 * pow(0.8, bps_ratio));
2297     cmd->dw20.deviation_threshold_1_i = (unsigned int)(-50 * pow(0.6, bps_ratio));
2298     cmd->dw20.deviation_threshold_2_i = (unsigned int)(-50 * pow(0.34, bps_ratio));
2299     cmd->dw20.deviation_threshold_3_i = (unsigned int)(-50 * pow(0.2, bps_ratio));
2300     cmd->dw21.deviation_threshold_4_i = (unsigned int)(50 * pow(0.2,  bps_ratio));
2301     cmd->dw21.deviation_threshold_5_i = (unsigned int)(50 * pow(0.4,  bps_ratio));
2302     cmd->dw21.deviation_threshold_6_i = (unsigned int)(50 * pow(0.66, bps_ratio));
2303     cmd->dw21.deviation_threshold_7_i = (unsigned int)(50 * pow(0.9,  bps_ratio));
2304
2305     cmd->dw22.sliding_window_size = generic_state->frames_per_window_size;
2306
2307     i965_gpe_context_unmap_curbe(gpe_context);
2308
2309     return;
2310 }
2311
2312 static void
2313 gen9_avc_send_surface_brc_init_reset(VADriverContextP ctx,
2314                                      struct encode_state *encode_state,
2315                                      struct i965_gpe_context *gpe_context,
2316                                      struct intel_encoder_context *encoder_context,
2317                                      void * param_mbenc)
2318 {
2319     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2320     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2321
2322     gen9_add_buffer_gpe_surface(ctx,
2323                                 gpe_context,
2324                                 &avc_ctx->res_brc_history_buffer,
2325                                 0,
2326                                 avc_ctx->res_brc_history_buffer.size,
2327                                 0,
2328                                 GEN9_AVC_BRC_INIT_RESET_HISTORY_INDEX);
2329
2330     gen9_add_buffer_2d_gpe_surface(ctx,
2331                                    gpe_context,
2332                                    &avc_ctx->res_brc_dist_data_surface,
2333                                    1,
2334                                    I965_SURFACEFORMAT_R8_UNORM,
2335                                    GEN9_AVC_BRC_INIT_RESET_DISTORTION_INDEX);
2336
2337     return;
2338 }
2339
2340 static VAStatus
2341 gen9_avc_kernel_brc_init_reset(VADriverContextP ctx,
2342                                struct encode_state *encode_state,
2343                                struct intel_encoder_context *encoder_context)
2344 {
2345     struct i965_driver_data *i965 = i965_driver_data(ctx);
2346     struct i965_gpe_table *gpe = &i965->gpe_table;
2347     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2348     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2349     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2350     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
2351
2352     struct i965_gpe_context *gpe_context;
2353     struct gpe_media_object_parameter media_object_param;
2354     struct gpe_media_object_inline_data media_object_inline_data;
2355     int media_function = 0;
2356     int kernel_idx = GEN9_AVC_KERNEL_BRC_INIT;
2357
2358     media_function = INTEL_MEDIA_STATE_BRC_INIT_RESET;
2359
2360     if (generic_state->brc_inited)
2361         kernel_idx = GEN9_AVC_KERNEL_BRC_RESET;
2362
2363     gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
2364
2365     gpe->context_init(ctx, gpe_context);
2366     gpe->reset_binding_table(ctx, gpe_context);
2367
2368     generic_ctx->pfn_set_curbe_brc_init_reset(ctx, encode_state, gpe_context, encoder_context, NULL);
2369
2370     generic_ctx->pfn_send_brc_init_reset_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
2371
2372     gpe->setup_interface_data(ctx, gpe_context);
2373
2374     memset(&media_object_param, 0, sizeof(media_object_param));
2375     memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
2376     media_object_param.pinline_data = &media_object_inline_data;
2377     media_object_param.inline_size = sizeof(media_object_inline_data);
2378
2379     gen9_avc_run_kernel_media_object(ctx, encoder_context,
2380                                      gpe_context,
2381                                      media_function,
2382                                      &media_object_param);
2383
2384     return VA_STATUS_SUCCESS;
2385 }
2386
2387 static void
2388 gen9_avc_set_curbe_brc_frame_update(VADriverContextP ctx,
2389                                     struct encode_state *encode_state,
2390                                     struct i965_gpe_context *gpe_context,
2391                                     struct intel_encoder_context *encoder_context,
2392                                     void * param)
2393 {
2394     gen9_avc_frame_brc_update_curbe_data *cmd;
2395     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2396     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2397     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2398     struct object_surface *obj_surface;
2399     struct gen9_surface_avc *avc_priv_surface;
2400     struct avc_param common_param;
2401     VAEncSequenceParameterBufferH264 * seq_param = avc_state->seq_param;
2402
2403     obj_surface = encode_state->reconstructed_object;
2404
2405     if (!obj_surface || !obj_surface->private_data)
2406         return;
2407     avc_priv_surface = obj_surface->private_data;
2408
2409     cmd = i965_gpe_context_map_curbe(gpe_context);
2410
2411     if (!cmd)
2412         return;
2413
2414     memcpy(cmd, &gen9_avc_frame_brc_update_curbe_init_data, sizeof(gen9_avc_frame_brc_update_curbe_data));
2415
2416     cmd->dw5.target_size_flag = 0 ;
2417     if (generic_state->brc_init_current_target_buf_full_in_bits > (double)generic_state->brc_init_reset_buf_size_in_bits) {
2418         /*overflow*/
2419         generic_state->brc_init_current_target_buf_full_in_bits -= (double)generic_state->brc_init_reset_buf_size_in_bits;
2420         cmd->dw5.target_size_flag = 1 ;
2421     }
2422
2423     if (generic_state->skip_frame_enbale) {
2424         cmd->dw6.num_skip_frames = generic_state->num_skip_frames ;
2425         cmd->dw7.size_skip_frames = generic_state->size_skip_frames;
2426
2427         generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame * generic_state->num_skip_frames;
2428
2429     }
2430     cmd->dw0.target_size = (unsigned int)generic_state->brc_init_current_target_buf_full_in_bits ;
2431     cmd->dw1.frame_number = generic_state->seq_frame_number ;
2432     cmd->dw2.size_of_pic_headers = generic_state->herder_bytes_inserted << 3 ;
2433     cmd->dw5.cur_frame_type = generic_state->frame_type ;
2434     cmd->dw5.brc_flag = 0 ;
2435     cmd->dw5.brc_flag |= (avc_priv_surface->is_as_ref) ? INTEL_ENCODE_BRCUPDATE_IS_REFERENCE : 0 ;
2436
2437     if (avc_state->multi_pre_enable) {
2438         cmd->dw5.brc_flag  |= INTEL_ENCODE_BRCUPDATE_IS_ACTUALQP ;
2439         cmd->dw14.qp_index_of_cur_pic = avc_priv_surface->frame_idx ; //do not know this. use -1
2440     }
2441
2442     cmd->dw5.max_num_paks = generic_state->num_pak_passes ;
2443     if (avc_state->min_max_qp_enable) {
2444         switch (generic_state->frame_type) {
2445         case SLICE_TYPE_I:
2446             cmd->dw6.minimum_qp = avc_state->min_qp_i ;
2447             cmd->dw6.maximum_qp = avc_state->max_qp_i ;
2448             break;
2449         case SLICE_TYPE_P:
2450             cmd->dw6.minimum_qp = avc_state->min_qp_p ;
2451             cmd->dw6.maximum_qp = avc_state->max_qp_p ;
2452             break;
2453         case SLICE_TYPE_B:
2454             cmd->dw6.minimum_qp = avc_state->min_qp_b ;
2455             cmd->dw6.maximum_qp = avc_state->max_qp_b ;
2456             break;
2457         }
2458     } else {
2459         cmd->dw6.minimum_qp = 0 ;
2460         cmd->dw6.maximum_qp = 0 ;
2461     }
2462     cmd->dw6.enable_force_skip = avc_state->enable_force_skip ;
2463     cmd->dw6.enable_sliding_window = 0 ;
2464
2465     generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame;
2466
2467     if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2468         cmd->dw3.start_gadj_frame0 = (unsigned int)((10 *   generic_state->avbr_convergence) / (double)150);
2469         cmd->dw3.start_gadj_frame1 = (unsigned int)((50 *   generic_state->avbr_convergence) / (double)150);
2470         cmd->dw4.start_gadj_frame2 = (unsigned int)((100 *  generic_state->avbr_convergence) / (double)150);
2471         cmd->dw4.start_gadj_frame3 = (unsigned int)((150 *  generic_state->avbr_convergence) / (double)150);
2472         cmd->dw11.g_rate_ratio_threshold_0 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 40)));
2473         cmd->dw11.g_rate_ratio_threshold_1 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 75)));
2474         cmd->dw12.g_rate_ratio_threshold_2 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 97)));
2475         cmd->dw12.g_rate_ratio_threshold_3 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (103 - 100)));
2476         cmd->dw12.g_rate_ratio_threshold_4 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (125 - 100)));
2477         cmd->dw12.g_rate_ratio_threshold_5 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (160 - 100)));
2478
2479     }
2480     cmd->dw15.enable_roi = generic_state->brc_roi_enable ;
2481
2482     memset(&common_param, 0, sizeof(common_param));
2483     common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
2484     common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
2485     common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
2486     common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
2487     common_param.frames_per_100s = generic_state->frames_per_100s;
2488     common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
2489     common_param.target_bit_rate = generic_state->target_bit_rate;
2490
2491     cmd->dw19.user_max_frame = i965_avc_get_profile_level_max_frame(&common_param, seq_param->level_idc);
2492     i965_gpe_context_unmap_curbe(gpe_context);
2493
2494     return;
2495 }
2496
2497 static void
2498 gen9_avc_send_surface_brc_frame_update(VADriverContextP ctx,
2499                                        struct encode_state *encode_state,
2500                                        struct i965_gpe_context *gpe_context,
2501                                        struct intel_encoder_context *encoder_context,
2502                                        void * param_brc)
2503 {
2504     struct i965_driver_data *i965 = i965_driver_data(ctx);
2505     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2506     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2507     struct brc_param * param = (struct brc_param *)param_brc ;
2508     struct i965_gpe_context * gpe_context_mbenc = param->gpe_context_mbenc;
2509     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2510     unsigned char is_g95 = 0;
2511
2512     if (IS_SKL(i965->intel.device_info) ||
2513         IS_BXT(i965->intel.device_info))
2514         is_g95 = 0;
2515     else if (IS_KBL(i965->intel.device_info) ||
2516              IS_GLK(i965->intel.device_info))
2517         is_g95 = 1;
2518
2519     /* brc history buffer*/
2520     gen9_add_buffer_gpe_surface(ctx,
2521                                 gpe_context,
2522                                 &avc_ctx->res_brc_history_buffer,
2523                                 0,
2524                                 avc_ctx->res_brc_history_buffer.size,
2525                                 0,
2526                                 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_HISTORY_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_HISTORY_INDEX));
2527
2528     /* previous pak buffer*/
2529     gen9_add_buffer_gpe_surface(ctx,
2530                                 gpe_context,
2531                                 &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
2532                                 0,
2533                                 avc_ctx->res_brc_pre_pak_statistics_output_buffer.size,
2534                                 0,
2535                                 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_PAK_STATISTICS_OUTPUT_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_PAK_STATISTICS_OUTPUT_INDEX));
2536
2537     /* image state command buffer read only*/
2538     gen9_add_buffer_gpe_surface(ctx,
2539                                 gpe_context,
2540                                 &avc_ctx->res_brc_image_state_read_buffer,
2541                                 0,
2542                                 avc_ctx->res_brc_image_state_read_buffer.size,
2543                                 0,
2544                                 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_READ_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_READ_INDEX));
2545
2546     /* image state command buffer write only*/
2547     gen9_add_buffer_gpe_surface(ctx,
2548                                 gpe_context,
2549                                 &avc_ctx->res_brc_image_state_write_buffer,
2550                                 0,
2551                                 avc_ctx->res_brc_image_state_write_buffer.size,
2552                                 0,
2553                                 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_WRITE_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_WRITE_INDEX));
2554
2555     if (avc_state->mbenc_brc_buffer_size > 0) {
2556         gen9_add_buffer_gpe_surface(ctx,
2557                                     gpe_context,
2558                                     &(avc_ctx->res_mbenc_brc_buffer),
2559                                     0,
2560                                     avc_ctx->res_mbenc_brc_buffer.size,
2561                                     0,
2562                                     GEN95_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_WRITE_INDEX);
2563     } else {
2564         /*  Mbenc curbe input buffer */
2565         gen9_add_dri_buffer_gpe_surface(ctx,
2566                                         gpe_context,
2567                                         gpe_context_mbenc->dynamic_state.bo,
2568                                         0,
2569                                         ALIGN(gpe_context_mbenc->curbe.length, 64),
2570                                         gpe_context_mbenc->curbe.offset,
2571                                         GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_READ_INDEX);
2572         /* Mbenc curbe output buffer */
2573         gen9_add_dri_buffer_gpe_surface(ctx,
2574                                         gpe_context,
2575                                         gpe_context_mbenc->dynamic_state.bo,
2576                                         0,
2577                                         ALIGN(gpe_context_mbenc->curbe.length, 64),
2578                                         gpe_context_mbenc->curbe.offset,
2579                                         GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_WRITE_INDEX);
2580     }
2581
2582     /* AVC_ME Distortion 2D surface buffer,input/output. is it res_brc_dist_data_surface*/
2583     gen9_add_buffer_2d_gpe_surface(ctx,
2584                                    gpe_context,
2585                                    &avc_ctx->res_brc_dist_data_surface,
2586                                    1,
2587                                    I965_SURFACEFORMAT_R8_UNORM,
2588                                    (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_DISTORTION_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_DISTORTION_INDEX));
2589
2590     /* BRC const data 2D surface buffer */
2591     gen9_add_buffer_2d_gpe_surface(ctx,
2592                                    gpe_context,
2593                                    &avc_ctx->res_brc_const_data_buffer,
2594                                    1,
2595                                    I965_SURFACEFORMAT_R8_UNORM,
2596                                    (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX));
2597
2598     /* MB statistical data surface*/
2599     gen9_add_buffer_gpe_surface(ctx,
2600                                 gpe_context,
2601                                 &avc_ctx->res_mb_status_buffer,
2602                                 0,
2603                                 avc_ctx->res_mb_status_buffer.size,
2604                                 0,
2605                                 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX));
2606
2607     return;
2608 }
2609
2610 static VAStatus
2611 gen9_avc_kernel_brc_frame_update(VADriverContextP ctx,
2612                                  struct encode_state *encode_state,
2613                                  struct intel_encoder_context *encoder_context)
2614
2615 {
2616     struct i965_driver_data *i965 = i965_driver_data(ctx);
2617     struct i965_gpe_table *gpe = &i965->gpe_table;
2618     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2619     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
2620     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2621     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2622     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2623
2624     struct i965_gpe_context *gpe_context = NULL;
2625     struct gpe_media_object_parameter media_object_param;
2626     struct gpe_media_object_inline_data media_object_inline_data;
2627     int media_function = 0;
2628     int kernel_idx = 0;
2629     unsigned int mb_const_data_buffer_in_use, mb_qp_buffer_in_use;
2630     unsigned int brc_enabled = 0;
2631     unsigned int roi_enable = (generic_state->num_roi > 0) ? 1 : 0;
2632     unsigned int dirty_roi_enable = ((generic_state->dirty_num_roi > 0) && (generic_state->frame_type == SLICE_TYPE_P) && (0));
2633
2634     /* the following set the mbenc curbe*/
2635     struct mbenc_param curbe_mbenc_param ;
2636     struct brc_param curbe_brc_param ;
2637
2638     mb_const_data_buffer_in_use =
2639         generic_state->mb_brc_enabled ||
2640         roi_enable ||
2641         dirty_roi_enable ||
2642         avc_state->mb_qp_data_enable ||
2643         avc_state->rolling_intra_refresh_enable;
2644     mb_qp_buffer_in_use =
2645         generic_state->mb_brc_enabled ||
2646         generic_state->brc_roi_enable ||
2647         avc_state->mb_qp_data_enable;
2648
2649     switch (generic_state->kernel_mode) {
2650     case INTEL_ENC_KERNEL_NORMAL : {
2651         kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_NORMAL_I;
2652         break;
2653     }
2654     case INTEL_ENC_KERNEL_PERFORMANCE : {
2655         kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_PERFORMANCE_I;
2656         break;
2657     }
2658     case INTEL_ENC_KERNEL_QUALITY : {
2659         kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_QUALITY_I;
2660         break;
2661     }
2662     default:
2663         assert(0);
2664
2665     }
2666
2667     if (generic_state->frame_type == SLICE_TYPE_P) {
2668         kernel_idx += 1;
2669     } else if (generic_state->frame_type == SLICE_TYPE_B) {
2670         kernel_idx += 2;
2671     }
2672
2673     gpe_context = &(avc_ctx->context_mbenc.gpe_contexts[kernel_idx]);
2674     gpe->context_init(ctx, gpe_context);
2675
2676     memset(&curbe_mbenc_param, 0, sizeof(struct mbenc_param));
2677
2678     curbe_mbenc_param.mb_const_data_buffer_in_use = mb_const_data_buffer_in_use;
2679     curbe_mbenc_param.mb_qp_buffer_in_use = mb_qp_buffer_in_use;
2680     curbe_mbenc_param.mbenc_i_frame_dist_in_use = 0;
2681     curbe_mbenc_param.brc_enabled = brc_enabled;
2682     curbe_mbenc_param.roi_enabled = roi_enable;
2683
2684     /* set curbe mbenc*/
2685     generic_ctx->pfn_set_curbe_mbenc(ctx, encode_state, gpe_context, encoder_context, &curbe_mbenc_param);
2686
2687     // gen95 set curbe out of the brc. gen9 do it here
2688     avc_state->mbenc_curbe_set_in_brc_update = !avc_state->decouple_mbenc_curbe_from_brc_enable;
2689     /*begin brc frame update*/
2690     memset(&curbe_brc_param, 0, sizeof(struct brc_param));
2691     curbe_brc_param.gpe_context_mbenc = gpe_context;
2692     media_function = INTEL_MEDIA_STATE_BRC_UPDATE;
2693     kernel_idx = GEN9_AVC_KERNEL_BRC_FRAME_UPDATE;
2694     gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
2695     curbe_brc_param.gpe_context_brc_frame_update = gpe_context;
2696
2697     gpe->context_init(ctx, gpe_context);
2698     gpe->reset_binding_table(ctx, gpe_context);
2699     /*brc copy ignored*/
2700
2701     /* set curbe frame update*/
2702     generic_ctx->pfn_set_curbe_brc_frame_update(ctx, encode_state, gpe_context, encoder_context, &curbe_brc_param);
2703
2704     /* load brc constant data, is it same as mbenc mb brc constant data? no.*/
2705     if (avc_state->multi_pre_enable) {
2706         gen9_avc_init_brc_const_data(ctx, encode_state, encoder_context);
2707     } else {
2708         gen9_avc_init_brc_const_data_old(ctx, encode_state, encoder_context);
2709     }
2710     /* image state construct*/
2711     gen9_avc_set_image_state(ctx, encode_state, encoder_context, &(avc_ctx->res_brc_image_state_read_buffer));
2712     /* set surface frame mbenc*/
2713     generic_ctx->pfn_send_brc_frame_update_surface(ctx, encode_state, gpe_context, encoder_context, &curbe_brc_param);
2714
2715
2716     gpe->setup_interface_data(ctx, gpe_context);
2717
2718     memset(&media_object_param, 0, sizeof(media_object_param));
2719     memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
2720     media_object_param.pinline_data = &media_object_inline_data;
2721     media_object_param.inline_size = sizeof(media_object_inline_data);
2722
2723     gen9_avc_run_kernel_media_object(ctx, encoder_context,
2724                                      gpe_context,
2725                                      media_function,
2726                                      &media_object_param);
2727
2728     return VA_STATUS_SUCCESS;
2729 }
2730
2731 static void
2732 gen9_avc_set_curbe_brc_mb_update(VADriverContextP ctx,
2733                                  struct encode_state *encode_state,
2734                                  struct i965_gpe_context *gpe_context,
2735                                  struct intel_encoder_context *encoder_context,
2736                                  void * param)
2737 {
2738     gen9_avc_mb_brc_curbe_data *cmd;
2739     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2740     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2741
2742     cmd = i965_gpe_context_map_curbe(gpe_context);
2743
2744     if (!cmd)
2745         return;
2746
2747     memset(cmd, 0, sizeof(gen9_avc_mb_brc_curbe_data));
2748
2749     cmd->dw0.cur_frame_type = generic_state->frame_type;
2750     if (generic_state->brc_roi_enable) {
2751         cmd->dw0.enable_roi = 1;
2752     } else {
2753         cmd->dw0.enable_roi = 0;
2754     }
2755
2756     i965_gpe_context_unmap_curbe(gpe_context);
2757
2758     return;
2759 }
2760
2761 static void
2762 gen9_avc_send_surface_brc_mb_update(VADriverContextP ctx,
2763                                     struct encode_state *encode_state,
2764                                     struct i965_gpe_context *gpe_context,
2765                                     struct intel_encoder_context *encoder_context,
2766                                     void * param_mbenc)
2767 {
2768     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2769     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2770     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2771
2772     /* brc history buffer*/
2773     gen9_add_buffer_gpe_surface(ctx,
2774                                 gpe_context,
2775                                 &avc_ctx->res_brc_history_buffer,
2776                                 0,
2777                                 avc_ctx->res_brc_history_buffer.size,
2778                                 0,
2779                                 GEN9_AVC_MB_BRC_UPDATE_HISTORY_INDEX);
2780
2781     /* MB qp data buffer is it same as res_mbbrc_mb_qp_data_surface*/
2782     if (generic_state->mb_brc_enabled) {
2783         gen9_add_buffer_2d_gpe_surface(ctx,
2784                                        gpe_context,
2785                                        &avc_ctx->res_mbbrc_mb_qp_data_surface,
2786                                        1,
2787                                        I965_SURFACEFORMAT_R8_UNORM,
2788                                        GEN9_AVC_MB_BRC_UPDATE_MB_QP_INDEX);
2789
2790     }
2791
2792     /* BRC roi feature*/
2793     if (generic_state->brc_roi_enable) {
2794         gen9_add_buffer_gpe_surface(ctx,
2795                                     gpe_context,
2796                                     &avc_ctx->res_mbbrc_roi_surface,
2797                                     0,
2798                                     avc_ctx->res_mbbrc_roi_surface.size,
2799                                     0,
2800                                     GEN9_AVC_MB_BRC_UPDATE_ROI_INDEX);
2801
2802     }
2803
2804     /* MB statistical data surface*/
2805     gen9_add_buffer_gpe_surface(ctx,
2806                                 gpe_context,
2807                                 &avc_ctx->res_mb_status_buffer,
2808                                 0,
2809                                 avc_ctx->res_mb_status_buffer.size,
2810                                 0,
2811                                 GEN9_AVC_MB_BRC_UPDATE_MB_STATUS_INDEX);
2812
2813     return;
2814 }
2815
2816 static VAStatus
2817 gen9_avc_kernel_brc_mb_update(VADriverContextP ctx,
2818                               struct encode_state *encode_state,
2819                               struct intel_encoder_context *encoder_context)
2820
2821 {
2822     struct i965_driver_data *i965 = i965_driver_data(ctx);
2823     struct i965_gpe_table *gpe = &i965->gpe_table;
2824     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2825     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2826     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2827     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
2828
2829     struct i965_gpe_context *gpe_context;
2830     struct gpe_media_object_walker_parameter media_object_walker_param;
2831     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
2832     int media_function = 0;
2833     int kernel_idx = 0;
2834
2835     media_function = INTEL_MEDIA_STATE_MB_BRC_UPDATE;
2836     kernel_idx = GEN9_AVC_KERNEL_BRC_MB_UPDATE;
2837     gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
2838
2839     gpe->context_init(ctx, gpe_context);
2840     gpe->reset_binding_table(ctx, gpe_context);
2841
2842     /* set curbe brc mb update*/
2843     generic_ctx->pfn_set_curbe_brc_mb_update(ctx, encode_state, gpe_context, encoder_context, NULL);
2844
2845
2846     /* set surface brc mb update*/
2847     generic_ctx->pfn_send_brc_mb_update_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
2848
2849
2850     gpe->setup_interface_data(ctx, gpe_context);
2851
2852     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2853     /* the scaling is based on 8x8 blk level */
2854     kernel_walker_param.resolution_x = (generic_state->frame_width_in_mbs + 1) / 2;
2855     kernel_walker_param.resolution_y = (generic_state->frame_height_in_mbs + 1) / 2 ;
2856     kernel_walker_param.no_dependency = 1;
2857
2858     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
2859
2860     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
2861                                             gpe_context,
2862                                             media_function,
2863                                             &media_object_walker_param);
2864
2865     return VA_STATUS_SUCCESS;
2866 }
2867
2868 /*
2869 mbenc kernel related function,it include intra dist kernel
2870 */
2871 static int
2872 gen9_avc_get_biweight(int dist_scale_factor_ref_id0_list0, unsigned short weighted_bipredidc)
2873 {
2874     int biweight = 32;      // default value
2875
2876     /* based on kernel HLD*/
2877     if (weighted_bipredidc != INTEL_AVC_WP_MODE_IMPLICIT) {
2878         biweight = 32;
2879     } else {
2880         biweight = (dist_scale_factor_ref_id0_list0 + 2) >> 2;
2881
2882         if (biweight != 16 && biweight != 21 &&
2883             biweight != 32 && biweight != 43 && biweight != 48) {
2884             biweight = 32;        // If # of B-pics between two refs is more than 3. VME does not support it.
2885         }
2886     }
2887
2888     return biweight;
2889 }
2890
2891 static void
2892 gen9_avc_get_dist_scale_factor(VADriverContextP ctx,
2893                                struct encode_state *encode_state,
2894                                struct intel_encoder_context *encoder_context)
2895 {
2896     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2897     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2898     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
2899     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
2900
2901     int max_num_references;
2902     VAPictureH264 *curr_pic;
2903     VAPictureH264 *ref_pic_l0;
2904     VAPictureH264 *ref_pic_l1;
2905     int i = 0;
2906     int tb = 0;
2907     int td = 0;
2908     int tx = 0;
2909     int tmp = 0;
2910     int poc0 = 0;
2911     int poc1 = 0;
2912
2913     max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
2914
2915     memset(avc_state->dist_scale_factor_list0, 0, 32 * sizeof(unsigned int));
2916     curr_pic = &pic_param->CurrPic;
2917     for (i = 0; i < max_num_references; i++) {
2918         ref_pic_l0 = &(slice_param->RefPicList0[i]);
2919
2920         if ((ref_pic_l0->flags & VA_PICTURE_H264_INVALID) ||
2921             (ref_pic_l0->picture_id == VA_INVALID_SURFACE))
2922             break;
2923         ref_pic_l1 = &(slice_param->RefPicList1[0]);
2924         if ((ref_pic_l0->flags & VA_PICTURE_H264_INVALID) ||
2925             (ref_pic_l0->picture_id == VA_INVALID_SURFACE))
2926             break;
2927
2928         poc0 = (curr_pic->TopFieldOrderCnt - ref_pic_l0->TopFieldOrderCnt);
2929         poc1 = (ref_pic_l1->TopFieldOrderCnt - ref_pic_l0->TopFieldOrderCnt);
2930         CLIP(poc0, -128, 127);
2931         CLIP(poc1, -128, 127);
2932         tb = poc0;
2933         td = poc1;
2934
2935         if (td == 0) {
2936             td = 1;
2937         }
2938         tmp = (td / 2 > 0) ? (td / 2) : (-(td / 2));
2939         tx = (16384 + tmp) / td ;
2940         tmp = (tb * tx + 32) >> 6;
2941         CLIP(tmp, -1024, 1023);
2942         avc_state->dist_scale_factor_list0[i] = tmp;
2943     }
2944     return;
2945 }
2946
2947 static unsigned int
2948 gen9_avc_get_qp_from_ref_list(VADriverContextP ctx,
2949                               VAEncSliceParameterBufferH264 *slice_param,
2950                               int list,
2951                               int ref_frame_idx)
2952 {
2953     struct i965_driver_data *i965 = i965_driver_data(ctx);
2954     struct object_surface *obj_surface;
2955     struct gen9_surface_avc *avc_priv_surface;
2956     VASurfaceID surface_id;
2957
2958     assert(slice_param);
2959     assert(list < 2);
2960
2961     if (list == 0) {
2962         if (ref_frame_idx < slice_param->num_ref_idx_l0_active_minus1 + 1)
2963             surface_id = slice_param->RefPicList0[ref_frame_idx].picture_id;
2964         else
2965             return 0;
2966     } else {
2967         if (ref_frame_idx < slice_param->num_ref_idx_l1_active_minus1 + 1)
2968             surface_id = slice_param->RefPicList1[ref_frame_idx].picture_id;
2969         else
2970             return 0;
2971     }
2972     obj_surface = SURFACE(surface_id);
2973     if (obj_surface && obj_surface->private_data) {
2974         avc_priv_surface = obj_surface->private_data;
2975         return avc_priv_surface->qp_value;
2976     } else {
2977         return 0;
2978     }
2979 }
2980
2981 static void
2982 gen9_avc_load_mb_brc_const_data(VADriverContextP ctx,
2983                                 struct encode_state *encode_state,
2984                                 struct intel_encoder_context *encoder_context)
2985 {
2986     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2987     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2988     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2989     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2990
2991     struct i965_gpe_resource *gpe_resource = NULL;
2992     unsigned int * data = NULL;
2993     unsigned int * data_tmp = NULL;
2994     unsigned int size = 16 * 52;
2995     unsigned int table_idx = 0;
2996     unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
2997     unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
2998     int i = 0;
2999
3000     gpe_resource = &(avc_ctx->res_mbbrc_const_data_buffer);
3001     assert(gpe_resource);
3002     data = i965_map_gpe_resource(gpe_resource);
3003     assert(data);
3004
3005     table_idx = slice_type_kernel[generic_state->frame_type];
3006
3007     memcpy(data, gen9_avc_mb_brc_const_data[table_idx][0], size * sizeof(unsigned int));
3008
3009     data_tmp = data;
3010
3011     switch (generic_state->frame_type) {
3012     case SLICE_TYPE_I:
3013         for (i = 0; i < AVC_QP_MAX ; i++) {
3014             if (avc_state->old_mode_cost_enable)
3015                 *data = (unsigned int)gen9_avc_old_intra_mode_cost[i];
3016             data += 16;
3017         }
3018         break;
3019     case SLICE_TYPE_P:
3020     case SLICE_TYPE_B:
3021         for (i = 0; i < AVC_QP_MAX ; i++) {
3022             if (generic_state->frame_type == SLICE_TYPE_P) {
3023                 if (avc_state->skip_bias_adjustment_enable)
3024                     *(data + 3) = (unsigned int)gen9_avc_mv_cost_p_skip_adjustment[i];
3025             }
3026             if (avc_state->non_ftq_skip_threshold_lut_input_enable) {
3027                 *(data + 9) = (unsigned int)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
3028             } else if (generic_state->frame_type == SLICE_TYPE_P) {
3029                 *(data + 9) = (unsigned int)gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag][i];
3030             } else {
3031                 *(data + 9) = (unsigned int)gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag][i];
3032             }
3033
3034             if (avc_state->adaptive_intra_scaling_enable) {
3035                 *(data + 10) = (unsigned int)gen9_avc_adaptive_intra_scaling_factor[i];
3036             } else {
3037                 *(data + 10) = (unsigned int)gen9_avc_intra_scaling_factor[i];
3038
3039             }
3040             data += 16;
3041
3042         }
3043         break;
3044     default:
3045         assert(0);
3046     }
3047
3048     data = data_tmp;
3049     for (i = 0; i < AVC_QP_MAX ; i++) {
3050         if (avc_state->ftq_skip_threshold_lut_input_enable) {
3051             *(data + 6) = (avc_state->ftq_skip_threshold_lut[i] |
3052                            (avc_state->ftq_skip_threshold_lut[i] << 16) |
3053                            (avc_state->ftq_skip_threshold_lut[i] << 24));
3054             *(data + 7) = (avc_state->ftq_skip_threshold_lut[i] |
3055                            (avc_state->ftq_skip_threshold_lut[i] << 8) |
3056                            (avc_state->ftq_skip_threshold_lut[i] << 16) |
3057                            (avc_state->ftq_skip_threshold_lut[i] << 24));
3058         }
3059
3060         if (avc_state->kernel_trellis_enable) {
3061             *(data + 11) = (unsigned int)avc_state->lamda_value_lut[i][0];
3062             *(data + 12) = (unsigned int)avc_state->lamda_value_lut[i][1];
3063
3064         }
3065         data += 16;
3066
3067     }
3068     i965_unmap_gpe_resource(gpe_resource);
3069 }
3070
3071 static void
3072 gen9_avc_set_curbe_mbenc(VADriverContextP ctx,
3073                          struct encode_state *encode_state,
3074                          struct i965_gpe_context *gpe_context,
3075                          struct intel_encoder_context *encoder_context,
3076                          void * param)
3077 {
3078     struct i965_driver_data *i965 = i965_driver_data(ctx);
3079     union {
3080         gen9_avc_mbenc_curbe_data *g9;
3081         gen95_avc_mbenc_curbe_data *g95;
3082     } cmd;
3083     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3084     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3085     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3086
3087     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3088     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
3089     VASurfaceID surface_id;
3090     struct object_surface *obj_surface;
3091
3092     struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
3093     unsigned char qp = 0;
3094     unsigned char me_method = 0;
3095     unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
3096     unsigned int table_idx = 0;
3097     unsigned char is_g9 = 0;
3098     unsigned char is_g95 = 0;
3099     unsigned int curbe_size = 0;
3100
3101     unsigned int preset = generic_state->preset;
3102     if (IS_SKL(i965->intel.device_info) ||
3103         IS_BXT(i965->intel.device_info)) {
3104         cmd.g9 = (gen9_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
3105         if (!cmd.g9)
3106             return;
3107         is_g9 = 1;
3108         curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
3109         memset(cmd.g9, 0, curbe_size);
3110
3111         if (mbenc_i_frame_dist_in_use) {
3112             memcpy(cmd.g9, gen9_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
3113
3114         } else {
3115             switch (generic_state->frame_type) {
3116             case SLICE_TYPE_I:
3117                 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
3118                 break;
3119             case SLICE_TYPE_P:
3120                 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
3121                 break;
3122             case SLICE_TYPE_B:
3123                 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
3124                 break;
3125             default:
3126                 assert(0);
3127             }
3128
3129         }
3130     } else if (IS_KBL(i965->intel.device_info) ||
3131                IS_GLK(i965->intel.device_info)) {
3132         cmd.g95 = (gen95_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
3133         if (!cmd.g95)
3134             return;
3135         is_g95 = 1;
3136         curbe_size = sizeof(gen95_avc_mbenc_curbe_data);
3137         memset(cmd.g9, 0, curbe_size);
3138
3139         if (mbenc_i_frame_dist_in_use) {
3140             memcpy(cmd.g95, gen95_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
3141
3142         } else {
3143             switch (generic_state->frame_type) {
3144             case SLICE_TYPE_I:
3145                 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
3146                 break;
3147             case SLICE_TYPE_P:
3148                 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
3149                 break;
3150             case SLICE_TYPE_B:
3151                 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
3152                 break;
3153             default:
3154                 assert(0);
3155             }
3156
3157         }
3158     } else {
3159         /* Never get here, just silence a gcc warning */
3160         assert(0);
3161
3162         return;
3163     }
3164
3165     me_method = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_b_me_method[preset] : gen9_avc_p_me_method[preset];
3166     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
3167
3168     cmd.g9->dw0.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
3169     cmd.g9->dw37.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
3170     cmd.g9->dw0.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
3171     cmd.g9->dw37.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
3172
3173     cmd.g9->dw2.max_len_sp = gen9_avc_max_len_sp[preset];
3174     cmd.g9->dw38.max_len_sp = 0;
3175
3176     if (is_g95)
3177         cmd.g95->dw1.extended_mv_cost_range = avc_state->extended_mv_cost_range_enable;
3178
3179     cmd.g9->dw3.src_access = 0;
3180     cmd.g9->dw3.ref_access = 0;
3181
3182     if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) {
3183         //disable ftq_override by now.
3184         if (avc_state->ftq_override) {
3185             cmd.g9->dw3.ftq_enable = avc_state->ftq_enable;
3186
3187         } else {
3188             // both gen9 and gen95 come here by now
3189             if (generic_state->frame_type == SLICE_TYPE_P) {
3190                 cmd.g9->dw3.ftq_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
3191
3192             } else {
3193                 cmd.g9->dw3.ftq_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
3194             }
3195         }
3196     } else {
3197         cmd.g9->dw3.ftq_enable = 0;
3198     }
3199
3200     if (avc_state->disable_sub_mb_partion)
3201         cmd.g9->dw3.sub_mb_part_mask = 0x7;
3202
3203     if (mbenc_i_frame_dist_in_use) {
3204         cmd.g9->dw2.pitch_width = generic_state->downscaled_width_4x_in_mb;
3205         cmd.g9->dw4.picture_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
3206         cmd.g9->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4;
3207         cmd.g9->dw6.batch_buffer_end = 0;
3208         cmd.g9->dw31.intra_compute_type = 1;
3209
3210     } else {
3211         cmd.g9->dw2.pitch_width = generic_state->frame_width_in_mbs;
3212         cmd.g9->dw4.picture_height_minus1 = generic_state->frame_height_in_mbs - 1;
3213         cmd.g9->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ? generic_state->frame_height_in_mbs : avc_state->slice_height;
3214
3215         {
3216             memcpy(&(cmd.g9->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
3217             if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
3218                 //cmd.g9->dw8 = gen9_avc_old_intra_mode_cost[qp];
3219             } else if (avc_state->skip_bias_adjustment_enable) {
3220                 /* Load different MvCost for P picture when SkipBiasAdjustment is enabled
3221                 // No need to check for P picture as the flag is only enabled for P picture */
3222                 cmd.g9->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
3223
3224             }
3225         }
3226
3227         table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
3228         memcpy(&(cmd.g9->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
3229     }
3230     cmd.g9->dw4.enable_fbr_bypass = avc_state->fbr_bypass_enable;
3231     cmd.g9->dw4.enable_intra_cost_scaling_for_static_frame = avc_state->sfd_enable && generic_state->hme_enabled;
3232     cmd.g9->dw4.field_parity_flag = 0;//bottom field
3233     cmd.g9->dw4.enable_cur_fld_idr = 0;//field realted
3234     cmd.g9->dw4.contrained_intra_pred_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
3235     cmd.g9->dw4.hme_enable = generic_state->hme_enabled;
3236     cmd.g9->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
3237     cmd.g9->dw4.use_actual_ref_qp_value = generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
3238
3239
3240     cmd.g9->dw7.intra_part_mask = avc_state->transform_8x8_mode_enable ? 0 : 0x02;
3241     cmd.g9->dw7.src_field_polarity = 0;//field related
3242
3243     /*ftq_skip_threshold_lut set,dw14 /15*/
3244
3245     /*r5 disable NonFTQSkipThresholdLUT*/
3246     if (generic_state->frame_type == SLICE_TYPE_P) {
3247         cmd.g9->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
3248
3249     } else if (generic_state->frame_type == SLICE_TYPE_B) {
3250         cmd.g9->dw32.skip_val = gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
3251
3252     }
3253
3254     cmd.g9->dw13.qp_prime_y = qp;
3255     cmd.g9->dw13.qp_prime_cb = qp;
3256     cmd.g9->dw13.qp_prime_cr = qp;
3257     cmd.g9->dw13.target_size_in_word = 0xff;//hardcode for brc disable
3258
3259     if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) {
3260         switch (gen9_avc_multi_pred[preset]) {
3261         case 0:
3262             cmd.g9->dw32.mult_pred_l0_disable = 128;
3263             cmd.g9->dw32.mult_pred_l1_disable = 128;
3264             break;
3265         case 1:
3266             cmd.g9->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_P) ? 1 : 128;
3267             cmd.g9->dw32.mult_pred_l1_disable = 128;
3268             break;
3269         case 2:
3270             cmd.g9->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3271             cmd.g9->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3272             break;
3273         case 3:
3274             cmd.g9->dw32.mult_pred_l0_disable = 1;
3275             cmd.g9->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3276             break;
3277
3278         }
3279
3280     } else {
3281         cmd.g9->dw32.mult_pred_l0_disable = 128;
3282         cmd.g9->dw32.mult_pred_l1_disable = 128;
3283     }
3284
3285     /*field setting for dw33 34, ignored*/
3286
3287     if (avc_state->adaptive_transform_decision_enable) {
3288         if (generic_state->frame_type != SLICE_TYPE_I) {
3289             cmd.g9->dw34.enable_adaptive_tx_decision = 1;
3290             if (is_g95) {
3291                 cmd.g95->dw60.mb_texture_threshold = 1024;
3292                 cmd.g95->dw60.tx_decision_threshold = 128;
3293             }
3294
3295         }
3296
3297         if (is_g9) {
3298             cmd.g9->dw58.mb_texture_threshold = 1024;
3299             cmd.g9->dw58.tx_decision_threshold = 128;
3300         }
3301     }
3302
3303
3304     if (generic_state->frame_type == SLICE_TYPE_B) {
3305         cmd.g9->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
3306         cmd.g9->dw34.list1_ref_id0_frm_field_parity = 0;
3307         cmd.g9->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
3308     }
3309
3310     cmd.g9->dw34.b_original_bff = 0; //frame only
3311     cmd.g9->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
3312     cmd.g9->dw34.roi_enable_flag = curbe_param->roi_enabled;
3313     cmd.g9->dw34.mad_enable_falg = avc_state->mad_enable;
3314     cmd.g9->dw34.mb_brc_enable = avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled;
3315     cmd.g9->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
3316     if (is_g95) {
3317         cmd.g95->dw34.tq_enable = avc_state->tq_enable;
3318         cmd.g95->dw34.cqp_flag = !generic_state->brc_enabled;
3319     }
3320
3321     if (is_g9) {
3322         cmd.g9->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
3323
3324         if (cmd.g9->dw34.force_non_skip_check) {
3325             cmd.g9->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
3326         }
3327     }
3328
3329
3330     cmd.g9->dw36.check_all_fractional_enable = avc_state->caf_enable;
3331     cmd.g9->dw38.ref_threshold = 400;
3332     cmd.g9->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
3333
3334     /* Default:2 used for MBBRC (MB QP Surface width and height are 4x downscaled picture in MB unit * 4  bytes)
3335        0 used for MBQP data surface (MB QP Surface width and height are same as the input picture size in MB unit * 1bytes)
3336        starting GEN9, BRC use split kernel, MB QP surface is same size as input picture */
3337     cmd.g9->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled) ? 0 : 2;
3338
3339     if (mbenc_i_frame_dist_in_use) {
3340         cmd.g9->dw13.qp_prime_y = 0;
3341         cmd.g9->dw13.qp_prime_cb = 0;
3342         cmd.g9->dw13.qp_prime_cr = 0;
3343         cmd.g9->dw33.intra_16x16_nondc_penalty = 0;
3344         cmd.g9->dw33.intra_8x8_nondc_penalty = 0;
3345         cmd.g9->dw33.intra_4x4_nondc_penalty = 0;
3346
3347     }
3348     if (cmd.g9->dw4.use_actual_ref_qp_value) {
3349         cmd.g9->dw44.actual_qp_value_for_ref_id0_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 0);
3350         cmd.g9->dw44.actual_qp_value_for_ref_id1_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 1);
3351         cmd.g9->dw44.actual_qp_value_for_ref_id2_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 2);
3352         cmd.g9->dw44.actual_qp_value_for_ref_id3_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 3);
3353         cmd.g9->dw45.actual_qp_value_for_ref_id4_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 4);
3354         cmd.g9->dw45.actual_qp_value_for_ref_id5_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 5);
3355         cmd.g9->dw45.actual_qp_value_for_ref_id6_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 6);
3356         cmd.g9->dw45.actual_qp_value_for_ref_id7_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 7);
3357         cmd.g9->dw46.actual_qp_value_for_ref_id0_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 0);
3358         cmd.g9->dw46.actual_qp_value_for_ref_id1_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 1);
3359     }
3360
3361     table_idx = slice_type_kernel[generic_state->frame_type];
3362     cmd.g9->dw46.ref_cost = gen9_avc_ref_cost[table_idx][qp];
3363
3364     if (generic_state->frame_type == SLICE_TYPE_I) {
3365         cmd.g9->dw0.skip_mode_enable = 0;
3366         cmd.g9->dw37.skip_mode_enable = 0;
3367         cmd.g9->dw36.hme_combine_overlap = 0;
3368         cmd.g9->dw47.intra_cost_sf = 16;
3369         cmd.g9->dw34.enable_direct_bias_adjustment = 0;
3370         if (is_g9)
3371             cmd.g9->dw34.enable_global_motion_bias_adjustment = 0;
3372
3373     } else if (generic_state->frame_type == SLICE_TYPE_P) {
3374         cmd.g9->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
3375         cmd.g9->dw3.bme_disable_fbr = 1;
3376         cmd.g9->dw5.ref_width = gen9_avc_search_x[preset];
3377         cmd.g9->dw5.ref_height = gen9_avc_search_y[preset];
3378         cmd.g9->dw7.non_skip_zmv_added = 1;
3379         cmd.g9->dw7.non_skip_mode_added = 1;
3380         cmd.g9->dw7.skip_center_mask = 1;
3381         cmd.g9->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
3382         cmd.g9->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame onlys
3383         cmd.g9->dw36.hme_combine_overlap = 1;
3384         cmd.g9->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
3385         cmd.g9->dw39.ref_width = gen9_avc_search_x[preset];
3386         cmd.g9->dw39.ref_height = gen9_avc_search_y[preset];
3387         cmd.g9->dw34.enable_direct_bias_adjustment = 0;
3388         cmd.g9->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
3389         if (is_g9 && avc_state->global_motion_bias_adjustment_enable)
3390             cmd.g9->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
3391
3392     } else {
3393         cmd.g9->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
3394         cmd.g9->dw1.bi_weight = avc_state->bi_weight;
3395         cmd.g9->dw3.search_ctrl = 7;
3396         cmd.g9->dw3.skip_type = 1;
3397         cmd.g9->dw5.ref_width = gen9_avc_b_search_x[preset];
3398         cmd.g9->dw5.ref_height = gen9_avc_b_search_y[preset];
3399         cmd.g9->dw7.skip_center_mask = 0xff;
3400         cmd.g9->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
3401         cmd.g9->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
3402         cmd.g9->dw36.hme_combine_overlap = 1;
3403         surface_id = slice_param->RefPicList1[0].picture_id;
3404         obj_surface = SURFACE(surface_id);
3405         if (!obj_surface) {
3406             WARN_ONCE("Invalid backward reference frame\n");
3407             return;
3408         }
3409         cmd.g9->dw36.is_fwd_frame_short_term_ref = !!(slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
3410
3411         cmd.g9->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
3412         cmd.g9->dw36.num_ref_idx_l1_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l1_active_minus1 : 0;
3413         cmd.g9->dw39.ref_width = gen9_avc_b_search_x[preset];
3414         cmd.g9->dw39.ref_height = gen9_avc_b_search_y[preset];
3415         cmd.g9->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
3416         cmd.g9->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
3417         cmd.g9->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
3418         cmd.g9->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
3419         cmd.g9->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
3420         cmd.g9->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
3421         cmd.g9->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
3422         cmd.g9->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
3423
3424         cmd.g9->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
3425         if (cmd.g9->dw34.enable_direct_bias_adjustment) {
3426             cmd.g9->dw7.non_skip_zmv_added = 1;
3427             cmd.g9->dw7.non_skip_mode_added = 1;
3428         }
3429
3430         cmd.g9->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
3431         if (is_g9 && avc_state->global_motion_bias_adjustment_enable)
3432             cmd.g9->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
3433
3434     }
3435
3436     avc_state->block_based_skip_enable = cmd.g9->dw3.block_based_skip_enable;
3437
3438     if (avc_state->rolling_intra_refresh_enable) {
3439         /*by now disable it*/
3440         cmd.g9->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
3441         cmd.g9->dw32.mult_pred_l0_disable = 128;
3442         /* Pass the same IntraRefreshUnit to the kernel w/o the adjustment by -1, so as to have an overlap of one MB row or column of Intra macroblocks
3443          across one P frame to another P frame, as needed by the RollingI algo */
3444         if (is_g9) {
3445             cmd.g9->dw48.widi_intra_refresh_mb_num = 0;
3446             cmd.g9->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
3447             cmd.g9->dw48.widi_intra_refresh_qp_delta = 0;
3448         }
3449
3450         if (is_g95) {
3451             if (avc_state->rolling_intra_refresh_enable == INTEL_ROLLING_I_SQUARE && generic_state->brc_enabled) {
3452                 cmd.g95->dw4.enable_intra_refresh = 0;
3453                 cmd.g95->dw34.widi_intra_refresh_en = INTEL_ROLLING_I_DISABLED;
3454                 cmd.g95->dw48.widi_intra_refresh_mb_x = 0;
3455                 cmd.g95->dw61.widi_intra_refresh_mb_y = 0;
3456             } else {
3457                 cmd.g95->dw4.enable_intra_refresh = 1;
3458                 cmd.g95->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
3459                 cmd.g95->dw48.widi_intra_refresh_mb_x = 0;
3460                 cmd.g95->dw61.widi_intra_refresh_mb_y = 0;
3461                 cmd.g9->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
3462                 cmd.g9->dw48.widi_intra_refresh_qp_delta = 0;
3463             }
3464         }
3465
3466     } else {
3467         cmd.g9->dw34.widi_intra_refresh_en = 0;
3468     }
3469
3470     cmd.g9->dw34.enable_per_mb_static_check = avc_state->sfd_enable && generic_state->hme_enabled;
3471     cmd.g9->dw34.enable_adaptive_search_window_size = avc_state->adaptive_search_window_enable;
3472
3473     /*roi set disable by now. 49-56*/
3474     if (curbe_param->roi_enabled) {
3475         cmd.g9->dw49.roi_1_x_left   = generic_state->roi[0].left;
3476         cmd.g9->dw49.roi_1_y_top    = generic_state->roi[0].top;
3477         cmd.g9->dw50.roi_1_x_right  = generic_state->roi[0].right;
3478         cmd.g9->dw50.roi_1_y_bottom = generic_state->roi[0].bottom;
3479
3480         cmd.g9->dw51.roi_2_x_left   = generic_state->roi[1].left;
3481         cmd.g9->dw51.roi_2_y_top    = generic_state->roi[1].top;
3482         cmd.g9->dw52.roi_2_x_right  = generic_state->roi[1].right;
3483         cmd.g9->dw52.roi_2_y_bottom = generic_state->roi[1].bottom;
3484
3485         cmd.g9->dw53.roi_3_x_left   = generic_state->roi[2].left;
3486         cmd.g9->dw53.roi_3_y_top    = generic_state->roi[2].top;
3487         cmd.g9->dw54.roi_3_x_right  = generic_state->roi[2].right;
3488         cmd.g9->dw54.roi_3_y_bottom = generic_state->roi[2].bottom;
3489
3490         cmd.g9->dw55.roi_4_x_left   = generic_state->roi[3].left;
3491         cmd.g9->dw55.roi_4_y_top    = generic_state->roi[3].top;
3492         cmd.g9->dw56.roi_4_x_right  = generic_state->roi[3].right;
3493         cmd.g9->dw56.roi_4_y_bottom = generic_state->roi[3].bottom;
3494
3495         if (!generic_state->brc_enabled) {
3496             char tmp = 0;
3497             tmp = generic_state->roi[0].value;
3498             CLIP(tmp, -qp, AVC_QP_MAX - qp);
3499             cmd.g9->dw57.roi_1_dqp_prime_y = tmp;
3500             tmp = generic_state->roi[1].value;
3501             CLIP(tmp, -qp, AVC_QP_MAX - qp);
3502             cmd.g9->dw57.roi_2_dqp_prime_y = tmp;
3503             tmp = generic_state->roi[2].value;
3504             CLIP(tmp, -qp, AVC_QP_MAX - qp);
3505             cmd.g9->dw57.roi_3_dqp_prime_y = tmp;
3506             tmp = generic_state->roi[3].value;
3507             CLIP(tmp, -qp, AVC_QP_MAX - qp);
3508             cmd.g9->dw57.roi_4_dqp_prime_y = tmp;
3509         } else {
3510             cmd.g9->dw34.roi_enable_flag = 0;
3511         }
3512     }
3513
3514     if (is_g95) {
3515         if (avc_state->tq_enable) {
3516             if (generic_state->frame_type == SLICE_TYPE_I) {
3517                 cmd.g95->dw58.value = gen95_avc_tq_lambda_i_frame[qp][0];
3518                 cmd.g95->dw59.value = gen95_avc_tq_lambda_i_frame[qp][1];
3519
3520             } else if (generic_state->frame_type == SLICE_TYPE_P) {
3521                 cmd.g95->dw58.value = gen95_avc_tq_lambda_p_frame[qp][0];
3522                 cmd.g95->dw59.value = gen95_avc_tq_lambda_p_frame[qp][1];
3523
3524             } else {
3525                 cmd.g95->dw58.value = gen95_avc_tq_lambda_b_frame[qp][0];
3526                 cmd.g95->dw59.value = gen95_avc_tq_lambda_b_frame[qp][1];
3527             }
3528
3529             if (cmd.g95->dw58.lambda_8x8_inter > GEN95_AVC_MAX_LAMBDA)
3530                 cmd.g95->dw58.lambda_8x8_inter = 0xf000 + avc_state->rounding_value;
3531
3532             if (cmd.g95->dw58.lambda_8x8_intra > GEN95_AVC_MAX_LAMBDA)
3533                 cmd.g95->dw58.lambda_8x8_intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
3534
3535             if (cmd.g95->dw59.lambda_inter > GEN95_AVC_MAX_LAMBDA)
3536                 cmd.g95->dw59.lambda_inter = 0xf000 + avc_state->rounding_value;
3537
3538             if (cmd.g95->dw59.lambda_intra > GEN95_AVC_MAX_LAMBDA)
3539                 cmd.g95->dw59.lambda_intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
3540         }
3541     }
3542
3543     if (is_g95) {
3544         cmd.g95->dw66.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
3545         cmd.g95->dw67.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
3546         cmd.g95->dw68.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
3547         cmd.g95->dw69.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
3548         cmd.g95->dw70.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
3549         cmd.g95->dw71.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
3550         cmd.g95->dw72.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
3551         cmd.g95->dw73.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
3552         cmd.g95->dw74.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
3553         cmd.g95->dw75.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
3554         cmd.g95->dw76.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
3555         cmd.g95->dw77.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
3556         cmd.g95->dw78.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
3557         cmd.g95->dw79.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
3558         cmd.g95->dw80.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
3559         cmd.g95->dw81.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
3560         cmd.g95->dw82.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
3561         cmd.g95->dw83.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
3562         cmd.g95->dw84.brc_curbe_surf_index = GEN95_AVC_MBENC_BRC_CURBE_DATA_INDEX;
3563         cmd.g95->dw85.force_non_skip_mb_map_surface = GEN95_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
3564         cmd.g95->dw86.widi_wa_surf_index = GEN95_AVC_MBENC_WIDI_WA_INDEX;
3565         cmd.g95->dw87.static_detection_cost_table_index = GEN95_AVC_MBENC_SFD_COST_TABLE_INDEX;
3566     }
3567
3568     if (is_g9) {
3569         cmd.g9->dw64.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
3570         cmd.g9->dw65.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
3571         cmd.g9->dw66.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
3572         cmd.g9->dw67.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
3573         cmd.g9->dw68.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
3574         cmd.g9->dw69.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
3575         cmd.g9->dw70.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
3576         cmd.g9->dw71.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
3577         cmd.g9->dw72.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
3578         cmd.g9->dw73.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
3579         cmd.g9->dw74.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
3580         cmd.g9->dw75.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
3581         cmd.g9->dw76.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
3582         cmd.g9->dw77.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
3583         cmd.g9->dw78.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
3584         cmd.g9->dw79.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
3585         cmd.g9->dw80.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
3586         cmd.g9->dw81.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
3587         cmd.g9->dw82.force_non_skip_mb_map_surface = GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
3588         cmd.g9->dw83.widi_wa_surf_index = GEN9_AVC_MBENC_WIDI_WA_INDEX;
3589         cmd.g9->dw84.brc_curbe_surf_index = GEN9_AVC_MBENC_BRC_CURBE_DATA_INDEX;
3590         cmd.g9->dw85.static_detection_cost_table_index = GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX;
3591     }
3592
3593     i965_gpe_context_unmap_curbe(gpe_context);
3594
3595     return;
3596 }
3597
3598 static void
3599 gen9_avc_send_surface_mbenc(VADriverContextP ctx,
3600                             struct encode_state *encode_state,
3601                             struct i965_gpe_context *gpe_context,
3602                             struct intel_encoder_context *encoder_context,
3603                             void * param_mbenc)
3604 {
3605     struct i965_driver_data *i965 = i965_driver_data(ctx);
3606     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3607     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3608     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3609     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3610     struct object_surface *obj_surface;
3611     struct gen9_surface_avc *avc_priv_surface;
3612     struct i965_gpe_resource *gpe_resource;
3613     struct mbenc_param * param = (struct mbenc_param *)param_mbenc ;
3614     VASurfaceID surface_id;
3615     unsigned int mbenc_i_frame_dist_in_use = param->mbenc_i_frame_dist_in_use;
3616     unsigned int size = 0;
3617     unsigned int frame_mb_size = generic_state->frame_width_in_mbs *
3618                                  generic_state->frame_height_in_mbs;
3619     int i = 0;
3620     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3621     unsigned char is_g95 = 0;
3622
3623     if (IS_SKL(i965->intel.device_info) ||
3624         IS_BXT(i965->intel.device_info))
3625         is_g95 = 0;
3626     else if (IS_KBL(i965->intel.device_info) ||
3627              IS_GLK(i965->intel.device_info))
3628         is_g95 = 1;
3629
3630     obj_surface = encode_state->reconstructed_object;
3631
3632     if (!obj_surface || !obj_surface->private_data)
3633         return;
3634     avc_priv_surface = obj_surface->private_data;
3635
3636     /*pak obj command buffer output*/
3637     size = frame_mb_size * 16 * 4;
3638     gpe_resource = &avc_priv_surface->res_mb_code_surface;
3639     gen9_add_buffer_gpe_surface(ctx,
3640                                 gpe_context,
3641                                 gpe_resource,
3642                                 0,
3643                                 size / 4,
3644                                 0,
3645                                 GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX);
3646
3647     /*mv data buffer output*/
3648     size = frame_mb_size * 32 * 4;
3649     gpe_resource = &avc_priv_surface->res_mv_data_surface;
3650     gen9_add_buffer_gpe_surface(ctx,
3651                                 gpe_context,
3652                                 gpe_resource,
3653                                 0,
3654                                 size / 4,
3655                                 0,
3656                                 GEN9_AVC_MBENC_IND_MV_DATA_INDEX);
3657
3658     /*input current  YUV surface, current input Y/UV object*/
3659     if (mbenc_i_frame_dist_in_use) {
3660         obj_surface = encode_state->reconstructed_object;
3661         if (!obj_surface || !obj_surface->private_data)
3662             return;
3663         avc_priv_surface = obj_surface->private_data;
3664         obj_surface = avc_priv_surface->scaled_4x_surface_obj;
3665     } else {
3666         obj_surface = encode_state->input_yuv_object;
3667     }
3668     gen9_add_2d_gpe_surface(ctx,
3669                             gpe_context,
3670                             obj_surface,
3671                             0,
3672                             1,
3673                             I965_SURFACEFORMAT_R8_UNORM,
3674                             GEN9_AVC_MBENC_CURR_Y_INDEX);
3675
3676     gen9_add_2d_gpe_surface(ctx,
3677                             gpe_context,
3678                             obj_surface,
3679                             1,
3680                             1,
3681                             I965_SURFACEFORMAT_R16_UINT,
3682                             GEN9_AVC_MBENC_CURR_UV_INDEX);
3683
3684     if (generic_state->hme_enabled) {
3685         /*memv input 4x*/
3686         gpe_resource = &(avc_ctx->s4x_memv_data_buffer);
3687         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3688                                        gpe_resource,
3689                                        1,
3690                                        I965_SURFACEFORMAT_R8_UNORM,
3691                                        GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX);
3692         /* memv distortion input*/
3693         gpe_resource = &(avc_ctx->s4x_memv_distortion_buffer);
3694         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3695                                        gpe_resource,
3696                                        1,
3697                                        I965_SURFACEFORMAT_R8_UNORM,
3698                                        GEN9_AVC_MBENC_4XME_DISTORTION_INDEX);
3699     }
3700
3701     /*mbbrc const data_buffer*/
3702     if (param->mb_const_data_buffer_in_use) {
3703         size = 16 * AVC_QP_MAX * sizeof(unsigned int);
3704         gpe_resource = &avc_ctx->res_mbbrc_const_data_buffer;
3705         gen9_add_buffer_gpe_surface(ctx,
3706                                     gpe_context,
3707                                     gpe_resource,
3708                                     0,
3709                                     size / 4,
3710                                     0,
3711                                     GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX);
3712
3713     }
3714
3715     /*mb qp data_buffer*/
3716     if (param->mb_qp_buffer_in_use) {
3717         if (avc_state->mb_qp_data_enable)
3718             gpe_resource = &(avc_ctx->res_mb_qp_data_surface);
3719         else
3720             gpe_resource = &(avc_ctx->res_mbbrc_mb_qp_data_surface);
3721         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3722                                        gpe_resource,
3723                                        1,
3724                                        I965_SURFACEFORMAT_R8_UNORM,
3725                                        GEN9_AVC_MBENC_MBQP_INDEX);
3726     }
3727
3728     /*input current  YUV surface, current input Y/UV object*/
3729     if (mbenc_i_frame_dist_in_use) {
3730         obj_surface = encode_state->reconstructed_object;
3731         if (!obj_surface || !obj_surface->private_data)
3732             return;
3733         avc_priv_surface = obj_surface->private_data;
3734         obj_surface = avc_priv_surface->scaled_4x_surface_obj;
3735     } else {
3736         obj_surface = encode_state->input_yuv_object;
3737     }
3738     gen9_add_adv_gpe_surface(ctx, gpe_context,
3739                              obj_surface,
3740                              GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX);
3741     /*input ref YUV surface*/
3742     for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
3743         surface_id = slice_param->RefPicList0[i].picture_id;
3744         obj_surface = SURFACE(surface_id);
3745         if (!obj_surface || !obj_surface->private_data)
3746             break;
3747
3748         gen9_add_adv_gpe_surface(ctx, gpe_context,
3749                                  obj_surface,
3750                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 1);
3751     }
3752     /*input current  YUV surface, current input Y/UV object*/
3753     if (mbenc_i_frame_dist_in_use) {
3754         obj_surface = encode_state->reconstructed_object;
3755         if (!obj_surface || !obj_surface->private_data)
3756             return;
3757         avc_priv_surface = obj_surface->private_data;
3758         obj_surface = avc_priv_surface->scaled_4x_surface_obj;
3759     } else {
3760         obj_surface = encode_state->input_yuv_object;
3761     }
3762     gen9_add_adv_gpe_surface(ctx, gpe_context,
3763                              obj_surface,
3764                              GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX);
3765
3766     for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
3767         if (i > 0) break; // only  one ref supported here for B frame
3768         surface_id = slice_param->RefPicList1[i].picture_id;
3769         obj_surface = SURFACE(surface_id);
3770         if (!obj_surface || !obj_surface->private_data)
3771             break;
3772
3773         gen9_add_adv_gpe_surface(ctx, gpe_context,
3774                                  obj_surface,
3775                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1);
3776         gen9_add_adv_gpe_surface(ctx, gpe_context,
3777                                  obj_surface,
3778                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 2);
3779         if (i == 0) {
3780             avc_priv_surface = obj_surface->private_data;
3781             /*pak obj command buffer output(mb code)*/
3782             size = frame_mb_size * 16 * 4;
3783             gpe_resource = &avc_priv_surface->res_mb_code_surface;
3784             gen9_add_buffer_gpe_surface(ctx,
3785                                         gpe_context,
3786                                         gpe_resource,
3787                                         0,
3788                                         size / 4,
3789                                         0,
3790                                         GEN9_AVC_MBENC_FWD_MB_DATA_INDEX);
3791
3792             /*mv data buffer output*/
3793             size = frame_mb_size * 32 * 4;
3794             gpe_resource = &avc_priv_surface->res_mv_data_surface;
3795             gen9_add_buffer_gpe_surface(ctx,
3796                                         gpe_context,
3797                                         gpe_resource,
3798                                         0,
3799                                         size / 4,
3800                                         0,
3801                                         GEN9_AVC_MBENC_FWD_MV_DATA_INDEX);
3802
3803         }
3804
3805         if (i < INTEL_AVC_MAX_BWD_REF_NUM) {
3806             gen9_add_adv_gpe_surface(ctx, gpe_context,
3807                                      obj_surface,
3808                                      GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1 + INTEL_AVC_MAX_BWD_REF_NUM);
3809         }
3810
3811     }
3812
3813     /* BRC distortion data buffer for I frame*/
3814     if (mbenc_i_frame_dist_in_use) {
3815         gpe_resource = &(avc_ctx->res_brc_dist_data_surface);
3816         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3817                                        gpe_resource,
3818                                        1,
3819                                        I965_SURFACEFORMAT_R8_UNORM,
3820                                        GEN9_AVC_MBENC_BRC_DISTORTION_INDEX);
3821     }
3822
3823     /* as ref frame ,update later RefPicSelect of Current Picture*/
3824     obj_surface = encode_state->reconstructed_object;
3825     avc_priv_surface = obj_surface->private_data;
3826     if (avc_state->ref_pic_select_list_supported && avc_priv_surface->is_as_ref) {
3827         gpe_resource = &(avc_priv_surface->res_ref_pic_select_surface);
3828         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3829                                        gpe_resource,
3830                                        1,
3831                                        I965_SURFACEFORMAT_R8_UNORM,
3832                                        GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX);
3833
3834     }
3835
3836     if (param->mb_vproc_stats_enable) {
3837         /*mb status buffer input*/
3838         size = frame_mb_size * 16 * 4;
3839         gpe_resource = &(avc_ctx->res_mb_status_buffer);
3840         gen9_add_buffer_gpe_surface(ctx,
3841                                     gpe_context,
3842                                     gpe_resource,
3843                                     0,
3844                                     size / 4,
3845                                     0,
3846                                     GEN9_AVC_MBENC_MB_STATS_INDEX);
3847
3848     } else if (avc_state->flatness_check_enable) {
3849
3850         gpe_resource = &(avc_ctx->res_flatness_check_surface);
3851         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3852                                        gpe_resource,
3853                                        1,
3854                                        I965_SURFACEFORMAT_R8_UNORM,
3855                                        GEN9_AVC_MBENC_MB_STATS_INDEX);
3856     }
3857
3858     if (param->mad_enable) {
3859         /*mad buffer input*/
3860         size = 4;
3861         gpe_resource = &(avc_ctx->res_mad_data_buffer);
3862         gen9_add_buffer_gpe_surface(ctx,
3863                                     gpe_context,
3864                                     gpe_resource,
3865                                     0,
3866                                     size / 4,
3867                                     0,
3868                                     GEN9_AVC_MBENC_MAD_DATA_INDEX);
3869         i965_zero_gpe_resource(gpe_resource);
3870     }
3871
3872     /*brc updated mbenc curbe data buffer,it is ignored by gen9 and used in gen95*/
3873     if (avc_state->mbenc_brc_buffer_size > 0) {
3874         size = avc_state->mbenc_brc_buffer_size;
3875         gpe_resource = &(avc_ctx->res_mbenc_brc_buffer);
3876         gen9_add_buffer_gpe_surface(ctx,
3877                                     gpe_context,
3878                                     gpe_resource,
3879                                     0,
3880                                     size / 4,
3881                                     0,
3882                                     GEN95_AVC_MBENC_BRC_CURBE_DATA_INDEX);
3883     }
3884
3885     /*artitratry num mbs in slice*/
3886     if (avc_state->arbitrary_num_mbs_in_slice) {
3887         /*slice surface input*/
3888         gpe_resource = &(avc_ctx->res_mbenc_slice_map_surface);
3889         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3890                                        gpe_resource,
3891                                        1,
3892                                        I965_SURFACEFORMAT_R8_UNORM,
3893                                        GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX);
3894         gen9_avc_generate_slice_map(ctx, encode_state, encoder_context);
3895     }
3896
3897     /* BRC distortion data buffer for I frame */
3898     if (!mbenc_i_frame_dist_in_use) {
3899         if (avc_state->mb_disable_skip_map_enable) {
3900             gpe_resource = &(avc_ctx->res_mb_disable_skip_map_surface);
3901             gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3902                                            gpe_resource,
3903                                            1,
3904                                            I965_SURFACEFORMAT_R8_UNORM,
3905                                            (is_g95 ? GEN95_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX : GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX));
3906         }
3907
3908         if (avc_state->sfd_enable && generic_state->hme_enabled) {
3909             if (generic_state->frame_type == SLICE_TYPE_P) {
3910                 gpe_resource = &(avc_ctx->res_sfd_cost_table_p_frame_buffer);
3911
3912             } else if (generic_state->frame_type == SLICE_TYPE_B) {
3913                 gpe_resource = &(avc_ctx->res_sfd_cost_table_b_frame_buffer);
3914             }
3915
3916             if (generic_state->frame_type != SLICE_TYPE_I) {
3917                 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3918                                                gpe_resource,
3919                                                1,
3920                                                I965_SURFACEFORMAT_R8_UNORM,
3921                                                (is_g95 ? GEN95_AVC_MBENC_SFD_COST_TABLE_INDEX : GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX));
3922             }
3923         }
3924     }
3925
3926     return;
3927 }
3928
3929 static VAStatus
3930 gen9_avc_kernel_mbenc(VADriverContextP ctx,
3931                       struct encode_state *encode_state,
3932                       struct intel_encoder_context *encoder_context,
3933                       bool i_frame_dist_in_use)
3934 {
3935     struct i965_driver_data *i965 = i965_driver_data(ctx);
3936     struct i965_gpe_table *gpe = &i965->gpe_table;
3937     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3938     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
3939     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3940     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3941     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3942
3943     struct i965_gpe_context *gpe_context;
3944     struct gpe_media_object_walker_parameter media_object_walker_param;
3945     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
3946     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
3947     int media_function = 0;
3948     int kernel_idx = 0;
3949     unsigned int mb_const_data_buffer_in_use = 0;
3950     unsigned int mb_qp_buffer_in_use = 0;
3951     unsigned int brc_enabled = 0;
3952     unsigned int roi_enable = (generic_state->num_roi > 0) ? 1 : 0;
3953     unsigned int dirty_roi_enable = ((generic_state->dirty_num_roi > 0) && (generic_state->frame_type == SLICE_TYPE_P) && (0));
3954     struct mbenc_param param ;
3955
3956     int mbenc_i_frame_dist_in_use = i_frame_dist_in_use;
3957     int mad_enable = 0;
3958     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3959
3960     mb_const_data_buffer_in_use =
3961         generic_state->mb_brc_enabled ||
3962         roi_enable ||
3963         dirty_roi_enable ||
3964         avc_state->mb_qp_data_enable ||
3965         avc_state->rolling_intra_refresh_enable;
3966     mb_qp_buffer_in_use =
3967         generic_state->mb_brc_enabled ||
3968         generic_state->brc_roi_enable ||
3969         avc_state->mb_qp_data_enable;
3970
3971     if (mbenc_i_frame_dist_in_use) {
3972         media_function = INTEL_MEDIA_STATE_ENC_I_FRAME_DIST;
3973         kernel_idx = GEN9_AVC_KERNEL_BRC_I_FRAME_DIST;
3974         downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
3975         downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
3976         mad_enable = 0;
3977         brc_enabled = 0;
3978
3979         gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
3980     } else {
3981         switch (generic_state->kernel_mode) {
3982         case INTEL_ENC_KERNEL_NORMAL : {
3983             media_function = INTEL_MEDIA_STATE_ENC_NORMAL;
3984             kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_NORMAL_I;
3985             break;
3986         }
3987         case INTEL_ENC_KERNEL_PERFORMANCE : {
3988             media_function = INTEL_MEDIA_STATE_ENC_PERFORMANCE;
3989             kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_PERFORMANCE_I;
3990             break;
3991         }
3992         case INTEL_ENC_KERNEL_QUALITY : {
3993             media_function = INTEL_MEDIA_STATE_ENC_QUALITY;
3994             kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_QUALITY_I;
3995             break;
3996         }
3997         default:
3998             assert(0);
3999
4000         }
4001
4002         if (generic_state->frame_type == SLICE_TYPE_P) {
4003             kernel_idx += 1;
4004         } else if (generic_state->frame_type == SLICE_TYPE_B) {
4005             kernel_idx += 2;
4006         }
4007
4008         downscaled_width_in_mb = generic_state->frame_width_in_mbs;
4009         downscaled_height_in_mb = generic_state->frame_height_in_mbs;
4010         mad_enable = avc_state->mad_enable;
4011         brc_enabled = generic_state->brc_enabled;
4012
4013         gpe_context = &(avc_ctx->context_mbenc.gpe_contexts[kernel_idx]);
4014     }
4015
4016     memset(&param, 0, sizeof(struct mbenc_param));
4017
4018     param.mb_const_data_buffer_in_use = mb_const_data_buffer_in_use;
4019     param.mb_qp_buffer_in_use = mb_qp_buffer_in_use;
4020     param.mbenc_i_frame_dist_in_use = mbenc_i_frame_dist_in_use;
4021     param.mad_enable = mad_enable;
4022     param.brc_enabled = brc_enabled;
4023     param.roi_enabled = roi_enable;
4024
4025     if (avc_state->mb_status_supported) {
4026         param.mb_vproc_stats_enable =  avc_state->flatness_check_enable || avc_state->adaptive_transform_decision_enable;
4027     }
4028
4029     if (!avc_state->mbenc_curbe_set_in_brc_update) {
4030         gpe->context_init(ctx, gpe_context);
4031     }
4032
4033     gpe->reset_binding_table(ctx, gpe_context);
4034
4035     if (!avc_state->mbenc_curbe_set_in_brc_update) {
4036         /*set curbe here*/
4037         generic_ctx->pfn_set_curbe_mbenc(ctx, encode_state, gpe_context, encoder_context, &param);
4038     }
4039
4040     /* MB brc const data buffer set up*/
4041     if (mb_const_data_buffer_in_use) {
4042         // caculate the lambda table, it is kernel controlled trellis quantization,gen95+
4043         if (avc_state->lambda_table_enable)
4044             gen95_avc_calc_lambda_table(ctx, encode_state, encoder_context);
4045
4046         gen9_avc_load_mb_brc_const_data(ctx, encode_state, encoder_context);
4047     }
4048
4049     /*clear the mad buffer*/
4050     if (mad_enable) {
4051         i965_zero_gpe_resource(&(avc_ctx->res_mad_data_buffer));
4052     }
4053     /*send surface*/
4054     generic_ctx->pfn_send_mbenc_surface(ctx, encode_state, gpe_context, encoder_context, &param);
4055
4056     gpe->setup_interface_data(ctx, gpe_context);
4057
4058     /*walker setting*/
4059     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
4060
4061     kernel_walker_param.use_scoreboard = 1;
4062     kernel_walker_param.resolution_x = downscaled_width_in_mb ;
4063     kernel_walker_param.resolution_y = downscaled_height_in_mb ;
4064     if (mbenc_i_frame_dist_in_use) {
4065         kernel_walker_param.no_dependency = 1;
4066     } else {
4067         switch (generic_state->frame_type) {
4068         case SLICE_TYPE_I:
4069             kernel_walker_param.walker_degree = WALKER_45_DEGREE;
4070             break;
4071         case SLICE_TYPE_P:
4072             kernel_walker_param.walker_degree = WALKER_26_DEGREE;
4073             break;
4074         case SLICE_TYPE_B:
4075             kernel_walker_param.walker_degree = WALKER_26_DEGREE;
4076             if (!slice_param->direct_spatial_mv_pred_flag) {
4077                 kernel_walker_param.walker_degree = WALKER_45_DEGREE;
4078             }
4079             break;
4080         default:
4081             assert(0);
4082         }
4083         kernel_walker_param.no_dependency = 0;
4084     }
4085
4086     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
4087
4088     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
4089                                             gpe_context,
4090                                             media_function,
4091                                             &media_object_walker_param);
4092     return VA_STATUS_SUCCESS;
4093 }
4094
4095 /*
4096 me kernle related function
4097 */
4098 static void
4099 gen9_avc_set_curbe_me(VADriverContextP ctx,
4100                       struct encode_state *encode_state,
4101                       struct i965_gpe_context *gpe_context,
4102                       struct intel_encoder_context *encoder_context,
4103                       void * param)
4104 {
4105     gen9_avc_me_curbe_data *curbe_cmd;
4106     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4107     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4108     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4109
4110     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4111
4112     struct me_param * curbe_param = (struct me_param *)param ;
4113     unsigned char  use_mv_from_prev_step = 0;
4114     unsigned char write_distortions = 0;
4115     unsigned char qp_prime_y = 0;
4116     unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
4117     unsigned char seach_table_idx = 0;
4118     unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
4119     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
4120     unsigned int scale_factor = 0;
4121
4122     qp_prime_y = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta;
4123     switch (curbe_param->hme_type) {
4124     case INTEL_ENC_HME_4x : {
4125         use_mv_from_prev_step = (generic_state->b16xme_enabled) ? 1 : 0;
4126         write_distortions = 1;
4127         mv_shift_factor = 2;
4128         scale_factor = 4;
4129         prev_mv_read_pos_factor = 0;
4130         break;
4131     }
4132     case INTEL_ENC_HME_16x : {
4133         use_mv_from_prev_step = (generic_state->b32xme_enabled) ? 1 : 0;
4134         write_distortions = 0;
4135         mv_shift_factor = 2;
4136         scale_factor = 16;
4137         prev_mv_read_pos_factor = 1;
4138         break;
4139     }
4140     case INTEL_ENC_HME_32x : {
4141         use_mv_from_prev_step = 0;
4142         write_distortions = 0;
4143         mv_shift_factor = 1;
4144         scale_factor = 32;
4145         prev_mv_read_pos_factor = 0;
4146         break;
4147     }
4148     default:
4149         assert(0);
4150
4151     }
4152     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
4153
4154     if (!curbe_cmd)
4155         return;
4156
4157     downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
4158     downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
4159
4160     memcpy(curbe_cmd, gen9_avc_me_curbe_init_data, sizeof(gen9_avc_me_curbe_data));
4161
4162     curbe_cmd->dw3.sub_pel_mode = 3;
4163     if (avc_state->field_scaling_output_interleaved) {
4164         /*frame set to zero,field specified*/
4165         curbe_cmd->dw3.src_access = 0;
4166         curbe_cmd->dw3.ref_access = 0;
4167         curbe_cmd->dw7.src_field_polarity = 0;
4168     }
4169     curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
4170     curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
4171     curbe_cmd->dw5.qp_prime_y = qp_prime_y;
4172
4173     curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
4174     curbe_cmd->dw6.write_distortions = write_distortions;
4175     curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
4176     curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
4177
4178     if (generic_state->frame_type == SLICE_TYPE_B) {
4179         curbe_cmd->dw1.bi_weight = 32;
4180         curbe_cmd->dw13.num_ref_idx_l1_minus1 = slice_param->num_ref_idx_l1_active_minus1;
4181         me_method = gen9_avc_b_me_method[generic_state->preset];
4182         seach_table_idx = 1;
4183     }
4184
4185     if (generic_state->frame_type == SLICE_TYPE_P ||
4186         generic_state->frame_type == SLICE_TYPE_B)
4187         curbe_cmd->dw13.num_ref_idx_l0_minus1 = slice_param->num_ref_idx_l0_active_minus1;
4188
4189     curbe_cmd->dw13.ref_streamin_cost = 5;
4190     curbe_cmd->dw13.roi_enable = 0;
4191
4192     curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
4193     curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
4194
4195     memcpy(&curbe_cmd->dw16, table_enc_search_path[seach_table_idx][me_method], 14 * sizeof(int));
4196
4197     curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN9_AVC_ME_MV_DATA_SURFACE_INDEX;
4198     curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x) ? GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX : GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX ;
4199     curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN9_AVC_ME_DISTORTION_SURFACE_INDEX;
4200     curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN9_AVC_ME_BRC_DISTORTION_INDEX;
4201     curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX;
4202     curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX;
4203     curbe_cmd->dw38.reserved = GEN9_AVC_ME_VDENC_STREAMIN_INDEX;
4204
4205     i965_gpe_context_unmap_curbe(gpe_context);
4206     return;
4207 }
4208
4209 static void
4210 gen9_avc_send_surface_me(VADriverContextP ctx,
4211                          struct encode_state *encode_state,
4212                          struct i965_gpe_context *gpe_context,
4213                          struct intel_encoder_context *encoder_context,
4214                          void * param)
4215 {
4216     struct i965_driver_data *i965 = i965_driver_data(ctx);
4217
4218     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4219     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4220     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4221     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4222
4223     struct object_surface *obj_surface, *input_surface;
4224     struct gen9_surface_avc *avc_priv_surface;
4225     struct i965_gpe_resource *gpe_resource;
4226     struct me_param * curbe_param = (struct me_param *)param ;
4227
4228     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4229     VASurfaceID surface_id;
4230     int i = 0;
4231
4232     /* all scaled input surface stored in reconstructed_object*/
4233     obj_surface = encode_state->reconstructed_object;
4234     if (!obj_surface || !obj_surface->private_data)
4235         return;
4236     avc_priv_surface = obj_surface->private_data;
4237
4238
4239     switch (curbe_param->hme_type) {
4240     case INTEL_ENC_HME_4x : {
4241         /*memv output 4x*/
4242         gpe_resource = &avc_ctx->s4x_memv_data_buffer;
4243         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4244                                        gpe_resource,
4245                                        1,
4246                                        I965_SURFACEFORMAT_R8_UNORM,
4247                                        GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
4248
4249         /*memv input 16x*/
4250         if (generic_state->b16xme_enabled) {
4251             gpe_resource = &avc_ctx->s16x_memv_data_buffer;
4252             gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4253                                            gpe_resource,
4254                                            1,
4255                                            I965_SURFACEFORMAT_R8_UNORM,
4256                                            GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX);
4257         }
4258         /* brc distortion  output*/
4259         gpe_resource = &avc_ctx->res_brc_dist_data_surface;
4260         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4261                                        gpe_resource,
4262                                        1,
4263                                        I965_SURFACEFORMAT_R8_UNORM,
4264                                        GEN9_AVC_ME_BRC_DISTORTION_INDEX);
4265         /* memv distortion output*/
4266         gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
4267         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4268                                        gpe_resource,
4269                                        1,
4270                                        I965_SURFACEFORMAT_R8_UNORM,
4271                                        GEN9_AVC_ME_DISTORTION_SURFACE_INDEX);
4272         /*input current down scaled YUV surface*/
4273         obj_surface = encode_state->reconstructed_object;
4274         avc_priv_surface = obj_surface->private_data;
4275         input_surface = avc_priv_surface->scaled_4x_surface_obj;
4276         gen9_add_adv_gpe_surface(ctx, gpe_context,
4277                                  input_surface,
4278                                  GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
4279         /*input ref scaled YUV surface*/
4280         for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
4281             surface_id = slice_param->RefPicList0[i].picture_id;
4282             obj_surface = SURFACE(surface_id);
4283             if (!obj_surface || !obj_surface->private_data)
4284                 break;
4285             avc_priv_surface = obj_surface->private_data;
4286
4287             input_surface = avc_priv_surface->scaled_4x_surface_obj;
4288
4289             gen9_add_adv_gpe_surface(ctx, gpe_context,
4290                                      input_surface,
4291                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
4292         }
4293
4294         obj_surface = encode_state->reconstructed_object;
4295         avc_priv_surface = obj_surface->private_data;
4296         input_surface = avc_priv_surface->scaled_4x_surface_obj;
4297
4298         gen9_add_adv_gpe_surface(ctx, gpe_context,
4299                                  input_surface,
4300                                  GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
4301
4302         for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
4303             surface_id = slice_param->RefPicList1[i].picture_id;
4304             obj_surface = SURFACE(surface_id);
4305             if (!obj_surface || !obj_surface->private_data)
4306                 break;
4307             avc_priv_surface = obj_surface->private_data;
4308
4309             input_surface = avc_priv_surface->scaled_4x_surface_obj;
4310
4311             gen9_add_adv_gpe_surface(ctx, gpe_context,
4312                                      input_surface,
4313                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
4314         }
4315         break;
4316
4317     }
4318     case INTEL_ENC_HME_16x : {
4319         gpe_resource = &avc_ctx->s16x_memv_data_buffer;
4320         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4321                                        gpe_resource,
4322                                        1,
4323                                        I965_SURFACEFORMAT_R8_UNORM,
4324                                        GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
4325
4326         if (generic_state->b32xme_enabled) {
4327             gpe_resource = &avc_ctx->s32x_memv_data_buffer;
4328             gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4329                                            gpe_resource,
4330                                            1,
4331                                            I965_SURFACEFORMAT_R8_UNORM,
4332                                            GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX);
4333         }
4334
4335         obj_surface = encode_state->reconstructed_object;
4336         avc_priv_surface = obj_surface->private_data;
4337         input_surface = avc_priv_surface->scaled_16x_surface_obj;
4338         gen9_add_adv_gpe_surface(ctx, gpe_context,
4339                                  input_surface,
4340                                  GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
4341
4342         for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
4343             surface_id = slice_param->RefPicList0[i].picture_id;
4344             obj_surface = SURFACE(surface_id);
4345             if (!obj_surface || !obj_surface->private_data)
4346                 break;
4347             avc_priv_surface = obj_surface->private_data;
4348
4349             input_surface = avc_priv_surface->scaled_16x_surface_obj;
4350
4351             gen9_add_adv_gpe_surface(ctx, gpe_context,
4352                                      input_surface,
4353                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
4354         }
4355
4356         obj_surface = encode_state->reconstructed_object;
4357         avc_priv_surface = obj_surface->private_data;
4358         input_surface = avc_priv_surface->scaled_16x_surface_obj;
4359
4360         gen9_add_adv_gpe_surface(ctx, gpe_context,
4361                                  input_surface,
4362                                  GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
4363
4364         for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
4365             surface_id = slice_param->RefPicList1[i].picture_id;
4366             obj_surface = SURFACE(surface_id);
4367             if (!obj_surface || !obj_surface->private_data)
4368                 break;
4369             avc_priv_surface = obj_surface->private_data;
4370
4371             input_surface = avc_priv_surface->scaled_16x_surface_obj;
4372
4373             gen9_add_adv_gpe_surface(ctx, gpe_context,
4374                                      input_surface,
4375                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
4376         }
4377         break;
4378     }
4379     case INTEL_ENC_HME_32x : {
4380         gpe_resource = &avc_ctx->s32x_memv_data_buffer;
4381         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4382                                        gpe_resource,
4383                                        1,
4384                                        I965_SURFACEFORMAT_R8_UNORM,
4385                                        GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
4386
4387         obj_surface = encode_state->reconstructed_object;
4388         avc_priv_surface = obj_surface->private_data;
4389         input_surface = avc_priv_surface->scaled_32x_surface_obj;
4390         gen9_add_adv_gpe_surface(ctx, gpe_context,
4391                                  input_surface,
4392                                  GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
4393
4394         for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
4395             surface_id = slice_param->RefPicList0[i].picture_id;
4396             obj_surface = SURFACE(surface_id);
4397             if (!obj_surface || !obj_surface->private_data)
4398                 break;
4399             avc_priv_surface = obj_surface->private_data;
4400
4401             input_surface = avc_priv_surface->scaled_32x_surface_obj;
4402
4403             gen9_add_adv_gpe_surface(ctx, gpe_context,
4404                                      input_surface,
4405                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
4406         }
4407
4408         obj_surface = encode_state->reconstructed_object;
4409         avc_priv_surface = obj_surface->private_data;
4410         input_surface = avc_priv_surface->scaled_32x_surface_obj;
4411
4412         gen9_add_adv_gpe_surface(ctx, gpe_context,
4413                                  input_surface,
4414                                  GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
4415
4416         for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
4417             surface_id = slice_param->RefPicList1[i].picture_id;
4418             obj_surface = SURFACE(surface_id);
4419             if (!obj_surface || !obj_surface->private_data)
4420                 break;
4421             avc_priv_surface = obj_surface->private_data;
4422
4423             input_surface = avc_priv_surface->scaled_32x_surface_obj;
4424
4425             gen9_add_adv_gpe_surface(ctx, gpe_context,
4426                                      input_surface,
4427                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
4428         }
4429         break;
4430     }
4431     default:
4432         assert(0);
4433
4434     }
4435 }
4436
4437 static VAStatus
4438 gen9_avc_kernel_me(VADriverContextP ctx,
4439                    struct encode_state *encode_state,
4440                    struct intel_encoder_context *encoder_context,
4441                    int hme_type)
4442 {
4443     struct i965_driver_data *i965 = i965_driver_data(ctx);
4444     struct i965_gpe_table *gpe = &i965->gpe_table;
4445     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4446     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
4447     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4448     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4449
4450     struct i965_gpe_context *gpe_context;
4451     struct gpe_media_object_walker_parameter media_object_walker_param;
4452     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
4453     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
4454     int media_function = 0;
4455     int kernel_idx = 0;
4456     struct me_param param ;
4457     unsigned int scale_factor = 0;
4458
4459     switch (hme_type) {
4460     case INTEL_ENC_HME_4x : {
4461         media_function = INTEL_MEDIA_STATE_4X_ME;
4462         scale_factor = 4;
4463         break;
4464     }
4465     case INTEL_ENC_HME_16x : {
4466         media_function = INTEL_MEDIA_STATE_16X_ME;
4467         scale_factor = 16;
4468         break;
4469     }
4470     case INTEL_ENC_HME_32x : {
4471         media_function = INTEL_MEDIA_STATE_32X_ME;
4472         scale_factor = 32;
4473         break;
4474     }
4475     default:
4476         assert(0);
4477
4478     }
4479
4480     downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
4481     downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
4482
4483     /* I frame should not come here.*/
4484     kernel_idx = (generic_state->frame_type == SLICE_TYPE_P) ? GEN9_AVC_KERNEL_ME_P_IDX : GEN9_AVC_KERNEL_ME_B_IDX;
4485     gpe_context = &(avc_ctx->context_me.gpe_contexts[kernel_idx]);
4486
4487     gpe->context_init(ctx, gpe_context);
4488     gpe->reset_binding_table(ctx, gpe_context);
4489
4490     /*set curbe*/
4491     memset(&param, 0, sizeof(param));
4492     param.hme_type = hme_type;
4493     generic_ctx->pfn_set_curbe_me(ctx, encode_state, gpe_context, encoder_context, &param);
4494
4495     /*send surface*/
4496     generic_ctx->pfn_send_me_surface(ctx, encode_state, gpe_context, encoder_context, &param);
4497
4498     gpe->setup_interface_data(ctx, gpe_context);
4499
4500     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
4501     /* the scaling is based on 8x8 blk level */
4502     kernel_walker_param.resolution_x = downscaled_width_in_mb ;
4503     kernel_walker_param.resolution_y = downscaled_height_in_mb ;
4504     kernel_walker_param.no_dependency = 1;
4505
4506     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
4507
4508     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
4509                                             gpe_context,
4510                                             media_function,
4511                                             &media_object_walker_param);
4512
4513     return VA_STATUS_SUCCESS;
4514 }
4515
4516 /*
4517 wp related function
4518 */
4519 static void
4520 gen9_avc_set_curbe_wp(VADriverContextP ctx,
4521                       struct encode_state *encode_state,
4522                       struct i965_gpe_context *gpe_context,
4523                       struct intel_encoder_context *encoder_context,
4524                       void * param)
4525 {
4526     gen9_avc_wp_curbe_data *cmd;
4527     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4528     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4529     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4530     struct wp_param * curbe_param = (struct wp_param *)param;
4531
4532     cmd = i965_gpe_context_map_curbe(gpe_context);
4533
4534     if (!cmd)
4535         return;
4536     memset(cmd, 0, sizeof(gen9_avc_wp_curbe_data));
4537     if (curbe_param->ref_list_idx) {
4538         cmd->dw0.default_weight = slice_param->luma_weight_l1[0];
4539         cmd->dw0.default_offset = slice_param->luma_offset_l1[0];
4540     } else {
4541         cmd->dw0.default_weight = slice_param->luma_weight_l0[0];
4542         cmd->dw0.default_offset = slice_param->luma_offset_l0[0];
4543     }
4544
4545     cmd->dw49.input_surface = GEN9_AVC_WP_INPUT_REF_SURFACE_INDEX;
4546     cmd->dw50.output_surface = GEN9_AVC_WP_OUTPUT_SCALED_SURFACE_INDEX;
4547
4548     i965_gpe_context_unmap_curbe(gpe_context);
4549
4550 }
4551
4552 static void
4553 gen9_avc_send_surface_wp(VADriverContextP ctx,
4554                          struct encode_state *encode_state,
4555                          struct i965_gpe_context *gpe_context,
4556                          struct intel_encoder_context *encoder_context,
4557                          void * param)
4558 {
4559     struct i965_driver_data *i965 = i965_driver_data(ctx);
4560     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4561     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4562     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4563     struct wp_param * curbe_param = (struct wp_param *)param;
4564     struct object_surface *obj_surface;
4565     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4566     VASurfaceID surface_id;
4567
4568     if (curbe_param->ref_list_idx) {
4569         surface_id = slice_param->RefPicList1[0].picture_id;
4570         obj_surface = SURFACE(surface_id);
4571         if (!obj_surface || !obj_surface->private_data)
4572             avc_state->weighted_ref_l1_enable = 0;
4573         else
4574             avc_state->weighted_ref_l1_enable = 1;
4575     } else {
4576         surface_id = slice_param->RefPicList0[0].picture_id;
4577         obj_surface = SURFACE(surface_id);
4578         if (!obj_surface || !obj_surface->private_data)
4579             avc_state->weighted_ref_l0_enable = 0;
4580         else
4581             avc_state->weighted_ref_l0_enable = 1;
4582     }
4583     if (!obj_surface)
4584         obj_surface = encode_state->reference_objects[0];
4585
4586
4587     gen9_add_adv_gpe_surface(ctx, gpe_context,
4588                              obj_surface,
4589                              GEN9_AVC_WP_INPUT_REF_SURFACE_INDEX);
4590
4591     obj_surface = avc_ctx->wp_output_pic_select_surface_obj[curbe_param->ref_list_idx];
4592     gen9_add_adv_gpe_surface(ctx, gpe_context,
4593                              obj_surface,
4594                              GEN9_AVC_WP_OUTPUT_SCALED_SURFACE_INDEX);
4595 }
4596
4597
4598 static VAStatus
4599 gen9_avc_kernel_wp(VADriverContextP ctx,
4600                    struct encode_state *encode_state,
4601                    struct intel_encoder_context *encoder_context,
4602                    unsigned int list1_in_use)
4603 {
4604     struct i965_driver_data *i965 = i965_driver_data(ctx);
4605     struct i965_gpe_table *gpe = &i965->gpe_table;
4606     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4607     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4608     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4609     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
4610
4611     struct i965_gpe_context *gpe_context;
4612     struct gpe_media_object_walker_parameter media_object_walker_param;
4613     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
4614     int media_function = INTEL_MEDIA_STATE_ENC_WP;
4615     struct wp_param param;
4616
4617     gpe_context = &(avc_ctx->context_wp.gpe_contexts);
4618
4619     gpe->context_init(ctx, gpe_context);
4620     gpe->reset_binding_table(ctx, gpe_context);
4621
4622     memset(&param, 0, sizeof(param));
4623     param.ref_list_idx = (list1_in_use == 1) ? 1 : 0;
4624     /*set curbe*/
4625     generic_ctx->pfn_set_curbe_wp(ctx, encode_state, gpe_context, encoder_context, &param);
4626
4627     /*send surface*/
4628     generic_ctx->pfn_send_wp_surface(ctx, encode_state, gpe_context, encoder_context, &param);
4629
4630     gpe->setup_interface_data(ctx, gpe_context);
4631
4632     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
4633     /* the scaling is based on 8x8 blk level */
4634     kernel_walker_param.resolution_x = generic_state->frame_width_in_mbs;
4635     kernel_walker_param.resolution_y = generic_state->frame_height_in_mbs;
4636     kernel_walker_param.no_dependency = 1;
4637
4638     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
4639
4640     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
4641                                             gpe_context,
4642                                             media_function,
4643                                             &media_object_walker_param);
4644
4645     return VA_STATUS_SUCCESS;
4646 }
4647
4648
4649 /*
4650 sfd related function
4651 */
4652 static void
4653 gen9_avc_set_curbe_sfd(VADriverContextP ctx,
4654                        struct encode_state *encode_state,
4655                        struct i965_gpe_context *gpe_context,
4656                        struct intel_encoder_context *encoder_context,
4657                        void * param)
4658 {
4659     gen9_avc_sfd_curbe_data *cmd;
4660     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4661     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4662     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4663     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4664
4665     cmd = i965_gpe_context_map_curbe(gpe_context);
4666
4667     if (!cmd)
4668         return;
4669     memset(cmd, 0, sizeof(gen9_avc_sfd_curbe_data));
4670
4671     cmd->dw0.enable_intra_cost_scaling_for_static_frame = 1 ;
4672     cmd->dw0.enable_adaptive_mv_stream_in = 0 ;
4673     cmd->dw0.stream_in_type = 7 ;
4674     cmd->dw0.slice_type = slice_type_kernel[generic_state->frame_type]  ;
4675     cmd->dw0.brc_mode_enable = generic_state->brc_enabled ;
4676     cmd->dw0.vdenc_mode_disable = 1 ;
4677
4678     cmd->dw1.hme_stream_in_ref_cost = 5 ;
4679     cmd->dw1.num_of_refs = slice_param->num_ref_idx_l0_active_minus1 ;
4680     cmd->dw1.qp_value = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta ;
4681
4682     cmd->dw2.frame_width_in_mbs = generic_state->frame_width_in_mbs ;
4683     cmd->dw2.frame_height_in_mbs = generic_state->frame_height_in_mbs ;
4684
4685     cmd->dw3.large_mv_threshold = 128 ;
4686     cmd->dw4.total_large_mv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs) / 100 ;
4687     cmd->dw5.zmv_threshold = 4 ;
4688     cmd->dw6.total_zmv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * avc_state->zero_mv_threshold) / 100 ; // zero_mv_threshold = 60;
4689     cmd->dw7.min_dist_threshold = 10 ;
4690
4691     if (generic_state->frame_type == SLICE_TYPE_P) {
4692         memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_p_frame, AVC_QP_MAX * sizeof(unsigned char));
4693
4694     } else if (generic_state->frame_type == SLICE_TYPE_B) {
4695         memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_b_frame, AVC_QP_MAX * sizeof(unsigned char));
4696     }
4697
4698     cmd->dw21.actual_width_in_mb = cmd->dw2.frame_width_in_mbs ;
4699     cmd->dw21.actual_height_in_mb = cmd->dw2.frame_height_in_mbs ;
4700     cmd->dw24.vdenc_input_image_state_index = GEN9_AVC_SFD_VDENC_INPUT_IMAGE_STATE_INDEX ;
4701     cmd->dw26.mv_data_surface_index = GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX ;
4702     cmd->dw27.inter_distortion_surface_index = GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX ;
4703     cmd->dw28.output_data_surface_index = GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX ;
4704     cmd->dw29.vdenc_output_image_state_index = GEN9_AVC_SFD_VDENC_OUTPUT_IMAGE_STATE_INDEX ;
4705
4706     i965_gpe_context_unmap_curbe(gpe_context);
4707
4708 }
4709
4710 static void
4711 gen9_avc_send_surface_sfd(VADriverContextP ctx,
4712                           struct encode_state *encode_state,
4713                           struct i965_gpe_context *gpe_context,
4714                           struct intel_encoder_context *encoder_context,
4715                           void * param)
4716 {
4717     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4718     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4719     struct i965_gpe_resource *gpe_resource;
4720     int size = 0;
4721
4722     /*HME mv data surface memv output 4x*/
4723     gpe_resource = &avc_ctx->s4x_memv_data_buffer;
4724     gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4725                                    gpe_resource,
4726                                    1,
4727                                    I965_SURFACEFORMAT_R8_UNORM,
4728                                    GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX);
4729
4730     /* memv distortion */
4731     gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
4732     gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4733                                    gpe_resource,
4734                                    1,
4735                                    I965_SURFACEFORMAT_R8_UNORM,
4736                                    GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX);
4737     /*buffer output*/
4738     size = 32 * 4 * 4;
4739     gpe_resource = &avc_ctx->res_sfd_output_buffer;
4740     gen9_add_buffer_gpe_surface(ctx,
4741                                 gpe_context,
4742                                 gpe_resource,
4743                                 0,
4744                                 size / 4,
4745                                 0,
4746                                 GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX);
4747
4748 }
4749
4750 static VAStatus
4751 gen9_avc_kernel_sfd(VADriverContextP ctx,
4752                     struct encode_state *encode_state,
4753                     struct intel_encoder_context *encoder_context)
4754 {
4755     struct i965_driver_data *i965 = i965_driver_data(ctx);
4756     struct i965_gpe_table *gpe = &i965->gpe_table;
4757     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4758     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4759     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
4760
4761     struct i965_gpe_context *gpe_context;
4762     struct gpe_media_object_parameter media_object_param;
4763     struct gpe_media_object_inline_data media_object_inline_data;
4764     int media_function = INTEL_MEDIA_STATE_STATIC_FRAME_DETECTION;
4765     gpe_context = &(avc_ctx->context_sfd.gpe_contexts);
4766
4767     gpe->context_init(ctx, gpe_context);
4768     gpe->reset_binding_table(ctx, gpe_context);
4769
4770     /*set curbe*/
4771     generic_ctx->pfn_set_curbe_sfd(ctx, encode_state, gpe_context, encoder_context, NULL);
4772
4773     /*send surface*/
4774     generic_ctx->pfn_send_sfd_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
4775
4776     gpe->setup_interface_data(ctx, gpe_context);
4777
4778     memset(&media_object_param, 0, sizeof(media_object_param));
4779     memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
4780     media_object_param.pinline_data = &media_object_inline_data;
4781     media_object_param.inline_size = sizeof(media_object_inline_data);
4782
4783     gen9_avc_run_kernel_media_object(ctx, encoder_context,
4784                                      gpe_context,
4785                                      media_function,
4786                                      &media_object_param);
4787
4788     return VA_STATUS_SUCCESS;
4789 }
4790
4791 /*
4792 kernel related function:init/destroy etc
4793 */
4794 static void
4795 gen9_avc_kernel_init_scaling(VADriverContextP ctx,
4796                              struct generic_encoder_context *generic_context,
4797                              struct gen_avc_scaling_context *kernel_context)
4798 {
4799     struct i965_driver_data *i965 = i965_driver_data(ctx);
4800     struct i965_gpe_table *gpe = &i965->gpe_table;
4801     struct i965_gpe_context *gpe_context = NULL;
4802     struct encoder_kernel_parameter kernel_param ;
4803     struct encoder_scoreboard_parameter scoreboard_param;
4804     struct i965_kernel common_kernel;
4805
4806     if (IS_SKL(i965->intel.device_info) ||
4807         IS_BXT(i965->intel.device_info)) {
4808         kernel_param.curbe_size = sizeof(gen9_avc_scaling4x_curbe_data);
4809         kernel_param.inline_data_size = sizeof(gen9_avc_scaling4x_curbe_data);
4810     } else if (IS_KBL(i965->intel.device_info) ||
4811                IS_GLK(i965->intel.device_info)) {
4812         kernel_param.curbe_size = sizeof(gen95_avc_scaling4x_curbe_data);
4813         kernel_param.inline_data_size = sizeof(gen95_avc_scaling4x_curbe_data);
4814     }
4815
4816     /* 4x scaling kernel*/
4817     kernel_param.sampler_size = 0;
4818
4819     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4820     scoreboard_param.mask = 0xFF;
4821     scoreboard_param.enable = generic_context->use_hw_scoreboard;
4822     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
4823     scoreboard_param.walkpat_flag = 0;
4824
4825     gpe_context = &kernel_context->gpe_contexts[GEN9_AVC_KERNEL_SCALING_4X_IDX];
4826     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4827     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4828
4829     memset(&common_kernel, 0, sizeof(common_kernel));
4830
4831     intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4832                                          generic_context->enc_kernel_size,
4833                                          INTEL_GENERIC_ENC_SCALING4X,
4834                                          0,
4835                                          &common_kernel);
4836
4837     gpe->load_kernels(ctx,
4838                       gpe_context,
4839                       &common_kernel,
4840                       1);
4841
4842     /*2x scaling kernel*/
4843     kernel_param.curbe_size = sizeof(gen9_avc_scaling2x_curbe_data);
4844     kernel_param.inline_data_size = 0;
4845     kernel_param.sampler_size = 0;
4846
4847     gpe_context = &kernel_context->gpe_contexts[GEN9_AVC_KERNEL_SCALING_2X_IDX];
4848     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4849     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4850
4851     memset(&common_kernel, 0, sizeof(common_kernel));
4852
4853     intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4854                                          generic_context->enc_kernel_size,
4855                                          INTEL_GENERIC_ENC_SCALING2X,
4856                                          0,
4857                                          &common_kernel);
4858
4859     gpe->load_kernels(ctx,
4860                       gpe_context,
4861                       &common_kernel,
4862                       1);
4863
4864 }
4865
4866 static void
4867 gen9_avc_kernel_init_me(VADriverContextP ctx,
4868                         struct generic_encoder_context *generic_context,
4869                         struct gen_avc_me_context *kernel_context)
4870 {
4871     struct i965_driver_data *i965 = i965_driver_data(ctx);
4872     struct i965_gpe_table *gpe = &i965->gpe_table;
4873     struct i965_gpe_context *gpe_context = NULL;
4874     struct encoder_kernel_parameter kernel_param ;
4875     struct encoder_scoreboard_parameter scoreboard_param;
4876     struct i965_kernel common_kernel;
4877     int i = 0;
4878
4879     kernel_param.curbe_size = sizeof(gen9_avc_me_curbe_data);
4880     kernel_param.inline_data_size = 0;
4881     kernel_param.sampler_size = 0;
4882
4883     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4884     scoreboard_param.mask = 0xFF;
4885     scoreboard_param.enable = generic_context->use_hw_scoreboard;
4886     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
4887     scoreboard_param.walkpat_flag = 0;
4888
4889     for (i = 0; i < 2; i++) {
4890         gpe_context = &kernel_context->gpe_contexts[i];
4891         gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4892         gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4893
4894         memset(&common_kernel, 0, sizeof(common_kernel));
4895
4896         intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4897                                              generic_context->enc_kernel_size,
4898                                              INTEL_GENERIC_ENC_ME,
4899                                              i,
4900                                              &common_kernel);
4901
4902         gpe->load_kernels(ctx,
4903                           gpe_context,
4904                           &common_kernel,
4905                           1);
4906     }
4907
4908 }
4909
4910 static void
4911 gen9_avc_kernel_init_mbenc(VADriverContextP ctx,
4912                            struct generic_encoder_context *generic_context,
4913                            struct gen_avc_mbenc_context *kernel_context)
4914 {
4915     struct i965_driver_data *i965 = i965_driver_data(ctx);
4916     struct i965_gpe_table *gpe = &i965->gpe_table;
4917     struct i965_gpe_context *gpe_context = NULL;
4918     struct encoder_kernel_parameter kernel_param ;
4919     struct encoder_scoreboard_parameter scoreboard_param;
4920     struct i965_kernel common_kernel;
4921     int i = 0;
4922     unsigned int curbe_size = 0;
4923
4924     if (IS_SKL(i965->intel.device_info) ||
4925         IS_BXT(i965->intel.device_info)) {
4926         curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
4927     } else if (IS_KBL(i965->intel.device_info) ||
4928                IS_GLK(i965->intel.device_info)) {
4929         curbe_size = sizeof(gen95_avc_mbenc_curbe_data);
4930     }
4931
4932     assert(curbe_size > 0);
4933     kernel_param.curbe_size = curbe_size;
4934     kernel_param.inline_data_size = 0;
4935     kernel_param.sampler_size = 0;
4936
4937     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4938     scoreboard_param.mask = 0xFF;
4939     scoreboard_param.enable = generic_context->use_hw_scoreboard;
4940     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
4941     scoreboard_param.walkpat_flag = 0;
4942
4943     for (i = 0; i < NUM_GEN9_AVC_KERNEL_MBENC ; i++) {
4944         gpe_context = &kernel_context->gpe_contexts[i];
4945         gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4946         gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4947
4948         memset(&common_kernel, 0, sizeof(common_kernel));
4949
4950         intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4951                                              generic_context->enc_kernel_size,
4952                                              INTEL_GENERIC_ENC_MBENC,
4953                                              i,
4954                                              &common_kernel);
4955
4956         gpe->load_kernels(ctx,
4957                           gpe_context,
4958                           &common_kernel,
4959                           1);
4960     }
4961
4962 }
4963
4964 static void
4965 gen9_avc_kernel_init_brc(VADriverContextP ctx,
4966                          struct generic_encoder_context *generic_context,
4967                          struct gen_avc_brc_context *kernel_context)
4968 {
4969     struct i965_driver_data *i965 = i965_driver_data(ctx);
4970     struct i965_gpe_table *gpe = &i965->gpe_table;
4971     struct i965_gpe_context *gpe_context = NULL;
4972     struct encoder_kernel_parameter kernel_param ;
4973     struct encoder_scoreboard_parameter scoreboard_param;
4974     struct i965_kernel common_kernel;
4975     int i = 0;
4976
4977     const int brc_curbe_size[NUM_GEN9_AVC_KERNEL_BRC] = {
4978         (sizeof(gen9_avc_brc_init_reset_curbe_data)),
4979         (sizeof(gen9_avc_frame_brc_update_curbe_data)),
4980         (sizeof(gen9_avc_brc_init_reset_curbe_data)),
4981         ((IS_SKL(i965->intel.device_info) || IS_BXT(i965->intel.device_info)) ? sizeof(gen9_avc_mbenc_curbe_data) : sizeof(gen95_avc_mbenc_curbe_data)),
4982         0,
4983         (sizeof(gen9_avc_mb_brc_curbe_data))
4984     };
4985
4986     kernel_param.inline_data_size = 0;
4987     kernel_param.sampler_size = 0;
4988
4989     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4990     scoreboard_param.mask = 0xFF;
4991     scoreboard_param.enable = generic_context->use_hw_scoreboard;
4992     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
4993     scoreboard_param.walkpat_flag = 0;
4994
4995     for (i = 0; i < NUM_GEN9_AVC_KERNEL_BRC; i++) {
4996         kernel_param.curbe_size = brc_curbe_size[i];
4997         gpe_context = &kernel_context->gpe_contexts[i];
4998         gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4999         gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
5000
5001         memset(&common_kernel, 0, sizeof(common_kernel));
5002
5003         intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
5004                                              generic_context->enc_kernel_size,
5005                                              INTEL_GENERIC_ENC_BRC,
5006                                              i,
5007                                              &common_kernel);
5008
5009         gpe->load_kernels(ctx,
5010                           gpe_context,
5011                           &common_kernel,
5012                           1);
5013     }
5014
5015 }
5016
5017 static void
5018 gen9_avc_kernel_init_wp(VADriverContextP ctx,
5019                         struct generic_encoder_context *generic_context,
5020                         struct gen_avc_wp_context *kernel_context)
5021 {
5022     struct i965_driver_data *i965 = i965_driver_data(ctx);
5023     struct i965_gpe_table *gpe = &i965->gpe_table;
5024     struct i965_gpe_context *gpe_context = NULL;
5025     struct encoder_kernel_parameter kernel_param ;
5026     struct encoder_scoreboard_parameter scoreboard_param;
5027     struct i965_kernel common_kernel;
5028
5029     kernel_param.curbe_size = sizeof(gen9_avc_wp_curbe_data);
5030     kernel_param.inline_data_size = 0;
5031     kernel_param.sampler_size = 0;
5032
5033     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
5034     scoreboard_param.mask = 0xFF;
5035     scoreboard_param.enable = generic_context->use_hw_scoreboard;
5036     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
5037     scoreboard_param.walkpat_flag = 0;
5038
5039     gpe_context = &kernel_context->gpe_contexts;
5040     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
5041     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
5042
5043     memset(&common_kernel, 0, sizeof(common_kernel));
5044
5045     intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
5046                                          generic_context->enc_kernel_size,
5047                                          INTEL_GENERIC_ENC_WP,
5048                                          0,
5049                                          &common_kernel);
5050
5051     gpe->load_kernels(ctx,
5052                       gpe_context,
5053                       &common_kernel,
5054                       1);
5055
5056 }
5057
5058 static void
5059 gen9_avc_kernel_init_sfd(VADriverContextP ctx,
5060                          struct generic_encoder_context *generic_context,
5061                          struct gen_avc_sfd_context *kernel_context)
5062 {
5063     struct i965_driver_data *i965 = i965_driver_data(ctx);
5064     struct i965_gpe_table *gpe = &i965->gpe_table;
5065     struct i965_gpe_context *gpe_context = NULL;
5066     struct encoder_kernel_parameter kernel_param ;
5067     struct encoder_scoreboard_parameter scoreboard_param;
5068     struct i965_kernel common_kernel;
5069
5070     kernel_param.curbe_size = sizeof(gen9_avc_sfd_curbe_data);
5071     kernel_param.inline_data_size = 0;
5072     kernel_param.sampler_size = 0;
5073
5074     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
5075     scoreboard_param.mask = 0xFF;
5076     scoreboard_param.enable = generic_context->use_hw_scoreboard;
5077     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
5078     scoreboard_param.walkpat_flag = 0;
5079
5080     gpe_context = &kernel_context->gpe_contexts;
5081     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
5082     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
5083
5084     memset(&common_kernel, 0, sizeof(common_kernel));
5085
5086     intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
5087                                          generic_context->enc_kernel_size,
5088                                          INTEL_GENERIC_ENC_SFD,
5089                                          0,
5090                                          &common_kernel);
5091
5092     gpe->load_kernels(ctx,
5093                       gpe_context,
5094                       &common_kernel,
5095                       1);
5096
5097 }
5098
5099 static void
5100 gen9_avc_kernel_destroy(struct encoder_vme_mfc_context * vme_context)
5101 {
5102
5103     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5104     struct i965_driver_data *i965 = i965_driver_data(avc_ctx->ctx);
5105     struct i965_gpe_table *gpe = &i965->gpe_table;
5106
5107     int i = 0;
5108
5109     gen9_avc_free_resources(vme_context);
5110
5111     for (i = 0; i < NUM_GEN9_AVC_KERNEL_SCALING; i++)
5112         gpe->context_destroy(&avc_ctx->context_scaling.gpe_contexts[i]);
5113
5114     for (i = 0; i < NUM_GEN9_AVC_KERNEL_BRC; i++)
5115         gpe->context_destroy(&avc_ctx->context_brc.gpe_contexts[i]);
5116
5117     for (i = 0; i < NUM_GEN9_AVC_KERNEL_ME; i++)
5118         gpe->context_destroy(&avc_ctx->context_me.gpe_contexts[i]);
5119
5120     for (i = 0; i < NUM_GEN9_AVC_KERNEL_MBENC; i++)
5121         gpe->context_destroy(&avc_ctx->context_mbenc.gpe_contexts[i]);
5122
5123     gpe->context_destroy(&avc_ctx->context_wp.gpe_contexts);
5124
5125     gpe->context_destroy(&avc_ctx->context_sfd.gpe_contexts);
5126
5127 }
5128
5129 /*
5130 vme pipeline
5131 */
5132 static void
5133 gen9_avc_update_parameters(VADriverContextP ctx,
5134                            VAProfile profile,
5135                            struct encode_state *encode_state,
5136                            struct intel_encoder_context *encoder_context)
5137 {
5138     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5139     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5140     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5141     VAEncSequenceParameterBufferH264 *seq_param;
5142     VAEncSliceParameterBufferH264 *slice_param;
5143     int i, j, slice_index;
5144     unsigned int preset = generic_state->preset;
5145
5146     /* seq/pic/slice parameter setting */
5147     generic_state->b16xme_supported = gen9_avc_super_hme[preset];
5148     generic_state->b32xme_supported = gen9_avc_ultra_hme[preset];
5149
5150     avc_state->seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
5151     avc_state->pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
5152
5153     avc_state->slice_num = 0;
5154     slice_index = 0;
5155     for (j = 0; j < encode_state->num_slice_params_ext; j++) {
5156         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
5157         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
5158             avc_state->slice_param[slice_index] = slice_param;
5159             slice_param++;
5160             slice_index++;
5161             avc_state->slice_num++;
5162         }
5163     }
5164
5165     /* how many slices support by now? 1 slice or multi slices, but row slice.not slice group. */
5166     seq_param = avc_state->seq_param;
5167     slice_param = avc_state->slice_param[0];
5168
5169     generic_state->frame_type = avc_state->slice_param[0]->slice_type;
5170
5171     if (slice_param->slice_type == SLICE_TYPE_I ||
5172         slice_param->slice_type == SLICE_TYPE_SI)
5173         generic_state->frame_type = SLICE_TYPE_I;
5174     else if (slice_param->slice_type == SLICE_TYPE_P)
5175         generic_state->frame_type = SLICE_TYPE_P;
5176     else if (slice_param->slice_type == SLICE_TYPE_B)
5177         generic_state->frame_type = SLICE_TYPE_B;
5178     if (profile == VAProfileH264High)
5179         avc_state->transform_8x8_mode_enable = 0;//work around for high profile to disabel pic_param->pic_fields.bits.transform_8x8_mode_flag
5180     else
5181         avc_state->transform_8x8_mode_enable = 0;
5182
5183     /* rc init*/
5184     if (generic_state->brc_enabled && (!generic_state->brc_inited || generic_state->brc_need_reset)) {
5185         generic_state->target_bit_rate = ALIGN(seq_param->bits_per_second, 1000) / 1000;
5186         generic_state->init_vbv_buffer_fullness_in_bit = seq_param->bits_per_second;
5187         generic_state->vbv_buffer_size_in_bit = (uint64_t)seq_param->bits_per_second << 1;
5188         generic_state->frames_per_100s = 3000; /* 30fps */
5189     }
5190
5191     generic_state->gop_size = seq_param->intra_period;
5192     generic_state->gop_ref_distance = seq_param->ip_period;
5193
5194     if (generic_state->internal_rate_mode == VA_RC_CBR) {
5195         generic_state->max_bit_rate = generic_state->target_bit_rate;
5196         generic_state->min_bit_rate = generic_state->target_bit_rate;
5197     }
5198
5199     if (generic_state->frame_type == SLICE_TYPE_I || generic_state->first_frame) {
5200         gen9_avc_update_misc_parameters(ctx, encode_state, encoder_context);
5201     }
5202
5203     generic_state->preset = encoder_context->quality_level;
5204     if (encoder_context->quality_level == INTEL_PRESET_UNKNOWN) {
5205         generic_state->preset = INTEL_PRESET_RT_SPEED;
5206     }
5207     generic_state->kernel_mode = gen9_avc_kernel_mode[generic_state->preset];
5208
5209     if (!generic_state->brc_inited) {
5210         generic_state->brc_init_reset_input_bits_per_frame = ((double)(generic_state->max_bit_rate * 1000) * 100) / generic_state->frames_per_100s;;
5211         generic_state->brc_init_current_target_buf_full_in_bits = generic_state->init_vbv_buffer_fullness_in_bit;
5212         generic_state->brc_init_reset_buf_size_in_bits = generic_state->vbv_buffer_size_in_bit;
5213         generic_state->brc_target_size = generic_state->init_vbv_buffer_fullness_in_bit;
5214     }
5215
5216
5217     generic_state->curr_pak_pass = 0;
5218     generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
5219
5220     if (generic_state->internal_rate_mode == VA_RC_CBR ||
5221         generic_state->internal_rate_mode == VA_RC_VBR)
5222         generic_state->brc_enabled = 1;
5223     else
5224         generic_state->brc_enabled = 0;
5225
5226     if (generic_state->brc_enabled &&
5227         (!generic_state->init_vbv_buffer_fullness_in_bit ||
5228          !generic_state->vbv_buffer_size_in_bit ||
5229          !generic_state->max_bit_rate ||
5230          !generic_state->target_bit_rate ||
5231          !generic_state->frames_per_100s)) {
5232         WARN_ONCE("Rate control parameter is required for BRC\n");
5233         generic_state->brc_enabled = 0;
5234     }
5235
5236     if (!generic_state->brc_enabled) {
5237         generic_state->target_bit_rate = 0;
5238         generic_state->max_bit_rate = 0;
5239         generic_state->min_bit_rate = 0;
5240         generic_state->init_vbv_buffer_fullness_in_bit = 0;
5241         generic_state->vbv_buffer_size_in_bit = 0;
5242         generic_state->num_pak_passes = 1;
5243     } else {
5244         generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
5245     }
5246
5247
5248     generic_state->frame_width_in_mbs = seq_param->picture_width_in_mbs;
5249     generic_state->frame_height_in_mbs = seq_param->picture_height_in_mbs;
5250     generic_state->frame_width_in_pixel = generic_state->frame_width_in_mbs * 16;
5251     generic_state->frame_height_in_pixel = generic_state->frame_height_in_mbs * 16;
5252
5253     generic_state->frame_width_4x  = ALIGN(generic_state->frame_width_in_pixel / 4, 16);
5254     generic_state->frame_height_4x = ALIGN(generic_state->frame_height_in_pixel / 4, 16);
5255     generic_state->downscaled_width_4x_in_mb  = generic_state->frame_width_4x / 16 ;
5256     generic_state->downscaled_height_4x_in_mb = generic_state->frame_height_4x / 16;
5257
5258     generic_state->frame_width_16x  =  ALIGN(generic_state->frame_width_in_pixel / 16, 16);
5259     generic_state->frame_height_16x =  ALIGN(generic_state->frame_height_in_pixel / 16, 16);
5260     generic_state->downscaled_width_16x_in_mb  = generic_state->frame_width_16x / 16 ;
5261     generic_state->downscaled_height_16x_in_mb = generic_state->frame_height_16x / 16;
5262
5263     generic_state->frame_width_32x  = ALIGN(generic_state->frame_width_in_pixel / 32, 16);
5264     generic_state->frame_height_32x = ALIGN(generic_state->frame_height_in_pixel / 32, 16);
5265     generic_state->downscaled_width_32x_in_mb  = generic_state->frame_width_32x / 16 ;
5266     generic_state->downscaled_height_32x_in_mb = generic_state->frame_height_32x / 16;
5267
5268     if (generic_state->hme_supported) {
5269         generic_state->hme_enabled = 1;
5270     } else {
5271         generic_state->hme_enabled = 0;
5272     }
5273
5274     if (generic_state->b16xme_supported) {
5275         generic_state->b16xme_enabled = 1;
5276     } else {
5277         generic_state->b16xme_enabled = 0;
5278     }
5279
5280     if (generic_state->b32xme_supported) {
5281         generic_state->b32xme_enabled = 1;
5282     } else {
5283         generic_state->b32xme_enabled = 0;
5284     }
5285     /* disable HME/16xME if the size is too small */
5286     if (generic_state->frame_width_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5287         generic_state->b32xme_supported = 0;
5288         generic_state->b32xme_enabled = 0;
5289         generic_state->b16xme_supported = 0;
5290         generic_state->b16xme_enabled = 0;
5291         generic_state->frame_width_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5292         generic_state->downscaled_width_4x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5293     }
5294     if (generic_state->frame_height_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5295         generic_state->b32xme_supported = 0;
5296         generic_state->b32xme_enabled = 0;
5297         generic_state->b16xme_supported = 0;
5298         generic_state->b16xme_enabled = 0;
5299         generic_state->frame_height_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5300         generic_state->downscaled_height_4x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5301     }
5302
5303     if (generic_state->frame_width_16x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5304         generic_state->b32xme_supported = 0;
5305         generic_state->b32xme_enabled = 0;
5306         generic_state->frame_width_16x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5307         generic_state->downscaled_width_16x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5308     }
5309     if (generic_state->frame_height_16x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5310         generic_state->b32xme_supported = 0;
5311         generic_state->b32xme_enabled = 0;
5312         generic_state->frame_height_16x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5313         generic_state->downscaled_height_16x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5314     }
5315
5316     if (generic_state->frame_width_32x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5317         generic_state->frame_width_32x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5318         generic_state->downscaled_width_32x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5319     }
5320     if (generic_state->frame_height_32x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5321         generic_state->frame_height_32x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5322         generic_state->downscaled_height_32x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5323     }
5324
5325 }
5326
5327 static VAStatus
5328 gen9_avc_encode_check_parameter(VADriverContextP ctx,
5329                                 struct encode_state *encode_state,
5330                                 struct intel_encoder_context *encoder_context)
5331 {
5332     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5333     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5334     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5335     unsigned int rate_control_mode = encoder_context->rate_control_mode;
5336     unsigned int preset = generic_state->preset;
5337     VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param ;
5338     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5339     int i = 0;
5340     int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
5341     /*avbr init*/
5342     generic_state->avbr_curracy = 30;
5343     generic_state->avbr_convergence = 150;
5344
5345     switch (rate_control_mode & 0x7f) {
5346     case VA_RC_CBR:
5347         generic_state->internal_rate_mode = VA_RC_CBR;
5348         break;
5349
5350     case VA_RC_VBR:
5351         generic_state->internal_rate_mode = VA_RC_VBR;
5352         break;
5353
5354     case VA_RC_CQP:
5355     default:
5356         generic_state->internal_rate_mode = VA_RC_CQP;
5357         break;
5358     }
5359
5360     if (rate_control_mode != VA_RC_NONE &&
5361         rate_control_mode != VA_RC_CQP) {
5362         generic_state->brc_enabled = 1;
5363         generic_state->brc_distortion_buffer_supported = 1;
5364         generic_state->brc_constant_buffer_supported = 1;
5365         generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
5366     }
5367
5368     /*check brc parameter*/
5369     if (generic_state->brc_enabled) {
5370         avc_state->mb_qp_data_enable = 0;
5371     }
5372
5373     /*set the brc init and reset accordingly*/
5374     if (generic_state->brc_need_reset &&
5375         (generic_state->brc_distortion_buffer_supported == 0 ||
5376          rate_control_mode == VA_RC_CQP)) {
5377         generic_state->brc_need_reset = 0;// not support by CQP
5378     }
5379
5380     if (generic_state->brc_need_reset && !avc_state->sfd_mb_enable) {
5381         avc_state->sfd_enable = 0;
5382     }
5383
5384     if (generic_state->frames_per_window_size == 0) {
5385         generic_state->frames_per_window_size = (generic_state->frames_per_100s / 100 < 60) ? (generic_state->frames_per_100s / 100) : 60;
5386     } else if (generic_state->frames_per_window_size > 2 * generic_state->frames_per_100s / 100) {
5387         generic_state->frames_per_window_size = (generic_state->frames_per_100s / 100 < 60) ? (generic_state->frames_per_100s / 100) : 60;
5388     }
5389
5390     if (generic_state->brc_enabled) {
5391         generic_state->hme_enabled = generic_state->frame_type != SLICE_TYPE_I;
5392         if (avc_state->min_max_qp_enable) {
5393             generic_state->num_pak_passes = 1;
5394         }
5395         generic_state->brc_roi_enable = (rate_control_mode != VA_RC_CQP) && (generic_state->num_roi > 0);// only !CQP
5396         generic_state->mb_brc_enabled = generic_state->mb_brc_enabled || generic_state->brc_roi_enable;
5397     } else {
5398         generic_state->num_pak_passes = 1;// CQP only one pass
5399     }
5400
5401     avc_state->mbenc_i_frame_dist_in_use = 0;
5402     avc_state->mbenc_i_frame_dist_in_use = (generic_state->brc_enabled) && (generic_state->brc_distortion_buffer_supported) && (generic_state->frame_type == SLICE_TYPE_I);
5403
5404     /*ROI must enable mbbrc.*/
5405
5406     /*CAD check*/
5407     if (avc_state->caf_supported) {
5408         switch (generic_state->frame_type) {
5409         case SLICE_TYPE_I:
5410             break;
5411         case SLICE_TYPE_P:
5412             avc_state->caf_enable = gen9_avc_all_fractional[preset] & 0x01;
5413             break;
5414         case SLICE_TYPE_B:
5415             avc_state->caf_enable = (gen9_avc_all_fractional[preset] >> 1) & 0x01;
5416             break;
5417         }
5418
5419         if (avc_state->caf_enable && avc_state->caf_disable_hd && gen9_avc_disable_all_fractional_check_for_high_res[preset]) {
5420             if (generic_state->frame_width_in_pixel >= 1280 && generic_state->frame_height_in_pixel >= 720)
5421                 avc_state->caf_enable = 0;
5422         }
5423     }
5424
5425     avc_state->adaptive_transform_decision_enable &= gen9_avc_enable_adaptive_tx_decision[preset & 0x7];
5426
5427     /* Flatness check is enabled only if scaling will be performed and CAF is enabled. here only frame */
5428     if (avc_state->flatness_check_supported) {
5429         avc_state->flatness_check_enable = ((avc_state->caf_enable) && (generic_state->brc_enabled || generic_state->hme_supported)) ;
5430     } else {
5431         avc_state->flatness_check_enable = 0;
5432     }
5433
5434     /* check mb_status_supported/enbale*/
5435     if (avc_state->adaptive_transform_decision_enable) {
5436         avc_state->mb_status_enable = 1;
5437     } else {
5438         avc_state->mb_status_enable = 0;
5439     }
5440     /*slice check,all the slices use the same slice height except the last slice*/
5441     avc_state->arbitrary_num_mbs_in_slice = 0;
5442     for (i = 0; i < avc_state->slice_num; i++) {
5443         if (avc_state->slice_param[i]->num_macroblocks % generic_state->frame_width_in_mbs > 0) {
5444             avc_state->arbitrary_num_mbs_in_slice = 1;
5445             avc_state->slice_height = 1; /* slice height will be ignored by kernel ans here set it as default value */
5446         } else {
5447             avc_state->slice_height = avc_state->slice_param[i]->num_macroblocks / generic_state->frame_width_in_mbs;
5448         }
5449     }
5450
5451     if (generic_state->frame_type == SLICE_TYPE_I) {
5452         generic_state->hme_enabled = 0;
5453         generic_state->b16xme_enabled = 0;
5454         generic_state->b32xme_enabled = 0;
5455     }
5456
5457     if (generic_state->frame_type == SLICE_TYPE_B) {
5458         gen9_avc_get_dist_scale_factor(ctx, encode_state, encoder_context);
5459         avc_state->bi_weight = gen9_avc_get_biweight(avc_state->dist_scale_factor_list0[0], pic_param->pic_fields.bits.weighted_bipred_idc);
5460     }
5461
5462     /* Determine if SkipBiasAdjustment should be enabled for P picture 1. No B frame 2. Qp >= 22 3. CQP mode */
5463     avc_state->skip_bias_adjustment_enable = avc_state->skip_bias_adjustment_supported && (generic_state->frame_type == SLICE_TYPE_P)
5464                                              && (generic_state->gop_ref_distance == 1) && (avc_state->pic_param->pic_init_qp + avc_state->slice_param[0]->slice_qp_delta >= 22) && !generic_state->brc_enabled;
5465
5466     if (generic_state->kernel_mode == INTEL_ENC_KERNEL_QUALITY) {
5467         avc_state->tq_enable = 1;
5468         avc_state->tq_rounding = 6;
5469         if (generic_state->brc_enabled) {
5470             generic_state->mb_brc_enabled = 1;
5471         }
5472     }
5473
5474     //check the inter rounding
5475     avc_state->rounding_value = 0;
5476     avc_state->rounding_inter_p = 255;//default
5477     avc_state->rounding_inter_b = 255; //default
5478     avc_state->rounding_inter_b_ref = 255; //default
5479
5480     if (generic_state->frame_type == SLICE_TYPE_P) {
5481         if (avc_state->rounding_inter_p == AVC_INVALID_ROUNDING_VALUE) {
5482             if (avc_state->adaptive_rounding_inter_enable && !(generic_state->brc_enabled)) {
5483                 if (generic_state->gop_ref_distance == 1)
5484                     avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_p_without_b[slice_qp];
5485                 else
5486                     avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_p[slice_qp];
5487             } else {
5488                 avc_state->rounding_value = gen9_avc_inter_rounding_p[generic_state->preset];
5489             }
5490
5491         } else {
5492             avc_state->rounding_value = avc_state->rounding_inter_p;
5493         }
5494     } else if (generic_state->frame_type == SLICE_TYPE_B) {
5495         if (pic_param->pic_fields.bits.reference_pic_flag) {
5496             if (avc_state->rounding_inter_b_ref == AVC_INVALID_ROUNDING_VALUE)
5497                 avc_state->rounding_value = gen9_avc_inter_rounding_b_ref[generic_state->preset];
5498             else
5499                 avc_state->rounding_value = avc_state->rounding_inter_b_ref;
5500         } else {
5501             if (avc_state->rounding_inter_b == AVC_INVALID_ROUNDING_VALUE) {
5502                 if (avc_state->adaptive_rounding_inter_enable && !(generic_state->brc_enabled))
5503                     avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_b[slice_qp];
5504                 else
5505                     avc_state->rounding_value = gen9_avc_inter_rounding_b[generic_state->preset];
5506             } else {
5507                 avc_state->rounding_value = avc_state->rounding_inter_b;
5508             }
5509         }
5510     }
5511     return VA_STATUS_SUCCESS;
5512 }
5513
5514 static VAStatus
5515 gen9_avc_vme_gpe_kernel_prepare(VADriverContextP ctx,
5516                                 struct encode_state *encode_state,
5517                                 struct intel_encoder_context *encoder_context)
5518 {
5519     VAStatus va_status;
5520     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5521     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5522     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5523     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5524     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5525
5526     struct object_surface *obj_surface;
5527     struct object_buffer *obj_buffer;
5528     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5529     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
5530     struct i965_coded_buffer_segment *coded_buffer_segment;
5531
5532     struct gen9_surface_avc *avc_priv_surface;
5533     dri_bo *bo;
5534     struct avc_surface_param surface_param;
5535     int i, j = 0;
5536     unsigned char * pdata;
5537
5538     /* Setup current reconstruct frame */
5539     obj_surface = encode_state->reconstructed_object;
5540     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5541
5542     if (va_status != VA_STATUS_SUCCESS)
5543         return va_status;
5544
5545     memset(&surface_param, 0, sizeof(surface_param));
5546     surface_param.frame_width = generic_state->frame_width_in_pixel;
5547     surface_param.frame_height = generic_state->frame_height_in_pixel;
5548     va_status = gen9_avc_init_check_surfaces(ctx,
5549                                              obj_surface,
5550                                              encoder_context,
5551                                              &surface_param);
5552     if (va_status != VA_STATUS_SUCCESS)
5553         return va_status;
5554     {
5555         /* init the member of avc_priv_surface,frame_store_id,qp_value*/
5556         avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
5557         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = 0;
5558         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = 0;
5559         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2]);
5560         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1]);
5561         i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2], avc_priv_surface->dmv_top);
5562         i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1], avc_priv_surface->dmv_bottom);
5563         avc_priv_surface->qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
5564         avc_priv_surface->frame_store_id = 0;
5565         avc_priv_surface->frame_idx = pic_param->CurrPic.frame_idx;
5566         avc_priv_surface->top_field_order_cnt = pic_param->CurrPic.TopFieldOrderCnt;
5567         avc_priv_surface->is_as_ref = pic_param->pic_fields.bits.reference_pic_flag;
5568         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = avc_priv_surface->top_field_order_cnt;
5569         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = avc_priv_surface->top_field_order_cnt + 1;
5570     }
5571     i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
5572     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_reconstructed_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
5573
5574     /* input YUV surface*/
5575     obj_surface = encode_state->input_yuv_object;
5576     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5577
5578     if (va_status != VA_STATUS_SUCCESS)
5579         return va_status;
5580     i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
5581     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_uncompressed_input_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
5582
5583     /* Reference surfaces */
5584     for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
5585         i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
5586         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2]);
5587         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1]);
5588         obj_surface = encode_state->reference_objects[i];
5589         avc_state->top_field_poc[2 * i] = 0;
5590         avc_state->top_field_poc[2 * i + 1] = 0;
5591
5592         if (obj_surface && obj_surface->bo) {
5593             i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->list_reference_res[i], obj_surface, GPE_RESOURCE_ALIGNMENT);
5594
5595             /* actually it should be handled when it is reconstructed surface*/
5596             va_status = gen9_avc_init_check_surfaces(ctx,
5597                                                      obj_surface, encoder_context,
5598                                                      &surface_param);
5599             if (va_status != VA_STATUS_SUCCESS)
5600                 return va_status;
5601             avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
5602             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2], avc_priv_surface->dmv_top);
5603             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1], avc_priv_surface->dmv_bottom);
5604             avc_state->top_field_poc[2 * i] = avc_priv_surface->top_field_order_cnt;
5605             avc_state->top_field_poc[2 * i + 1] = avc_priv_surface->top_field_order_cnt + 1;
5606             avc_priv_surface->frame_store_id = i;
5607         } else {
5608             break;
5609         }
5610     }
5611
5612     /* Encoded bitstream ?*/
5613     obj_buffer = encode_state->coded_buf_object;
5614     bo = obj_buffer->buffer_store->bo;
5615     i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
5616     i965_dri_object_to_buffer_gpe_resource(&generic_ctx->compressed_bitstream.res, bo);
5617     generic_ctx->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
5618     generic_ctx->compressed_bitstream.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
5619
5620     /*status buffer */
5621     avc_ctx->status_buffer.bo = bo;
5622
5623     /* set the internal flag to 0 to indicate the coded size is unknown */
5624     dri_bo_map(bo, 1);
5625     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
5626     coded_buffer_segment->mapped = 0;
5627     coded_buffer_segment->codec = encoder_context->codec;
5628     coded_buffer_segment->status_support = 1;
5629
5630     pdata = bo->virtual + avc_ctx->status_buffer.base_offset;
5631     memset(pdata, 0, avc_ctx->status_buffer.status_buffer_size);
5632     dri_bo_unmap(bo);
5633
5634     //frame id, it is the ref pic id in the reference_objects list.
5635     avc_state->num_refs[0] = 0;
5636     avc_state->num_refs[1] = 0;
5637     if (generic_state->frame_type == SLICE_TYPE_P) {
5638         avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
5639
5640         if (slice_param->num_ref_idx_active_override_flag)
5641             avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
5642     } else if (generic_state->frame_type == SLICE_TYPE_B) {
5643         avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
5644         avc_state->num_refs[1] = pic_param->num_ref_idx_l1_active_minus1 + 1;
5645
5646         if (slice_param->num_ref_idx_active_override_flag) {
5647             avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
5648             avc_state->num_refs[1] = slice_param->num_ref_idx_l1_active_minus1 + 1;
5649         }
5650     }
5651
5652     if (avc_state->num_refs[0] > ARRAY_ELEMS(avc_state->list_ref_idx[0]))
5653         return VA_STATUS_ERROR_INVALID_VALUE;
5654     if (avc_state->num_refs[1] > ARRAY_ELEMS(avc_state->list_ref_idx[1]))
5655         return VA_STATUS_ERROR_INVALID_VALUE;
5656
5657     for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[0]); i++) {
5658         VAPictureH264 *va_pic;
5659
5660         assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(avc_state->list_ref_idx[0]));
5661         avc_state->list_ref_idx[0][i] = 0;
5662
5663         if (i >= avc_state->num_refs[0])
5664             continue;
5665
5666         va_pic = &slice_param->RefPicList0[i];
5667
5668         for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
5669             obj_surface = encode_state->reference_objects[j];
5670
5671             if (obj_surface &&
5672                 obj_surface->bo &&
5673                 obj_surface->base.id == va_pic->picture_id) {
5674
5675                 assert(obj_surface->base.id != VA_INVALID_SURFACE);
5676                 avc_state->list_ref_idx[0][i] = j;
5677
5678                 break;
5679             }
5680         }
5681     }
5682     for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[1]); i++) {
5683         VAPictureH264 *va_pic;
5684
5685         assert(ARRAY_ELEMS(slice_param->RefPicList1) == ARRAY_ELEMS(avc_state->list_ref_idx[1]));
5686         avc_state->list_ref_idx[1][i] = 0;
5687
5688         if (i >= avc_state->num_refs[1])
5689             continue;
5690
5691         va_pic = &slice_param->RefPicList1[i];
5692
5693         for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
5694             obj_surface = encode_state->reference_objects[j];
5695
5696             if (obj_surface &&
5697                 obj_surface->bo &&
5698                 obj_surface->base.id == va_pic->picture_id) {
5699
5700                 assert(obj_surface->base.id != VA_INVALID_SURFACE);
5701                 avc_state->list_ref_idx[1][i] = j;
5702
5703                 break;
5704             }
5705         }
5706     }
5707
5708     return VA_STATUS_SUCCESS;
5709 }
5710
5711 static VAStatus
5712 gen9_avc_vme_gpe_kernel_init(VADriverContextP ctx,
5713                              struct encode_state *encode_state,
5714                              struct intel_encoder_context *encoder_context)
5715 {
5716     return VA_STATUS_SUCCESS;
5717 }
5718
5719 static VAStatus
5720 gen9_avc_vme_gpe_kernel_final(VADriverContextP ctx,
5721                               struct encode_state *encode_state,
5722                               struct intel_encoder_context *encoder_context)
5723 {
5724
5725     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5726     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5727     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5728
5729     /*set this flag when all kernel is finished*/
5730     if (generic_state->brc_enabled) {
5731         generic_state->brc_inited = 1;
5732         generic_state->brc_need_reset = 0;
5733         avc_state->mbenc_curbe_set_in_brc_update = 0;
5734     }
5735     return VA_STATUS_SUCCESS;
5736 }
5737
5738 static VAStatus
5739 gen9_avc_vme_gpe_kernel_run(VADriverContextP ctx,
5740                             struct encode_state *encode_state,
5741                             struct intel_encoder_context *encoder_context)
5742 {
5743     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5744     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5745     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5746
5747     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
5748     VAEncSliceParameterBufferH264 *slice_param = avc_state->slice_param[0];
5749     int sfd_in_use = 0;
5750
5751     /* BRC init/reset needs to be called before HME since it will reset the Brc Distortion surface*/
5752     if (generic_state->brc_enabled && (!generic_state->brc_inited || generic_state->brc_need_reset)) {
5753         gen9_avc_kernel_brc_init_reset(ctx, encode_state, encoder_context);
5754     }
5755
5756     /*down scaling*/
5757     if (generic_state->hme_supported) {
5758         gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
5759         if (generic_state->b16xme_supported) {
5760             gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_16x);
5761             if (generic_state->b32xme_supported) {
5762                 gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_32x);
5763             }
5764         }
5765     }
5766
5767     /*me kernel*/
5768     if (generic_state->hme_enabled) {
5769         if (generic_state->b16xme_enabled) {
5770             if (generic_state->b32xme_enabled) {
5771                 gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_32x);
5772             }
5773             gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_16x);
5774         }
5775         gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
5776     }
5777
5778     /*call SFD kernel after HME in same command buffer*/
5779     sfd_in_use = avc_state->sfd_enable && generic_state->hme_enabled;
5780     sfd_in_use = sfd_in_use && !avc_state->sfd_mb_enable;
5781     if (sfd_in_use) {
5782         gen9_avc_kernel_sfd(ctx, encode_state, encoder_context);
5783     }
5784
5785     /* BRC and MbEnc are included in the same task phase*/
5786     if (generic_state->brc_enabled) {
5787         if (avc_state->mbenc_i_frame_dist_in_use) {
5788             gen9_avc_kernel_mbenc(ctx, encode_state, encoder_context, true);
5789         }
5790         gen9_avc_kernel_brc_frame_update(ctx, encode_state, encoder_context);
5791
5792         if (avc_state->brc_split_enable && generic_state->mb_brc_enabled) {
5793             gen9_avc_kernel_brc_mb_update(ctx, encode_state, encoder_context);
5794         }
5795     }
5796
5797     /*weight prediction,disable by now */
5798     avc_state->weighted_ref_l0_enable = 0;
5799     avc_state->weighted_ref_l1_enable = 0;
5800     if (avc_state->weighted_prediction_supported &&
5801         ((generic_state->frame_type == SLICE_TYPE_P && pic_param->pic_fields.bits.weighted_pred_flag) ||
5802          (generic_state->frame_type == SLICE_TYPE_B && pic_param->pic_fields.bits.weighted_bipred_idc == INTEL_AVC_WP_MODE_EXPLICIT))) {
5803         if (slice_param->luma_weight_l0_flag & 1) {
5804             gen9_avc_kernel_wp(ctx, encode_state, encoder_context, 0);
5805
5806         } else if (!(slice_param->chroma_weight_l0_flag & 1)) {
5807             pic_param->pic_fields.bits.weighted_pred_flag = 0;// it should be handled in app
5808         }
5809
5810         if (generic_state->frame_type == SLICE_TYPE_B && pic_param->pic_fields.bits.weighted_bipred_idc == INTEL_AVC_WP_MODE_EXPLICIT) {
5811             if (slice_param->luma_weight_l1_flag & 1) {
5812                 gen9_avc_kernel_wp(ctx, encode_state, encoder_context, 1);
5813             } else if (!((slice_param->luma_weight_l0_flag & 1) ||
5814                          (slice_param->chroma_weight_l0_flag & 1) ||
5815                          (slice_param->chroma_weight_l1_flag & 1))) {
5816                 pic_param->pic_fields.bits.weighted_bipred_idc = INTEL_AVC_WP_MODE_DEFAULT;// it should be handled in app
5817             }
5818         }
5819     }
5820
5821     /*mbenc kernel*/
5822     gen9_avc_kernel_mbenc(ctx, encode_state, encoder_context, false);
5823
5824     /*ignore the reset vertical line kernel*/
5825
5826     return VA_STATUS_SUCCESS;
5827 }
5828
5829 static VAStatus
5830 gen9_avc_vme_pipeline(VADriverContextP ctx,
5831                       VAProfile profile,
5832                       struct encode_state *encode_state,
5833                       struct intel_encoder_context *encoder_context)
5834 {
5835     VAStatus va_status;
5836
5837     gen9_avc_update_parameters(ctx, profile, encode_state, encoder_context);
5838
5839     va_status = gen9_avc_encode_check_parameter(ctx, encode_state, encoder_context);
5840     if (va_status != VA_STATUS_SUCCESS)
5841         return va_status;
5842
5843     va_status = gen9_avc_allocate_resources(ctx, encode_state, encoder_context);
5844     if (va_status != VA_STATUS_SUCCESS)
5845         return va_status;
5846
5847     va_status = gen9_avc_vme_gpe_kernel_prepare(ctx, encode_state, encoder_context);
5848     if (va_status != VA_STATUS_SUCCESS)
5849         return va_status;
5850
5851     va_status = gen9_avc_vme_gpe_kernel_init(ctx, encode_state, encoder_context);
5852     if (va_status != VA_STATUS_SUCCESS)
5853         return va_status;
5854
5855     va_status = gen9_avc_vme_gpe_kernel_run(ctx, encode_state, encoder_context);
5856     if (va_status != VA_STATUS_SUCCESS)
5857         return va_status;
5858
5859     gen9_avc_vme_gpe_kernel_final(ctx, encode_state, encoder_context);
5860
5861     return VA_STATUS_SUCCESS;
5862 }
5863
5864 static void
5865 gen9_avc_vme_context_destroy(void * context)
5866 {
5867     struct encoder_vme_mfc_context *vme_context = (struct encoder_vme_mfc_context *)context;
5868     struct generic_encoder_context *generic_ctx;
5869     struct i965_avc_encoder_context *avc_ctx;
5870     struct generic_enc_codec_state *generic_state;
5871     struct avc_enc_state *avc_state;
5872
5873     if (!vme_context)
5874         return;
5875
5876     generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5877     avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5878     generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5879     avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5880
5881     gen9_avc_kernel_destroy(vme_context);
5882
5883     free(generic_ctx);
5884     free(avc_ctx);
5885     free(generic_state);
5886     free(avc_state);
5887     free(vme_context);
5888     return;
5889
5890 }
5891
5892 static void
5893 gen9_avc_kernel_init(VADriverContextP ctx,
5894                      struct intel_encoder_context *encoder_context)
5895 {
5896     struct i965_driver_data *i965 = i965_driver_data(ctx);
5897     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5898     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5899     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5900
5901     gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling);
5902     gen9_avc_kernel_init_brc(ctx, generic_ctx, &avc_ctx->context_brc);
5903     gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me);
5904     gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc);
5905     gen9_avc_kernel_init_wp(ctx, generic_ctx, &avc_ctx->context_wp);
5906     gen9_avc_kernel_init_sfd(ctx, generic_ctx, &avc_ctx->context_sfd);
5907
5908     //function pointer
5909     generic_ctx->pfn_set_curbe_scaling2x = gen9_avc_set_curbe_scaling2x;
5910     generic_ctx->pfn_set_curbe_scaling4x = gen9_avc_set_curbe_scaling4x;
5911     generic_ctx->pfn_set_curbe_me = gen9_avc_set_curbe_me;
5912     generic_ctx->pfn_set_curbe_mbenc = gen9_avc_set_curbe_mbenc;
5913     generic_ctx->pfn_set_curbe_brc_init_reset = gen9_avc_set_curbe_brc_init_reset;
5914     generic_ctx->pfn_set_curbe_brc_frame_update = gen9_avc_set_curbe_brc_frame_update;
5915     generic_ctx->pfn_set_curbe_brc_mb_update = gen9_avc_set_curbe_brc_mb_update;
5916     generic_ctx->pfn_set_curbe_sfd = gen9_avc_set_curbe_sfd;
5917     generic_ctx->pfn_set_curbe_wp = gen9_avc_set_curbe_wp;
5918
5919     generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
5920     generic_ctx->pfn_send_me_surface = gen9_avc_send_surface_me;
5921     generic_ctx->pfn_send_mbenc_surface = gen9_avc_send_surface_mbenc;
5922     generic_ctx->pfn_send_brc_init_reset_surface = gen9_avc_send_surface_brc_init_reset;
5923     generic_ctx->pfn_send_brc_frame_update_surface = gen9_avc_send_surface_brc_frame_update;
5924     generic_ctx->pfn_send_brc_mb_update_surface = gen9_avc_send_surface_brc_mb_update;
5925     generic_ctx->pfn_send_sfd_surface = gen9_avc_send_surface_sfd;
5926     generic_ctx->pfn_send_wp_surface = gen9_avc_send_surface_wp;
5927
5928     if (IS_SKL(i965->intel.device_info) ||
5929         IS_BXT(i965->intel.device_info))
5930         generic_ctx->pfn_set_curbe_scaling4x = gen9_avc_set_curbe_scaling4x;
5931     else if (IS_KBL(i965->intel.device_info) ||
5932              IS_GLK(i965->intel.device_info))
5933         generic_ctx->pfn_set_curbe_scaling4x = gen95_avc_set_curbe_scaling4x;
5934
5935 }
5936
5937 /*
5938 PAK pipeline related function
5939 */
5940 extern int
5941 intel_avc_enc_slice_type_fixup(int slice_type);
5942
5943 static void
5944 gen9_mfc_avc_pipe_mode_select(VADriverContextP ctx,
5945                               struct encode_state *encode_state,
5946                               struct intel_encoder_context *encoder_context)
5947 {
5948     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5949     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
5950     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
5951     struct intel_batchbuffer *batch = encoder_context->base.batch;
5952
5953     BEGIN_BCS_BATCH(batch, 5);
5954
5955     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
5956     OUT_BCS_BATCH(batch,
5957                   (0 << 29) |
5958                   (MFX_LONG_MODE << 17) |       /* Must be long format for encoder */
5959                   (MFD_MODE_VLD << 15) |
5960                   (0 << 13) |                   /* Non-VDEnc mode  is 0*/
5961                   ((generic_state->curr_pak_pass != (generic_state->num_pak_passes - 1)) << 10) |                  /* Stream-Out Enable */
5962                   ((!!avc_ctx->res_post_deblocking_output.bo) << 9)  |    /* Post Deblocking Output */
5963                   ((!!avc_ctx->res_pre_deblocking_output.bo) << 8)  |     /* Pre Deblocking Output */
5964                   (0 << 7)  |                   /* Scaled surface enable */
5965                   (0 << 6)  |                   /* Frame statistics stream out enable */
5966                   (0 << 5)  |                   /* not in stitch mode */
5967                   (1 << 4)  |                   /* encoding mode */
5968                   (MFX_FORMAT_AVC << 0));
5969     OUT_BCS_BATCH(batch,
5970                   (0 << 7)  | /* expand NOA bus flag */
5971                   (0 << 6)  | /* disable slice-level clock gating */
5972                   (0 << 5)  | /* disable clock gating for NOA */
5973                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
5974                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
5975                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
5976                   (0 << 1)  |
5977                   (0 << 0));
5978     OUT_BCS_BATCH(batch, 0);
5979     OUT_BCS_BATCH(batch, 0);
5980
5981     ADVANCE_BCS_BATCH(batch);
5982 }
5983
5984 static void
5985 gen9_mfc_avc_surface_state(VADriverContextP ctx,
5986                            struct intel_encoder_context *encoder_context,
5987                            struct i965_gpe_resource *gpe_resource,
5988                            int id)
5989 {
5990     struct intel_batchbuffer *batch = encoder_context->base.batch;
5991
5992     BEGIN_BCS_BATCH(batch, 6);
5993
5994     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
5995     OUT_BCS_BATCH(batch, id);
5996     OUT_BCS_BATCH(batch,
5997                   ((gpe_resource->height - 1) << 18) |
5998                   ((gpe_resource->width - 1) << 4));
5999     OUT_BCS_BATCH(batch,
6000                   (MFX_SURFACE_PLANAR_420_8 << 28) |    /* 420 planar YUV surface */
6001                   (1 << 27) |                           /* must be 1 for interleave U/V, hardware requirement */
6002                   ((gpe_resource->pitch - 1) << 3) |    /* pitch */
6003                   (0 << 2)  |                           /* must be 0 for interleave U/V */
6004                   (1 << 1)  |                           /* must be tiled */
6005                   (I965_TILEWALK_YMAJOR << 0));         /* tile walk, TILEWALK_YMAJOR */
6006     OUT_BCS_BATCH(batch,
6007                   (0 << 16) |                   /* must be 0 for interleave U/V */
6008                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
6009     OUT_BCS_BATCH(batch,
6010                   (0 << 16) |                   /* must be 0 for interleave U/V */
6011                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
6012
6013     ADVANCE_BCS_BATCH(batch);
6014 }
6015
6016 static void
6017 gen9_mfc_avc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
6018 {
6019     struct i965_driver_data *i965 = i965_driver_data(ctx);
6020     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6021     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
6022     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
6023     struct intel_batchbuffer *batch = encoder_context->base.batch;
6024     int i;
6025
6026     BEGIN_BCS_BATCH(batch, 65);
6027
6028     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (65 - 2));
6029
6030     /* the DW1-3 is for pre_deblocking */
6031     OUT_BUFFER_3DW(batch, avc_ctx->res_pre_deblocking_output.bo, 1, 0, i965->intel.mocs_state);
6032
6033     /* the DW4-6 is for the post_deblocking */
6034     OUT_BUFFER_3DW(batch, avc_ctx->res_post_deblocking_output.bo, 1, 0, i965->intel.mocs_state);
6035
6036     /* the DW7-9 is for the uncompressed_picture */
6037     OUT_BUFFER_3DW(batch, generic_ctx->res_uncompressed_input_surface.bo, 0, 0, i965->intel.mocs_state);
6038
6039     /* the DW10-12 is for PAK information (write) */
6040     OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, i965->intel.mocs_state);//?
6041
6042     /* the DW13-15 is for the intra_row_store_scratch */
6043     OUT_BUFFER_3DW(batch, avc_ctx->res_intra_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
6044
6045     /* the DW16-18 is for the deblocking filter */
6046     OUT_BUFFER_3DW(batch, avc_ctx->res_deblocking_filter_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
6047
6048     /* the DW 19-50 is for Reference pictures*/
6049     for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
6050         OUT_BUFFER_2DW(batch, avc_ctx->list_reference_res[i].bo, 0, 0);
6051     }
6052
6053     /* DW 51, reference picture attributes */
6054     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
6055
6056     /* The DW 52-54 is for PAK information (read) */
6057     OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, i965->intel.mocs_state);
6058
6059     /* the DW 55-57 is the ILDB buffer */
6060     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6061
6062     /* the DW 58-60 is the second ILDB buffer */
6063     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6064
6065     /* DW 61, memory compress enable & mode */
6066     OUT_BCS_BATCH(batch, 0);
6067
6068     /* the DW 62-64 is the buffer */
6069     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6070
6071     ADVANCE_BCS_BATCH(batch);
6072 }
6073
6074 static void
6075 gen9_mfc_avc_ind_obj_base_addr_state(VADriverContextP ctx,
6076                                      struct encode_state *encode_state,
6077                                      struct intel_encoder_context *encoder_context)
6078 {
6079     struct i965_driver_data *i965 = i965_driver_data(ctx);
6080     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6081     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
6082     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
6083     struct intel_batchbuffer *batch = encoder_context->base.batch;
6084     struct object_surface *obj_surface;
6085     struct gen9_surface_avc *avc_priv_surface;
6086     unsigned int size = 0;
6087     unsigned int w_mb = generic_state->frame_width_in_mbs;
6088     unsigned int h_mb = generic_state->frame_height_in_mbs;
6089
6090     obj_surface = encode_state->reconstructed_object;
6091
6092     if (!obj_surface || !obj_surface->private_data)
6093         return;
6094     avc_priv_surface = obj_surface->private_data;
6095
6096     BEGIN_BCS_BATCH(batch, 26);
6097
6098     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
6099     /* The DW1-5 is for the MFX indirect bistream offset */
6100     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6101     OUT_BUFFER_2DW(batch, NULL, 0, 0);
6102
6103     /* the DW6-10 is for MFX Indirect MV Object Base Address */
6104     size = w_mb * h_mb * 32 * 4;
6105     OUT_BUFFER_3DW(batch,
6106                    avc_priv_surface->res_mv_data_surface.bo,
6107                    1,
6108                    0,
6109                    i965->intel.mocs_state);
6110     OUT_BUFFER_2DW(batch,
6111                    avc_priv_surface->res_mv_data_surface.bo,
6112                    1,
6113                    ALIGN(size, 0x1000));
6114
6115     /* The DW11-15 is for MFX IT-COFF. Not used on encoder */
6116     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6117     OUT_BUFFER_2DW(batch, NULL, 0, 0);
6118
6119     /* The DW16-20 is for MFX indirect DBLK. Not used on encoder */
6120     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6121     OUT_BUFFER_2DW(batch, NULL, 0, 0);
6122
6123     /* The DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder
6124      * Note: an offset is specified in MFX_AVC_SLICE_STATE
6125      */
6126     OUT_BUFFER_3DW(batch,
6127                    generic_ctx->compressed_bitstream.res.bo,
6128                    1,
6129                    0,
6130                    i965->intel.mocs_state);
6131     OUT_BUFFER_2DW(batch,
6132                    generic_ctx->compressed_bitstream.res.bo,
6133                    1,
6134                    generic_ctx->compressed_bitstream.end_offset);
6135
6136     ADVANCE_BCS_BATCH(batch);
6137 }
6138
6139 static void
6140 gen9_mfc_avc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
6141 {
6142     struct i965_driver_data *i965 = i965_driver_data(ctx);
6143     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6144     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
6145     struct intel_batchbuffer *batch = encoder_context->base.batch;
6146
6147     BEGIN_BCS_BATCH(batch, 10);
6148
6149     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
6150
6151     /* The DW1-3 is for bsd/mpc row store scratch buffer */
6152     OUT_BUFFER_3DW(batch, avc_ctx->res_bsd_mpc_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
6153
6154     /* The DW4-6 is for MPR Row Store Scratch Buffer Base Address, ignore for encoder */
6155     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6156
6157     /* The DW7-9 is for Bitplane Read Buffer Base Address, ignore for encoder */
6158     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6159
6160     ADVANCE_BCS_BATCH(batch);
6161 }
6162
6163 static void
6164 gen9_mfc_avc_directmode_state(VADriverContextP ctx,
6165                               struct intel_encoder_context *encoder_context)
6166 {
6167     struct i965_driver_data *i965 = i965_driver_data(ctx);
6168     struct intel_batchbuffer *batch = encoder_context->base.batch;
6169     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6170     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
6171     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
6172
6173     int i;
6174
6175     BEGIN_BCS_BATCH(batch, 71);
6176
6177     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
6178
6179     /* Reference frames and Current frames */
6180     /* the DW1-32 is for the direct MV for reference */
6181     for (i = 0; i < NUM_MFC_AVC_DMV_BUFFERS - 2; i += 2) {
6182         if (avc_ctx->res_direct_mv_buffersr[i].bo != NULL) {
6183             OUT_BCS_RELOC64(batch, avc_ctx->res_direct_mv_buffersr[i].bo,
6184                             I915_GEM_DOMAIN_INSTRUCTION, 0,
6185                             0);
6186         } else {
6187             OUT_BCS_BATCH(batch, 0);
6188             OUT_BCS_BATCH(batch, 0);
6189         }
6190     }
6191
6192     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
6193
6194     /* the DW34-36 is the MV for the current frame */
6195     OUT_BCS_RELOC64(batch, avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2].bo,
6196                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
6197                     0);
6198
6199     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
6200
6201     /* POL list */
6202     for (i = 0; i < 32; i++) {
6203         OUT_BCS_BATCH(batch, avc_state->top_field_poc[i]);
6204     }
6205     OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2]);
6206     OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1]);
6207
6208     ADVANCE_BCS_BATCH(batch);
6209 }
6210
6211 static void
6212 gen9_mfc_qm_state(VADriverContextP ctx,
6213                   int qm_type,
6214                   const unsigned int *qm,
6215                   int qm_length,
6216                   struct intel_encoder_context *encoder_context)
6217 {
6218     struct intel_batchbuffer *batch = encoder_context->base.batch;
6219     unsigned int qm_buffer[16];
6220
6221     assert(qm_length <= 16);
6222     assert(sizeof(*qm) == 4);
6223     memset(qm_buffer, 0, 16 * 4);
6224     memcpy(qm_buffer, qm, qm_length * 4);
6225
6226     BEGIN_BCS_BATCH(batch, 18);
6227     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
6228     OUT_BCS_BATCH(batch, qm_type << 0);
6229     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
6230     ADVANCE_BCS_BATCH(batch);
6231 }
6232
6233 static void
6234 gen9_mfc_avc_qm_state(VADriverContextP ctx,
6235                       struct encode_state *encode_state,
6236                       struct intel_encoder_context *encoder_context)
6237 {
6238     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6239     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
6240     VAEncSequenceParameterBufferH264  *seq_param = avc_state->seq_param;
6241     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
6242
6243
6244     const unsigned int *qm_4x4_intra;
6245     const unsigned int *qm_4x4_inter;
6246     const unsigned int *qm_8x8_intra;
6247     const unsigned int *qm_8x8_inter;
6248
6249     if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
6250         && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
6251         qm_4x4_intra = qm_4x4_inter = qm_8x8_intra = qm_8x8_inter = qm_flat;
6252     } else {
6253         VAIQMatrixBufferH264 *qm;
6254         assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
6255         qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
6256         qm_4x4_intra = (unsigned int *)qm->ScalingList4x4[0];
6257         qm_4x4_inter = (unsigned int *)qm->ScalingList4x4[3];
6258         qm_8x8_intra = (unsigned int *)qm->ScalingList8x8[0];
6259         qm_8x8_inter = (unsigned int *)qm->ScalingList8x8[1];
6260     }
6261
6262     gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm_4x4_intra, 12, encoder_context);
6263     gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm_4x4_inter, 12, encoder_context);
6264     gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm_8x8_intra, 16, encoder_context);
6265     gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm_8x8_inter, 16, encoder_context);
6266 }
6267
6268 static void
6269 gen9_mfc_fqm_state(VADriverContextP ctx,
6270                    int fqm_type,
6271                    const unsigned int *fqm,
6272                    int fqm_length,
6273                    struct intel_encoder_context *encoder_context)
6274 {
6275     struct intel_batchbuffer *batch = encoder_context->base.batch;
6276     unsigned int fqm_buffer[32];
6277
6278     assert(fqm_length <= 32);
6279     assert(sizeof(*fqm) == 4);
6280     memset(fqm_buffer, 0, 32 * 4);
6281     memcpy(fqm_buffer, fqm, fqm_length * 4);
6282
6283     BEGIN_BCS_BATCH(batch, 34);
6284     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
6285     OUT_BCS_BATCH(batch, fqm_type << 0);
6286     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
6287     ADVANCE_BCS_BATCH(batch);
6288 }
6289
6290 static void
6291 gen9_mfc_fill_fqm(uint8_t *qm, uint16_t *fqm, int len)
6292 {
6293     int i, j;
6294     for (i = 0; i < len; i++)
6295         for (j = 0; j < len; j++) {
6296             assert(qm[j * len + i]);
6297             fqm[i * len + j] = (1 << 16) / qm[j * len + i];
6298         }
6299 }
6300
6301 static void
6302 gen9_mfc_avc_fqm_state(VADriverContextP ctx,
6303                        struct encode_state *encode_state,
6304                        struct intel_encoder_context *encoder_context)
6305 {
6306     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6307     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
6308     VAEncSequenceParameterBufferH264  *seq_param = avc_state->seq_param;
6309     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
6310
6311     if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
6312         && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
6313         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm_flat, 24, encoder_context);
6314         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm_flat, 24, encoder_context);
6315         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm_flat, 32, encoder_context);
6316         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm_flat, 32, encoder_context);
6317     } else {
6318         int i;
6319         uint32_t fqm[32];
6320         VAIQMatrixBufferH264 *qm;
6321         assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
6322         qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
6323
6324         for (i = 0; i < 3; i++)
6325             gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * i, 4);
6326         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm, 24, encoder_context);
6327
6328         for (i = 3; i < 6; i++)
6329             gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * (i - 3), 4);
6330         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm, 24, encoder_context);
6331
6332         gen9_mfc_fill_fqm(qm->ScalingList8x8[0], (uint16_t *)fqm, 8);
6333         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm, 32, encoder_context);
6334
6335         gen9_mfc_fill_fqm(qm->ScalingList8x8[1], (uint16_t *)fqm, 8);
6336         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm, 32, encoder_context);
6337     }
6338 }
6339
6340 static void
6341 gen9_mfc_avc_insert_object(VADriverContextP ctx,
6342                            struct intel_encoder_context *encoder_context,
6343                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
6344                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
6345                            int slice_header_indicator,
6346                            struct intel_batchbuffer *batch)
6347 {
6348     if (data_bits_in_last_dw == 0)
6349         data_bits_in_last_dw = 32;
6350
6351     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
6352
6353     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws));
6354     OUT_BCS_BATCH(batch,
6355                   (0 << 16) |   /* always start at offset 0 */
6356                   (slice_header_indicator << 14) |
6357                   (data_bits_in_last_dw << 8) |
6358                   (skip_emul_byte_count << 4) |
6359                   (!!emulation_flag << 3) |
6360                   ((!!is_last_header) << 2) |
6361                   ((!!is_end_of_slice) << 1) |
6362                   (0 << 0));    /* check this flag */
6363     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
6364
6365     ADVANCE_BCS_BATCH(batch);
6366 }
6367
6368 static void
6369 gen9_mfc_avc_insert_aud_packed_data(VADriverContextP ctx,
6370                                     struct encode_state *encode_state,
6371                                     struct intel_encoder_context *encoder_context,
6372                                     struct intel_batchbuffer *batch)
6373 {
6374     VAEncPackedHeaderParameterBuffer *param = NULL;
6375     unsigned int length_in_bits;
6376     unsigned int *header_data = NULL;
6377     unsigned char *nal_type = NULL;
6378     int count, i, start_index;
6379
6380     count = encode_state->slice_rawdata_count[0];
6381     start_index = (encode_state->slice_rawdata_index[0] & SLICE_PACKED_DATA_INDEX_MASK);
6382
6383     for (i = 0; i < count; i++) {
6384         unsigned int skip_emul_byte_cnt;
6385
6386         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
6387         nal_type = (unsigned char *)header_data;
6388
6389         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
6390
6391         length_in_bits = param->bit_length;
6392
6393         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6394
6395         if ((*(nal_type + skip_emul_byte_cnt - 1) & 0x1f) == AVC_NAL_DELIMITER) {
6396             gen9_mfc_avc_insert_object(ctx,
6397                                        encoder_context,
6398                                        header_data,
6399                                        ALIGN(length_in_bits, 32) >> 5,
6400                                        length_in_bits & 0x1f,
6401                                        skip_emul_byte_cnt,
6402                                        0,
6403                                        0,
6404                                        !param->has_emulation_bytes,
6405                                        0,
6406                                        batch);
6407             break;
6408         }
6409     }
6410 }
6411
6412 static void
6413 gen9_mfc_avc_insert_slice_packed_data(VADriverContextP ctx,
6414                                       struct encode_state *encode_state,
6415                                       struct intel_encoder_context *encoder_context,
6416                                       int slice_index,
6417                                       struct intel_batchbuffer *batch)
6418 {
6419     VAEncPackedHeaderParameterBuffer *param = NULL;
6420     unsigned int length_in_bits;
6421     unsigned int *header_data = NULL;
6422     int count, i, start_index;
6423     int slice_header_index;
6424     unsigned char *nal_type = NULL;
6425
6426     if (encode_state->slice_header_index[slice_index] == 0)
6427         slice_header_index = -1;
6428     else
6429         slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
6430
6431     count = encode_state->slice_rawdata_count[slice_index];
6432     start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
6433
6434     for (i = 0; i < count; i++) {
6435         unsigned int skip_emul_byte_cnt;
6436
6437         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
6438         nal_type = (unsigned char *)header_data;
6439
6440         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
6441
6442         length_in_bits = param->bit_length;
6443
6444         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6445
6446         /* skip the slice header packed data type as it is lastly inserted */
6447         if (param->type == VAEncPackedHeaderSlice || (*(nal_type + skip_emul_byte_cnt - 1) & 0x1f) == AVC_NAL_DELIMITER)
6448             continue;
6449
6450         /* as the slice header is still required, the last header flag is set to
6451          * zero.
6452          */
6453         gen9_mfc_avc_insert_object(ctx,
6454                                    encoder_context,
6455                                    header_data,
6456                                    ALIGN(length_in_bits, 32) >> 5,
6457                                    length_in_bits & 0x1f,
6458                                    skip_emul_byte_cnt,
6459                                    0,
6460                                    0,
6461                                    !param->has_emulation_bytes,
6462                                    0,
6463                                    batch);
6464     }
6465
6466     if (slice_header_index == -1) {
6467         VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
6468         VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
6469         VAEncSliceParameterBufferH264 *slice_params = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
6470         unsigned char *slice_header = NULL;
6471         int slice_header_length_in_bits = 0;
6472
6473         /* No slice header data is passed. And the driver needs to generate it */
6474         /* For the Normal H264 */
6475         slice_header_length_in_bits = build_avc_slice_header(seq_param,
6476                                                              pic_param,
6477                                                              slice_params,
6478                                                              &slice_header);
6479         gen9_mfc_avc_insert_object(ctx,
6480                                    encoder_context,
6481                                    (unsigned int *)slice_header,
6482                                    ALIGN(slice_header_length_in_bits, 32) >> 5,
6483                                    slice_header_length_in_bits & 0x1f,
6484                                    5,  /* first 5 bytes are start code + nal unit type */
6485                                    1, 0, 1,
6486                                    1,
6487                                    batch);
6488
6489         free(slice_header);
6490     } else {
6491         unsigned int skip_emul_byte_cnt;
6492
6493         header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
6494
6495         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[slice_header_index]->buffer);
6496         length_in_bits = param->bit_length;
6497
6498         /* as the slice header is the last header data for one slice,
6499          * the last header flag is set to one.
6500          */
6501         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6502
6503         gen9_mfc_avc_insert_object(ctx,
6504                                    encoder_context,
6505                                    header_data,
6506                                    ALIGN(length_in_bits, 32) >> 5,
6507                                    length_in_bits & 0x1f,
6508                                    skip_emul_byte_cnt,
6509                                    1,
6510                                    0,
6511                                    !param->has_emulation_bytes,
6512                                    1,
6513                                    batch);
6514     }
6515
6516     return;
6517 }
6518
6519 static void
6520 gen9_mfc_avc_inset_headers(VADriverContextP ctx,
6521                            struct encode_state *encode_state,
6522                            struct intel_encoder_context *encoder_context,
6523                            VAEncSliceParameterBufferH264 *slice_param,
6524                            int slice_index,
6525                            struct intel_batchbuffer *batch)
6526 {
6527     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6528     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
6529     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
6530     unsigned int internal_rate_mode = generic_state->internal_rate_mode;
6531     unsigned int skip_emul_byte_cnt;
6532
6533     if (slice_index == 0) {
6534
6535         /* if AUD exist and insert it firstly */
6536         gen9_mfc_avc_insert_aud_packed_data(ctx, encode_state, encoder_context, batch);
6537
6538         if (encode_state->packed_header_data[idx]) {
6539             VAEncPackedHeaderParameterBuffer *param = NULL;
6540             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
6541             unsigned int length_in_bits;
6542
6543             assert(encode_state->packed_header_param[idx]);
6544             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
6545             length_in_bits = param->bit_length;
6546
6547             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6548             gen9_mfc_avc_insert_object(ctx,
6549                                        encoder_context,
6550                                        header_data,
6551                                        ALIGN(length_in_bits, 32) >> 5,
6552                                        length_in_bits & 0x1f,
6553                                        skip_emul_byte_cnt,
6554                                        0,
6555                                        0,
6556                                        !param->has_emulation_bytes,
6557                                        0,
6558                                        batch);
6559         }
6560
6561         idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
6562
6563         if (encode_state->packed_header_data[idx]) {
6564             VAEncPackedHeaderParameterBuffer *param = NULL;
6565             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
6566             unsigned int length_in_bits;
6567
6568             assert(encode_state->packed_header_param[idx]);
6569             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
6570             length_in_bits = param->bit_length;
6571
6572             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6573
6574             gen9_mfc_avc_insert_object(ctx,
6575                                        encoder_context,
6576                                        header_data,
6577                                        ALIGN(length_in_bits, 32) >> 5,
6578                                        length_in_bits & 0x1f,
6579                                        skip_emul_byte_cnt,
6580                                        0,
6581                                        0,
6582                                        !param->has_emulation_bytes,
6583                                        0,
6584                                        batch);
6585         }
6586
6587         idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
6588
6589         if (encode_state->packed_header_data[idx]) {
6590             VAEncPackedHeaderParameterBuffer *param = NULL;
6591             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
6592             unsigned int length_in_bits;
6593
6594             assert(encode_state->packed_header_param[idx]);
6595             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
6596             length_in_bits = param->bit_length;
6597
6598             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6599             gen9_mfc_avc_insert_object(ctx,
6600                                        encoder_context,
6601                                        header_data,
6602                                        ALIGN(length_in_bits, 32) >> 5,
6603                                        length_in_bits & 0x1f,
6604                                        skip_emul_byte_cnt,
6605                                        0,
6606                                        0,
6607                                        !param->has_emulation_bytes,
6608                                        0,
6609                                        batch);
6610         } else if (internal_rate_mode == VA_RC_CBR) {
6611             /* insert others */
6612         }
6613     }
6614
6615     gen9_mfc_avc_insert_slice_packed_data(ctx,
6616                                           encode_state,
6617                                           encoder_context,
6618                                           slice_index,
6619                                           batch);
6620 }
6621
6622 static void
6623 gen9_mfc_avc_slice_state(VADriverContextP ctx,
6624                          struct encode_state *encode_state,
6625                          struct intel_encoder_context *encoder_context,
6626                          VAEncPictureParameterBufferH264 *pic_param,
6627                          VAEncSliceParameterBufferH264 *slice_param,
6628                          VAEncSliceParameterBufferH264 *next_slice_param,
6629                          struct intel_batchbuffer *batch)
6630 {
6631     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6632     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
6633     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
6634     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
6635     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
6636     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
6637     unsigned char correct[6], grow, shrink;
6638     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
6639     int max_qp_n, max_qp_p;
6640     int i;
6641     int weighted_pred_idc = 0;
6642     int num_ref_l0 = 0, num_ref_l1 = 0;
6643     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
6644     int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
6645     unsigned int rc_panic_enable = 0;
6646     unsigned int rate_control_counter_enable = 0;
6647     unsigned int rounding_value = 0;
6648     unsigned int rounding_inter_enable = 0;
6649
6650     slice_hor_pos = slice_param->macroblock_address % generic_state->frame_width_in_mbs;
6651     slice_ver_pos = slice_param->macroblock_address / generic_state->frame_width_in_mbs;
6652
6653     if (next_slice_param) {
6654         next_slice_hor_pos = next_slice_param->macroblock_address % generic_state->frame_width_in_mbs;
6655         next_slice_ver_pos = next_slice_param->macroblock_address / generic_state->frame_width_in_mbs;
6656     } else {
6657         next_slice_hor_pos = 0;
6658         next_slice_ver_pos = generic_state->frame_height_in_mbs;
6659     }
6660
6661     if (slice_type == SLICE_TYPE_I) {
6662         luma_log2_weight_denom = 0;
6663         chroma_log2_weight_denom = 0;
6664     } else if (slice_type == SLICE_TYPE_P) {
6665         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
6666         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
6667         rounding_inter_enable = avc_state->rounding_inter_enable;
6668         rounding_value = avc_state->rounding_value;
6669
6670         if (slice_param->num_ref_idx_active_override_flag)
6671             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
6672     } else if (slice_type == SLICE_TYPE_B) {
6673         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
6674         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
6675         num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
6676         rounding_inter_enable = avc_state->rounding_inter_enable;
6677         rounding_value = avc_state->rounding_value;
6678
6679         if (slice_param->num_ref_idx_active_override_flag) {
6680             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
6681             num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
6682         }
6683
6684         if (weighted_pred_idc == 2) {
6685             /* 8.4.3 - Derivation process for prediction weights (8-279) */
6686             luma_log2_weight_denom = 5;
6687             chroma_log2_weight_denom = 5;
6688         }
6689     }
6690
6691     max_qp_n = 0;
6692     max_qp_p = 0;
6693     grow = 0;
6694     shrink = 0;
6695
6696     rate_control_counter_enable = (generic_state->brc_enabled && (generic_state->curr_pak_pass != 0));
6697     rc_panic_enable = (avc_state->rc_panic_enable &&
6698                        (!avc_state->min_max_qp_enable) &&
6699                        (encoder_context->rate_control_mode != VA_RC_CQP) &&
6700                        (generic_state->curr_pak_pass == (generic_state->num_pak_passes - 1)));
6701
6702     for (i = 0; i < 6; i++)
6703         correct[i] = 0;
6704
6705     BEGIN_BCS_BATCH(batch, 11);
6706
6707     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
6708     OUT_BCS_BATCH(batch, slice_type);
6709     OUT_BCS_BATCH(batch,
6710                   (num_ref_l1 << 24) |
6711                   (num_ref_l0 << 16) |
6712                   (chroma_log2_weight_denom << 8) |
6713                   (luma_log2_weight_denom << 0));
6714     OUT_BCS_BATCH(batch,
6715                   (weighted_pred_idc << 30) |
6716                   (((slice_type == SLICE_TYPE_B) ? slice_param->direct_spatial_mv_pred_flag : 0) << 29) |
6717                   (slice_param->disable_deblocking_filter_idc << 27) |
6718                   (slice_param->cabac_init_idc << 24) |
6719                   (slice_qp << 16) |
6720                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
6721                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
6722
6723     OUT_BCS_BATCH(batch,
6724                   slice_ver_pos << 24 |
6725                   slice_hor_pos << 16 |
6726                   slice_param->macroblock_address);
6727     OUT_BCS_BATCH(batch,
6728                   next_slice_ver_pos << 16 |
6729                   next_slice_hor_pos);
6730
6731     OUT_BCS_BATCH(batch,
6732                   (rate_control_counter_enable << 31) |
6733                   (1 << 30) |           /* ResetRateControlCounter */
6734                   (2 << 28) |           /* Loose Rate Control */
6735                   (0 << 24) |           /* RC Stable Tolerance */
6736                   (rc_panic_enable << 23) |           /* RC Panic Enable */
6737                   (1 << 22) |           /* CBP mode */
6738                   (0 << 21) |           /* MB Type Direct Conversion, 0: Enable, 1: Disable */
6739                   (0 << 20) |           /* MB Type Skip Conversion, 0: Enable, 1: Disable */
6740                   (!next_slice_param << 19) |                   /* Is Last Slice */
6741                   (0 << 18) |           /* BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable */
6742                   (1 << 17) |           /* HeaderPresentFlag */
6743                   (1 << 16) |           /* SliceData PresentFlag */
6744                   (0 << 15) |           /* TailPresentFlag  */
6745                   (1 << 13) |           /* RBSP NAL TYPE */
6746                   (1 << 12));           /* CabacZeroWordInsertionEnable */
6747
6748     OUT_BCS_BATCH(batch, generic_ctx->compressed_bitstream.start_offset);
6749
6750     OUT_BCS_BATCH(batch,
6751                   (max_qp_n << 24) |     /*Target QP - 24 is lowest QP*/
6752                   (max_qp_p << 16) |     /*Target QP + 20 is highest QP*/
6753                   (shrink << 8) |
6754                   (grow << 0));
6755     OUT_BCS_BATCH(batch,
6756                   (rounding_inter_enable << 31) |
6757                   (rounding_value << 28) |
6758                   (1 << 27) |
6759                   (5 << 24) |
6760                   (correct[5] << 20) |
6761                   (correct[4] << 16) |
6762                   (correct[3] << 12) |
6763                   (correct[2] << 8) |
6764                   (correct[1] << 4) |
6765                   (correct[0] << 0));
6766     OUT_BCS_BATCH(batch, 0);
6767
6768     ADVANCE_BCS_BATCH(batch);
6769 }
6770
6771 static uint8_t
6772 gen9_mfc_avc_get_ref_idx_state(VAPictureH264 *va_pic, unsigned int frame_store_id)
6773 {
6774     unsigned int is_long_term =
6775         !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
6776     unsigned int is_top_field =
6777         !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
6778     unsigned int is_bottom_field =
6779         !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
6780
6781     return ((is_long_term                         << 6) |
6782             (0 << 5) |
6783             (frame_store_id                       << 1) |
6784             ((is_top_field ^ 1) & is_bottom_field));
6785 }
6786
6787 static void
6788 gen9_mfc_avc_ref_idx_state(VADriverContextP ctx,
6789                            struct encode_state *encode_state,
6790                            struct intel_encoder_context *encoder_context,
6791                            VAEncSliceParameterBufferH264 *slice_param,
6792                            struct intel_batchbuffer *batch)
6793 {
6794     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6795     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
6796     VAPictureH264 *ref_pic;
6797     int i, slice_type, ref_idx_shift;
6798     unsigned int fwd_ref_entry;
6799     unsigned int bwd_ref_entry;
6800
6801     /* max 4 ref frames are allowed for l0 and l1 */
6802     fwd_ref_entry = 0x80808080;
6803     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
6804
6805     if ((slice_type == SLICE_TYPE_P) ||
6806         (slice_type == SLICE_TYPE_B)) {
6807         for (i = 0; i < MIN(avc_state->num_refs[0], 4); i++) {
6808             ref_pic = &slice_param->RefPicList0[i];
6809             ref_idx_shift = i * 8;
6810
6811             fwd_ref_entry &= ~(0xFF << ref_idx_shift);
6812             fwd_ref_entry += (gen9_mfc_avc_get_ref_idx_state(ref_pic, avc_state->list_ref_idx[0][i]) << ref_idx_shift);
6813         }
6814     }
6815
6816     bwd_ref_entry = 0x80808080;
6817     if (slice_type == SLICE_TYPE_B) {
6818         for (i = 0; i < MIN(avc_state->num_refs[1], 4); i++) {
6819             ref_pic = &slice_param->RefPicList1[i];
6820             ref_idx_shift = i * 8;
6821
6822             bwd_ref_entry &= ~(0xFF << ref_idx_shift);
6823             bwd_ref_entry += (gen9_mfc_avc_get_ref_idx_state(ref_pic, avc_state->list_ref_idx[1][i]) << ref_idx_shift);
6824         }
6825     }
6826
6827     if ((slice_type == SLICE_TYPE_P) ||
6828         (slice_type == SLICE_TYPE_B)) {
6829         BEGIN_BCS_BATCH(batch, 10);
6830         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
6831         OUT_BCS_BATCH(batch, 0);                        // L0
6832         OUT_BCS_BATCH(batch, fwd_ref_entry);
6833
6834         for (i = 0; i < 7; i++) {
6835             OUT_BCS_BATCH(batch, 0x80808080);
6836         }
6837
6838         ADVANCE_BCS_BATCH(batch);
6839     }
6840
6841     if (slice_type == SLICE_TYPE_B) {
6842         BEGIN_BCS_BATCH(batch, 10);
6843         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
6844         OUT_BCS_BATCH(batch, 1);                  //Select L1
6845         OUT_BCS_BATCH(batch, bwd_ref_entry);      //max 4 reference allowed
6846         for (i = 0; i < 7; i++) {
6847             OUT_BCS_BATCH(batch, 0x80808080);
6848         }
6849         ADVANCE_BCS_BATCH(batch);
6850     }
6851 }
6852
6853 static void
6854 gen9_mfc_avc_weightoffset_state(VADriverContextP ctx,
6855                                 struct encode_state *encode_state,
6856                                 struct intel_encoder_context *encoder_context,
6857                                 VAEncPictureParameterBufferH264 *pic_param,
6858                                 VAEncSliceParameterBufferH264 *slice_param,
6859                                 struct intel_batchbuffer *batch)
6860 {
6861     int i, slice_type;
6862     short weightoffsets[32 * 6];
6863
6864     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
6865
6866     if (slice_type == SLICE_TYPE_P &&
6867         pic_param->pic_fields.bits.weighted_pred_flag == 1) {
6868         memset(weightoffsets, 0, 32 * 6 * sizeof(short));
6869         for (i = 0; i < 32; i++) {
6870             weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
6871             weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
6872             weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
6873             weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
6874             weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
6875             weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
6876         }
6877
6878         BEGIN_BCS_BATCH(batch, 98);
6879         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
6880         OUT_BCS_BATCH(batch, 0);
6881         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
6882
6883         ADVANCE_BCS_BATCH(batch);
6884     }
6885
6886     if (slice_type == SLICE_TYPE_B &&
6887         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
6888         memset(weightoffsets, 0, 32 * 6 * sizeof(short));
6889         for (i = 0; i < 32; i++) {
6890             weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
6891             weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
6892             weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
6893             weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
6894             weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
6895             weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
6896         }
6897
6898         BEGIN_BCS_BATCH(batch, 98);
6899         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
6900         OUT_BCS_BATCH(batch, 0);
6901         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
6902         ADVANCE_BCS_BATCH(batch);
6903
6904         memset(weightoffsets, 0, 32 * 6 * sizeof(short));
6905         for (i = 0; i < 32; i++) {
6906             weightoffsets[i * 6 + 0] = slice_param->luma_weight_l1[i];
6907             weightoffsets[i * 6 + 1] = slice_param->luma_offset_l1[i];
6908             weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l1[i][0];
6909             weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l1[i][0];
6910             weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l1[i][1];
6911             weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l1[i][1];
6912         }
6913
6914         BEGIN_BCS_BATCH(batch, 98);
6915         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
6916         OUT_BCS_BATCH(batch, 1);
6917         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
6918         ADVANCE_BCS_BATCH(batch);
6919     }
6920 }
6921
6922 static void
6923 gen9_mfc_avc_single_slice(VADriverContextP ctx,
6924                           struct encode_state *encode_state,
6925                           struct intel_encoder_context *encoder_context,
6926                           VAEncSliceParameterBufferH264 *slice_param,
6927                           VAEncSliceParameterBufferH264 *next_slice_param,
6928                           int slice_index)
6929 {
6930     struct i965_driver_data *i965 = i965_driver_data(ctx);
6931     struct i965_gpe_table *gpe = &i965->gpe_table;
6932     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6933     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
6934     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
6935     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
6936     struct intel_batchbuffer *batch = encoder_context->base.batch;
6937     struct intel_batchbuffer *slice_batch = avc_ctx->pres_slice_batch_buffer_2nd_level;
6938     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
6939     struct gpe_mi_batch_buffer_start_parameter second_level_batch;
6940     struct object_surface *obj_surface;
6941     struct gen9_surface_avc *avc_priv_surface;
6942
6943     unsigned int slice_offset = 0;
6944
6945     if (generic_state->curr_pak_pass == 0) {
6946         slice_offset = intel_batchbuffer_used_size(slice_batch);
6947         avc_state->slice_batch_offset[slice_index] = slice_offset;
6948         gen9_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context, slice_param, slice_batch);
6949         gen9_mfc_avc_weightoffset_state(ctx,
6950                                         encode_state,
6951                                         encoder_context,
6952                                         pic_param,
6953                                         slice_param,
6954                                         slice_batch);
6955         gen9_mfc_avc_slice_state(ctx,
6956                                  encode_state,
6957                                  encoder_context,
6958                                  pic_param,
6959                                  slice_param,
6960                                  next_slice_param,
6961                                  slice_batch);
6962         gen9_mfc_avc_inset_headers(ctx,
6963                                    encode_state,
6964                                    encoder_context,
6965                                    slice_param,
6966                                    slice_index,
6967                                    slice_batch);
6968
6969         BEGIN_BCS_BATCH(slice_batch, 2);
6970         OUT_BCS_BATCH(slice_batch, 0);
6971         OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END);
6972         ADVANCE_BCS_BATCH(slice_batch);
6973
6974     } else {
6975         slice_offset = avc_state->slice_batch_offset[slice_index];
6976     }
6977     /* insert slice as second level.*/
6978     memset(&second_level_batch, 0, sizeof(second_level_batch));
6979     second_level_batch.is_second_level = 1; /* Must be the second level batch buffer */
6980     second_level_batch.offset = slice_offset;
6981     second_level_batch.bo = slice_batch->buffer;
6982     gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
6983
6984     /* insert mb code as second level.*/
6985     obj_surface = encode_state->reconstructed_object;
6986     assert(obj_surface->private_data);
6987     avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
6988
6989     memset(&second_level_batch, 0, sizeof(second_level_batch));
6990     second_level_batch.is_second_level = 1; /* Must be the second level batch buffer */
6991     second_level_batch.offset = slice_param->macroblock_address * 16 * 4;
6992     second_level_batch.bo = avc_priv_surface->res_mb_code_surface.bo;
6993     gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
6994
6995 }
6996
6997 static void
6998 gen9_avc_pak_slice_level(VADriverContextP ctx,
6999                          struct encode_state *encode_state,
7000                          struct intel_encoder_context *encoder_context)
7001 {
7002     struct i965_driver_data *i965 = i965_driver_data(ctx);
7003     struct i965_gpe_table *gpe = &i965->gpe_table;
7004     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7005     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
7006     struct intel_batchbuffer *batch = encoder_context->base.batch;
7007     struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
7008     VAEncSliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
7009     int i, j;
7010     int slice_index = 0;
7011     int is_frame_level = (avc_state->slice_num > 1) ? 0 : 1;   /* check it for SKL,now single slice per frame */
7012     int has_tail = 0;             /* check it later */
7013
7014     for (j = 0; j < encode_state->num_slice_params_ext; j++) {
7015         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
7016
7017         if (j == encode_state->num_slice_params_ext - 1)
7018             next_slice_group_param = NULL;
7019         else
7020             next_slice_group_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j + 1]->buffer;
7021
7022         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
7023             if (i < encode_state->slice_params_ext[j]->num_elements - 1)
7024                 next_slice_param = slice_param + 1;
7025             else
7026                 next_slice_param = next_slice_group_param;
7027
7028             gen9_mfc_avc_single_slice(ctx,
7029                                       encode_state,
7030                                       encoder_context,
7031                                       slice_param,
7032                                       next_slice_param,
7033                                       slice_index);
7034             slice_param++;
7035             slice_index++;
7036
7037             if (is_frame_level)
7038                 break;
7039         }
7040
7041         if (is_frame_level)
7042             break;
7043     }
7044
7045     if (has_tail) {
7046         /* insert a tail if required */
7047     }
7048
7049     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
7050     mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
7051     gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_params);
7052 }
7053 static void
7054 gen9_avc_pak_picture_level(VADriverContextP ctx,
7055                            struct encode_state *encode_state,
7056                            struct intel_encoder_context *encoder_context)
7057 {
7058     struct i965_driver_data *i965 = i965_driver_data(ctx);
7059     struct i965_gpe_table *gpe = &i965->gpe_table;
7060     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7061     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
7062     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
7063     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
7064     struct gpe_mi_batch_buffer_start_parameter second_level_batch;
7065     struct intel_batchbuffer *batch = encoder_context->base.batch;
7066
7067     if (generic_state->brc_enabled &&
7068         generic_state->curr_pak_pass) {
7069         struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
7070         struct encoder_status_buffer_internal *status_buffer;
7071         status_buffer = &(avc_ctx->status_buffer);
7072
7073         memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
7074         mi_conditional_batch_buffer_end_params.offset = status_buffer->image_status_mask_offset;
7075         mi_conditional_batch_buffer_end_params.bo = status_buffer->bo;
7076         mi_conditional_batch_buffer_end_params.compare_data = 0;
7077         mi_conditional_batch_buffer_end_params.compare_mask_mode_disabled = 0;
7078         gpe->mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
7079     }
7080
7081     gen9_mfc_avc_pipe_mode_select(ctx, encode_state, encoder_context);
7082     gen9_mfc_avc_surface_state(ctx, encoder_context, &(generic_ctx->res_reconstructed_surface), 0);
7083     gen9_mfc_avc_surface_state(ctx, encoder_context, &(generic_ctx->res_uncompressed_input_surface), 4);
7084     gen9_mfc_avc_pipe_buf_addr_state(ctx, encoder_context);
7085     gen9_mfc_avc_ind_obj_base_addr_state(ctx, encode_state, encoder_context);
7086     gen9_mfc_avc_bsp_buf_base_addr_state(ctx, encoder_context);
7087
7088     if (generic_state->brc_enabled) {
7089         memset(&second_level_batch, 0, sizeof(second_level_batch));
7090         if (generic_state->curr_pak_pass == 0) {
7091             second_level_batch.offset = 0;
7092         } else {
7093             second_level_batch.offset = generic_state->curr_pak_pass * INTEL_AVC_IMAGE_STATE_CMD_SIZE;
7094         }
7095         second_level_batch.is_second_level = 1;
7096         second_level_batch.bo = avc_ctx->res_brc_image_state_read_buffer.bo;
7097         gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
7098     } else {
7099         /*generate a new image state */
7100         gen9_avc_set_image_state_non_brc(ctx, encode_state, encoder_context, &(avc_ctx->res_image_state_batch_buffer_2nd_level));
7101         memset(&second_level_batch, 0, sizeof(second_level_batch));
7102         second_level_batch.offset = 0;
7103         second_level_batch.is_second_level = 1;
7104         second_level_batch.bo = avc_ctx->res_image_state_batch_buffer_2nd_level.bo;
7105         gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
7106     }
7107
7108     gen9_mfc_avc_qm_state(ctx, encode_state, encoder_context);
7109     gen9_mfc_avc_fqm_state(ctx, encode_state, encoder_context);
7110     gen9_mfc_avc_directmode_state(ctx, encoder_context);
7111
7112 }
7113
7114 static void
7115 gen9_avc_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
7116 {
7117     struct i965_driver_data *i965 = i965_driver_data(ctx);
7118     struct i965_gpe_table *gpe = &i965->gpe_table;
7119     struct intel_batchbuffer *batch = encoder_context->base.batch;
7120     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7121     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
7122     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
7123
7124     struct gpe_mi_store_register_mem_parameter mi_store_reg_mem_param;
7125     struct gpe_mi_store_data_imm_parameter mi_store_data_imm_param;
7126     struct gpe_mi_flush_dw_parameter mi_flush_dw_param;
7127     struct encoder_status_buffer_internal *status_buffer;
7128
7129     status_buffer = &(avc_ctx->status_buffer);
7130
7131     memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
7132     gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
7133
7134     /* read register and store into status_buffer and pak_statitistic info */
7135     memset(&mi_store_reg_mem_param, 0, sizeof(mi_store_reg_mem_param));
7136     mi_store_reg_mem_param.bo = status_buffer->bo;
7137     mi_store_reg_mem_param.offset = status_buffer->bs_byte_count_frame_offset;
7138     mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_reg_offset;
7139     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7140
7141     mi_store_reg_mem_param.bo = status_buffer->bo;
7142     mi_store_reg_mem_param.offset = status_buffer->image_status_mask_offset;
7143     mi_store_reg_mem_param.mmio_offset = status_buffer->image_status_mask_reg_offset;
7144     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7145
7146     /*update the status in the pak_statistic_surface */
7147     mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
7148     mi_store_reg_mem_param.offset = 0;
7149     mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_reg_offset;
7150     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7151
7152     mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
7153     mi_store_reg_mem_param.offset = 4;
7154     mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_nh_reg_offset;
7155     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7156
7157     memset(&mi_store_data_imm_param, 0, sizeof(mi_store_data_imm_param));
7158     mi_store_data_imm_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
7159     mi_store_data_imm_param.offset = sizeof(unsigned int) * 2;
7160     mi_store_data_imm_param.dw0 = (generic_state->curr_pak_pass + 1);
7161     gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm_param);
7162
7163     mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
7164     mi_store_reg_mem_param.offset = sizeof(unsigned int) * (4 + generic_state->curr_pak_pass) ;
7165     mi_store_reg_mem_param.mmio_offset = status_buffer->image_status_ctrl_reg_offset;
7166     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7167
7168     memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
7169     gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
7170
7171     return;
7172 }
7173
7174 static void
7175 gen9_avc_pak_brc_prepare(struct encode_state *encode_state,
7176                          struct intel_encoder_context *encoder_context)
7177 {
7178     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7179     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
7180     unsigned int rate_control_mode = encoder_context->rate_control_mode;
7181
7182     switch (rate_control_mode & 0x7f) {
7183     case VA_RC_CBR:
7184         generic_state->internal_rate_mode = VA_RC_CBR;
7185         break;
7186
7187     case VA_RC_VBR:
7188         generic_state->internal_rate_mode = VA_RC_VBR;//AVBR
7189         break;
7190
7191     case VA_RC_CQP:
7192     default:
7193         generic_state->internal_rate_mode = VA_RC_CQP;
7194         break;
7195     }
7196
7197     if (encoder_context->quality_level == 0)
7198         encoder_context->quality_level = ENCODER_DEFAULT_QUALITY_AVC;
7199 }
7200
7201 static VAStatus
7202 gen9_avc_pak_pipeline_prepare(VADriverContextP ctx,
7203                               struct encode_state *encode_state,
7204                               struct intel_encoder_context *encoder_context)
7205 {
7206     VAStatus va_status;
7207     struct i965_driver_data *i965 = i965_driver_data(ctx);
7208     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7209     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
7210     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
7211     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
7212     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
7213
7214     struct object_surface *obj_surface;
7215     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
7216     VAEncSliceParameterBufferH264 *slice_param = avc_state->slice_param[0];
7217
7218     struct gen9_surface_avc *avc_priv_surface;
7219     int i, j, enable_avc_ildb = 0;
7220     unsigned int allocate_flag = 1;
7221     unsigned int size;
7222     unsigned int w_mb = generic_state->frame_width_in_mbs;
7223     unsigned int h_mb = generic_state->frame_height_in_mbs;
7224     struct avc_surface_param surface_param;
7225
7226     /* update the parameter and check slice parameter */
7227     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
7228         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
7229         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
7230
7231         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
7232             assert((slice_param->slice_type == SLICE_TYPE_I) ||
7233                    (slice_param->slice_type == SLICE_TYPE_SI) ||
7234                    (slice_param->slice_type == SLICE_TYPE_P) ||
7235                    (slice_param->slice_type == SLICE_TYPE_SP) ||
7236                    (slice_param->slice_type == SLICE_TYPE_B));
7237
7238             if (slice_param->disable_deblocking_filter_idc != 1) {
7239                 enable_avc_ildb = 1;
7240                 break;
7241             }
7242
7243             slice_param++;
7244         }
7245     }
7246     avc_state->enable_avc_ildb = enable_avc_ildb;
7247
7248     /* setup the all surface and buffer for PAK */
7249     /* Setup current reconstruct frame */
7250     obj_surface = encode_state->reconstructed_object;
7251     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
7252
7253     if (va_status != VA_STATUS_SUCCESS)
7254         return va_status;
7255
7256     memset(&surface_param, 0, sizeof(surface_param));
7257     surface_param.frame_width = generic_state->frame_width_in_pixel;
7258     surface_param.frame_height = generic_state->frame_height_in_pixel;
7259     va_status = gen9_avc_init_check_surfaces(ctx,
7260                                              obj_surface, encoder_context,
7261                                              &surface_param);
7262     if (va_status != VA_STATUS_SUCCESS)
7263         return va_status;
7264     /* init the member of avc_priv_surface,frame_store_id,qp_value */
7265     {
7266         avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
7267         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = 0;
7268         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = 0;
7269         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2]);
7270         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1]);
7271         i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2], avc_priv_surface->dmv_top);
7272         i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1], avc_priv_surface->dmv_bottom);
7273         avc_priv_surface->qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
7274         avc_priv_surface->frame_store_id = 0;
7275         avc_priv_surface->frame_idx = pic_param->CurrPic.frame_idx;
7276         avc_priv_surface->top_field_order_cnt = pic_param->CurrPic.TopFieldOrderCnt;
7277         avc_priv_surface->is_as_ref = pic_param->pic_fields.bits.reference_pic_flag;
7278         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = avc_priv_surface->top_field_order_cnt;
7279         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = avc_priv_surface->top_field_order_cnt + 1;
7280     }
7281     i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
7282     i965_free_gpe_resource(&avc_ctx->res_post_deblocking_output);
7283     i965_free_gpe_resource(&avc_ctx->res_pre_deblocking_output);
7284     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_reconstructed_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
7285
7286
7287     if (avc_state->enable_avc_ildb) {
7288         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_post_deblocking_output, obj_surface, GPE_RESOURCE_ALIGNMENT);
7289     } else {
7290         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_pre_deblocking_output, obj_surface, GPE_RESOURCE_ALIGNMENT);
7291     }
7292     /* input YUV surface */
7293     obj_surface = encode_state->input_yuv_object;
7294     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
7295
7296     if (va_status != VA_STATUS_SUCCESS)
7297         return va_status;
7298     i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
7299     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_uncompressed_input_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
7300
7301     /* Reference surfaces */
7302     for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
7303         i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
7304         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2]);
7305         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1]);
7306         obj_surface = encode_state->reference_objects[i];
7307         avc_state->top_field_poc[2 * i] = 0;
7308         avc_state->top_field_poc[2 * i + 1] = 0;
7309
7310         if (obj_surface && obj_surface->bo) {
7311             i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->list_reference_res[i], obj_surface, GPE_RESOURCE_ALIGNMENT);
7312
7313             /* actually it should be handled when it is reconstructed surface */
7314             va_status = gen9_avc_init_check_surfaces(ctx,
7315                                                      obj_surface, encoder_context,
7316                                                      &surface_param);
7317             if (va_status != VA_STATUS_SUCCESS)
7318                 return va_status;
7319             avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
7320             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2], avc_priv_surface->dmv_top);
7321             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1], avc_priv_surface->dmv_bottom);
7322             avc_priv_surface->frame_store_id = i;
7323             avc_state->top_field_poc[2 * i] = avc_priv_surface->top_field_order_cnt;
7324             avc_state->top_field_poc[2 * i + 1] = avc_priv_surface->top_field_order_cnt + 1;
7325         } else {
7326             break;
7327         }
7328     }
7329
7330     if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
7331         intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
7332         avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
7333     }
7334
7335     avc_ctx->pres_slice_batch_buffer_2nd_level =
7336         intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD,
7337                               4096 *
7338                               encode_state->num_slice_params_ext);
7339     if (!avc_ctx->pres_slice_batch_buffer_2nd_level)
7340         return VA_STATUS_ERROR_ALLOCATION_FAILED;
7341
7342     for (i = 0; i < MAX_AVC_SLICE_NUM; i++) {
7343         avc_state->slice_batch_offset[i] = 0;
7344     }
7345
7346
7347     size = w_mb * 64;
7348     i965_free_gpe_resource(&avc_ctx->res_intra_row_store_scratch_buffer);
7349     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
7350                                                &avc_ctx->res_intra_row_store_scratch_buffer,
7351                                                size,
7352                                                "PAK Intra row store scratch buffer");
7353     if (!allocate_flag)
7354         goto failed_allocation;
7355
7356     size = w_mb * 4 * 64;
7357     i965_free_gpe_resource(&avc_ctx->res_deblocking_filter_row_store_scratch_buffer);
7358     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
7359                                                &avc_ctx->res_deblocking_filter_row_store_scratch_buffer,
7360                                                size,
7361                                                "PAK Deblocking filter row store scratch buffer");
7362     if (!allocate_flag)
7363         goto failed_allocation;
7364
7365     size = w_mb * 2 * 64;
7366     i965_free_gpe_resource(&avc_ctx->res_bsd_mpc_row_store_scratch_buffer);
7367     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
7368                                                &avc_ctx->res_bsd_mpc_row_store_scratch_buffer,
7369                                                size,
7370                                                "PAK BSD/MPC row store scratch buffer");
7371     if (!allocate_flag)
7372         goto failed_allocation;
7373
7374     size = w_mb * h_mb * 16;
7375     i965_free_gpe_resource(&avc_ctx->res_pak_mb_status_buffer);
7376     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
7377                                                &avc_ctx->res_pak_mb_status_buffer,
7378                                                size,
7379                                                "PAK MB status buffer");
7380     if (!allocate_flag)
7381         goto failed_allocation;
7382
7383     return VA_STATUS_SUCCESS;
7384
7385 failed_allocation:
7386     return VA_STATUS_ERROR_ALLOCATION_FAILED;
7387 }
7388
7389 static VAStatus
7390 gen9_avc_encode_picture(VADriverContextP ctx,
7391                         VAProfile profile,
7392                         struct encode_state *encode_state,
7393                         struct intel_encoder_context *encoder_context)
7394 {
7395     VAStatus va_status;
7396     struct i965_driver_data *i965 = i965_driver_data(ctx);
7397     struct i965_gpe_table *gpe = &i965->gpe_table;
7398     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7399     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
7400     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
7401     struct intel_batchbuffer *batch = encoder_context->base.batch;
7402
7403     va_status = gen9_avc_pak_pipeline_prepare(ctx, encode_state, encoder_context);
7404
7405     if (va_status != VA_STATUS_SUCCESS)
7406         return va_status;
7407
7408     if (i965->intel.has_bsd2)
7409         intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
7410     else
7411         intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
7412     intel_batchbuffer_emit_mi_flush(batch);
7413
7414     for (generic_state->curr_pak_pass = 0;
7415          generic_state->curr_pak_pass < generic_state->num_pak_passes;
7416          generic_state->curr_pak_pass++) {
7417
7418         if (generic_state->curr_pak_pass == 0) {
7419             /* Initialize the avc Image Ctrl reg for the first pass,write 0 to staturs/control register, is it needed in AVC? */
7420             struct gpe_mi_load_register_imm_parameter mi_load_reg_imm;
7421             struct encoder_status_buffer_internal *status_buffer;
7422
7423             status_buffer = &(avc_ctx->status_buffer);
7424             memset(&mi_load_reg_imm, 0, sizeof(mi_load_reg_imm));
7425             mi_load_reg_imm.mmio_offset = status_buffer->image_status_ctrl_reg_offset;
7426             mi_load_reg_imm.data = 0;
7427             gpe->mi_load_register_imm(ctx, batch, &mi_load_reg_imm);
7428         }
7429         gen9_avc_pak_picture_level(ctx, encode_state, encoder_context);
7430         gen9_avc_pak_slice_level(ctx, encode_state, encoder_context);
7431         gen9_avc_read_mfc_status(ctx, encoder_context);
7432
7433     }
7434
7435     if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
7436         intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
7437         avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
7438     }
7439
7440     intel_batchbuffer_end_atomic(batch);
7441     intel_batchbuffer_flush(batch);
7442
7443     generic_state->seq_frame_number++;
7444     generic_state->total_frame_number++;
7445     generic_state->first_frame = 0;
7446     return VA_STATUS_SUCCESS;
7447 }
7448
7449 static VAStatus
7450 gen9_avc_pak_pipeline(VADriverContextP ctx,
7451                       VAProfile profile,
7452                       struct encode_state *encode_state,
7453                       struct intel_encoder_context *encoder_context)
7454 {
7455     VAStatus vaStatus;
7456
7457     switch (profile) {
7458     case VAProfileH264ConstrainedBaseline:
7459     case VAProfileH264Main:
7460     case VAProfileH264High:
7461     case VAProfileH264MultiviewHigh:
7462     case VAProfileH264StereoHigh:
7463         vaStatus = gen9_avc_encode_picture(ctx, profile, encode_state, encoder_context);
7464         break;
7465
7466     default:
7467         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
7468         break;
7469     }
7470
7471     return vaStatus;
7472 }
7473
7474 static void
7475 gen9_avc_pak_context_destroy(void * context)
7476 {
7477     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)context;
7478     struct generic_encoder_context * generic_ctx;
7479     struct i965_avc_encoder_context * avc_ctx;
7480     int i = 0;
7481
7482     if (!pak_context)
7483         return;
7484
7485     generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
7486     avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
7487
7488     // other things
7489     i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
7490     i965_free_gpe_resource(&avc_ctx->res_post_deblocking_output);
7491     i965_free_gpe_resource(&avc_ctx->res_pre_deblocking_output);
7492     i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
7493
7494     i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
7495     i965_free_gpe_resource(&avc_ctx->res_intra_row_store_scratch_buffer);
7496     i965_free_gpe_resource(&avc_ctx->res_deblocking_filter_row_store_scratch_buffer);
7497     i965_free_gpe_resource(&avc_ctx->res_bsd_mpc_row_store_scratch_buffer);
7498     i965_free_gpe_resource(&avc_ctx->res_pak_mb_status_buffer);
7499
7500     for (i = 0 ; i < MAX_MFC_AVC_REFERENCE_SURFACES; i++) {
7501         i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
7502     }
7503
7504     for (i = 0 ; i < NUM_MFC_AVC_DMV_BUFFERS; i++) {
7505         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i]);
7506     }
7507
7508     if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
7509         intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
7510         avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
7511     }
7512
7513 }
7514
7515 static VAStatus
7516 gen9_avc_get_coded_status(VADriverContextP ctx,
7517                           struct intel_encoder_context *encoder_context,
7518                           struct i965_coded_buffer_segment *coded_buf_seg)
7519 {
7520     struct encoder_status *avc_encode_status;
7521
7522     if (!encoder_context || !coded_buf_seg)
7523         return VA_STATUS_ERROR_INVALID_BUFFER;
7524
7525     avc_encode_status = (struct encoder_status *)coded_buf_seg->codec_private_data;
7526     coded_buf_seg->base.size = avc_encode_status->bs_byte_count_frame;
7527
7528     return VA_STATUS_SUCCESS;
7529 }
7530
7531 Bool
7532 gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
7533 {
7534     /* VME & PAK share the same context */
7535     struct i965_driver_data *i965 = i965_driver_data(ctx);
7536     struct encoder_vme_mfc_context * vme_context = NULL;
7537     struct generic_encoder_context * generic_ctx = NULL;
7538     struct i965_avc_encoder_context * avc_ctx = NULL;
7539     struct generic_enc_codec_state * generic_state = NULL;
7540     struct avc_enc_state * avc_state = NULL;
7541     struct encoder_status_buffer_internal *status_buffer;
7542     uint32_t base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
7543
7544     vme_context = calloc(1, sizeof(struct encoder_vme_mfc_context));
7545     generic_ctx = calloc(1, sizeof(struct generic_encoder_context));
7546     avc_ctx = calloc(1, sizeof(struct i965_avc_encoder_context));
7547     generic_state = calloc(1, sizeof(struct generic_enc_codec_state));
7548     avc_state = calloc(1, sizeof(struct avc_enc_state));
7549
7550     if (!vme_context || !generic_ctx || !avc_ctx || !generic_state || !avc_state)
7551         goto allocate_structure_failed;
7552
7553     memset(vme_context, 0, sizeof(struct encoder_vme_mfc_context));
7554     memset(generic_ctx, 0, sizeof(struct generic_encoder_context));
7555     memset(avc_ctx, 0, sizeof(struct i965_avc_encoder_context));
7556     memset(generic_state, 0, sizeof(struct generic_enc_codec_state));
7557     memset(avc_state, 0, sizeof(struct avc_enc_state));
7558
7559     encoder_context->vme_context = vme_context;
7560     vme_context->generic_enc_ctx = generic_ctx;
7561     vme_context->private_enc_ctx = avc_ctx;
7562     vme_context->generic_enc_state = generic_state;
7563     vme_context->private_enc_state = avc_state;
7564
7565     if (IS_SKL(i965->intel.device_info) ||
7566         IS_BXT(i965->intel.device_info)) {
7567         generic_ctx->enc_kernel_ptr = (void *)skl_avc_encoder_kernels;
7568         generic_ctx->enc_kernel_size = sizeof(skl_avc_encoder_kernels);
7569     } else if (IS_KBL(i965->intel.device_info) ||
7570                IS_GLK(i965->intel.device_info)) {
7571         generic_ctx->enc_kernel_ptr = (void *)kbl_avc_encoder_kernels;
7572         generic_ctx->enc_kernel_size = sizeof(kbl_avc_encoder_kernels);
7573     } else
7574         goto allocate_structure_failed;
7575
7576     /* initialize misc ? */
7577     avc_ctx->ctx = ctx;
7578     generic_ctx->use_hw_scoreboard = 1;
7579     generic_ctx->use_hw_non_stalling_scoreboard = 1;
7580
7581     /* initialize generic state */
7582
7583     generic_state->kernel_mode = INTEL_ENC_KERNEL_NORMAL;
7584     generic_state->preset = INTEL_PRESET_RT_SPEED;
7585     generic_state->seq_frame_number = 0;
7586     generic_state->total_frame_number = 0;
7587     generic_state->frame_type = 0;
7588     generic_state->first_frame = 1;
7589
7590     generic_state->frame_width_in_pixel = 0;
7591     generic_state->frame_height_in_pixel = 0;
7592     generic_state->frame_width_in_mbs = 0;
7593     generic_state->frame_height_in_mbs = 0;
7594     generic_state->frame_width_4x = 0;
7595     generic_state->frame_height_4x = 0;
7596     generic_state->frame_width_16x = 0;
7597     generic_state->frame_height_16x = 0;
7598     generic_state->frame_width_32x = 0;
7599     generic_state->downscaled_width_4x_in_mb = 0;
7600     generic_state->downscaled_height_4x_in_mb = 0;
7601     generic_state->downscaled_width_16x_in_mb = 0;
7602     generic_state->downscaled_height_16x_in_mb = 0;
7603     generic_state->downscaled_width_32x_in_mb = 0;
7604     generic_state->downscaled_height_32x_in_mb = 0;
7605
7606     generic_state->hme_supported = 1;
7607     generic_state->b16xme_supported = 1;
7608     generic_state->b32xme_supported = 0;
7609     generic_state->hme_enabled = 0;
7610     generic_state->b16xme_enabled = 0;
7611     generic_state->b32xme_enabled = 0;
7612     generic_state->brc_distortion_buffer_supported = 1;
7613     generic_state->brc_constant_buffer_supported = 0;
7614
7615
7616     generic_state->frame_rate = 30;
7617     generic_state->brc_allocated = 0;
7618     generic_state->brc_inited = 0;
7619     generic_state->brc_need_reset = 0;
7620     generic_state->is_low_delay = 0;
7621     generic_state->brc_enabled = 0;//default
7622     generic_state->internal_rate_mode = 0;
7623     generic_state->curr_pak_pass = 0;
7624     generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
7625     generic_state->is_first_pass = 1;
7626     generic_state->is_last_pass = 0;
7627     generic_state->mb_brc_enabled = 0; // enable mb brc
7628     generic_state->brc_roi_enable = 0;
7629     generic_state->brc_dirty_roi_enable = 0;
7630     generic_state->skip_frame_enbale = 0;
7631
7632     generic_state->target_bit_rate = 0;
7633     generic_state->max_bit_rate = 0;
7634     generic_state->min_bit_rate = 0;
7635     generic_state->init_vbv_buffer_fullness_in_bit = 0;
7636     generic_state->vbv_buffer_size_in_bit = 0;
7637     generic_state->frames_per_100s = 0;
7638     generic_state->gop_size = 0;
7639     generic_state->gop_ref_distance = 0;
7640     generic_state->brc_target_size = 0;
7641     generic_state->brc_mode = 0;
7642     generic_state->brc_init_current_target_buf_full_in_bits = 0.0;
7643     generic_state->brc_init_reset_input_bits_per_frame = 0.0;
7644     generic_state->brc_init_reset_buf_size_in_bits = 0;
7645     generic_state->brc_init_previous_target_buf_full_in_bits = 0;
7646     generic_state->frames_per_window_size = 0;//default
7647     generic_state->target_percentage = 0;
7648
7649     generic_state->avbr_curracy = 0;
7650     generic_state->avbr_convergence = 0;
7651
7652     generic_state->num_skip_frames = 0;
7653     generic_state->size_skip_frames = 0;
7654
7655     generic_state->num_roi = 0;
7656     generic_state->max_delta_qp = 0;
7657     generic_state->min_delta_qp = 0;
7658
7659     if (encoder_context->rate_control_mode != VA_RC_NONE &&
7660         encoder_context->rate_control_mode != VA_RC_CQP) {
7661         generic_state->brc_enabled = 1;
7662         generic_state->brc_distortion_buffer_supported = 1;
7663         generic_state->brc_constant_buffer_supported = 1;
7664         generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
7665     }
7666     /*avc state initialization */
7667     avc_state->mad_enable = 0;
7668     avc_state->mb_disable_skip_map_enable = 0;
7669     avc_state->sfd_enable = 1;//default
7670     avc_state->sfd_mb_enable = 1;//set it true
7671     avc_state->adaptive_search_window_enable = 1;//default
7672     avc_state->mb_qp_data_enable = 0;
7673     avc_state->intra_refresh_i_enable = 0;
7674     avc_state->min_max_qp_enable = 0;
7675     avc_state->skip_bias_adjustment_enable = 0;//default,same as   skip_bias_adjustment_supporte? no
7676
7677     //external input
7678     avc_state->non_ftq_skip_threshold_lut_input_enable = 0;
7679     avc_state->ftq_skip_threshold_lut_input_enable = 0;
7680     avc_state->ftq_override = 0;
7681
7682     avc_state->direct_bias_adjustment_enable = 0;
7683     avc_state->global_motion_bias_adjustment_enable = 0;
7684     avc_state->disable_sub_mb_partion = 0;
7685     avc_state->arbitrary_num_mbs_in_slice = 0;
7686     avc_state->adaptive_transform_decision_enable = 0;//default
7687     avc_state->skip_check_disable = 0;
7688     avc_state->tq_enable = 0;
7689     avc_state->enable_avc_ildb = 0;
7690     avc_state->mbaff_flag = 0;
7691     avc_state->enable_force_skip = 1;//default
7692     avc_state->rc_panic_enable = 1;//default
7693     avc_state->suppress_recon_enable = 1;//default
7694
7695     avc_state->ref_pic_select_list_supported = 1;
7696     avc_state->mb_brc_supported = 1;//?,default
7697     avc_state->multi_pre_enable = 1;//default
7698     avc_state->ftq_enable = 1;//default
7699     avc_state->caf_supported = 1; //default
7700     avc_state->caf_enable = 0;
7701     avc_state->caf_disable_hd = 1;//default
7702     avc_state->skip_bias_adjustment_supported = 1;//default
7703
7704     avc_state->adaptive_intra_scaling_enable = 1;//default
7705     avc_state->old_mode_cost_enable = 0;//default
7706     avc_state->multi_ref_qp_enable = 1;//default
7707     avc_state->weighted_ref_l0_enable = 1;//default
7708     avc_state->weighted_ref_l1_enable = 1;//default
7709     avc_state->weighted_prediction_supported = 0;
7710     avc_state->brc_split_enable = 0;
7711     avc_state->slice_level_report_supported = 0;
7712
7713     avc_state->fbr_bypass_enable = 1;//default
7714     avc_state->field_scaling_output_interleaved = 0;
7715     avc_state->mb_variance_output_enable = 0;
7716     avc_state->mb_pixel_average_output_enable = 0;
7717     avc_state->rolling_intra_refresh_enable = 0;// same as intra_refresh_i_enable?
7718     avc_state->mbenc_curbe_set_in_brc_update = 0;
7719     avc_state->rounding_inter_enable = 1; //default
7720     avc_state->adaptive_rounding_inter_enable = 1;//default
7721
7722     avc_state->mbenc_i_frame_dist_in_use = 0;
7723     avc_state->mb_status_supported = 1; //set in intialization for gen9
7724     avc_state->mb_status_enable = 0;
7725     avc_state->mb_vproc_stats_enable = 0;
7726     avc_state->flatness_check_enable = 0;
7727     avc_state->flatness_check_supported = 1;//default
7728     avc_state->block_based_skip_enable = 0;
7729     avc_state->use_widi_mbenc_kernel = 0;
7730     avc_state->kernel_trellis_enable = 0;
7731     avc_state->generic_reserved = 0;
7732
7733     avc_state->rounding_value = 0;
7734     avc_state->rounding_inter_p = AVC_INVALID_ROUNDING_VALUE;//default
7735     avc_state->rounding_inter_b = AVC_INVALID_ROUNDING_VALUE; //default
7736     avc_state->rounding_inter_b_ref = AVC_INVALID_ROUNDING_VALUE; //default
7737     avc_state->min_qp_i = INTEL_AVC_MIN_QP;
7738     avc_state->min_qp_p = INTEL_AVC_MIN_QP;
7739     avc_state->min_qp_b = INTEL_AVC_MIN_QP;
7740     avc_state->max_qp_i = INTEL_AVC_MAX_QP;
7741     avc_state->max_qp_p = INTEL_AVC_MAX_QP;
7742     avc_state->max_qp_b = INTEL_AVC_MAX_QP;
7743
7744     memset(avc_state->non_ftq_skip_threshold_lut, 0, AVC_QP_MAX * sizeof(uint8_t));
7745     memset(avc_state->ftq_skip_threshold_lut, 0, AVC_QP_MAX * sizeof(uint8_t));
7746     memset(avc_state->lamda_value_lut, 0, AVC_QP_MAX * 2 * sizeof(uint32_t));
7747
7748     avc_state->intra_refresh_qp_threshold = 0;
7749     avc_state->trellis_flag = 0;
7750     avc_state->hme_mv_cost_scaling_factor = 0;
7751     avc_state->slice_height = 1;
7752     avc_state->slice_num = 1;
7753     memset(avc_state->dist_scale_factor_list0, 0, 32 * sizeof(uint32_t));
7754     avc_state->bi_weight = 0;
7755
7756     avc_state->lambda_table_enable = 0;
7757
7758
7759     if (IS_SKL(i965->intel.device_info) ||
7760         IS_BXT(i965->intel.device_info)) {
7761         avc_state->brc_const_data_surface_width = 64;
7762         avc_state->brc_const_data_surface_height = 44;
7763         avc_state->brc_split_enable = 1;
7764     } else if (IS_KBL(i965->intel.device_info) ||
7765                IS_GLK(i965->intel.device_info)) {
7766         avc_state->brc_const_data_surface_width = 64;
7767         avc_state->brc_const_data_surface_height = 53;
7768         //gen95
7769         avc_state->decouple_mbenc_curbe_from_brc_enable = 1;
7770         avc_state->extended_mv_cost_range_enable = 0;
7771         avc_state->reserved_g95 = 0;
7772         avc_state->mbenc_brc_buffer_size = 128;
7773         avc_state->kernel_trellis_enable = 1;
7774         avc_state->lambda_table_enable = 1;
7775         avc_state->brc_split_enable = 1;
7776     }
7777
7778     avc_state->num_refs[0] = 0;
7779     avc_state->num_refs[1] = 0;
7780     memset(avc_state->list_ref_idx, 0, 32 * 2 * sizeof(uint32_t));
7781     memset(avc_state->top_field_poc, 0, NUM_MFC_AVC_DMV_BUFFERS * sizeof(int32_t));
7782     avc_state->tq_rounding = 0;
7783     avc_state->zero_mv_threshold = 0;
7784     avc_state->slice_second_levle_batch_buffer_in_use = 0;
7785
7786     //1. seq/pic/slice
7787
7788     /* the definition of status buffer offset for Encoder */
7789
7790     status_buffer = &avc_ctx->status_buffer;
7791     memset(status_buffer, 0, sizeof(struct encoder_status_buffer_internal));
7792
7793     status_buffer->base_offset = base_offset;
7794     status_buffer->bs_byte_count_frame_offset = base_offset + offsetof(struct encoder_status, bs_byte_count_frame);
7795     status_buffer->bs_byte_count_frame_nh_offset = base_offset + offsetof(struct encoder_status, bs_byte_count_frame_nh);
7796     status_buffer->image_status_mask_offset = base_offset + offsetof(struct encoder_status, image_status_mask);
7797     status_buffer->image_status_ctrl_offset = base_offset + offsetof(struct encoder_status, image_status_ctrl);
7798     status_buffer->mfc_qp_status_count_offset = base_offset + offsetof(struct encoder_status, mfc_qp_status_count);
7799     status_buffer->media_index_offset       = base_offset + offsetof(struct encoder_status, media_index);
7800
7801     status_buffer->status_buffer_size = sizeof(struct encoder_status);
7802     status_buffer->bs_byte_count_frame_reg_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG;
7803     status_buffer->bs_byte_count_frame_nh_reg_offset = MFC_BITSTREAM_BYTECOUNT_SLICE_REG;
7804     status_buffer->image_status_mask_reg_offset = MFC_IMAGE_STATUS_MASK_REG;
7805     status_buffer->image_status_ctrl_reg_offset = MFC_IMAGE_STATUS_CTRL_REG;
7806     status_buffer->mfc_qp_status_count_reg_offset = MFC_QP_STATUS_COUNT_REG;
7807
7808     gen9_avc_kernel_init(ctx, encoder_context);
7809     encoder_context->vme_context = vme_context;
7810     encoder_context->vme_pipeline = gen9_avc_vme_pipeline;
7811     encoder_context->vme_context_destroy = gen9_avc_vme_context_destroy;
7812
7813     return true;
7814
7815 allocate_structure_failed:
7816
7817     free(vme_context);
7818     free(generic_ctx);
7819     free(avc_ctx);
7820     free(generic_state);
7821     free(avc_state);
7822     return false;
7823 }
7824
7825 Bool
7826 gen9_avc_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
7827 {
7828     /* VME & PAK share the same context */
7829     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7830
7831     if (!pak_context)
7832         return false;
7833
7834     encoder_context->mfc_context = pak_context;
7835     encoder_context->mfc_context_destroy = gen9_avc_pak_context_destroy;
7836     encoder_context->mfc_pipeline = gen9_avc_pak_pipeline;
7837     encoder_context->mfc_brc_prepare = gen9_avc_pak_brc_prepare;
7838     encoder_context->get_status = gen9_avc_get_coded_status;
7839     return true;
7840 }