OSDN Git Service

d0bd697414ccc32819fe033414a309b71dbdb6d8
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_avc_encoder.c
1 /*
2  * Copyright @ 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWAR OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Pengfei Qu <Pengfei.qu@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <stdbool.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35 #include <va/va.h>
36
37 #include "intel_batchbuffer.h"
38 #include "intel_driver.h"
39
40 #include "i965_defines.h"
41 #include "i965_structs.h"
42 #include "i965_drv_video.h"
43 #include "i965_encoder.h"
44 #include "i965_encoder_utils.h"
45 #include "intel_media.h"
46
47 #include "i965_gpe_utils.h"
48 #include "i965_encoder_common.h"
49 #include "i965_avc_encoder_common.h"
50 #include "gen9_avc_encoder_kernels.h"
51 #include "gen9_avc_encoder.h"
52 #include "gen9_avc_const_def.h"
53
54 #define MAX_URB_SIZE                    4096 /* In register */
55 #define NUM_KERNELS_PER_GPE_CONTEXT     1
56 #define MBENC_KERNEL_BASE GEN9_AVC_KERNEL_MBENC_QUALITY_I
57 #define GPE_RESOURCE_ALIGNMENT 4  /* 4 means 16 = 1 << 4) */
58
59 #define OUT_BUFFER_2DW(batch, bo, is_target, delta)  do {               \
60         if (bo) {                                                       \
61             OUT_BCS_RELOC64(batch,                                        \
62                             bo,                                         \
63                             I915_GEM_DOMAIN_INSTRUCTION,                \
64                             is_target ? I915_GEM_DOMAIN_RENDER : 0,     \
65                             delta);                                     \
66         } else {                                                        \
67             OUT_BCS_BATCH(batch, 0);                                    \
68             OUT_BCS_BATCH(batch, 0);                                    \
69         }                                                               \
70     } while (0)
71
72 #define OUT_BUFFER_3DW(batch, bo, is_target, delta, attr)  do { \
73         OUT_BUFFER_2DW(batch, bo, is_target, delta);            \
74         OUT_BCS_BATCH(batch, attr);                             \
75     } while (0)
76
77 static const uint32_t qm_flat[16] = {
78     0x10101010, 0x10101010, 0x10101010, 0x10101010,
79     0x10101010, 0x10101010, 0x10101010, 0x10101010,
80     0x10101010, 0x10101010, 0x10101010, 0x10101010,
81     0x10101010, 0x10101010, 0x10101010, 0x10101010
82 };
83
84 static const uint32_t fqm_flat[32] = {
85     0x10001000, 0x10001000, 0x10001000, 0x10001000,
86     0x10001000, 0x10001000, 0x10001000, 0x10001000,
87     0x10001000, 0x10001000, 0x10001000, 0x10001000,
88     0x10001000, 0x10001000, 0x10001000, 0x10001000,
89     0x10001000, 0x10001000, 0x10001000, 0x10001000,
90     0x10001000, 0x10001000, 0x10001000, 0x10001000,
91     0x10001000, 0x10001000, 0x10001000, 0x10001000,
92     0x10001000, 0x10001000, 0x10001000, 0x10001000
93 };
94
95 static const unsigned int slice_type_kernel[3] = {1, 2, 0};
96
97 static const gen9_avc_brc_init_reset_curbe_data gen9_avc_brc_init_reset_curbe_init_data = {
98     // unsigned int 0
99     {
100         0
101     },
102
103     // unsigned int 1
104     {
105         0
106     },
107
108     // unsigned int 2
109     {
110         0
111     },
112
113     // unsigned int 3
114     {
115         0
116     },
117
118     // unsigned int 4
119     {
120         0
121     },
122
123     // unsigned int 5
124     {
125         0
126     },
127
128     // unsigned int 6
129     {
130         0
131     },
132
133     // unsigned int 7
134     {
135         0
136     },
137
138     // unsigned int 8
139     {
140         0,
141         0
142     },
143
144     // unsigned int 9
145     {
146         0,
147         0
148     },
149
150     // unsigned int 10
151     {
152         0,
153         0
154     },
155
156     // unsigned int 11
157     {
158         0,
159         1
160     },
161
162     // unsigned int 12
163     {
164         51,
165         0
166     },
167
168     // unsigned int 13
169     {
170         40,
171         60,
172         80,
173         120
174     },
175
176     // unsigned int 14
177     {
178         35,
179         60,
180         80,
181         120
182     },
183
184     // unsigned int 15
185     {
186         40,
187         60,
188         90,
189         115
190     },
191
192     // unsigned int 16
193     {
194         0,
195         0,
196         0,
197         0
198     },
199
200     // unsigned int 17
201     {
202         0,
203         0,
204         0,
205         0
206     },
207
208     // unsigned int 18
209     {
210         0,
211         0,
212         0,
213         0
214     },
215
216     // unsigned int 19
217     {
218         0,
219         0,
220         0,
221         0
222     },
223
224     // unsigned int 20
225     {
226         0,
227         0,
228         0,
229         0
230     },
231
232     // unsigned int 21
233     {
234         0,
235         0,
236         0,
237         0
238     },
239
240     // unsigned int 22
241     {
242         0,
243         0,
244         0,
245         0
246     },
247
248     // unsigned int 23
249     {
250         0
251     }
252 };
253
254 static const gen9_avc_frame_brc_update_curbe_data gen9_avc_frame_brc_update_curbe_init_data = {
255     // unsigned int 0
256     {
257         0
258     },
259
260     // unsigned int 1
261     {
262         0
263     },
264
265     // unsigned int 2
266     {
267         0
268     },
269
270     // unsigned int 3
271     {
272         10,
273         50
274     },
275
276     // unsigned int 4
277     {
278         100,
279         150
280     },
281
282     // unsigned int 5
283     {
284         0,
285         0,
286         0,
287         0
288     },
289
290     // unsigned int 6
291     {
292         0,
293         0,
294         0,
295         0,
296         0,
297         0
298     },
299
300     // unsigned int 7
301     {
302         0
303     },
304
305     // unsigned int 8
306     {
307         1,
308         1,
309         3,
310         2
311     },
312
313     // unsigned int 9
314     {
315         1,
316         40,
317         5,
318         5
319     },
320
321     // unsigned int 10
322     {
323         3,
324         1,
325         7,
326         18
327     },
328
329     // unsigned int 11
330     {
331         25,
332         37,
333         40,
334         75
335     },
336
337     // unsigned int 12
338     {
339         97,
340         103,
341         125,
342         160
343     },
344
345     // unsigned int 13
346     {
347         -3,
348         -2,
349         -1,
350         0
351     },
352
353     // unsigned int 14
354     {
355         1,
356         2,
357         3,
358         0xff
359     },
360
361     // unsigned int 15
362     {
363         0,
364         0,
365         0,
366         0
367     },
368
369     // unsigned int 16
370     {
371         0
372     },
373
374     // unsigned int 17
375     {
376         0
377     },
378
379     // unsigned int 18
380     {
381         0
382     },
383
384     // unsigned int 19
385     {
386         0
387     },
388
389     // unsigned int 20
390     {
391         0
392     },
393
394     // unsigned int 21
395     {
396         0
397     },
398
399     // unsigned int 22
400     {
401         0
402     },
403
404     // unsigned int 23
405     {
406         0
407     },
408
409 };
410
411 static void
412 gen9_avc_update_misc_parameters(VADriverContextP ctx,
413                                 struct encode_state *encode_state,
414                                 struct intel_encoder_context *encoder_context)
415 {
416     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
417     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
418     int i;
419
420     /* brc */
421     generic_state->max_bit_rate = (encoder_context->brc.bits_per_second[0] + 1000 - 1) / 1000;
422
423     generic_state->brc_need_reset = encoder_context->brc.need_reset;
424
425     if (generic_state->internal_rate_mode == VA_RC_CBR) {
426         generic_state->min_bit_rate = generic_state->max_bit_rate;
427         generic_state->mb_brc_enabled = encoder_context->brc.mb_rate_control[0];
428
429         if (generic_state->target_bit_rate != generic_state->max_bit_rate) {
430             generic_state->target_bit_rate = generic_state->max_bit_rate;
431             generic_state->brc_need_reset = 1;
432         }
433     } else if (generic_state->internal_rate_mode == VA_RC_VBR) {
434         generic_state->min_bit_rate = generic_state->max_bit_rate * (2 * encoder_context->brc.target_percentage[0] - 100) / 100;
435         generic_state->mb_brc_enabled = encoder_context->brc.mb_rate_control[0];
436
437         if (generic_state->target_bit_rate != generic_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100) {
438             generic_state->target_bit_rate = generic_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100;
439             generic_state->brc_need_reset = 1;
440         }
441     }
442
443     /*  frame rate */
444     if (generic_state->internal_rate_mode != VA_RC_CQP) {
445         generic_state->frames_per_100s = encoder_context->brc.framerate[0].num * 100 / encoder_context->brc.framerate[0].den ;
446         generic_state->frame_rate = encoder_context->brc.framerate[0].num / encoder_context->brc.framerate[0].den ;
447         generic_state->frames_per_window_size = (int)(encoder_context->brc.window_size * generic_state->frame_rate / 1000); // brc.windows size in ms as the unit
448     } else {
449         generic_state->frames_per_100s = 30 * 100;
450         generic_state->frame_rate = 30 ;
451         generic_state->frames_per_window_size = 30;
452     }
453
454     /*  HRD */
455     if (generic_state->internal_rate_mode != VA_RC_CQP) {
456         generic_state->vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;//misc->buffer_size;
457         generic_state->init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;//misc->initial_buffer_fullness;
458     }
459
460     /* ROI */
461     generic_state->num_roi = MIN(encoder_context->brc.num_roi, 3);
462     if (generic_state->num_roi > 0) {
463         generic_state->max_delta_qp = encoder_context->brc.roi_max_delta_qp;
464         generic_state->min_delta_qp = encoder_context->brc.roi_min_delta_qp;
465
466         for (i = 0; i < generic_state->num_roi; i++) {
467             generic_state->roi[i].left   = encoder_context->brc.roi[i].left;
468             generic_state->roi[i].right  = encoder_context->brc.roi[i].right;
469             generic_state->roi[i].top    = encoder_context->brc.roi[i].top;
470             generic_state->roi[i].bottom = encoder_context->brc.roi[i].bottom;
471             generic_state->roi[i].value  = encoder_context->brc.roi[i].value;
472
473             generic_state->roi[i].left /= 16;
474             generic_state->roi[i].right /= 16;
475             generic_state->roi[i].top /= 16;
476             generic_state->roi[i].bottom /= 16;
477         }
478     }
479
480 }
481
482 static bool
483 intel_avc_get_kernel_header_and_size(void *pvbinary,
484                                      int binary_size,
485                                      INTEL_GENERIC_ENC_OPERATION operation,
486                                      int krnstate_idx,
487                                      struct i965_kernel *ret_kernel)
488 {
489     typedef uint32_t BIN_PTR[4];
490
491     char *bin_start;
492     gen9_avc_encoder_kernel_header      *pkh_table;
493     kernel_header          *pcurr_header, *pinvalid_entry, *pnext_header;
494     int next_krnoffset;
495
496     if (!pvbinary || !ret_kernel)
497         return false;
498
499     bin_start = (char *)pvbinary;
500     pkh_table = (gen9_avc_encoder_kernel_header *)pvbinary;
501     pinvalid_entry = &(pkh_table->static_detection) + 1;
502     next_krnoffset = binary_size;
503
504     if (operation == INTEL_GENERIC_ENC_SCALING4X) {
505         pcurr_header = &pkh_table->ply_dscale_ply;
506     } else if (operation == INTEL_GENERIC_ENC_SCALING2X) {
507         pcurr_header = &pkh_table->ply_2xdscale_ply;
508     } else if (operation == INTEL_GENERIC_ENC_ME) {
509         pcurr_header = &pkh_table->me_p;
510     } else if (operation == INTEL_GENERIC_ENC_BRC) {
511         pcurr_header = &pkh_table->frame_brc_init;
512     } else if (operation == INTEL_GENERIC_ENC_MBENC) {
513         pcurr_header = &pkh_table->mbenc_quality_I;
514     } else if (operation == INTEL_GENERIC_ENC_WP) {
515         pcurr_header = &pkh_table->wp;
516     } else if (operation == INTEL_GENERIC_ENC_SFD) {
517         pcurr_header = &pkh_table->static_detection;
518     } else {
519         return false;
520     }
521
522     pcurr_header += krnstate_idx;
523     ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
524
525     pnext_header = (pcurr_header + 1);
526     if (pnext_header < pinvalid_entry) {
527         next_krnoffset = pnext_header->kernel_start_pointer << 6;
528     }
529     ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
530
531     return true;
532 }
533 static void
534 gen9_free_surfaces_avc(void **data)
535 {
536     struct gen9_surface_avc *avc_surface;
537
538     if (!data || !*data)
539         return;
540
541     avc_surface = *data;
542
543     if (avc_surface->scaled_4x_surface_obj) {
544         i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_4x_surface_id, 1);
545         avc_surface->scaled_4x_surface_id = VA_INVALID_SURFACE;
546         avc_surface->scaled_4x_surface_obj = NULL;
547     }
548
549     if (avc_surface->scaled_16x_surface_obj) {
550         i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_16x_surface_id, 1);
551         avc_surface->scaled_16x_surface_id = VA_INVALID_SURFACE;
552         avc_surface->scaled_16x_surface_obj = NULL;
553     }
554
555     if (avc_surface->scaled_32x_surface_obj) {
556         i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_32x_surface_id, 1);
557         avc_surface->scaled_32x_surface_id = VA_INVALID_SURFACE;
558         avc_surface->scaled_32x_surface_obj = NULL;
559     }
560
561     i965_free_gpe_resource(&avc_surface->res_mb_code_surface);
562     i965_free_gpe_resource(&avc_surface->res_mv_data_surface);
563     i965_free_gpe_resource(&avc_surface->res_ref_pic_select_surface);
564
565     dri_bo_unreference(avc_surface->dmv_top);
566     avc_surface->dmv_top = NULL;
567     dri_bo_unreference(avc_surface->dmv_bottom);
568     avc_surface->dmv_bottom = NULL;
569
570     free(avc_surface);
571
572     *data = NULL;
573
574     return;
575 }
576
577 static VAStatus
578 gen9_avc_init_check_surfaces(VADriverContextP ctx,
579                              struct object_surface *obj_surface,
580                              struct intel_encoder_context *encoder_context,
581                              struct avc_surface_param *surface_param)
582 {
583     struct i965_driver_data *i965 = i965_driver_data(ctx);
584     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
585     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
586     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
587
588     struct gen9_surface_avc *avc_surface;
589     int downscaled_width_4x, downscaled_height_4x;
590     int downscaled_width_16x, downscaled_height_16x;
591     int downscaled_width_32x, downscaled_height_32x;
592     int size = 0;
593     unsigned int frame_width_in_mbs = ALIGN(surface_param->frame_width, 16) / 16;
594     unsigned int frame_height_in_mbs = ALIGN(surface_param->frame_height, 16) / 16;
595     unsigned int frame_mb_nums = frame_width_in_mbs * frame_height_in_mbs;
596     int allocate_flag = 1;
597     int width, height;
598
599     if (!obj_surface || !obj_surface->bo)
600         return VA_STATUS_ERROR_INVALID_SURFACE;
601
602     if (obj_surface->private_data) {
603         return VA_STATUS_SUCCESS;
604     }
605
606     avc_surface = calloc(1, sizeof(struct gen9_surface_avc));
607
608     if (!avc_surface)
609         return VA_STATUS_ERROR_ALLOCATION_FAILED;
610
611     avc_surface->ctx = ctx;
612     obj_surface->private_data = avc_surface;
613     obj_surface->free_private_data = gen9_free_surfaces_avc;
614
615     downscaled_width_4x = generic_state->frame_width_4x;
616     downscaled_height_4x = generic_state->frame_height_4x;
617
618     i965_CreateSurfaces(ctx,
619                         downscaled_width_4x,
620                         downscaled_height_4x,
621                         VA_RT_FORMAT_YUV420,
622                         1,
623                         &avc_surface->scaled_4x_surface_id);
624
625     avc_surface->scaled_4x_surface_obj = SURFACE(avc_surface->scaled_4x_surface_id);
626
627     if (!avc_surface->scaled_4x_surface_obj) {
628         return VA_STATUS_ERROR_ALLOCATION_FAILED;
629     }
630
631     i965_check_alloc_surface_bo(ctx, avc_surface->scaled_4x_surface_obj, 1,
632                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
633
634     downscaled_width_16x = generic_state->frame_width_16x;
635     downscaled_height_16x = generic_state->frame_height_16x;
636     i965_CreateSurfaces(ctx,
637                         downscaled_width_16x,
638                         downscaled_height_16x,
639                         VA_RT_FORMAT_YUV420,
640                         1,
641                         &avc_surface->scaled_16x_surface_id);
642     avc_surface->scaled_16x_surface_obj = SURFACE(avc_surface->scaled_16x_surface_id);
643
644     if (!avc_surface->scaled_16x_surface_obj) {
645         return VA_STATUS_ERROR_ALLOCATION_FAILED;
646     }
647
648     i965_check_alloc_surface_bo(ctx, avc_surface->scaled_16x_surface_obj, 1,
649                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
650
651     if (generic_state->b32xme_supported ||
652         generic_state->b32xme_enabled) {
653         downscaled_width_32x = generic_state->frame_width_32x;
654         downscaled_height_32x = generic_state->frame_height_32x;
655         i965_CreateSurfaces(ctx,
656                             downscaled_width_32x,
657                             downscaled_height_32x,
658                             VA_RT_FORMAT_YUV420,
659                             1,
660                             &avc_surface->scaled_32x_surface_id);
661         avc_surface->scaled_32x_surface_obj = SURFACE(avc_surface->scaled_32x_surface_id);
662
663         if (!avc_surface->scaled_32x_surface_obj) {
664             return VA_STATUS_ERROR_ALLOCATION_FAILED;
665         }
666
667         i965_check_alloc_surface_bo(ctx, avc_surface->scaled_32x_surface_obj, 1,
668                                     VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
669     }
670
671     /*mb code and mv data for each frame*/
672     size = frame_mb_nums * 16 * 4;
673     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
674                                                &avc_surface->res_mb_code_surface,
675                                                ALIGN(size, 0x1000),
676                                                "mb code buffer");
677     if (!allocate_flag)
678         goto failed_allocation;
679
680     size = frame_mb_nums * 32 * 4;
681     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
682                                                &avc_surface->res_mv_data_surface,
683                                                ALIGN(size, 0x1000),
684                                                "mv data buffer");
685     if (!allocate_flag)
686         goto failed_allocation;
687
688     /* ref pic list*/
689     if (avc_state->ref_pic_select_list_supported) {
690         width = ALIGN(frame_width_in_mbs * 8, 64);
691         height = frame_height_in_mbs ;
692         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
693                                                       &avc_surface->res_ref_pic_select_surface,
694                                                       width, height,
695                                                       width,
696                                                       "Ref pic select list buffer");
697         if (!allocate_flag)
698             goto failed_allocation;
699     }
700
701     /*direct mv*/
702     avc_surface->dmv_top =
703         dri_bo_alloc(i965->intel.bufmgr,
704                      "direct mv top Buffer",
705                      68 * frame_mb_nums,
706                      64);
707     avc_surface->dmv_bottom =
708         dri_bo_alloc(i965->intel.bufmgr,
709                      "direct mv bottom Buffer",
710                      68 * frame_mb_nums,
711                      64);
712     assert(avc_surface->dmv_top);
713     assert(avc_surface->dmv_bottom);
714
715     return VA_STATUS_SUCCESS;
716
717 failed_allocation:
718     return VA_STATUS_ERROR_ALLOCATION_FAILED;
719 }
720
721 static void
722 gen9_avc_generate_slice_map(VADriverContextP ctx,
723                             struct encode_state *encode_state,
724                             struct intel_encoder_context *encoder_context)
725 {
726     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
727     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
728     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
729     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
730
731     struct i965_gpe_resource *gpe_resource = NULL;
732     VAEncSliceParameterBufferH264 * slice_param = NULL;
733     unsigned int * data = NULL;
734     unsigned int * data_row = NULL;
735     int i, j, count = 0;
736     unsigned int pitch = ALIGN((generic_state->frame_width_in_mbs + 1) * 4, 64) / 4;
737
738     if (!avc_state->arbitrary_num_mbs_in_slice)
739         return;
740
741     gpe_resource = &(avc_ctx->res_mbenc_slice_map_surface);
742     assert(gpe_resource);
743
744     i965_zero_gpe_resource(gpe_resource);
745
746     data_row = (unsigned int *)i965_map_gpe_resource(gpe_resource);
747     assert(data_row);
748
749     data = data_row;
750     for (i = 0; i < avc_state->slice_num; i++) {
751         slice_param = avc_state->slice_param[i];
752         for (j = 0; j < slice_param->num_macroblocks; j++) {
753             *data++ = i;
754             if ((count > 0) && (count % generic_state->frame_width_in_mbs == 0)) {
755                 data_row += pitch;
756                 data = data_row;
757                 *data++ = i;
758             }
759             count++;
760         }
761     }
762     *data++ = 0xFFFFFFFF;
763
764     i965_unmap_gpe_resource(gpe_resource);
765 }
766
767 static VAStatus
768 gen9_avc_allocate_resources(VADriverContextP ctx,
769                             struct encode_state *encode_state,
770                             struct intel_encoder_context *encoder_context)
771 {
772     struct i965_driver_data *i965 = i965_driver_data(ctx);
773     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
774     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
775     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
776     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
777     unsigned int size  = 0;
778     unsigned int width  = 0;
779     unsigned int height  = 0;
780     unsigned char * data  = NULL;
781     int allocate_flag = 1;
782     int i = 0;
783
784     /*all the surface/buffer are allocated here*/
785
786     /*second level batch buffer for image state write when cqp etc*/
787     i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
788     size = INTEL_AVC_IMAGE_STATE_CMD_SIZE ;
789     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
790                                                &avc_ctx->res_image_state_batch_buffer_2nd_level,
791                                                ALIGN(size, 0x1000),
792                                                "second levle batch (image state write) buffer");
793     if (!allocate_flag)
794         goto failed_allocation;
795
796     /* scaling related surface   */
797     if (avc_state->mb_status_supported) {
798         i965_free_gpe_resource(&avc_ctx->res_mb_status_buffer);
799         size = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * 16 * 4 + 1023) & ~0x3ff;
800         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
801                                                    &avc_ctx->res_mb_status_buffer,
802                                                    ALIGN(size, 0x1000),
803                                                    "MB statistics output buffer");
804         if (!allocate_flag)
805             goto failed_allocation;
806         i965_zero_gpe_resource(&avc_ctx->res_mb_status_buffer);
807     }
808
809     if (avc_state->flatness_check_supported) {
810         width = generic_state->frame_width_in_mbs * 4;
811         height = generic_state->frame_height_in_mbs * 4;
812         i965_free_gpe_resource(&avc_ctx->res_flatness_check_surface);
813         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
814                                                       &avc_ctx->res_flatness_check_surface,
815                                                       width, height,
816                                                       ALIGN(width, 64),
817                                                       "Flatness check buffer");
818         if (!allocate_flag)
819             goto failed_allocation;
820     }
821     /* me related surface */
822     width = generic_state->downscaled_width_4x_in_mb * 8;
823     height = generic_state->downscaled_height_4x_in_mb * 4 * 10;
824     i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
825     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
826                                                   &avc_ctx->s4x_memv_distortion_buffer,
827                                                   width, height,
828                                                   ALIGN(width, 64),
829                                                   "4x MEMV distortion buffer");
830     if (!allocate_flag)
831         goto failed_allocation;
832     i965_zero_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
833
834     width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
835     height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
836     i965_free_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
837     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
838                                                   &avc_ctx->s4x_memv_min_distortion_brc_buffer,
839                                                   width, height,
840                                                   width,
841                                                   "4x MEMV min distortion brc buffer");
842     if (!allocate_flag)
843         goto failed_allocation;
844     i965_zero_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
845
846
847     width = ALIGN(generic_state->downscaled_width_4x_in_mb * 32, 64);
848     height = generic_state->downscaled_height_4x_in_mb * 4 * 2 * 10;
849     i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
850     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
851                                                   &avc_ctx->s4x_memv_data_buffer,
852                                                   width, height,
853                                                   width,
854                                                   "4x MEMV data buffer");
855     if (!allocate_flag)
856         goto failed_allocation;
857     i965_zero_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
858
859
860     width = ALIGN(generic_state->downscaled_width_16x_in_mb * 32, 64);
861     height = generic_state->downscaled_height_16x_in_mb * 4 * 2 * 10 ;
862     i965_free_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
863     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
864                                                   &avc_ctx->s16x_memv_data_buffer,
865                                                   width, height,
866                                                   width,
867                                                   "16x MEMV data buffer");
868     if (!allocate_flag)
869         goto failed_allocation;
870     i965_zero_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
871
872
873     width = ALIGN(generic_state->downscaled_width_32x_in_mb * 32, 64);
874     height = generic_state->downscaled_height_32x_in_mb * 4 * 2 * 10 ;
875     i965_free_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
876     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
877                                                   &avc_ctx->s32x_memv_data_buffer,
878                                                   width, height,
879                                                   width,
880                                                   "32x MEMV data buffer");
881     if (!allocate_flag)
882         goto failed_allocation;
883     i965_zero_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
884
885
886     if (!generic_state->brc_allocated) {
887         /*brc related surface */
888         i965_free_gpe_resource(&avc_ctx->res_brc_history_buffer);
889         size = 864;
890         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
891                                                    &avc_ctx->res_brc_history_buffer,
892                                                    ALIGN(size, 0x1000),
893                                                    "brc history buffer");
894         if (!allocate_flag)
895             goto failed_allocation;
896
897         i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
898         size = 64;//44
899         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
900                                                    &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
901                                                    ALIGN(size, 0x1000),
902                                                    "brc pak statistic buffer");
903         if (!allocate_flag)
904             goto failed_allocation;
905
906         i965_free_gpe_resource(&avc_ctx->res_brc_image_state_read_buffer);
907         size = INTEL_AVC_IMAGE_STATE_CMD_SIZE * 7;
908         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
909                                                    &avc_ctx->res_brc_image_state_read_buffer,
910                                                    ALIGN(size, 0x1000),
911                                                    "brc image state read buffer");
912         if (!allocate_flag)
913             goto failed_allocation;
914
915         i965_free_gpe_resource(&avc_ctx->res_brc_image_state_write_buffer);
916         size = INTEL_AVC_IMAGE_STATE_CMD_SIZE * 7;
917         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
918                                                    &avc_ctx->res_brc_image_state_write_buffer,
919                                                    ALIGN(size, 0x1000),
920                                                    "brc image state write buffer");
921         if (!allocate_flag)
922             goto failed_allocation;
923
924         width = ALIGN(avc_state->brc_const_data_surface_width, 64);
925         height = avc_state->brc_const_data_surface_height;
926         i965_free_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
927         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
928                                                       &avc_ctx->res_brc_const_data_buffer,
929                                                       width, height,
930                                                       width,
931                                                       "brc const data buffer");
932         if (!allocate_flag)
933             goto failed_allocation;
934
935         if (generic_state->brc_distortion_buffer_supported) {
936             width = ALIGN(generic_state->downscaled_width_4x_in_mb * 8, 64);
937             height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
938             width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
939             height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
940             i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
941             allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
942                                                           &avc_ctx->res_brc_dist_data_surface,
943                                                           width, height,
944                                                           width,
945                                                           "brc dist data buffer");
946             if (!allocate_flag)
947                 goto failed_allocation;
948             i965_zero_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
949         }
950
951         if (generic_state->brc_roi_enable) {
952             width = ALIGN(generic_state->downscaled_width_4x_in_mb * 16, 64);
953             height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
954             i965_free_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
955             allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
956                                                           &avc_ctx->res_mbbrc_roi_surface,
957                                                           width, height,
958                                                           width,
959                                                           "mbbrc roi buffer");
960             if (!allocate_flag)
961                 goto failed_allocation;
962             i965_zero_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
963         }
964
965         /*mb qp in mb brc*/
966         width = ALIGN(generic_state->downscaled_width_4x_in_mb * 4, 64);
967         height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
968         i965_free_gpe_resource(&avc_ctx->res_mbbrc_mb_qp_data_surface);
969         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
970                                                       &avc_ctx->res_mbbrc_mb_qp_data_surface,
971                                                       width, height,
972                                                       width,
973                                                       "mbbrc mb qp buffer");
974         if (!allocate_flag)
975             goto failed_allocation;
976
977         i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
978         size = 16 * AVC_QP_MAX * 4;
979         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
980                                                    &avc_ctx->res_mbbrc_const_data_buffer,
981                                                    ALIGN(size, 0x1000),
982                                                    "mbbrc const data buffer");
983         if (!allocate_flag)
984             goto failed_allocation;
985
986         if (avc_state->decouple_mbenc_curbe_from_brc_enable) {
987             i965_free_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
988             size = avc_state->mbenc_brc_buffer_size;
989             allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
990                                                        &avc_ctx->res_mbenc_brc_buffer,
991                                                        ALIGN(size, 0x1000),
992                                                        "mbenc brc buffer");
993             if (!allocate_flag)
994                 goto failed_allocation;
995             i965_zero_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
996         }
997         generic_state->brc_allocated = 1;
998     }
999
1000     /*mb qp external*/
1001     if (avc_state->mb_qp_data_enable) {
1002         width = ALIGN(generic_state->downscaled_width_4x_in_mb * 4, 64);
1003         height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1004         i965_free_gpe_resource(&avc_ctx->res_mb_qp_data_surface);
1005         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1006                                                       &avc_ctx->res_mb_qp_data_surface,
1007                                                       width, height,
1008                                                       width,
1009                                                       "external mb qp buffer");
1010         if (!allocate_flag)
1011             goto failed_allocation;
1012     }
1013
1014     /*     mbenc related surface. it share most of surface with other kernels     */
1015     if (avc_state->arbitrary_num_mbs_in_slice) {
1016         width = ALIGN((generic_state->frame_width_in_mbs + 1) * 4, 64);
1017         height = generic_state->frame_height_in_mbs ;
1018         i965_free_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1019         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1020                                                       &avc_ctx->res_mbenc_slice_map_surface,
1021                                                       width, height,
1022                                                       width,
1023                                                       "slice map buffer");
1024         if (!allocate_flag)
1025             goto failed_allocation;
1026         i965_zero_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1027
1028         /*generate slice map,default one slice per frame.*/
1029     }
1030
1031     /* sfd related surface  */
1032     if (avc_state->sfd_enable) {
1033         i965_free_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1034         size = 128;
1035         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1036                                                    &avc_ctx->res_sfd_output_buffer,
1037                                                    size,
1038                                                    "sfd output buffer");
1039         if (!allocate_flag)
1040             goto failed_allocation;
1041
1042         i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer);
1043         size = ALIGN(52, 64);
1044         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1045                                                    &avc_ctx->res_sfd_cost_table_p_frame_buffer,
1046                                                    size,
1047                                                    "sfd P frame cost table buffer");
1048         if (!allocate_flag)
1049             goto failed_allocation;
1050         data = i965_map_gpe_resource(&(avc_ctx->res_sfd_cost_table_p_frame_buffer));
1051         assert(data);
1052         memcpy(data, gen9_avc_sfd_cost_table_p_frame, sizeof(unsigned char) * 52);
1053         i965_unmap_gpe_resource(&(avc_ctx->res_sfd_cost_table_p_frame_buffer));
1054
1055         i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_b_frame_buffer);
1056         size = ALIGN(52, 64);
1057         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1058                                                    &avc_ctx->res_sfd_cost_table_b_frame_buffer,
1059                                                    size,
1060                                                    "sfd B frame cost table buffer");
1061         if (!allocate_flag)
1062             goto failed_allocation;
1063         data = i965_map_gpe_resource(&(avc_ctx->res_sfd_cost_table_b_frame_buffer));
1064         assert(data);
1065         memcpy(data, gen9_avc_sfd_cost_table_b_frame, sizeof(unsigned char) * 52);
1066         i965_unmap_gpe_resource(&(avc_ctx->res_sfd_cost_table_b_frame_buffer));
1067     }
1068
1069     /* wp related surfaces */
1070     if (avc_state->weighted_prediction_supported) {
1071         for (i = 0; i < 2 ; i++) {
1072             if (avc_ctx->wp_output_pic_select_surface_obj[i]) {
1073                 continue;
1074             }
1075
1076             width = generic_state->frame_width_in_pixel;
1077             height = generic_state->frame_height_in_pixel ;
1078             i965_CreateSurfaces(ctx,
1079                                 width,
1080                                 height,
1081                                 VA_RT_FORMAT_YUV420,
1082                                 1,
1083                                 &avc_ctx->wp_output_pic_select_surface_id[i]);
1084             avc_ctx->wp_output_pic_select_surface_obj[i] = SURFACE(avc_ctx->wp_output_pic_select_surface_id[i]);
1085
1086             if (!avc_ctx->wp_output_pic_select_surface_obj[i]) {
1087                 goto failed_allocation;
1088             }
1089
1090             i965_check_alloc_surface_bo(ctx, avc_ctx->wp_output_pic_select_surface_obj[i], 1,
1091                                         VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
1092         }
1093         i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[0]);
1094         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_wp_output_pic_select_surface_list[0], avc_ctx->wp_output_pic_select_surface_obj[0], GPE_RESOURCE_ALIGNMENT);
1095         i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[1]);
1096         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_wp_output_pic_select_surface_list[1], avc_ctx->wp_output_pic_select_surface_obj[1], GPE_RESOURCE_ALIGNMENT);
1097     }
1098
1099     /* other   */
1100
1101     i965_free_gpe_resource(&avc_ctx->res_mad_data_buffer);
1102     size = 4 * 1;
1103     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1104                                                &avc_ctx->res_mad_data_buffer,
1105                                                ALIGN(size, 0x1000),
1106                                                "MAD data buffer");
1107     if (!allocate_flag)
1108         goto failed_allocation;
1109
1110     return VA_STATUS_SUCCESS;
1111
1112 failed_allocation:
1113     return VA_STATUS_ERROR_ALLOCATION_FAILED;
1114 }
1115
1116 static void
1117 gen9_avc_free_resources(struct encoder_vme_mfc_context * vme_context)
1118 {
1119     if (!vme_context)
1120         return;
1121
1122     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1123     VADriverContextP ctx = avc_ctx->ctx;
1124     int i = 0;
1125
1126     /* free all the surface/buffer here*/
1127     i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
1128     i965_free_gpe_resource(&avc_ctx->res_mb_status_buffer);
1129     i965_free_gpe_resource(&avc_ctx->res_flatness_check_surface);
1130     i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
1131     i965_free_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
1132     i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
1133     i965_free_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
1134     i965_free_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
1135     i965_free_gpe_resource(&avc_ctx->res_brc_history_buffer);
1136     i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
1137     i965_free_gpe_resource(&avc_ctx->res_brc_image_state_read_buffer);
1138     i965_free_gpe_resource(&avc_ctx->res_brc_image_state_write_buffer);
1139     i965_free_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
1140     i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
1141     i965_free_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
1142     i965_free_gpe_resource(&avc_ctx->res_mbbrc_mb_qp_data_surface);
1143     i965_free_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
1144     i965_free_gpe_resource(&avc_ctx->res_mb_qp_data_surface);
1145     i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
1146     i965_free_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1147     i965_free_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1148     i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer);
1149     i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_b_frame_buffer);
1150     i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[0]);
1151     i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[1]);
1152     i965_free_gpe_resource(&avc_ctx->res_mad_data_buffer);
1153
1154     for (i = 0; i < 2 ; i++) {
1155         if (avc_ctx->wp_output_pic_select_surface_obj[i]) {
1156             i965_DestroySurfaces(ctx, &avc_ctx->wp_output_pic_select_surface_id[i], 1);
1157             avc_ctx->wp_output_pic_select_surface_id[i] = VA_INVALID_SURFACE;
1158             avc_ctx->wp_output_pic_select_surface_obj[i] = NULL;
1159         }
1160     }
1161
1162 }
1163
1164 static void
1165 gen9_avc_run_kernel_media_object(VADriverContextP ctx,
1166                                  struct intel_encoder_context *encoder_context,
1167                                  struct i965_gpe_context *gpe_context,
1168                                  int media_function,
1169                                  struct gpe_media_object_parameter *param)
1170 {
1171     struct i965_driver_data *i965 = i965_driver_data(ctx);
1172     struct i965_gpe_table *gpe = &i965->gpe_table;
1173     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1174     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1175
1176     struct intel_batchbuffer *batch = encoder_context->base.batch;
1177     struct encoder_status_buffer_internal *status_buffer;
1178     struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1179
1180     if (!batch)
1181         return;
1182
1183     intel_batchbuffer_start_atomic(batch, 0x1000);
1184     intel_batchbuffer_emit_mi_flush(batch);
1185
1186     status_buffer = &(avc_ctx->status_buffer);
1187     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1188     mi_store_data_imm.bo = status_buffer->bo;
1189     mi_store_data_imm.offset = status_buffer->media_index_offset;
1190     mi_store_data_imm.dw0 = media_function;
1191     gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1192
1193     gpe->pipeline_setup(ctx, gpe_context, batch);
1194     gpe->media_object(ctx, gpe_context, batch, param);
1195     gpe->media_state_flush(ctx, gpe_context, batch);
1196
1197     gpe->pipeline_end(ctx, gpe_context, batch);
1198
1199     intel_batchbuffer_end_atomic(batch);
1200
1201     intel_batchbuffer_flush(batch);
1202 }
1203
1204 static void
1205 gen9_avc_run_kernel_media_object_walker(VADriverContextP ctx,
1206                                         struct intel_encoder_context *encoder_context,
1207                                         struct i965_gpe_context *gpe_context,
1208                                         int media_function,
1209                                         struct gpe_media_object_walker_parameter *param)
1210 {
1211     struct i965_driver_data *i965 = i965_driver_data(ctx);
1212     struct i965_gpe_table *gpe = &i965->gpe_table;
1213     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1214     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1215
1216     struct intel_batchbuffer *batch = encoder_context->base.batch;
1217     struct encoder_status_buffer_internal *status_buffer;
1218     struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1219
1220     if (!batch)
1221         return;
1222
1223     intel_batchbuffer_start_atomic(batch, 0x1000);
1224
1225     intel_batchbuffer_emit_mi_flush(batch);
1226
1227     status_buffer = &(avc_ctx->status_buffer);
1228     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1229     mi_store_data_imm.bo = status_buffer->bo;
1230     mi_store_data_imm.offset = status_buffer->media_index_offset;
1231     mi_store_data_imm.dw0 = media_function;
1232     gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1233
1234     gpe->pipeline_setup(ctx, gpe_context, batch);
1235     gpe->media_object_walker(ctx, gpe_context, batch, param);
1236     gpe->media_state_flush(ctx, gpe_context, batch);
1237
1238     gpe->pipeline_end(ctx, gpe_context, batch);
1239
1240     intel_batchbuffer_end_atomic(batch);
1241
1242     intel_batchbuffer_flush(batch);
1243 }
1244
1245 static void
1246 gen9_init_gpe_context_avc(VADriverContextP ctx,
1247                           struct i965_gpe_context *gpe_context,
1248                           struct encoder_kernel_parameter *kernel_param)
1249 {
1250     struct i965_driver_data *i965 = i965_driver_data(ctx);
1251
1252     gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
1253
1254     gpe_context->sampler.entry_size = 0;
1255     gpe_context->sampler.max_entries = 0;
1256
1257     if (kernel_param->sampler_size) {
1258         gpe_context->sampler.entry_size = ALIGN(kernel_param->sampler_size, 64);
1259         gpe_context->sampler.max_entries = 1;
1260     }
1261
1262     gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
1263     gpe_context->idrt.max_entries = NUM_KERNELS_PER_GPE_CONTEXT;
1264
1265     gpe_context->surface_state_binding_table.max_entries = MAX_AVC_ENCODER_SURFACES;
1266     gpe_context->surface_state_binding_table.binding_table_offset = 0;
1267     gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_AVC_ENCODER_SURFACES * 4, 64);
1268     gpe_context->surface_state_binding_table.length = ALIGN(MAX_AVC_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_AVC_ENCODER_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
1269
1270     if (i965->intel.eu_total > 0)
1271         gpe_context->vfe_state.max_num_threads = 6 * i965->intel.eu_total;
1272     else
1273         gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads
1274
1275     gpe_context->vfe_state.curbe_allocation_size = MAX(1, ALIGN(gpe_context->curbe.length, 32) >> 5); // in registers
1276     gpe_context->vfe_state.urb_entry_size = MAX(1, ALIGN(kernel_param->inline_data_size, 32) >> 5); // in registers
1277     gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE -
1278                                               gpe_context->vfe_state.curbe_allocation_size -
1279                                               ((gpe_context->idrt.entry_size >> 5) *
1280                                                gpe_context->idrt.max_entries)) / gpe_context->vfe_state.urb_entry_size;
1281     gpe_context->vfe_state.num_urb_entries = CLAMP(1, 127, gpe_context->vfe_state.num_urb_entries);
1282     gpe_context->vfe_state.gpgpu_mode = 0;
1283 }
1284
1285 static void
1286 gen9_init_vfe_scoreboard_avc(struct i965_gpe_context *gpe_context,
1287                              struct encoder_scoreboard_parameter *scoreboard_param)
1288 {
1289     gpe_context->vfe_desc5.scoreboard0.mask = scoreboard_param->mask;
1290     gpe_context->vfe_desc5.scoreboard0.type = scoreboard_param->type;
1291     gpe_context->vfe_desc5.scoreboard0.enable = scoreboard_param->enable;
1292
1293     if (scoreboard_param->walkpat_flag) {
1294         gpe_context->vfe_desc5.scoreboard0.mask = 0x0F;
1295         gpe_context->vfe_desc5.scoreboard0.type = 1;
1296
1297         gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0x0;
1298         gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0xF;
1299
1300         gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
1301         gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xE;
1302
1303         gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0xF;
1304         gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0x3;
1305
1306         gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
1307         gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0x1;
1308     } else {
1309         // Scoreboard 0
1310         gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0xF;
1311         gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0x0;
1312
1313         // Scoreboard 1
1314         gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
1315         gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xF;
1316
1317         // Scoreboard 2
1318         gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0x1;
1319         gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0xF;
1320
1321         // Scoreboard 3
1322         gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
1323         gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0xF;
1324
1325         // Scoreboard 4
1326         gpe_context->vfe_desc7.scoreboard2.delta_x4 = 0xF;
1327         gpe_context->vfe_desc7.scoreboard2.delta_y4 = 0x1;
1328
1329         // Scoreboard 5
1330         gpe_context->vfe_desc7.scoreboard2.delta_x5 = 0x0;
1331         gpe_context->vfe_desc7.scoreboard2.delta_y5 = 0xE;
1332
1333         // Scoreboard 6
1334         gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0x1;
1335         gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
1336
1337         // Scoreboard 7
1338         gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0xF;
1339         gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
1340     }
1341 }
1342 /*
1343 VME pipeline related function
1344 */
1345
1346 /*
1347 scaling kernel related function
1348 */
1349 static void
1350 gen9_avc_set_curbe_scaling4x(VADriverContextP ctx,
1351                              struct encode_state *encode_state,
1352                              struct i965_gpe_context *gpe_context,
1353                              struct intel_encoder_context *encoder_context,
1354                              void *param)
1355 {
1356     gen9_avc_scaling4x_curbe_data *curbe_cmd;
1357     struct scaling_param *surface_param = (struct scaling_param *)param;
1358
1359     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1360
1361     if (!curbe_cmd)
1362         return;
1363
1364     memset(curbe_cmd, 0, sizeof(gen9_avc_scaling4x_curbe_data));
1365
1366     curbe_cmd->dw0.input_picture_width  = surface_param->input_frame_width;
1367     curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1368
1369     curbe_cmd->dw1.input_y_bti = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1370     curbe_cmd->dw2.output_y_bti = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1371
1372
1373     curbe_cmd->dw5.flatness_threshold = 128;
1374     curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
1375     curbe_cmd->dw7.enable_mb_variance_output = surface_param->enable_mb_variance_output;
1376     curbe_cmd->dw8.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
1377
1378     if (curbe_cmd->dw6.enable_mb_flatness_check ||
1379         curbe_cmd->dw7.enable_mb_variance_output ||
1380         curbe_cmd->dw8.enable_mb_pixel_average_output) {
1381         curbe_cmd->dw10.mbv_proc_stat_bti = GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX;
1382     }
1383
1384     i965_gpe_context_unmap_curbe(gpe_context);
1385     return;
1386 }
1387
1388 static void
1389 gen95_avc_set_curbe_scaling4x(VADriverContextP ctx,
1390                               struct encode_state *encode_state,
1391                               struct i965_gpe_context *gpe_context,
1392                               struct intel_encoder_context *encoder_context,
1393                               void *param)
1394 {
1395     gen95_avc_scaling4x_curbe_data *curbe_cmd;
1396     struct scaling_param *surface_param = (struct scaling_param *)param;
1397
1398     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1399
1400     if (!curbe_cmd)
1401         return;
1402
1403     memset(curbe_cmd, 0, sizeof(gen95_avc_scaling4x_curbe_data));
1404
1405     curbe_cmd->dw0.input_picture_width  = surface_param->input_frame_width;
1406     curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1407
1408     curbe_cmd->dw1.input_y_bti_frame = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1409     curbe_cmd->dw2.output_y_bti_frame = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1410
1411     if (surface_param->enable_mb_flatness_check)
1412         curbe_cmd->dw5.flatness_threshold = 128;
1413     curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
1414     curbe_cmd->dw6.enable_mb_variance_output = surface_param->enable_mb_variance_output;
1415     curbe_cmd->dw6.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
1416     curbe_cmd->dw6.enable_block8x8_statistics_output = surface_param->blk8x8_stat_enabled;
1417
1418     if (curbe_cmd->dw6.enable_mb_flatness_check ||
1419         curbe_cmd->dw6.enable_mb_variance_output ||
1420         curbe_cmd->dw6.enable_mb_pixel_average_output) {
1421         curbe_cmd->dw8.mbv_proc_stat_bti_frame = GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX;
1422     }
1423
1424     i965_gpe_context_unmap_curbe(gpe_context);
1425     return;
1426 }
1427
1428 static void
1429 gen9_avc_set_curbe_scaling2x(VADriverContextP ctx,
1430                              struct encode_state *encode_state,
1431                              struct i965_gpe_context *gpe_context,
1432                              struct intel_encoder_context *encoder_context,
1433                              void *param)
1434 {
1435     gen9_avc_scaling2x_curbe_data *curbe_cmd;
1436     struct scaling_param *surface_param = (struct scaling_param *)param;
1437
1438     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1439
1440     if (!curbe_cmd)
1441         return;
1442
1443     memset(curbe_cmd, 0, sizeof(gen9_avc_scaling2x_curbe_data));
1444
1445     curbe_cmd->dw0.input_picture_width  = surface_param->input_frame_width;
1446     curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1447
1448     curbe_cmd->dw8.input_y_bti = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1449     curbe_cmd->dw9.output_y_bti = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1450
1451     i965_gpe_context_unmap_curbe(gpe_context);
1452     return;
1453 }
1454
1455 static void
1456 gen9_avc_send_surface_scaling(VADriverContextP ctx,
1457                               struct encode_state *encode_state,
1458                               struct i965_gpe_context *gpe_context,
1459                               struct intel_encoder_context *encoder_context,
1460                               void *param)
1461 {
1462     struct scaling_param *surface_param = (struct scaling_param *)param;
1463     unsigned int surface_format;
1464     unsigned int res_size;
1465
1466     if (surface_param->scaling_out_use_32unorm_surf_fmt)
1467         surface_format = I965_SURFACEFORMAT_R32_UNORM;
1468     else if (surface_param->scaling_out_use_16unorm_surf_fmt)
1469         surface_format = I965_SURFACEFORMAT_R16_UNORM;
1470     else
1471         surface_format = I965_SURFACEFORMAT_R8_UNORM;
1472
1473     gen9_add_2d_gpe_surface(ctx, gpe_context,
1474                             surface_param->input_surface,
1475                             0, 1, surface_format,
1476                             GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX);
1477
1478     gen9_add_2d_gpe_surface(ctx, gpe_context,
1479                             surface_param->output_surface,
1480                             0, 1, surface_format,
1481                             GEN9_AVC_SCALING_FRAME_DST_Y_INDEX);
1482
1483     /*add buffer mv_proc_stat, here need change*/
1484     if (surface_param->mbv_proc_stat_enabled) {
1485         res_size = 16 * (surface_param->input_frame_width / 16) * (surface_param->input_frame_height / 16) * sizeof(unsigned int);
1486
1487         gen9_add_buffer_gpe_surface(ctx,
1488                                     gpe_context,
1489                                     surface_param->pres_mbv_proc_stat_buffer,
1490                                     0,
1491                                     res_size / 4,
1492                                     0,
1493                                     GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
1494     } else if (surface_param->enable_mb_flatness_check) {
1495         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
1496                                        surface_param->pres_flatness_check_surface,
1497                                        1,
1498                                        I965_SURFACEFORMAT_R8_UNORM,
1499                                        GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
1500     }
1501
1502     return;
1503 }
1504
1505 static VAStatus
1506 gen9_avc_kernel_scaling(VADriverContextP ctx,
1507                         struct encode_state *encode_state,
1508                         struct intel_encoder_context *encoder_context,
1509                         int hme_type)
1510 {
1511     struct i965_driver_data *i965 = i965_driver_data(ctx);
1512     struct i965_gpe_table *gpe = &i965->gpe_table;
1513     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1514     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1515     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1516     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1517     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
1518
1519     struct i965_gpe_context *gpe_context;
1520     struct scaling_param surface_param;
1521     struct object_surface *obj_surface;
1522     struct gen9_surface_avc *avc_priv_surface;
1523     struct gpe_media_object_walker_parameter media_object_walker_param;
1524     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
1525     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
1526     int media_function = 0;
1527     int kernel_idx = 0;
1528
1529     obj_surface = encode_state->reconstructed_object;
1530     avc_priv_surface = obj_surface->private_data;
1531
1532     memset(&surface_param, 0, sizeof(struct scaling_param));
1533     switch (hme_type) {
1534     case INTEL_ENC_HME_4x : {
1535         media_function = INTEL_MEDIA_STATE_4X_SCALING;
1536         kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
1537         downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
1538         downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
1539
1540         surface_param.input_surface = encode_state->input_yuv_object ;
1541         surface_param.input_frame_width = generic_state->frame_width_in_pixel ;
1542         surface_param.input_frame_height = generic_state->frame_height_in_pixel ;
1543
1544         surface_param.output_surface = avc_priv_surface->scaled_4x_surface_obj ;
1545         surface_param.output_frame_width = generic_state->frame_width_4x ;
1546         surface_param.output_frame_height = generic_state->frame_height_4x ;
1547
1548         surface_param.enable_mb_flatness_check = avc_state->flatness_check_enable;
1549         surface_param.enable_mb_variance_output = avc_state->mb_status_enable;
1550         surface_param.enable_mb_pixel_average_output = avc_state->mb_status_enable;
1551
1552         surface_param.blk8x8_stat_enabled = 0 ;
1553         surface_param.use_4x_scaling  = 1 ;
1554         surface_param.use_16x_scaling = 0 ;
1555         surface_param.use_32x_scaling = 0 ;
1556         break;
1557     }
1558     case INTEL_ENC_HME_16x : {
1559         media_function = INTEL_MEDIA_STATE_16X_SCALING;
1560         kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
1561         downscaled_width_in_mb = generic_state->downscaled_width_16x_in_mb;
1562         downscaled_height_in_mb = generic_state->downscaled_height_16x_in_mb;
1563
1564         surface_param.input_surface = avc_priv_surface->scaled_4x_surface_obj ;
1565         surface_param.input_frame_width = generic_state->frame_width_4x ;
1566         surface_param.input_frame_height = generic_state->frame_height_4x ;
1567
1568         surface_param.output_surface = avc_priv_surface->scaled_16x_surface_obj ;
1569         surface_param.output_frame_width = generic_state->frame_width_16x ;
1570         surface_param.output_frame_height = generic_state->frame_height_16x ;
1571
1572         surface_param.enable_mb_flatness_check = 0 ;
1573         surface_param.enable_mb_variance_output = 0 ;
1574         surface_param.enable_mb_pixel_average_output = 0 ;
1575
1576         surface_param.blk8x8_stat_enabled = 0 ;
1577         surface_param.use_4x_scaling  = 0 ;
1578         surface_param.use_16x_scaling = 1 ;
1579         surface_param.use_32x_scaling = 0 ;
1580
1581         break;
1582     }
1583     case INTEL_ENC_HME_32x : {
1584         media_function = INTEL_MEDIA_STATE_32X_SCALING;
1585         kernel_idx = GEN9_AVC_KERNEL_SCALING_2X_IDX;
1586         downscaled_width_in_mb = generic_state->downscaled_width_32x_in_mb;
1587         downscaled_height_in_mb = generic_state->downscaled_height_32x_in_mb;
1588
1589         surface_param.input_surface = avc_priv_surface->scaled_16x_surface_obj ;
1590         surface_param.input_frame_width = generic_state->frame_width_16x ;
1591         surface_param.input_frame_height = generic_state->frame_height_16x ;
1592
1593         surface_param.output_surface = avc_priv_surface->scaled_32x_surface_obj ;
1594         surface_param.output_frame_width = generic_state->frame_width_32x ;
1595         surface_param.output_frame_height = generic_state->frame_height_32x ;
1596
1597         surface_param.enable_mb_flatness_check = 0 ;
1598         surface_param.enable_mb_variance_output = 0 ;
1599         surface_param.enable_mb_pixel_average_output = 0 ;
1600
1601         surface_param.blk8x8_stat_enabled = 0 ;
1602         surface_param.use_4x_scaling  = 0 ;
1603         surface_param.use_16x_scaling = 0 ;
1604         surface_param.use_32x_scaling = 1 ;
1605         break;
1606     }
1607     default :
1608         assert(0);
1609
1610     }
1611
1612     gpe_context = &(avc_ctx->context_scaling.gpe_contexts[kernel_idx]);
1613
1614     gpe->context_init(ctx, gpe_context);
1615     gpe->reset_binding_table(ctx, gpe_context);
1616
1617     if (surface_param.use_32x_scaling) {
1618         generic_ctx->pfn_set_curbe_scaling2x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1619     } else {
1620         generic_ctx->pfn_set_curbe_scaling4x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1621     }
1622
1623     if (surface_param.use_32x_scaling) {
1624         surface_param.scaling_out_use_16unorm_surf_fmt = 1 ;
1625         surface_param.scaling_out_use_32unorm_surf_fmt = 0 ;
1626     } else {
1627         surface_param.scaling_out_use_16unorm_surf_fmt = 0 ;
1628         surface_param.scaling_out_use_32unorm_surf_fmt = 1 ;
1629     }
1630
1631     if (surface_param.use_4x_scaling) {
1632         if (avc_state->mb_status_supported) {
1633             surface_param.enable_mb_flatness_check = 0;
1634             surface_param.mbv_proc_stat_enabled = (surface_param.use_4x_scaling) ? (avc_state->mb_status_enable || avc_state->flatness_check_enable) : 0 ;
1635             surface_param.pres_mbv_proc_stat_buffer = &(avc_ctx->res_mb_status_buffer);
1636
1637         } else {
1638             surface_param.enable_mb_flatness_check = (surface_param.use_4x_scaling) ? avc_state->flatness_check_enable : 0;
1639             surface_param.mbv_proc_stat_enabled = 0 ;
1640             surface_param.pres_flatness_check_surface = &(avc_ctx->res_flatness_check_surface);
1641         }
1642     }
1643
1644     generic_ctx->pfn_send_scaling_surface(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1645
1646     /* setup the interface data */
1647     gpe->setup_interface_data(ctx, gpe_context);
1648
1649     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
1650     if (surface_param.use_32x_scaling) {
1651         kernel_walker_param.resolution_x = downscaled_width_in_mb ;
1652         kernel_walker_param.resolution_y = downscaled_height_in_mb ;
1653     } else {
1654         /* the scaling is based on 8x8 blk level */
1655         kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
1656         kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
1657     }
1658     kernel_walker_param.no_dependency = 1;
1659
1660     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
1661
1662     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
1663                                             gpe_context,
1664                                             media_function,
1665                                             &media_object_walker_param);
1666
1667     return VA_STATUS_SUCCESS;
1668 }
1669
1670 /*
1671 frame/mb brc related function
1672 */
1673 static void
1674 gen9_avc_init_mfx_avc_img_state(VADriverContextP ctx,
1675                                 struct encode_state *encode_state,
1676                                 struct intel_encoder_context *encoder_context,
1677                                 struct gen9_mfx_avc_img_state *pstate)
1678 {
1679     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1680     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1681     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1682
1683     VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
1684     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
1685
1686     memset(pstate, 0, sizeof(*pstate));
1687
1688     pstate->dw0.dword_length = (sizeof(struct gen9_mfx_avc_img_state)) / 4 - 2;
1689     pstate->dw0.sub_opcode_b = 0;
1690     pstate->dw0.sub_opcode_a = 0;
1691     pstate->dw0.command_opcode = 1;
1692     pstate->dw0.pipeline = 2;
1693     pstate->dw0.command_type = 3;
1694
1695     pstate->dw1.frame_size_in_mbs = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs ;
1696
1697     pstate->dw2.frame_width_in_mbs_minus1 = generic_state->frame_width_in_mbs - 1;
1698     pstate->dw2.frame_height_in_mbs_minus1 = generic_state->frame_height_in_mbs - 1;
1699
1700     pstate->dw3.image_structure = 0;//frame is zero
1701     pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
1702     pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
1703     pstate->dw3.brc_domain_rate_control_enable = 0;//0,set for non-vdenc mode;
1704     pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
1705     pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
1706
1707     pstate->dw4.field_picture_flag = 0;
1708     pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
1709     pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
1710     pstate->dw4.transform_8x8_idct_mode_flag = pic_param->pic_fields.bits.transform_8x8_mode_flag;
1711     pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
1712     pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
1713     pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
1714     pstate->dw4.mb_mv_format_flag = 1;
1715     pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
1716     pstate->dw4.mv_unpacked_flag = 1;
1717     pstate->dw4.insert_test_flag = 0;
1718     pstate->dw4.load_slice_pointer_flag = 0;
1719     pstate->dw4.macroblock_stat_enable = 0;        /* disable in the first pass */
1720     pstate->dw4.minimum_frame_size = 0;
1721     pstate->dw5.intra_mb_max_bit_flag = 1;
1722     pstate->dw5.inter_mb_max_bit_flag = 1;
1723     pstate->dw5.frame_size_over_flag = 1;
1724     pstate->dw5.frame_size_under_flag = 1;
1725     pstate->dw5.intra_mb_ipcm_flag = 1;
1726     pstate->dw5.mb_rate_ctrl_flag = 0;
1727     pstate->dw5.non_first_pass_flag = 0;
1728     pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
1729     pstate->dw5.aq_chroma_disable = 1;
1730     if (pstate->dw4.entropy_coding_flag && (avc_state->tq_enable)) {
1731         pstate->dw5.aq_enable = avc_state->tq_enable;
1732         pstate->dw5.aq_rounding = avc_state->tq_rounding;
1733     } else {
1734         pstate->dw5.aq_rounding = 0;
1735     }
1736
1737     pstate->dw6.intra_mb_max_size = 2700;
1738     pstate->dw6.inter_mb_max_size = 4095;
1739
1740     pstate->dw8.slice_delta_qp_max0 = 0;
1741     pstate->dw8.slice_delta_qp_max1 = 0;
1742     pstate->dw8.slice_delta_qp_max2 = 0;
1743     pstate->dw8.slice_delta_qp_max3 = 0;
1744
1745     pstate->dw9.slice_delta_qp_min0 = 0;
1746     pstate->dw9.slice_delta_qp_min1 = 0;
1747     pstate->dw9.slice_delta_qp_min2 = 0;
1748     pstate->dw9.slice_delta_qp_min3 = 0;
1749
1750     pstate->dw10.frame_bitrate_min = 0;
1751     pstate->dw10.frame_bitrate_min_unit = 1;
1752     pstate->dw10.frame_bitrate_min_unit_mode = 1;
1753     pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
1754     pstate->dw10.frame_bitrate_max_unit = 1;
1755     pstate->dw10.frame_bitrate_max_unit_mode = 1;
1756
1757     pstate->dw11.frame_bitrate_min_delta = 0;
1758     pstate->dw11.frame_bitrate_max_delta = 0;
1759
1760     pstate->dw12.vad_error_logic = 1;
1761     /* set paramters DW19/DW20 for slices */
1762 }
1763
1764 void gen9_avc_set_image_state(VADriverContextP ctx,
1765                               struct encode_state *encode_state,
1766                               struct intel_encoder_context *encoder_context,
1767                               struct i965_gpe_resource *gpe_resource)
1768 {
1769     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1770     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
1771     char *pdata;
1772     int i;
1773     unsigned int * data;
1774     struct gen9_mfx_avc_img_state cmd;
1775
1776     pdata = i965_map_gpe_resource(gpe_resource);
1777
1778     if (!pdata)
1779         return;
1780
1781     gen9_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
1782     for (i = 0; i < generic_state->num_pak_passes; i++) {
1783
1784         if (i == 0) {
1785             cmd.dw4.macroblock_stat_enable = 0;
1786             cmd.dw5.non_first_pass_flag = 0;
1787         } else {
1788             cmd.dw4.macroblock_stat_enable = 1;
1789             cmd.dw5.non_first_pass_flag = 1;
1790             cmd.dw5.intra_mb_ipcm_flag = 1;
1791
1792         }
1793         cmd.dw5.mb_rate_ctrl_flag = 0;
1794         memcpy(pdata, &cmd, sizeof(struct gen9_mfx_avc_img_state));
1795         data = (unsigned int *)(pdata + sizeof(struct gen9_mfx_avc_img_state));
1796         *data = MI_BATCH_BUFFER_END;
1797
1798         pdata += INTEL_AVC_IMAGE_STATE_CMD_SIZE;
1799     }
1800     i965_unmap_gpe_resource(gpe_resource);
1801     return;
1802 }
1803
1804 void gen9_avc_set_image_state_non_brc(VADriverContextP ctx,
1805                                       struct encode_state *encode_state,
1806                                       struct intel_encoder_context *encoder_context,
1807                                       struct i965_gpe_resource *gpe_resource)
1808 {
1809     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1810     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
1811     char *pdata;
1812
1813     unsigned int * data;
1814     struct gen9_mfx_avc_img_state cmd;
1815
1816     pdata = i965_map_gpe_resource(gpe_resource);
1817
1818     if (!pdata)
1819         return;
1820
1821     gen9_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
1822
1823     if (generic_state->curr_pak_pass == 0) {
1824         cmd.dw4.macroblock_stat_enable = 0;
1825         cmd.dw5.non_first_pass_flag = 0;
1826
1827     } else {
1828         cmd.dw4.macroblock_stat_enable = 1;
1829         cmd.dw5.non_first_pass_flag = 0;
1830         cmd.dw5.intra_mb_ipcm_flag = 1;
1831     }
1832
1833     cmd.dw5.mb_rate_ctrl_flag = 0;
1834     memcpy(pdata, &cmd, sizeof(struct gen9_mfx_avc_img_state));
1835     data = (unsigned int *)(pdata + sizeof(struct gen9_mfx_avc_img_state));
1836     *data = MI_BATCH_BUFFER_END;
1837
1838     i965_unmap_gpe_resource(gpe_resource);
1839     return;
1840 }
1841
1842 static void
1843 gen95_avc_calc_lambda_table(VADriverContextP ctx,
1844                             struct encode_state *encode_state,
1845                             struct intel_encoder_context *encoder_context)
1846 {
1847     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1848     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1849     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1850     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
1851     unsigned int value, inter, intra;
1852     unsigned int rounding_value = 0;
1853     unsigned int size = 0;
1854     int i = 0;
1855     int col = 0;
1856     unsigned int * lambda_table = (unsigned int *)(avc_state->lamda_value_lut);
1857
1858     value = 0;
1859     inter = 0;
1860     intra = 0;
1861
1862     size = AVC_QP_MAX * 2 * sizeof(unsigned int);
1863     switch (generic_state->frame_type) {
1864     case SLICE_TYPE_I:
1865         memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_i_frame[0][0], size * sizeof(unsigned char));
1866         break;
1867     case SLICE_TYPE_P:
1868         memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_p_frame[0][0], size * sizeof(unsigned char));
1869         break;
1870     case SLICE_TYPE_B:
1871         memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_b_frame[0][0], size * sizeof(unsigned char));
1872         break;
1873     default:
1874         assert(0);
1875         break;
1876     }
1877
1878     for (i = 0; i < AVC_QP_MAX ; i++) {
1879         for (col = 0; col < 2; col++) {
1880             value = *(lambda_table + i * 2 + col);
1881             intra = value >> 16;
1882
1883             if (intra < GEN95_AVC_MAX_LAMBDA) {
1884                 if (intra == 0xfffa) {
1885                     intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
1886                 }
1887             }
1888
1889             intra = intra << 16;
1890             inter = value & 0xffff;
1891
1892             if (inter < GEN95_AVC_MAX_LAMBDA) {
1893                 if (inter == 0xffef) {
1894                     if (generic_state->frame_type == SLICE_TYPE_P) {
1895                         if (avc_state->rounding_inter_p == AVC_INVALID_ROUNDING_VALUE)
1896                             rounding_value = gen9_avc_inter_rounding_p[generic_state->preset];
1897                         else
1898                             rounding_value = avc_state->rounding_inter_p;
1899                     } else if (generic_state->frame_type == SLICE_TYPE_B) {
1900                         if (pic_param->pic_fields.bits.reference_pic_flag) {
1901                             if (avc_state->rounding_inter_b_ref == AVC_INVALID_ROUNDING_VALUE)
1902                                 rounding_value = gen9_avc_inter_rounding_b_ref[generic_state->preset];
1903                             else
1904                                 rounding_value = avc_state->rounding_inter_b_ref;
1905                         } else {
1906                             if (avc_state->rounding_inter_b == AVC_INVALID_ROUNDING_VALUE)
1907                                 rounding_value = gen9_avc_inter_rounding_b[generic_state->preset];
1908                             else
1909                                 rounding_value = avc_state->rounding_inter_b;
1910                         }
1911                     }
1912                 }
1913                 inter = 0xf000 + rounding_value;
1914             }
1915             *(lambda_table + i * 2 + col) = intra + inter;
1916         }
1917     }
1918 }
1919
1920 static void
1921 gen9_avc_init_brc_const_data(VADriverContextP ctx,
1922                              struct encode_state *encode_state,
1923                              struct intel_encoder_context *encoder_context)
1924 {
1925     struct i965_driver_data *i965 = i965_driver_data(ctx);
1926     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1927     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1928     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1929     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1930
1931     struct i965_gpe_resource *gpe_resource = NULL;
1932     unsigned char * data = NULL;
1933     unsigned char * data_tmp = NULL;
1934     unsigned int size = 0;
1935     unsigned int table_idx = 0;
1936     unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
1937     int i = 0;
1938
1939     struct object_surface *obj_surface;
1940     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
1941     VASurfaceID surface_id;
1942     unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
1943
1944     gpe_resource = &(avc_ctx->res_brc_const_data_buffer);
1945     assert(gpe_resource);
1946
1947     i965_zero_gpe_resource(gpe_resource);
1948
1949     data = i965_map_gpe_resource(gpe_resource);
1950     assert(data);
1951
1952     table_idx = slice_type_kernel[generic_state->frame_type];
1953
1954     /* Fill surface with QP Adjustment table, Distortion threshold table, MaxFrame threshold table, Distortion QP Adjustment Table*/
1955     size = sizeof(gen9_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb);
1956     memcpy(data, gen9_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb, size * sizeof(unsigned char));
1957
1958     data += size;
1959
1960     /* skip threshold table*/
1961     size = 128;
1962     switch (generic_state->frame_type) {
1963     case SLICE_TYPE_P:
1964         memcpy(data, gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
1965         break;
1966     case SLICE_TYPE_B:
1967         memcpy(data, gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
1968         break;
1969     default:
1970         /*SLICE_TYPE_I,no change */
1971         break;
1972     }
1973
1974     if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->non_ftq_skip_threshold_lut_input_enable) {
1975         for (i = 0; i < AVC_QP_MAX ; i++) {
1976             *(data + 1 + (i * 2)) = (unsigned char)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
1977         }
1978     }
1979     data += size;
1980
1981     /*fill the qp for ref list*/
1982     size = 32 + 32 + 32 + 160;
1983     memset(data, 0xff, 32);
1984     memset(data + 32 + 32, 0xff, 32);
1985     switch (generic_state->frame_type) {
1986     case SLICE_TYPE_P: {
1987         for (i = 0 ; i <  slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
1988             surface_id = slice_param->RefPicList0[i].picture_id;
1989             obj_surface = SURFACE(surface_id);
1990             if (!obj_surface)
1991                 break;
1992             *(data + i) = avc_state->list_ref_idx[0][i];//?
1993         }
1994     }
1995     break;
1996     case SLICE_TYPE_B: {
1997         data = data + 32 + 32;
1998         for (i = 0 ; i <  slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
1999             surface_id = slice_param->RefPicList1[i].picture_id;
2000             obj_surface = SURFACE(surface_id);
2001             if (!obj_surface)
2002                 break;
2003             *(data + i) = avc_state->list_ref_idx[1][i];//?
2004         }
2005
2006         data = data - 32 - 32;
2007
2008         for (i = 0 ; i <  slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
2009             surface_id = slice_param->RefPicList0[i].picture_id;
2010             obj_surface = SURFACE(surface_id);
2011             if (!obj_surface)
2012                 break;
2013             *(data + i) = avc_state->list_ref_idx[0][i];//?
2014         }
2015     }
2016     break;
2017     default:
2018         /*SLICE_TYPE_I,no change */
2019         break;
2020     }
2021     data += size;
2022
2023     /*mv cost and mode cost*/
2024     size = 1664;
2025     memcpy(data, (unsigned char *)&gen9_avc_mode_mv_cost_table[table_idx][0][0], size * sizeof(unsigned char));
2026
2027     if (avc_state->old_mode_cost_enable) {
2028         data_tmp = data;
2029         for (i = 0; i < AVC_QP_MAX ; i++) {
2030             *(data_tmp + 3) = (unsigned int)gen9_avc_old_intra_mode_cost[i];
2031             data_tmp += 16;
2032         }
2033     }
2034
2035     if (avc_state->ftq_skip_threshold_lut_input_enable) {
2036         for (i = 0; i < AVC_QP_MAX ; i++) {
2037             *(data + (i * 32) + 24) =
2038                 *(data + (i * 32) + 25) =
2039                     *(data + (i * 32) + 27) =
2040                         *(data + (i * 32) + 28) =
2041                             *(data + (i * 32) + 29) =
2042                                 *(data + (i * 32) + 30) =
2043                                     *(data + (i * 32) + 31) = avc_state->ftq_skip_threshold_lut[i];
2044         }
2045
2046     }
2047     data += size;
2048
2049     /*ref cost*/
2050     size = 128;
2051     memcpy(data, (unsigned char *)&gen9_avc_ref_cost[table_idx][0], size * sizeof(unsigned char));
2052     data += size;
2053
2054     /*scaling factor*/
2055     size = 64;
2056     if (avc_state->adaptive_intra_scaling_enable) {
2057         memcpy(data, (unsigned char *)gen9_avc_adaptive_intra_scaling_factor, size * sizeof(unsigned char));
2058     } else {
2059         memcpy(data, (unsigned char *)gen9_avc_intra_scaling_factor, size * sizeof(unsigned char));
2060     }
2061
2062     if (IS_KBL(i965->intel.device_info) ||
2063         IS_GLK(i965->intel.device_info)) {
2064         data += size;
2065
2066         size = 512;
2067         memcpy(data, (unsigned char *)gen95_avc_lambda_data, size * sizeof(unsigned char));
2068         data += size;
2069
2070         size = 64;
2071         memcpy(data, (unsigned char *)gen95_avc_ftq25, size * sizeof(unsigned char));
2072     }
2073
2074     i965_unmap_gpe_resource(gpe_resource);
2075 }
2076
2077 static void
2078 gen9_avc_init_brc_const_data_old(VADriverContextP ctx,
2079                                  struct encode_state *encode_state,
2080                                  struct intel_encoder_context *encoder_context)
2081 {
2082     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2083     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2084     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2085     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2086
2087     struct i965_gpe_resource *gpe_resource = NULL;
2088     unsigned int * data = NULL;
2089     unsigned int * data_tmp = NULL;
2090     unsigned int size = 0;
2091     unsigned int table_idx = 0;
2092     unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
2093     unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
2094     int i = 0;
2095
2096     gpe_resource = &(avc_ctx->res_brc_const_data_buffer);
2097     assert(gpe_resource);
2098
2099     i965_zero_gpe_resource(gpe_resource);
2100
2101     data = i965_map_gpe_resource(gpe_resource);
2102     assert(data);
2103
2104     table_idx = slice_type_kernel[generic_state->frame_type];
2105
2106     /* Fill surface with QP Adjustment table, Distortion threshold table, MaxFrame threshold table, Distortion QP Adjustment Table*/
2107     size = sizeof(gen75_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb);
2108     memcpy(data, gen75_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb, size * sizeof(unsigned char));
2109
2110     data += size;
2111
2112     /* skip threshold table*/
2113     size = 128;
2114     switch (generic_state->frame_type) {
2115     case SLICE_TYPE_P:
2116         memcpy(data, gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2117         break;
2118     case SLICE_TYPE_B:
2119         memcpy(data, gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2120         break;
2121     default:
2122         /*SLICE_TYPE_I,no change */
2123         break;
2124     }
2125
2126     if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->non_ftq_skip_threshold_lut_input_enable) {
2127         for (i = 0; i < AVC_QP_MAX ; i++) {
2128             *(data + 1 + (i * 2)) = (unsigned char)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
2129         }
2130     }
2131     data += size;
2132
2133     /*fill the qp for ref list*/
2134     size = 128;
2135     data += size;
2136     size = 128;
2137     data += size;
2138
2139     /*mv cost and mode cost*/
2140     size = 1664;
2141     memcpy(data, (unsigned char *)&gen75_avc_mode_mv_cost_table[table_idx][0][0], size * sizeof(unsigned char));
2142
2143     if (avc_state->old_mode_cost_enable) {
2144         data_tmp = data;
2145         for (i = 0; i < AVC_QP_MAX ; i++) {
2146             *(data_tmp + 3) = (unsigned int)gen9_avc_old_intra_mode_cost[i];
2147             data_tmp += 16;
2148         }
2149     }
2150
2151     if (avc_state->ftq_skip_threshold_lut_input_enable) {
2152         for (i = 0; i < AVC_QP_MAX ; i++) {
2153             *(data + (i * 32) + 24) =
2154                 *(data + (i * 32) + 25) =
2155                     *(data + (i * 32) + 27) =
2156                         *(data + (i * 32) + 28) =
2157                             *(data + (i * 32) + 29) =
2158                                 *(data + (i * 32) + 30) =
2159                                     *(data + (i * 32) + 31) = avc_state->ftq_skip_threshold_lut[i];
2160         }
2161
2162     }
2163     data += size;
2164
2165     /*ref cost*/
2166     size = 128;
2167     memcpy(data, (unsigned char *)&gen9_avc_ref_cost[table_idx][0], size * sizeof(unsigned char));
2168
2169     i965_unmap_gpe_resource(gpe_resource);
2170 }
2171 static void
2172 gen9_avc_set_curbe_brc_init_reset(VADriverContextP ctx,
2173                                   struct encode_state *encode_state,
2174                                   struct i965_gpe_context *gpe_context,
2175                                   struct intel_encoder_context *encoder_context,
2176                                   void * param)
2177 {
2178     gen9_avc_brc_init_reset_curbe_data *cmd;
2179     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2180     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2181     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2182     double input_bits_per_frame = 0;
2183     double bps_ratio = 0;
2184     VAEncSequenceParameterBufferH264 * seq_param = avc_state->seq_param;
2185     struct avc_param common_param;
2186
2187     cmd = i965_gpe_context_map_curbe(gpe_context);
2188
2189     if (!cmd)
2190         return;
2191
2192     memcpy(cmd, &gen9_avc_brc_init_reset_curbe_init_data, sizeof(gen9_avc_brc_init_reset_curbe_data));
2193
2194     memset(&common_param, 0, sizeof(common_param));
2195     common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
2196     common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
2197     common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
2198     common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
2199     common_param.frames_per_100s = generic_state->frames_per_100s;
2200     common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
2201     common_param.target_bit_rate = generic_state->target_bit_rate;
2202
2203     cmd->dw0.profile_level_max_frame = i965_avc_get_profile_level_max_frame(&common_param, seq_param->level_idc);
2204     cmd->dw1.init_buf_full_in_bits = generic_state->init_vbv_buffer_fullness_in_bit;
2205     cmd->dw2.buf_size_in_bits = generic_state->vbv_buffer_size_in_bit;
2206     cmd->dw3.average_bit_rate = generic_state->target_bit_rate * 1000;
2207     cmd->dw4.max_bit_rate = generic_state->max_bit_rate * 1000;
2208     cmd->dw8.gop_p = (generic_state->gop_ref_distance) ? ((generic_state->gop_size - 1) / generic_state->gop_ref_distance) : 0;
2209     cmd->dw9.gop_b = (generic_state->gop_size - 1 - cmd->dw8.gop_p);
2210     cmd->dw9.frame_width_in_bytes = generic_state->frame_width_in_pixel;
2211     cmd->dw10.frame_height_in_bytes = generic_state->frame_height_in_pixel;
2212     cmd->dw12.no_slices = avc_state->slice_num;
2213
2214     //VUI
2215     if (seq_param->vui_parameters_present_flag && generic_state->internal_rate_mode != INTEL_BRC_AVBR) {
2216         cmd->dw4.max_bit_rate = cmd->dw4.max_bit_rate;
2217         if (generic_state->internal_rate_mode == VA_RC_CBR) {
2218             cmd->dw3.average_bit_rate = cmd->dw4.max_bit_rate;
2219
2220         }
2221
2222     }
2223     cmd->dw6.frame_rate_m = generic_state->frames_per_100s;
2224     cmd->dw7.frame_rate_d = 100;
2225     cmd->dw8.brc_flag = 0;
2226     cmd->dw8.brc_flag |= (generic_state->mb_brc_enabled) ? 0 : 0x8000;
2227
2228
2229     if (generic_state->internal_rate_mode == VA_RC_CBR) {
2230         //CBR
2231         cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate;
2232         cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISCBR;
2233
2234     } else if (generic_state->internal_rate_mode == VA_RC_VBR) {
2235         //VBR
2236         if (cmd->dw4.max_bit_rate < cmd->dw3.average_bit_rate) {
2237             cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate << 1;
2238         }
2239         cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISVBR;
2240
2241     } else if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2242         //AVBR
2243         cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate;
2244         cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISAVBR;
2245
2246     }
2247     //igonre icq/vcm/qvbr
2248
2249     cmd->dw10.avbr_accuracy = generic_state->avbr_curracy;
2250     cmd->dw11.avbr_convergence = generic_state->avbr_convergence;
2251
2252     //frame bits
2253     input_bits_per_frame = (double)(cmd->dw4.max_bit_rate) * (double)(cmd->dw7.frame_rate_d) / (double)(cmd->dw6.frame_rate_m);;
2254
2255     if (cmd->dw2.buf_size_in_bits == 0) {
2256         cmd->dw2.buf_size_in_bits = (unsigned int)(input_bits_per_frame * 4);
2257     }
2258
2259     if (cmd->dw1.init_buf_full_in_bits == 0) {
2260         cmd->dw1.init_buf_full_in_bits = cmd->dw2.buf_size_in_bits * 7 / 8;
2261     }
2262     if (cmd->dw1.init_buf_full_in_bits < (unsigned int)(input_bits_per_frame * 2)) {
2263         cmd->dw1.init_buf_full_in_bits = (unsigned int)(input_bits_per_frame * 2);
2264     }
2265     if (cmd->dw1.init_buf_full_in_bits > cmd->dw2.buf_size_in_bits) {
2266         cmd->dw1.init_buf_full_in_bits = cmd->dw2.buf_size_in_bits;
2267     }
2268
2269     //AVBR
2270     if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2271         cmd->dw2.buf_size_in_bits = 2 * generic_state->target_bit_rate * 1000;
2272         cmd->dw1.init_buf_full_in_bits = (unsigned int)(3 * cmd->dw2.buf_size_in_bits / 4);
2273
2274     }
2275
2276     bps_ratio = input_bits_per_frame / (cmd->dw2.buf_size_in_bits / 30.0);
2277     bps_ratio = (bps_ratio < 0.1) ? 0.1 : (bps_ratio > 3.5) ? 3.5 : bps_ratio;
2278
2279
2280     cmd->dw16.deviation_threshold_0_pand_b = (unsigned int)(-50 * pow(0.90, bps_ratio));
2281     cmd->dw16.deviation_threshold_1_pand_b = (unsigned int)(-50 * pow(0.66, bps_ratio));
2282     cmd->dw16.deviation_threshold_2_pand_b = (unsigned int)(-50 * pow(0.46, bps_ratio));
2283     cmd->dw16.deviation_threshold_3_pand_b = (unsigned int)(-50 * pow(0.3, bps_ratio));
2284     cmd->dw17.deviation_threshold_4_pand_b = (unsigned int)(50 *  pow(0.3, bps_ratio));
2285     cmd->dw17.deviation_threshold_5_pand_b = (unsigned int)(50 * pow(0.46, bps_ratio));
2286     cmd->dw17.deviation_threshold_6_pand_b = (unsigned int)(50 * pow(0.7,  bps_ratio));
2287     cmd->dw17.deviation_threshold_7_pand_b = (unsigned int)(50 * pow(0.9,  bps_ratio));
2288     cmd->dw18.deviation_threshold_0_vbr = (unsigned int)(-50 * pow(0.9, bps_ratio));
2289     cmd->dw18.deviation_threshold_1_vbr = (unsigned int)(-50 * pow(0.7, bps_ratio));
2290     cmd->dw18.deviation_threshold_2_vbr = (unsigned int)(-50 * pow(0.5, bps_ratio));
2291     cmd->dw18.deviation_threshold_3_vbr = (unsigned int)(-50 * pow(0.3, bps_ratio));
2292     cmd->dw19.deviation_threshold_4_vbr = (unsigned int)(100 * pow(0.4, bps_ratio));
2293     cmd->dw19.deviation_threshold_5_vbr = (unsigned int)(100 * pow(0.5, bps_ratio));
2294     cmd->dw19.deviation_threshold_6_vbr = (unsigned int)(100 * pow(0.75, bps_ratio));
2295     cmd->dw19.deviation_threshold_7_vbr = (unsigned int)(100 * pow(0.9, bps_ratio));
2296     cmd->dw20.deviation_threshold_0_i = (unsigned int)(-50 * pow(0.8, bps_ratio));
2297     cmd->dw20.deviation_threshold_1_i = (unsigned int)(-50 * pow(0.6, bps_ratio));
2298     cmd->dw20.deviation_threshold_2_i = (unsigned int)(-50 * pow(0.34, bps_ratio));
2299     cmd->dw20.deviation_threshold_3_i = (unsigned int)(-50 * pow(0.2, bps_ratio));
2300     cmd->dw21.deviation_threshold_4_i = (unsigned int)(50 * pow(0.2,  bps_ratio));
2301     cmd->dw21.deviation_threshold_5_i = (unsigned int)(50 * pow(0.4,  bps_ratio));
2302     cmd->dw21.deviation_threshold_6_i = (unsigned int)(50 * pow(0.66, bps_ratio));
2303     cmd->dw21.deviation_threshold_7_i = (unsigned int)(50 * pow(0.9,  bps_ratio));
2304
2305     cmd->dw22.sliding_window_size = generic_state->frames_per_window_size;
2306
2307     i965_gpe_context_unmap_curbe(gpe_context);
2308
2309     return;
2310 }
2311
2312 static void
2313 gen9_avc_send_surface_brc_init_reset(VADriverContextP ctx,
2314                                      struct encode_state *encode_state,
2315                                      struct i965_gpe_context *gpe_context,
2316                                      struct intel_encoder_context *encoder_context,
2317                                      void * param_mbenc)
2318 {
2319     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2320     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2321
2322     gen9_add_buffer_gpe_surface(ctx,
2323                                 gpe_context,
2324                                 &avc_ctx->res_brc_history_buffer,
2325                                 0,
2326                                 avc_ctx->res_brc_history_buffer.size,
2327                                 0,
2328                                 GEN9_AVC_BRC_INIT_RESET_HISTORY_INDEX);
2329
2330     gen9_add_buffer_2d_gpe_surface(ctx,
2331                                    gpe_context,
2332                                    &avc_ctx->res_brc_dist_data_surface,
2333                                    1,
2334                                    I965_SURFACEFORMAT_R8_UNORM,
2335                                    GEN9_AVC_BRC_INIT_RESET_DISTORTION_INDEX);
2336
2337     return;
2338 }
2339
2340 static VAStatus
2341 gen9_avc_kernel_brc_init_reset(VADriverContextP ctx,
2342                                struct encode_state *encode_state,
2343                                struct intel_encoder_context *encoder_context)
2344 {
2345     struct i965_driver_data *i965 = i965_driver_data(ctx);
2346     struct i965_gpe_table *gpe = &i965->gpe_table;
2347     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2348     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2349     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2350     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
2351
2352     struct i965_gpe_context *gpe_context;
2353     struct gpe_media_object_parameter media_object_param;
2354     struct gpe_media_object_inline_data media_object_inline_data;
2355     int media_function = 0;
2356     int kernel_idx = GEN9_AVC_KERNEL_BRC_INIT;
2357
2358     media_function = INTEL_MEDIA_STATE_BRC_INIT_RESET;
2359
2360     if (generic_state->brc_inited)
2361         kernel_idx = GEN9_AVC_KERNEL_BRC_RESET;
2362
2363     gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
2364
2365     gpe->context_init(ctx, gpe_context);
2366     gpe->reset_binding_table(ctx, gpe_context);
2367
2368     generic_ctx->pfn_set_curbe_brc_init_reset(ctx, encode_state, gpe_context, encoder_context, NULL);
2369
2370     generic_ctx->pfn_send_brc_init_reset_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
2371
2372     gpe->setup_interface_data(ctx, gpe_context);
2373
2374     memset(&media_object_param, 0, sizeof(media_object_param));
2375     memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
2376     media_object_param.pinline_data = &media_object_inline_data;
2377     media_object_param.inline_size = sizeof(media_object_inline_data);
2378
2379     gen9_avc_run_kernel_media_object(ctx, encoder_context,
2380                                      gpe_context,
2381                                      media_function,
2382                                      &media_object_param);
2383
2384     return VA_STATUS_SUCCESS;
2385 }
2386
2387 static void
2388 gen9_avc_set_curbe_brc_frame_update(VADriverContextP ctx,
2389                                     struct encode_state *encode_state,
2390                                     struct i965_gpe_context *gpe_context,
2391                                     struct intel_encoder_context *encoder_context,
2392                                     void * param)
2393 {
2394     gen9_avc_frame_brc_update_curbe_data *cmd;
2395     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2396     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2397     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2398     struct object_surface *obj_surface;
2399     struct gen9_surface_avc *avc_priv_surface;
2400     struct avc_param common_param;
2401     VAEncSequenceParameterBufferH264 * seq_param = avc_state->seq_param;
2402
2403     obj_surface = encode_state->reconstructed_object;
2404
2405     if (!obj_surface || !obj_surface->private_data)
2406         return;
2407     avc_priv_surface = obj_surface->private_data;
2408
2409     cmd = i965_gpe_context_map_curbe(gpe_context);
2410
2411     if (!cmd)
2412         return;
2413
2414     memcpy(cmd, &gen9_avc_frame_brc_update_curbe_init_data, sizeof(gen9_avc_frame_brc_update_curbe_data));
2415
2416     cmd->dw5.target_size_flag = 0 ;
2417     if (generic_state->brc_init_current_target_buf_full_in_bits > (double)generic_state->brc_init_reset_buf_size_in_bits) {
2418         /*overflow*/
2419         generic_state->brc_init_current_target_buf_full_in_bits -= (double)generic_state->brc_init_reset_buf_size_in_bits;
2420         cmd->dw5.target_size_flag = 1 ;
2421     }
2422
2423     if (generic_state->skip_frame_enbale) {
2424         cmd->dw6.num_skip_frames = generic_state->num_skip_frames ;
2425         cmd->dw7.size_skip_frames = generic_state->size_skip_frames;
2426
2427         generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame * generic_state->num_skip_frames;
2428
2429     }
2430     cmd->dw0.target_size = (unsigned int)generic_state->brc_init_current_target_buf_full_in_bits ;
2431     cmd->dw1.frame_number = generic_state->seq_frame_number ;
2432     cmd->dw2.size_of_pic_headers = generic_state->herder_bytes_inserted << 3 ;
2433     cmd->dw5.cur_frame_type = generic_state->frame_type ;
2434     cmd->dw5.brc_flag = 0 ;
2435     cmd->dw5.brc_flag |= (avc_priv_surface->is_as_ref) ? INTEL_ENCODE_BRCUPDATE_IS_REFERENCE : 0 ;
2436
2437     if (avc_state->multi_pre_enable) {
2438         cmd->dw5.brc_flag  |= INTEL_ENCODE_BRCUPDATE_IS_ACTUALQP ;
2439         cmd->dw14.qp_index_of_cur_pic = avc_priv_surface->frame_idx ; //do not know this. use -1
2440     }
2441
2442     cmd->dw5.max_num_paks = generic_state->num_pak_passes ;
2443     if (avc_state->min_max_qp_enable) {
2444         switch (generic_state->frame_type) {
2445         case SLICE_TYPE_I:
2446             cmd->dw6.minimum_qp = avc_state->min_qp_i ;
2447             cmd->dw6.maximum_qp = avc_state->max_qp_i ;
2448             break;
2449         case SLICE_TYPE_P:
2450             cmd->dw6.minimum_qp = avc_state->min_qp_p ;
2451             cmd->dw6.maximum_qp = avc_state->max_qp_p ;
2452             break;
2453         case SLICE_TYPE_B:
2454             cmd->dw6.minimum_qp = avc_state->min_qp_b ;
2455             cmd->dw6.maximum_qp = avc_state->max_qp_b ;
2456             break;
2457         }
2458     } else {
2459         cmd->dw6.minimum_qp = 0 ;
2460         cmd->dw6.maximum_qp = 0 ;
2461     }
2462     cmd->dw6.enable_force_skip = avc_state->enable_force_skip ;
2463     cmd->dw6.enable_sliding_window = 0 ;
2464
2465     generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame;
2466
2467     if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2468         cmd->dw3.start_gadj_frame0 = (unsigned int)((10 *   generic_state->avbr_convergence) / (double)150);
2469         cmd->dw3.start_gadj_frame1 = (unsigned int)((50 *   generic_state->avbr_convergence) / (double)150);
2470         cmd->dw4.start_gadj_frame2 = (unsigned int)((100 *  generic_state->avbr_convergence) / (double)150);
2471         cmd->dw4.start_gadj_frame3 = (unsigned int)((150 *  generic_state->avbr_convergence) / (double)150);
2472         cmd->dw11.g_rate_ratio_threshold_0 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 40)));
2473         cmd->dw11.g_rate_ratio_threshold_1 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 75)));
2474         cmd->dw12.g_rate_ratio_threshold_2 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 97)));
2475         cmd->dw12.g_rate_ratio_threshold_3 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (103 - 100)));
2476         cmd->dw12.g_rate_ratio_threshold_4 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (125 - 100)));
2477         cmd->dw12.g_rate_ratio_threshold_5 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (160 - 100)));
2478
2479     }
2480     cmd->dw15.enable_roi = generic_state->brc_roi_enable ;
2481
2482     memset(&common_param, 0, sizeof(common_param));
2483     common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
2484     common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
2485     common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
2486     common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
2487     common_param.frames_per_100s = generic_state->frames_per_100s;
2488     common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
2489     common_param.target_bit_rate = generic_state->target_bit_rate;
2490
2491     cmd->dw19.user_max_frame = i965_avc_get_profile_level_max_frame(&common_param, seq_param->level_idc);
2492     i965_gpe_context_unmap_curbe(gpe_context);
2493
2494     return;
2495 }
2496
2497 static void
2498 gen9_avc_send_surface_brc_frame_update(VADriverContextP ctx,
2499                                        struct encode_state *encode_state,
2500                                        struct i965_gpe_context *gpe_context,
2501                                        struct intel_encoder_context *encoder_context,
2502                                        void * param_brc)
2503 {
2504     struct i965_driver_data *i965 = i965_driver_data(ctx);
2505     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2506     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2507     struct brc_param * param = (struct brc_param *)param_brc ;
2508     struct i965_gpe_context * gpe_context_mbenc = param->gpe_context_mbenc;
2509     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2510     unsigned char is_g95 = 0;
2511
2512     if (IS_SKL(i965->intel.device_info) ||
2513         IS_BXT(i965->intel.device_info))
2514         is_g95 = 0;
2515     else if (IS_KBL(i965->intel.device_info) ||
2516              IS_GLK(i965->intel.device_info))
2517         is_g95 = 1;
2518
2519     /* brc history buffer*/
2520     gen9_add_buffer_gpe_surface(ctx,
2521                                 gpe_context,
2522                                 &avc_ctx->res_brc_history_buffer,
2523                                 0,
2524                                 avc_ctx->res_brc_history_buffer.size,
2525                                 0,
2526                                 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_HISTORY_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_HISTORY_INDEX));
2527
2528     /* previous pak buffer*/
2529     gen9_add_buffer_gpe_surface(ctx,
2530                                 gpe_context,
2531                                 &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
2532                                 0,
2533                                 avc_ctx->res_brc_pre_pak_statistics_output_buffer.size,
2534                                 0,
2535                                 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_PAK_STATISTICS_OUTPUT_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_PAK_STATISTICS_OUTPUT_INDEX));
2536
2537     /* image state command buffer read only*/
2538     gen9_add_buffer_gpe_surface(ctx,
2539                                 gpe_context,
2540                                 &avc_ctx->res_brc_image_state_read_buffer,
2541                                 0,
2542                                 avc_ctx->res_brc_image_state_read_buffer.size,
2543                                 0,
2544                                 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_READ_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_READ_INDEX));
2545
2546     /* image state command buffer write only*/
2547     gen9_add_buffer_gpe_surface(ctx,
2548                                 gpe_context,
2549                                 &avc_ctx->res_brc_image_state_write_buffer,
2550                                 0,
2551                                 avc_ctx->res_brc_image_state_write_buffer.size,
2552                                 0,
2553                                 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_WRITE_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_WRITE_INDEX));
2554
2555     if (avc_state->mbenc_brc_buffer_size > 0) {
2556         gen9_add_buffer_gpe_surface(ctx,
2557                                     gpe_context,
2558                                     &(avc_ctx->res_mbenc_brc_buffer),
2559                                     0,
2560                                     avc_ctx->res_mbenc_brc_buffer.size,
2561                                     0,
2562                                     GEN95_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_WRITE_INDEX);
2563     } else {
2564         /*  Mbenc curbe input buffer */
2565         gen9_add_dri_buffer_gpe_surface(ctx,
2566                                         gpe_context,
2567                                         gpe_context_mbenc->dynamic_state.bo,
2568                                         0,
2569                                         ALIGN(gpe_context_mbenc->curbe.length, 64),
2570                                         gpe_context_mbenc->curbe.offset,
2571                                         GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_READ_INDEX);
2572         /* Mbenc curbe output buffer */
2573         gen9_add_dri_buffer_gpe_surface(ctx,
2574                                         gpe_context,
2575                                         gpe_context_mbenc->dynamic_state.bo,
2576                                         0,
2577                                         ALIGN(gpe_context_mbenc->curbe.length, 64),
2578                                         gpe_context_mbenc->curbe.offset,
2579                                         GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_WRITE_INDEX);
2580     }
2581
2582     /* AVC_ME Distortion 2D surface buffer,input/output. is it res_brc_dist_data_surface*/
2583     gen9_add_buffer_2d_gpe_surface(ctx,
2584                                    gpe_context,
2585                                    &avc_ctx->res_brc_dist_data_surface,
2586                                    1,
2587                                    I965_SURFACEFORMAT_R8_UNORM,
2588                                    (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_DISTORTION_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_DISTORTION_INDEX));
2589
2590     /* BRC const data 2D surface buffer */
2591     gen9_add_buffer_2d_gpe_surface(ctx,
2592                                    gpe_context,
2593                                    &avc_ctx->res_brc_const_data_buffer,
2594                                    1,
2595                                    I965_SURFACEFORMAT_R8_UNORM,
2596                                    (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX));
2597
2598     /* MB statistical data surface*/
2599     gen9_add_buffer_gpe_surface(ctx,
2600                                 gpe_context,
2601                                 &avc_ctx->res_mb_status_buffer,
2602                                 0,
2603                                 avc_ctx->res_mb_status_buffer.size,
2604                                 0,
2605                                 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX));
2606
2607     return;
2608 }
2609
2610 static VAStatus
2611 gen9_avc_kernel_brc_frame_update(VADriverContextP ctx,
2612                                  struct encode_state *encode_state,
2613                                  struct intel_encoder_context *encoder_context)
2614
2615 {
2616     struct i965_driver_data *i965 = i965_driver_data(ctx);
2617     struct i965_gpe_table *gpe = &i965->gpe_table;
2618     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2619     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
2620     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2621     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2622     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2623
2624     struct i965_gpe_context *gpe_context = NULL;
2625     struct gpe_media_object_parameter media_object_param;
2626     struct gpe_media_object_inline_data media_object_inline_data;
2627     int media_function = 0;
2628     int kernel_idx = 0;
2629     unsigned int mb_const_data_buffer_in_use, mb_qp_buffer_in_use;
2630     unsigned int brc_enabled = 0;
2631     unsigned int roi_enable = (generic_state->num_roi > 0) ? 1 : 0;
2632     unsigned int dirty_roi_enable = ((generic_state->dirty_num_roi > 0) && (generic_state->frame_type == SLICE_TYPE_P) && (0));
2633
2634     /* the following set the mbenc curbe*/
2635     struct mbenc_param curbe_mbenc_param ;
2636     struct brc_param curbe_brc_param ;
2637
2638     mb_const_data_buffer_in_use =
2639         generic_state->mb_brc_enabled ||
2640         roi_enable ||
2641         dirty_roi_enable ||
2642         avc_state->mb_qp_data_enable ||
2643         avc_state->rolling_intra_refresh_enable;
2644     mb_qp_buffer_in_use =
2645         generic_state->mb_brc_enabled ||
2646         generic_state->brc_roi_enable ||
2647         avc_state->mb_qp_data_enable;
2648
2649     switch (generic_state->kernel_mode) {
2650     case INTEL_ENC_KERNEL_NORMAL : {
2651         kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_NORMAL_I;
2652         break;
2653     }
2654     case INTEL_ENC_KERNEL_PERFORMANCE : {
2655         kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_PERFORMANCE_I;
2656         break;
2657     }
2658     case INTEL_ENC_KERNEL_QUALITY : {
2659         kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_QUALITY_I;
2660         break;
2661     }
2662     default:
2663         assert(0);
2664
2665     }
2666
2667     if (generic_state->frame_type == SLICE_TYPE_P) {
2668         kernel_idx += 1;
2669     } else if (generic_state->frame_type == SLICE_TYPE_B) {
2670         kernel_idx += 2;
2671     }
2672
2673     gpe_context = &(avc_ctx->context_mbenc.gpe_contexts[kernel_idx]);
2674     gpe->context_init(ctx, gpe_context);
2675
2676     memset(&curbe_mbenc_param, 0, sizeof(struct mbenc_param));
2677
2678     curbe_mbenc_param.mb_const_data_buffer_in_use = mb_const_data_buffer_in_use;
2679     curbe_mbenc_param.mb_qp_buffer_in_use = mb_qp_buffer_in_use;
2680     curbe_mbenc_param.mbenc_i_frame_dist_in_use = 0;
2681     curbe_mbenc_param.brc_enabled = brc_enabled;
2682     curbe_mbenc_param.roi_enabled = roi_enable;
2683
2684     /* set curbe mbenc*/
2685     generic_ctx->pfn_set_curbe_mbenc(ctx, encode_state, gpe_context, encoder_context, &curbe_mbenc_param);
2686
2687     // gen95 set curbe out of the brc. gen9 do it here
2688     avc_state->mbenc_curbe_set_in_brc_update = !avc_state->decouple_mbenc_curbe_from_brc_enable;
2689     /*begin brc frame update*/
2690     memset(&curbe_brc_param, 0, sizeof(struct brc_param));
2691     curbe_brc_param.gpe_context_mbenc = gpe_context;
2692     media_function = INTEL_MEDIA_STATE_BRC_UPDATE;
2693     kernel_idx = GEN9_AVC_KERNEL_BRC_FRAME_UPDATE;
2694     gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
2695     curbe_brc_param.gpe_context_brc_frame_update = gpe_context;
2696
2697     gpe->context_init(ctx, gpe_context);
2698     gpe->reset_binding_table(ctx, gpe_context);
2699     /*brc copy ignored*/
2700
2701     /* set curbe frame update*/
2702     generic_ctx->pfn_set_curbe_brc_frame_update(ctx, encode_state, gpe_context, encoder_context, &curbe_brc_param);
2703
2704     /* load brc constant data, is it same as mbenc mb brc constant data? no.*/
2705     if (avc_state->multi_pre_enable) {
2706         gen9_avc_init_brc_const_data(ctx, encode_state, encoder_context);
2707     } else {
2708         gen9_avc_init_brc_const_data_old(ctx, encode_state, encoder_context);
2709     }
2710     /* image state construct*/
2711     gen9_avc_set_image_state(ctx, encode_state, encoder_context, &(avc_ctx->res_brc_image_state_read_buffer));
2712     /* set surface frame mbenc*/
2713     generic_ctx->pfn_send_brc_frame_update_surface(ctx, encode_state, gpe_context, encoder_context, &curbe_brc_param);
2714
2715
2716     gpe->setup_interface_data(ctx, gpe_context);
2717
2718     memset(&media_object_param, 0, sizeof(media_object_param));
2719     memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
2720     media_object_param.pinline_data = &media_object_inline_data;
2721     media_object_param.inline_size = sizeof(media_object_inline_data);
2722
2723     gen9_avc_run_kernel_media_object(ctx, encoder_context,
2724                                      gpe_context,
2725                                      media_function,
2726                                      &media_object_param);
2727
2728     return VA_STATUS_SUCCESS;
2729 }
2730
2731 static void
2732 gen9_avc_set_curbe_brc_mb_update(VADriverContextP ctx,
2733                                  struct encode_state *encode_state,
2734                                  struct i965_gpe_context *gpe_context,
2735                                  struct intel_encoder_context *encoder_context,
2736                                  void * param)
2737 {
2738     gen9_avc_mb_brc_curbe_data *cmd;
2739     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2740     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2741
2742     cmd = i965_gpe_context_map_curbe(gpe_context);
2743
2744     if (!cmd)
2745         return;
2746
2747     memset(cmd, 0, sizeof(gen9_avc_mb_brc_curbe_data));
2748
2749     cmd->dw0.cur_frame_type = generic_state->frame_type;
2750     if (generic_state->brc_roi_enable) {
2751         cmd->dw0.enable_roi = 1;
2752     } else {
2753         cmd->dw0.enable_roi = 0;
2754     }
2755
2756     i965_gpe_context_unmap_curbe(gpe_context);
2757
2758     return;
2759 }
2760
2761 static void
2762 gen9_avc_send_surface_brc_mb_update(VADriverContextP ctx,
2763                                     struct encode_state *encode_state,
2764                                     struct i965_gpe_context *gpe_context,
2765                                     struct intel_encoder_context *encoder_context,
2766                                     void * param_mbenc)
2767 {
2768     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2769     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2770     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2771
2772     /* brc history buffer*/
2773     gen9_add_buffer_gpe_surface(ctx,
2774                                 gpe_context,
2775                                 &avc_ctx->res_brc_history_buffer,
2776                                 0,
2777                                 avc_ctx->res_brc_history_buffer.size,
2778                                 0,
2779                                 GEN9_AVC_MB_BRC_UPDATE_HISTORY_INDEX);
2780
2781     /* MB qp data buffer is it same as res_mbbrc_mb_qp_data_surface*/
2782     if (generic_state->mb_brc_enabled) {
2783         gen9_add_buffer_2d_gpe_surface(ctx,
2784                                        gpe_context,
2785                                        &avc_ctx->res_mbbrc_mb_qp_data_surface,
2786                                        1,
2787                                        I965_SURFACEFORMAT_R8_UNORM,
2788                                        GEN9_AVC_MB_BRC_UPDATE_MB_QP_INDEX);
2789
2790     }
2791
2792     /* BRC roi feature*/
2793     if (generic_state->brc_roi_enable) {
2794         gen9_add_buffer_gpe_surface(ctx,
2795                                     gpe_context,
2796                                     &avc_ctx->res_mbbrc_roi_surface,
2797                                     0,
2798                                     avc_ctx->res_mbbrc_roi_surface.size,
2799                                     0,
2800                                     GEN9_AVC_MB_BRC_UPDATE_ROI_INDEX);
2801
2802     }
2803
2804     /* MB statistical data surface*/
2805     gen9_add_buffer_gpe_surface(ctx,
2806                                 gpe_context,
2807                                 &avc_ctx->res_mb_status_buffer,
2808                                 0,
2809                                 avc_ctx->res_mb_status_buffer.size,
2810                                 0,
2811                                 GEN9_AVC_MB_BRC_UPDATE_MB_STATUS_INDEX);
2812
2813     return;
2814 }
2815
2816 static VAStatus
2817 gen9_avc_kernel_brc_mb_update(VADriverContextP ctx,
2818                               struct encode_state *encode_state,
2819                               struct intel_encoder_context *encoder_context)
2820
2821 {
2822     struct i965_driver_data *i965 = i965_driver_data(ctx);
2823     struct i965_gpe_table *gpe = &i965->gpe_table;
2824     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2825     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2826     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2827     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
2828
2829     struct i965_gpe_context *gpe_context;
2830     struct gpe_media_object_walker_parameter media_object_walker_param;
2831     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
2832     int media_function = 0;
2833     int kernel_idx = 0;
2834
2835     media_function = INTEL_MEDIA_STATE_MB_BRC_UPDATE;
2836     kernel_idx = GEN9_AVC_KERNEL_BRC_MB_UPDATE;
2837     gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
2838
2839     gpe->context_init(ctx, gpe_context);
2840     gpe->reset_binding_table(ctx, gpe_context);
2841
2842     /* set curbe brc mb update*/
2843     generic_ctx->pfn_set_curbe_brc_mb_update(ctx, encode_state, gpe_context, encoder_context, NULL);
2844
2845
2846     /* set surface brc mb update*/
2847     generic_ctx->pfn_send_brc_mb_update_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
2848
2849
2850     gpe->setup_interface_data(ctx, gpe_context);
2851
2852     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2853     /* the scaling is based on 8x8 blk level */
2854     kernel_walker_param.resolution_x = (generic_state->frame_width_in_mbs + 1) / 2;
2855     kernel_walker_param.resolution_y = (generic_state->frame_height_in_mbs + 1) / 2 ;
2856     kernel_walker_param.no_dependency = 1;
2857
2858     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
2859
2860     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
2861                                             gpe_context,
2862                                             media_function,
2863                                             &media_object_walker_param);
2864
2865     return VA_STATUS_SUCCESS;
2866 }
2867
2868 /*
2869 mbenc kernel related function,it include intra dist kernel
2870 */
2871 static int
2872 gen9_avc_get_biweight(int dist_scale_factor_ref_id0_list0, unsigned short weighted_bipredidc)
2873 {
2874     int biweight = 32;      // default value
2875
2876     /* based on kernel HLD*/
2877     if (weighted_bipredidc != INTEL_AVC_WP_MODE_IMPLICIT) {
2878         biweight = 32;
2879     } else {
2880         biweight = (dist_scale_factor_ref_id0_list0 + 2) >> 2;
2881
2882         if (biweight != 16 && biweight != 21 &&
2883             biweight != 32 && biweight != 43 && biweight != 48) {
2884             biweight = 32;        // If # of B-pics between two refs is more than 3. VME does not support it.
2885         }
2886     }
2887
2888     return biweight;
2889 }
2890
2891 static void
2892 gen9_avc_get_dist_scale_factor(VADriverContextP ctx,
2893                                struct encode_state *encode_state,
2894                                struct intel_encoder_context *encoder_context)
2895 {
2896     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2897     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2898     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
2899     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
2900
2901     int max_num_references;
2902     VAPictureH264 *curr_pic;
2903     VAPictureH264 *ref_pic_l0;
2904     VAPictureH264 *ref_pic_l1;
2905     int i = 0;
2906     int tb = 0;
2907     int td = 0;
2908     int tx = 0;
2909     int tmp = 0;
2910     int poc0 = 0;
2911     int poc1 = 0;
2912
2913     max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
2914
2915     memset(avc_state->dist_scale_factor_list0, 0, 32 * sizeof(unsigned int));
2916     curr_pic = &pic_param->CurrPic;
2917     for (i = 0; i < max_num_references; i++) {
2918         ref_pic_l0 = &(slice_param->RefPicList0[i]);
2919
2920         if ((ref_pic_l0->flags & VA_PICTURE_H264_INVALID) ||
2921             (ref_pic_l0->picture_id == VA_INVALID_SURFACE))
2922             break;
2923         ref_pic_l1 = &(slice_param->RefPicList1[0]);
2924         if ((ref_pic_l0->flags & VA_PICTURE_H264_INVALID) ||
2925             (ref_pic_l0->picture_id == VA_INVALID_SURFACE))
2926             break;
2927
2928         poc0 = (curr_pic->TopFieldOrderCnt - ref_pic_l0->TopFieldOrderCnt);
2929         poc1 = (ref_pic_l1->TopFieldOrderCnt - ref_pic_l0->TopFieldOrderCnt);
2930         CLIP(poc0, -128, 127);
2931         CLIP(poc1, -128, 127);
2932         tb = poc0;
2933         td = poc1;
2934
2935         if (td == 0) {
2936             td = 1;
2937         }
2938         tmp = (td / 2 > 0) ? (td / 2) : (-(td / 2));
2939         tx = (16384 + tmp) / td ;
2940         tmp = (tb * tx + 32) >> 6;
2941         CLIP(tmp, -1024, 1023);
2942         avc_state->dist_scale_factor_list0[i] = tmp;
2943     }
2944     return;
2945 }
2946
2947 static unsigned int
2948 gen9_avc_get_qp_from_ref_list(VADriverContextP ctx,
2949                               VAEncSliceParameterBufferH264 *slice_param,
2950                               int list,
2951                               int ref_frame_idx)
2952 {
2953     struct i965_driver_data *i965 = i965_driver_data(ctx);
2954     struct object_surface *obj_surface;
2955     struct gen9_surface_avc *avc_priv_surface;
2956     VASurfaceID surface_id;
2957
2958     assert(slice_param);
2959     assert(list < 2);
2960
2961     if (list == 0) {
2962         if (ref_frame_idx < slice_param->num_ref_idx_l0_active_minus1 + 1)
2963             surface_id = slice_param->RefPicList0[ref_frame_idx].picture_id;
2964         else
2965             return 0;
2966     } else {
2967         if (ref_frame_idx < slice_param->num_ref_idx_l1_active_minus1 + 1)
2968             surface_id = slice_param->RefPicList1[ref_frame_idx].picture_id;
2969         else
2970             return 0;
2971     }
2972     obj_surface = SURFACE(surface_id);
2973     if (obj_surface && obj_surface->private_data) {
2974         avc_priv_surface = obj_surface->private_data;
2975         return avc_priv_surface->qp_value;
2976     } else {
2977         return 0;
2978     }
2979 }
2980
2981 static void
2982 gen9_avc_load_mb_brc_const_data(VADriverContextP ctx,
2983                                 struct encode_state *encode_state,
2984                                 struct intel_encoder_context *encoder_context)
2985 {
2986     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2987     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2988     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2989     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2990
2991     struct i965_gpe_resource *gpe_resource = NULL;
2992     unsigned int * data = NULL;
2993     unsigned int * data_tmp = NULL;
2994     unsigned int size = 16 * 52;
2995     unsigned int table_idx = 0;
2996     unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
2997     unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
2998     int i = 0;
2999
3000     gpe_resource = &(avc_ctx->res_mbbrc_const_data_buffer);
3001     assert(gpe_resource);
3002     data = i965_map_gpe_resource(gpe_resource);
3003     assert(data);
3004
3005     table_idx = slice_type_kernel[generic_state->frame_type];
3006
3007     memcpy(data, gen9_avc_mb_brc_const_data[table_idx][0], size * sizeof(unsigned int));
3008
3009     data_tmp = data;
3010
3011     switch (generic_state->frame_type) {
3012     case SLICE_TYPE_I:
3013         for (i = 0; i < AVC_QP_MAX ; i++) {
3014             if (avc_state->old_mode_cost_enable)
3015                 *data = (unsigned int)gen9_avc_old_intra_mode_cost[i];
3016             data += 16;
3017         }
3018         break;
3019     case SLICE_TYPE_P:
3020     case SLICE_TYPE_B:
3021         for (i = 0; i < AVC_QP_MAX ; i++) {
3022             if (generic_state->frame_type == SLICE_TYPE_P) {
3023                 if (avc_state->skip_bias_adjustment_enable)
3024                     *(data + 3) = (unsigned int)gen9_avc_mv_cost_p_skip_adjustment[i];
3025             }
3026             if (avc_state->non_ftq_skip_threshold_lut_input_enable) {
3027                 *(data + 9) = (unsigned int)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
3028             } else if (generic_state->frame_type == SLICE_TYPE_P) {
3029                 *(data + 9) = (unsigned int)gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag][i];
3030             } else {
3031                 *(data + 9) = (unsigned int)gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag][i];
3032             }
3033
3034             if (avc_state->adaptive_intra_scaling_enable) {
3035                 *(data + 10) = (unsigned int)gen9_avc_adaptive_intra_scaling_factor[i];
3036             } else {
3037                 *(data + 10) = (unsigned int)gen9_avc_intra_scaling_factor[i];
3038
3039             }
3040             data += 16;
3041
3042         }
3043         break;
3044     default:
3045         assert(0);
3046     }
3047
3048     data = data_tmp;
3049     for (i = 0; i < AVC_QP_MAX ; i++) {
3050         if (avc_state->ftq_skip_threshold_lut_input_enable) {
3051             *(data + 6) = (avc_state->ftq_skip_threshold_lut[i] |
3052                            (avc_state->ftq_skip_threshold_lut[i] << 16) |
3053                            (avc_state->ftq_skip_threshold_lut[i] << 24));
3054             *(data + 7) = (avc_state->ftq_skip_threshold_lut[i] |
3055                            (avc_state->ftq_skip_threshold_lut[i] << 8) |
3056                            (avc_state->ftq_skip_threshold_lut[i] << 16) |
3057                            (avc_state->ftq_skip_threshold_lut[i] << 24));
3058         }
3059
3060         if (avc_state->kernel_trellis_enable) {
3061             *(data + 11) = (unsigned int)avc_state->lamda_value_lut[i][0];
3062             *(data + 12) = (unsigned int)avc_state->lamda_value_lut[i][1];
3063
3064         }
3065         data += 16;
3066
3067     }
3068     i965_unmap_gpe_resource(gpe_resource);
3069 }
3070
3071 static void
3072 gen9_avc_set_curbe_mbenc(VADriverContextP ctx,
3073                          struct encode_state *encode_state,
3074                          struct i965_gpe_context *gpe_context,
3075                          struct intel_encoder_context *encoder_context,
3076                          void * param)
3077 {
3078     struct i965_driver_data *i965 = i965_driver_data(ctx);
3079     union {
3080         gen9_avc_mbenc_curbe_data *g9;
3081         gen95_avc_mbenc_curbe_data *g95;
3082     } cmd;
3083     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3084     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3085     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3086
3087     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3088     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
3089     VASurfaceID surface_id;
3090     struct object_surface *obj_surface;
3091
3092     struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
3093     unsigned char qp = 0;
3094     unsigned char me_method = 0;
3095     unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
3096     unsigned int table_idx = 0;
3097     unsigned char is_g9 = 0;
3098     unsigned char is_g95 = 0;
3099     unsigned int curbe_size = 0;
3100
3101     unsigned int preset = generic_state->preset;
3102     if (IS_SKL(i965->intel.device_info) ||
3103         IS_BXT(i965->intel.device_info)) {
3104         cmd.g9 = (gen9_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
3105         if (!cmd.g9)
3106             return;
3107         is_g9 = 1;
3108         curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
3109         memset(cmd.g9, 0, curbe_size);
3110
3111         if (mbenc_i_frame_dist_in_use) {
3112             memcpy(cmd.g9, gen9_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
3113
3114         } else {
3115             switch (generic_state->frame_type) {
3116             case SLICE_TYPE_I:
3117                 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
3118                 break;
3119             case SLICE_TYPE_P:
3120                 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
3121                 break;
3122             case SLICE_TYPE_B:
3123                 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
3124                 break;
3125             default:
3126                 assert(0);
3127             }
3128
3129         }
3130     } else if (IS_KBL(i965->intel.device_info) ||
3131                IS_GLK(i965->intel.device_info)) {
3132         cmd.g95 = (gen95_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
3133         if (!cmd.g95)
3134             return;
3135         is_g95 = 1;
3136         curbe_size = sizeof(gen95_avc_mbenc_curbe_data);
3137         memset(cmd.g9, 0, curbe_size);
3138
3139         if (mbenc_i_frame_dist_in_use) {
3140             memcpy(cmd.g95, gen95_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
3141
3142         } else {
3143             switch (generic_state->frame_type) {
3144             case SLICE_TYPE_I:
3145                 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
3146                 break;
3147             case SLICE_TYPE_P:
3148                 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
3149                 break;
3150             case SLICE_TYPE_B:
3151                 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
3152                 break;
3153             default:
3154                 assert(0);
3155             }
3156
3157         }
3158     }
3159
3160     me_method = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_b_me_method[preset] : gen9_avc_p_me_method[preset];
3161     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
3162
3163     cmd.g9->dw0.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
3164     cmd.g9->dw37.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
3165     cmd.g9->dw0.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
3166     cmd.g9->dw37.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
3167
3168     cmd.g9->dw2.max_len_sp = gen9_avc_max_len_sp[preset];
3169     cmd.g9->dw38.max_len_sp = 0;
3170
3171     if (is_g95)
3172         cmd.g95->dw1.extended_mv_cost_range = avc_state->extended_mv_cost_range_enable;
3173
3174     cmd.g9->dw3.src_access = 0;
3175     cmd.g9->dw3.ref_access = 0;
3176
3177     if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) {
3178         //disable ftq_override by now.
3179         if (avc_state->ftq_override) {
3180             cmd.g9->dw3.ftq_enable = avc_state->ftq_enable;
3181
3182         } else {
3183             // both gen9 and gen95 come here by now
3184             if (generic_state->frame_type == SLICE_TYPE_P) {
3185                 cmd.g9->dw3.ftq_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
3186
3187             } else {
3188                 cmd.g9->dw3.ftq_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
3189             }
3190         }
3191     } else {
3192         cmd.g9->dw3.ftq_enable = 0;
3193     }
3194
3195     if (avc_state->disable_sub_mb_partion)
3196         cmd.g9->dw3.sub_mb_part_mask = 0x7;
3197
3198     if (mbenc_i_frame_dist_in_use) {
3199         cmd.g9->dw2.pitch_width = generic_state->downscaled_width_4x_in_mb;
3200         cmd.g9->dw4.picture_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
3201         cmd.g9->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4;
3202         cmd.g9->dw6.batch_buffer_end = 0;
3203         cmd.g9->dw31.intra_compute_type = 1;
3204
3205     } else {
3206         cmd.g9->dw2.pitch_width = generic_state->frame_width_in_mbs;
3207         cmd.g9->dw4.picture_height_minus1 = generic_state->frame_height_in_mbs - 1;
3208         cmd.g9->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ? generic_state->frame_height_in_mbs : avc_state->slice_height;
3209
3210         {
3211             memcpy(&(cmd.g9->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
3212             if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
3213                 //cmd.g9->dw8 = gen9_avc_old_intra_mode_cost[qp];
3214             } else if (avc_state->skip_bias_adjustment_enable) {
3215                 /* Load different MvCost for P picture when SkipBiasAdjustment is enabled
3216                 // No need to check for P picture as the flag is only enabled for P picture */
3217                 cmd.g9->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
3218
3219             }
3220         }
3221
3222         table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
3223         memcpy(&(cmd.g9->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
3224     }
3225     cmd.g9->dw4.enable_fbr_bypass = avc_state->fbr_bypass_enable;
3226     cmd.g9->dw4.enable_intra_cost_scaling_for_static_frame = avc_state->sfd_enable && generic_state->hme_enabled;
3227     cmd.g9->dw4.field_parity_flag = 0;//bottom field
3228     cmd.g9->dw4.enable_cur_fld_idr = 0;//field realted
3229     cmd.g9->dw4.contrained_intra_pred_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
3230     cmd.g9->dw4.hme_enable = generic_state->hme_enabled;
3231     cmd.g9->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
3232     cmd.g9->dw4.use_actual_ref_qp_value = generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
3233
3234
3235     cmd.g9->dw7.intra_part_mask = avc_state->transform_8x8_mode_enable ? 0 : 0x02;
3236     cmd.g9->dw7.src_field_polarity = 0;//field related
3237
3238     /*ftq_skip_threshold_lut set,dw14 /15*/
3239
3240     /*r5 disable NonFTQSkipThresholdLUT*/
3241     if (generic_state->frame_type == SLICE_TYPE_P) {
3242         cmd.g9->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
3243
3244     } else if (generic_state->frame_type == SLICE_TYPE_B) {
3245         cmd.g9->dw32.skip_val = gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
3246
3247     }
3248
3249     cmd.g9->dw13.qp_prime_y = qp;
3250     cmd.g9->dw13.qp_prime_cb = qp;
3251     cmd.g9->dw13.qp_prime_cr = qp;
3252     cmd.g9->dw13.target_size_in_word = 0xff;//hardcode for brc disable
3253
3254     if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) {
3255         switch (gen9_avc_multi_pred[preset]) {
3256         case 0:
3257             cmd.g9->dw32.mult_pred_l0_disable = 128;
3258             cmd.g9->dw32.mult_pred_l1_disable = 128;
3259             break;
3260         case 1:
3261             cmd.g9->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_P) ? 1 : 128;
3262             cmd.g9->dw32.mult_pred_l1_disable = 128;
3263             break;
3264         case 2:
3265             cmd.g9->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3266             cmd.g9->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3267             break;
3268         case 3:
3269             cmd.g9->dw32.mult_pred_l0_disable = 1;
3270             cmd.g9->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3271             break;
3272
3273         }
3274
3275     } else {
3276         cmd.g9->dw32.mult_pred_l0_disable = 128;
3277         cmd.g9->dw32.mult_pred_l1_disable = 128;
3278     }
3279
3280     /*field setting for dw33 34, ignored*/
3281
3282     if (avc_state->adaptive_transform_decision_enable) {
3283         if (generic_state->frame_type != SLICE_TYPE_I) {
3284             cmd.g9->dw34.enable_adaptive_tx_decision = 1;
3285             if (is_g95) {
3286                 cmd.g95->dw60.mb_texture_threshold = 1024;
3287                 cmd.g95->dw60.tx_decision_threshold = 128;
3288             }
3289
3290         }
3291
3292         if (is_g9) {
3293             cmd.g9->dw58.mb_texture_threshold = 1024;
3294             cmd.g9->dw58.tx_decision_threshold = 128;
3295         }
3296     }
3297
3298
3299     if (generic_state->frame_type == SLICE_TYPE_B) {
3300         cmd.g9->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
3301         cmd.g9->dw34.list1_ref_id0_frm_field_parity = 0;
3302         cmd.g9->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
3303     }
3304
3305     cmd.g9->dw34.b_original_bff = 0; //frame only
3306     cmd.g9->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
3307     cmd.g9->dw34.roi_enable_flag = curbe_param->roi_enabled;
3308     cmd.g9->dw34.mad_enable_falg = avc_state->mad_enable;
3309     cmd.g9->dw34.mb_brc_enable = avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled;
3310     cmd.g9->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
3311     if (is_g95) {
3312         cmd.g95->dw34.tq_enable = avc_state->tq_enable;
3313         cmd.g95->dw34.cqp_flag = !generic_state->brc_enabled;
3314     }
3315
3316     if (is_g9) {
3317         cmd.g9->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
3318
3319         if (cmd.g9->dw34.force_non_skip_check) {
3320             cmd.g9->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
3321         }
3322     }
3323
3324
3325     cmd.g9->dw36.check_all_fractional_enable = avc_state->caf_enable;
3326     cmd.g9->dw38.ref_threshold = 400;
3327     cmd.g9->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
3328
3329     /* Default:2 used for MBBRC (MB QP Surface width and height are 4x downscaled picture in MB unit * 4  bytes)
3330        0 used for MBQP data surface (MB QP Surface width and height are same as the input picture size in MB unit * 1bytes)
3331        starting GEN9, BRC use split kernel, MB QP surface is same size as input picture */
3332     cmd.g9->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled) ? 0 : 2;
3333
3334     if (mbenc_i_frame_dist_in_use) {
3335         cmd.g9->dw13.qp_prime_y = 0;
3336         cmd.g9->dw13.qp_prime_cb = 0;
3337         cmd.g9->dw13.qp_prime_cr = 0;
3338         cmd.g9->dw33.intra_16x16_nondc_penalty = 0;
3339         cmd.g9->dw33.intra_8x8_nondc_penalty = 0;
3340         cmd.g9->dw33.intra_4x4_nondc_penalty = 0;
3341
3342     }
3343     if (cmd.g9->dw4.use_actual_ref_qp_value) {
3344         cmd.g9->dw44.actual_qp_value_for_ref_id0_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 0);
3345         cmd.g9->dw44.actual_qp_value_for_ref_id1_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 1);
3346         cmd.g9->dw44.actual_qp_value_for_ref_id2_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 2);
3347         cmd.g9->dw44.actual_qp_value_for_ref_id3_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 3);
3348         cmd.g9->dw45.actual_qp_value_for_ref_id4_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 4);
3349         cmd.g9->dw45.actual_qp_value_for_ref_id5_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 5);
3350         cmd.g9->dw45.actual_qp_value_for_ref_id6_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 6);
3351         cmd.g9->dw45.actual_qp_value_for_ref_id7_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 7);
3352         cmd.g9->dw46.actual_qp_value_for_ref_id0_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 0);
3353         cmd.g9->dw46.actual_qp_value_for_ref_id1_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 1);
3354     }
3355
3356     table_idx = slice_type_kernel[generic_state->frame_type];
3357     cmd.g9->dw46.ref_cost = gen9_avc_ref_cost[table_idx][qp];
3358
3359     if (generic_state->frame_type == SLICE_TYPE_I) {
3360         cmd.g9->dw0.skip_mode_enable = 0;
3361         cmd.g9->dw37.skip_mode_enable = 0;
3362         cmd.g9->dw36.hme_combine_overlap = 0;
3363         cmd.g9->dw47.intra_cost_sf = 16;
3364         cmd.g9->dw34.enable_direct_bias_adjustment = 0;
3365         if (is_g9)
3366             cmd.g9->dw34.enable_global_motion_bias_adjustment = 0;
3367
3368     } else if (generic_state->frame_type == SLICE_TYPE_P) {
3369         cmd.g9->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
3370         cmd.g9->dw3.bme_disable_fbr = 1;
3371         cmd.g9->dw5.ref_width = gen9_avc_search_x[preset];
3372         cmd.g9->dw5.ref_height = gen9_avc_search_y[preset];
3373         cmd.g9->dw7.non_skip_zmv_added = 1;
3374         cmd.g9->dw7.non_skip_mode_added = 1;
3375         cmd.g9->dw7.skip_center_mask = 1;
3376         cmd.g9->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
3377         cmd.g9->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame onlys
3378         cmd.g9->dw36.hme_combine_overlap = 1;
3379         cmd.g9->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
3380         cmd.g9->dw39.ref_width = gen9_avc_search_x[preset];
3381         cmd.g9->dw39.ref_height = gen9_avc_search_y[preset];
3382         cmd.g9->dw34.enable_direct_bias_adjustment = 0;
3383         cmd.g9->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
3384         if (is_g9 && avc_state->global_motion_bias_adjustment_enable)
3385             cmd.g9->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
3386
3387     } else {
3388         cmd.g9->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
3389         cmd.g9->dw1.bi_weight = avc_state->bi_weight;
3390         cmd.g9->dw3.search_ctrl = 7;
3391         cmd.g9->dw3.skip_type = 1;
3392         cmd.g9->dw5.ref_width = gen9_avc_b_search_x[preset];
3393         cmd.g9->dw5.ref_height = gen9_avc_b_search_y[preset];
3394         cmd.g9->dw7.skip_center_mask = 0xff;
3395         cmd.g9->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
3396         cmd.g9->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
3397         cmd.g9->dw36.hme_combine_overlap = 1;
3398         surface_id = slice_param->RefPicList1[0].picture_id;
3399         obj_surface = SURFACE(surface_id);
3400         if (!obj_surface) {
3401             WARN_ONCE("Invalid backward reference frame\n");
3402             return;
3403         }
3404         cmd.g9->dw36.is_fwd_frame_short_term_ref = !!(slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
3405
3406         cmd.g9->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
3407         cmd.g9->dw36.num_ref_idx_l1_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l1_active_minus1 : 0;
3408         cmd.g9->dw39.ref_width = gen9_avc_b_search_x[preset];
3409         cmd.g9->dw39.ref_height = gen9_avc_b_search_y[preset];
3410         cmd.g9->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
3411         cmd.g9->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
3412         cmd.g9->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
3413         cmd.g9->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
3414         cmd.g9->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
3415         cmd.g9->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
3416         cmd.g9->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
3417         cmd.g9->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
3418
3419         cmd.g9->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
3420         if (cmd.g9->dw34.enable_direct_bias_adjustment) {
3421             cmd.g9->dw7.non_skip_zmv_added = 1;
3422             cmd.g9->dw7.non_skip_mode_added = 1;
3423         }
3424
3425         cmd.g9->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
3426         if (is_g9 && avc_state->global_motion_bias_adjustment_enable)
3427             cmd.g9->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
3428
3429     }
3430
3431     avc_state->block_based_skip_enable = cmd.g9->dw3.block_based_skip_enable;
3432
3433     if (avc_state->rolling_intra_refresh_enable) {
3434         /*by now disable it*/
3435         cmd.g9->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
3436         cmd.g9->dw32.mult_pred_l0_disable = 128;
3437         /* Pass the same IntraRefreshUnit to the kernel w/o the adjustment by -1, so as to have an overlap of one MB row or column of Intra macroblocks
3438          across one P frame to another P frame, as needed by the RollingI algo */
3439         if (is_g9) {
3440             cmd.g9->dw48.widi_intra_refresh_mb_num = 0;
3441             cmd.g9->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
3442             cmd.g9->dw48.widi_intra_refresh_qp_delta = 0;
3443         }
3444
3445         if (is_g95) {
3446             if (avc_state->rolling_intra_refresh_enable == INTEL_ROLLING_I_SQUARE && generic_state->brc_enabled) {
3447                 cmd.g95->dw4.enable_intra_refresh = 0;
3448                 cmd.g95->dw34.widi_intra_refresh_en = INTEL_ROLLING_I_DISABLED;
3449                 cmd.g95->dw48.widi_intra_refresh_mb_x = 0;
3450                 cmd.g95->dw61.widi_intra_refresh_mb_y = 0;
3451             } else {
3452                 cmd.g95->dw4.enable_intra_refresh = 1;
3453                 cmd.g95->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
3454                 cmd.g95->dw48.widi_intra_refresh_mb_x = 0;
3455                 cmd.g95->dw61.widi_intra_refresh_mb_y = 0;
3456                 cmd.g9->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
3457                 cmd.g9->dw48.widi_intra_refresh_qp_delta = 0;
3458             }
3459         }
3460
3461     } else {
3462         cmd.g9->dw34.widi_intra_refresh_en = 0;
3463     }
3464
3465     cmd.g9->dw34.enable_per_mb_static_check = avc_state->sfd_enable && generic_state->hme_enabled;
3466     cmd.g9->dw34.enable_adaptive_search_window_size = avc_state->adaptive_search_window_enable;
3467
3468     /*roi set disable by now. 49-56*/
3469     if (curbe_param->roi_enabled) {
3470         cmd.g9->dw49.roi_1_x_left   = generic_state->roi[0].left;
3471         cmd.g9->dw49.roi_1_y_top    = generic_state->roi[0].top;
3472         cmd.g9->dw50.roi_1_x_right  = generic_state->roi[0].right;
3473         cmd.g9->dw50.roi_1_y_bottom = generic_state->roi[0].bottom;
3474
3475         cmd.g9->dw51.roi_2_x_left   = generic_state->roi[1].left;
3476         cmd.g9->dw51.roi_2_y_top    = generic_state->roi[1].top;
3477         cmd.g9->dw52.roi_2_x_right  = generic_state->roi[1].right;
3478         cmd.g9->dw52.roi_2_y_bottom = generic_state->roi[1].bottom;
3479
3480         cmd.g9->dw53.roi_3_x_left   = generic_state->roi[2].left;
3481         cmd.g9->dw53.roi_3_y_top    = generic_state->roi[2].top;
3482         cmd.g9->dw54.roi_3_x_right  = generic_state->roi[2].right;
3483         cmd.g9->dw54.roi_3_y_bottom = generic_state->roi[2].bottom;
3484
3485         cmd.g9->dw55.roi_4_x_left   = generic_state->roi[3].left;
3486         cmd.g9->dw55.roi_4_y_top    = generic_state->roi[3].top;
3487         cmd.g9->dw56.roi_4_x_right  = generic_state->roi[3].right;
3488         cmd.g9->dw56.roi_4_y_bottom = generic_state->roi[3].bottom;
3489
3490         if (!generic_state->brc_enabled) {
3491             char tmp = 0;
3492             tmp = generic_state->roi[0].value;
3493             CLIP(tmp, -qp, AVC_QP_MAX - qp);
3494             cmd.g9->dw57.roi_1_dqp_prime_y = tmp;
3495             tmp = generic_state->roi[1].value;
3496             CLIP(tmp, -qp, AVC_QP_MAX - qp);
3497             cmd.g9->dw57.roi_2_dqp_prime_y = tmp;
3498             tmp = generic_state->roi[2].value;
3499             CLIP(tmp, -qp, AVC_QP_MAX - qp);
3500             cmd.g9->dw57.roi_3_dqp_prime_y = tmp;
3501             tmp = generic_state->roi[3].value;
3502             CLIP(tmp, -qp, AVC_QP_MAX - qp);
3503             cmd.g9->dw57.roi_4_dqp_prime_y = tmp;
3504         } else {
3505             cmd.g9->dw34.roi_enable_flag = 0;
3506         }
3507     }
3508
3509     if (is_g95) {
3510         if (avc_state->tq_enable) {
3511             if (generic_state->frame_type == SLICE_TYPE_I) {
3512                 cmd.g95->dw58.value = gen95_avc_tq_lambda_i_frame[qp][0];
3513                 cmd.g95->dw59.value = gen95_avc_tq_lambda_i_frame[qp][1];
3514
3515             } else if (generic_state->frame_type == SLICE_TYPE_P) {
3516                 cmd.g95->dw58.value = gen95_avc_tq_lambda_p_frame[qp][0];
3517                 cmd.g95->dw59.value = gen95_avc_tq_lambda_p_frame[qp][1];
3518
3519             } else {
3520                 cmd.g95->dw58.value = gen95_avc_tq_lambda_b_frame[qp][0];
3521                 cmd.g95->dw59.value = gen95_avc_tq_lambda_b_frame[qp][1];
3522             }
3523
3524             if (cmd.g95->dw58.lambda_8x8_inter > GEN95_AVC_MAX_LAMBDA)
3525                 cmd.g95->dw58.lambda_8x8_inter = 0xf000 + avc_state->rounding_value;
3526
3527             if (cmd.g95->dw58.lambda_8x8_intra > GEN95_AVC_MAX_LAMBDA)
3528                 cmd.g95->dw58.lambda_8x8_intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
3529
3530             if (cmd.g95->dw59.lambda_inter > GEN95_AVC_MAX_LAMBDA)
3531                 cmd.g95->dw59.lambda_inter = 0xf000 + avc_state->rounding_value;
3532
3533             if (cmd.g95->dw59.lambda_intra > GEN95_AVC_MAX_LAMBDA)
3534                 cmd.g95->dw59.lambda_intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
3535         }
3536     }
3537
3538     if (is_g95) {
3539         cmd.g95->dw66.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
3540         cmd.g95->dw67.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
3541         cmd.g95->dw68.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
3542         cmd.g95->dw69.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
3543         cmd.g95->dw70.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
3544         cmd.g95->dw71.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
3545         cmd.g95->dw72.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
3546         cmd.g95->dw73.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
3547         cmd.g95->dw74.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
3548         cmd.g95->dw75.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
3549         cmd.g95->dw76.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
3550         cmd.g95->dw77.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
3551         cmd.g95->dw78.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
3552         cmd.g95->dw79.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
3553         cmd.g95->dw80.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
3554         cmd.g95->dw81.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
3555         cmd.g95->dw82.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
3556         cmd.g95->dw83.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
3557         cmd.g95->dw84.brc_curbe_surf_index = GEN95_AVC_MBENC_BRC_CURBE_DATA_INDEX;
3558         cmd.g95->dw85.force_non_skip_mb_map_surface = GEN95_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
3559         cmd.g95->dw86.widi_wa_surf_index = GEN95_AVC_MBENC_WIDI_WA_INDEX;
3560         cmd.g95->dw87.static_detection_cost_table_index = GEN95_AVC_MBENC_SFD_COST_TABLE_INDEX;
3561     }
3562
3563     if (is_g9) {
3564         cmd.g9->dw64.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
3565         cmd.g9->dw65.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
3566         cmd.g9->dw66.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
3567         cmd.g9->dw67.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
3568         cmd.g9->dw68.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
3569         cmd.g9->dw69.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
3570         cmd.g9->dw70.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
3571         cmd.g9->dw71.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
3572         cmd.g9->dw72.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
3573         cmd.g9->dw73.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
3574         cmd.g9->dw74.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
3575         cmd.g9->dw75.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
3576         cmd.g9->dw76.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
3577         cmd.g9->dw77.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
3578         cmd.g9->dw78.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
3579         cmd.g9->dw79.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
3580         cmd.g9->dw80.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
3581         cmd.g9->dw81.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
3582         cmd.g9->dw82.force_non_skip_mb_map_surface = GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
3583         cmd.g9->dw83.widi_wa_surf_index = GEN9_AVC_MBENC_WIDI_WA_INDEX;
3584         cmd.g9->dw84.brc_curbe_surf_index = GEN9_AVC_MBENC_BRC_CURBE_DATA_INDEX;
3585         cmd.g9->dw85.static_detection_cost_table_index = GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX;
3586     }
3587
3588     i965_gpe_context_unmap_curbe(gpe_context);
3589
3590     return;
3591 }
3592
3593 static void
3594 gen9_avc_send_surface_mbenc(VADriverContextP ctx,
3595                             struct encode_state *encode_state,
3596                             struct i965_gpe_context *gpe_context,
3597                             struct intel_encoder_context *encoder_context,
3598                             void * param_mbenc)
3599 {
3600     struct i965_driver_data *i965 = i965_driver_data(ctx);
3601     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3602     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3603     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3604     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3605     struct object_surface *obj_surface;
3606     struct gen9_surface_avc *avc_priv_surface;
3607     struct i965_gpe_resource *gpe_resource;
3608     struct mbenc_param * param = (struct mbenc_param *)param_mbenc ;
3609     VASurfaceID surface_id;
3610     unsigned int mbenc_i_frame_dist_in_use = param->mbenc_i_frame_dist_in_use;
3611     unsigned int size = 0;
3612     unsigned int frame_mb_size = generic_state->frame_width_in_mbs *
3613                                  generic_state->frame_height_in_mbs;
3614     int i = 0;
3615     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3616     unsigned char is_g95 = 0;
3617
3618     if (IS_SKL(i965->intel.device_info) ||
3619         IS_BXT(i965->intel.device_info))
3620         is_g95 = 0;
3621     else if (IS_KBL(i965->intel.device_info) ||
3622              IS_GLK(i965->intel.device_info))
3623         is_g95 = 1;
3624
3625     obj_surface = encode_state->reconstructed_object;
3626
3627     if (!obj_surface || !obj_surface->private_data)
3628         return;
3629     avc_priv_surface = obj_surface->private_data;
3630
3631     /*pak obj command buffer output*/
3632     size = frame_mb_size * 16 * 4;
3633     gpe_resource = &avc_priv_surface->res_mb_code_surface;
3634     gen9_add_buffer_gpe_surface(ctx,
3635                                 gpe_context,
3636                                 gpe_resource,
3637                                 0,
3638                                 size / 4,
3639                                 0,
3640                                 GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX);
3641
3642     /*mv data buffer output*/
3643     size = frame_mb_size * 32 * 4;
3644     gpe_resource = &avc_priv_surface->res_mv_data_surface;
3645     gen9_add_buffer_gpe_surface(ctx,
3646                                 gpe_context,
3647                                 gpe_resource,
3648                                 0,
3649                                 size / 4,
3650                                 0,
3651                                 GEN9_AVC_MBENC_IND_MV_DATA_INDEX);
3652
3653     /*input current  YUV surface, current input Y/UV object*/
3654     if (mbenc_i_frame_dist_in_use) {
3655         obj_surface = encode_state->reconstructed_object;
3656         if (!obj_surface || !obj_surface->private_data)
3657             return;
3658         avc_priv_surface = obj_surface->private_data;
3659         obj_surface = avc_priv_surface->scaled_4x_surface_obj;
3660     } else {
3661         obj_surface = encode_state->input_yuv_object;
3662     }
3663     gen9_add_2d_gpe_surface(ctx,
3664                             gpe_context,
3665                             obj_surface,
3666                             0,
3667                             1,
3668                             I965_SURFACEFORMAT_R8_UNORM,
3669                             GEN9_AVC_MBENC_CURR_Y_INDEX);
3670
3671     gen9_add_2d_gpe_surface(ctx,
3672                             gpe_context,
3673                             obj_surface,
3674                             1,
3675                             1,
3676                             I965_SURFACEFORMAT_R16_UINT,
3677                             GEN9_AVC_MBENC_CURR_UV_INDEX);
3678
3679     if (generic_state->hme_enabled) {
3680         /*memv input 4x*/
3681         gpe_resource = &(avc_ctx->s4x_memv_data_buffer);
3682         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3683                                        gpe_resource,
3684                                        1,
3685                                        I965_SURFACEFORMAT_R8_UNORM,
3686                                        GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX);
3687         /* memv distortion input*/
3688         gpe_resource = &(avc_ctx->s4x_memv_distortion_buffer);
3689         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3690                                        gpe_resource,
3691                                        1,
3692                                        I965_SURFACEFORMAT_R8_UNORM,
3693                                        GEN9_AVC_MBENC_4XME_DISTORTION_INDEX);
3694     }
3695
3696     /*mbbrc const data_buffer*/
3697     if (param->mb_const_data_buffer_in_use) {
3698         size = 16 * AVC_QP_MAX * sizeof(unsigned int);
3699         gpe_resource = &avc_ctx->res_mbbrc_const_data_buffer;
3700         gen9_add_buffer_gpe_surface(ctx,
3701                                     gpe_context,
3702                                     gpe_resource,
3703                                     0,
3704                                     size / 4,
3705                                     0,
3706                                     GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX);
3707
3708     }
3709
3710     /*mb qp data_buffer*/
3711     if (param->mb_qp_buffer_in_use) {
3712         if (avc_state->mb_qp_data_enable)
3713             gpe_resource = &(avc_ctx->res_mb_qp_data_surface);
3714         else
3715             gpe_resource = &(avc_ctx->res_mbbrc_mb_qp_data_surface);
3716         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3717                                        gpe_resource,
3718                                        1,
3719                                        I965_SURFACEFORMAT_R8_UNORM,
3720                                        GEN9_AVC_MBENC_MBQP_INDEX);
3721     }
3722
3723     /*input current  YUV surface, current input Y/UV object*/
3724     if (mbenc_i_frame_dist_in_use) {
3725         obj_surface = encode_state->reconstructed_object;
3726         if (!obj_surface || !obj_surface->private_data)
3727             return;
3728         avc_priv_surface = obj_surface->private_data;
3729         obj_surface = avc_priv_surface->scaled_4x_surface_obj;
3730     } else {
3731         obj_surface = encode_state->input_yuv_object;
3732     }
3733     gen9_add_adv_gpe_surface(ctx, gpe_context,
3734                              obj_surface,
3735                              GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX);
3736     /*input ref YUV surface*/
3737     for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
3738         surface_id = slice_param->RefPicList0[i].picture_id;
3739         obj_surface = SURFACE(surface_id);
3740         if (!obj_surface || !obj_surface->private_data)
3741             break;
3742
3743         gen9_add_adv_gpe_surface(ctx, gpe_context,
3744                                  obj_surface,
3745                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 1);
3746     }
3747     /*input current  YUV surface, current input Y/UV object*/
3748     if (mbenc_i_frame_dist_in_use) {
3749         obj_surface = encode_state->reconstructed_object;
3750         if (!obj_surface || !obj_surface->private_data)
3751             return;
3752         avc_priv_surface = obj_surface->private_data;
3753         obj_surface = avc_priv_surface->scaled_4x_surface_obj;
3754     } else {
3755         obj_surface = encode_state->input_yuv_object;
3756     }
3757     gen9_add_adv_gpe_surface(ctx, gpe_context,
3758                              obj_surface,
3759                              GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX);
3760
3761     for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
3762         if (i > 0) break; // only  one ref supported here for B frame
3763         surface_id = slice_param->RefPicList1[i].picture_id;
3764         obj_surface = SURFACE(surface_id);
3765         if (!obj_surface || !obj_surface->private_data)
3766             break;
3767
3768         gen9_add_adv_gpe_surface(ctx, gpe_context,
3769                                  obj_surface,
3770                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1);
3771         gen9_add_adv_gpe_surface(ctx, gpe_context,
3772                                  obj_surface,
3773                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 2);
3774         if (i == 0) {
3775             avc_priv_surface = obj_surface->private_data;
3776             /*pak obj command buffer output(mb code)*/
3777             size = frame_mb_size * 16 * 4;
3778             gpe_resource = &avc_priv_surface->res_mb_code_surface;
3779             gen9_add_buffer_gpe_surface(ctx,
3780                                         gpe_context,
3781                                         gpe_resource,
3782                                         0,
3783                                         size / 4,
3784                                         0,
3785                                         GEN9_AVC_MBENC_FWD_MB_DATA_INDEX);
3786
3787             /*mv data buffer output*/
3788             size = frame_mb_size * 32 * 4;
3789             gpe_resource = &avc_priv_surface->res_mv_data_surface;
3790             gen9_add_buffer_gpe_surface(ctx,
3791                                         gpe_context,
3792                                         gpe_resource,
3793                                         0,
3794                                         size / 4,
3795                                         0,
3796                                         GEN9_AVC_MBENC_FWD_MV_DATA_INDEX);
3797
3798         }
3799
3800         if (i < INTEL_AVC_MAX_BWD_REF_NUM) {
3801             gen9_add_adv_gpe_surface(ctx, gpe_context,
3802                                      obj_surface,
3803                                      GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1 + INTEL_AVC_MAX_BWD_REF_NUM);
3804         }
3805
3806     }
3807
3808     /* BRC distortion data buffer for I frame*/
3809     if (mbenc_i_frame_dist_in_use) {
3810         gpe_resource = &(avc_ctx->res_brc_dist_data_surface);
3811         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3812                                        gpe_resource,
3813                                        1,
3814                                        I965_SURFACEFORMAT_R8_UNORM,
3815                                        GEN9_AVC_MBENC_BRC_DISTORTION_INDEX);
3816     }
3817
3818     /* as ref frame ,update later RefPicSelect of Current Picture*/
3819     obj_surface = encode_state->reconstructed_object;
3820     avc_priv_surface = obj_surface->private_data;
3821     if (avc_state->ref_pic_select_list_supported && avc_priv_surface->is_as_ref) {
3822         gpe_resource = &(avc_priv_surface->res_ref_pic_select_surface);
3823         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3824                                        gpe_resource,
3825                                        1,
3826                                        I965_SURFACEFORMAT_R8_UNORM,
3827                                        GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX);
3828
3829     }
3830
3831     if (param->mb_vproc_stats_enable) {
3832         /*mb status buffer input*/
3833         size = frame_mb_size * 16 * 4;
3834         gpe_resource = &(avc_ctx->res_mb_status_buffer);
3835         gen9_add_buffer_gpe_surface(ctx,
3836                                     gpe_context,
3837                                     gpe_resource,
3838                                     0,
3839                                     size / 4,
3840                                     0,
3841                                     GEN9_AVC_MBENC_MB_STATS_INDEX);
3842
3843     } else if (avc_state->flatness_check_enable) {
3844
3845         gpe_resource = &(avc_ctx->res_flatness_check_surface);
3846         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3847                                        gpe_resource,
3848                                        1,
3849                                        I965_SURFACEFORMAT_R8_UNORM,
3850                                        GEN9_AVC_MBENC_MB_STATS_INDEX);
3851     }
3852
3853     if (param->mad_enable) {
3854         /*mad buffer input*/
3855         size = 4;
3856         gpe_resource = &(avc_ctx->res_mad_data_buffer);
3857         gen9_add_buffer_gpe_surface(ctx,
3858                                     gpe_context,
3859                                     gpe_resource,
3860                                     0,
3861                                     size / 4,
3862                                     0,
3863                                     GEN9_AVC_MBENC_MAD_DATA_INDEX);
3864         i965_zero_gpe_resource(gpe_resource);
3865     }
3866
3867     /*brc updated mbenc curbe data buffer,it is ignored by gen9 and used in gen95*/
3868     if (avc_state->mbenc_brc_buffer_size > 0) {
3869         size = avc_state->mbenc_brc_buffer_size;
3870         gpe_resource = &(avc_ctx->res_mbenc_brc_buffer);
3871         gen9_add_buffer_gpe_surface(ctx,
3872                                     gpe_context,
3873                                     gpe_resource,
3874                                     0,
3875                                     size / 4,
3876                                     0,
3877                                     GEN95_AVC_MBENC_BRC_CURBE_DATA_INDEX);
3878     }
3879
3880     /*artitratry num mbs in slice*/
3881     if (avc_state->arbitrary_num_mbs_in_slice) {
3882         /*slice surface input*/
3883         gpe_resource = &(avc_ctx->res_mbenc_slice_map_surface);
3884         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3885                                        gpe_resource,
3886                                        1,
3887                                        I965_SURFACEFORMAT_R8_UNORM,
3888                                        GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX);
3889         gen9_avc_generate_slice_map(ctx, encode_state, encoder_context);
3890     }
3891
3892     /* BRC distortion data buffer for I frame */
3893     if (!mbenc_i_frame_dist_in_use) {
3894         if (avc_state->mb_disable_skip_map_enable) {
3895             gpe_resource = &(avc_ctx->res_mb_disable_skip_map_surface);
3896             gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3897                                            gpe_resource,
3898                                            1,
3899                                            I965_SURFACEFORMAT_R8_UNORM,
3900                                            (is_g95 ? GEN95_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX : GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX));
3901         }
3902
3903         if (avc_state->sfd_enable && generic_state->hme_enabled) {
3904             if (generic_state->frame_type == SLICE_TYPE_P) {
3905                 gpe_resource = &(avc_ctx->res_sfd_cost_table_p_frame_buffer);
3906
3907             } else if (generic_state->frame_type == SLICE_TYPE_B) {
3908                 gpe_resource = &(avc_ctx->res_sfd_cost_table_b_frame_buffer);
3909             }
3910
3911             if (generic_state->frame_type != SLICE_TYPE_I) {
3912                 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3913                                                gpe_resource,
3914                                                1,
3915                                                I965_SURFACEFORMAT_R8_UNORM,
3916                                                (is_g95 ? GEN95_AVC_MBENC_SFD_COST_TABLE_INDEX : GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX));
3917             }
3918         }
3919     }
3920
3921     return;
3922 }
3923
3924 static VAStatus
3925 gen9_avc_kernel_mbenc(VADriverContextP ctx,
3926                       struct encode_state *encode_state,
3927                       struct intel_encoder_context *encoder_context,
3928                       bool i_frame_dist_in_use)
3929 {
3930     struct i965_driver_data *i965 = i965_driver_data(ctx);
3931     struct i965_gpe_table *gpe = &i965->gpe_table;
3932     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3933     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
3934     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3935     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3936     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3937
3938     struct i965_gpe_context *gpe_context;
3939     struct gpe_media_object_walker_parameter media_object_walker_param;
3940     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
3941     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
3942     int media_function = 0;
3943     int kernel_idx = 0;
3944     unsigned int mb_const_data_buffer_in_use = 0;
3945     unsigned int mb_qp_buffer_in_use = 0;
3946     unsigned int brc_enabled = 0;
3947     unsigned int roi_enable = (generic_state->num_roi > 0) ? 1 : 0;
3948     unsigned int dirty_roi_enable = ((generic_state->dirty_num_roi > 0) && (generic_state->frame_type == SLICE_TYPE_P) && (0));
3949     struct mbenc_param param ;
3950
3951     int mbenc_i_frame_dist_in_use = i_frame_dist_in_use;
3952     int mad_enable = 0;
3953     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3954
3955     mb_const_data_buffer_in_use =
3956         generic_state->mb_brc_enabled ||
3957         roi_enable ||
3958         dirty_roi_enable ||
3959         avc_state->mb_qp_data_enable ||
3960         avc_state->rolling_intra_refresh_enable;
3961     mb_qp_buffer_in_use =
3962         generic_state->mb_brc_enabled ||
3963         generic_state->brc_roi_enable ||
3964         avc_state->mb_qp_data_enable;
3965
3966     if (mbenc_i_frame_dist_in_use) {
3967         media_function = INTEL_MEDIA_STATE_ENC_I_FRAME_DIST;
3968         kernel_idx = GEN9_AVC_KERNEL_BRC_I_FRAME_DIST;
3969         downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
3970         downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
3971         mad_enable = 0;
3972         brc_enabled = 0;
3973
3974         gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
3975     } else {
3976         switch (generic_state->kernel_mode) {
3977         case INTEL_ENC_KERNEL_NORMAL : {
3978             media_function = INTEL_MEDIA_STATE_ENC_NORMAL;
3979             kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_NORMAL_I;
3980             break;
3981         }
3982         case INTEL_ENC_KERNEL_PERFORMANCE : {
3983             media_function = INTEL_MEDIA_STATE_ENC_PERFORMANCE;
3984             kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_PERFORMANCE_I;
3985             break;
3986         }
3987         case INTEL_ENC_KERNEL_QUALITY : {
3988             media_function = INTEL_MEDIA_STATE_ENC_QUALITY;
3989             kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_QUALITY_I;
3990             break;
3991         }
3992         default:
3993             assert(0);
3994
3995         }
3996
3997         if (generic_state->frame_type == SLICE_TYPE_P) {
3998             kernel_idx += 1;
3999         } else if (generic_state->frame_type == SLICE_TYPE_B) {
4000             kernel_idx += 2;
4001         }
4002
4003         downscaled_width_in_mb = generic_state->frame_width_in_mbs;
4004         downscaled_height_in_mb = generic_state->frame_height_in_mbs;
4005         mad_enable = avc_state->mad_enable;
4006         brc_enabled = generic_state->brc_enabled;
4007
4008         gpe_context = &(avc_ctx->context_mbenc.gpe_contexts[kernel_idx]);
4009     }
4010
4011     memset(&param, 0, sizeof(struct mbenc_param));
4012
4013     param.mb_const_data_buffer_in_use = mb_const_data_buffer_in_use;
4014     param.mb_qp_buffer_in_use = mb_qp_buffer_in_use;
4015     param.mbenc_i_frame_dist_in_use = mbenc_i_frame_dist_in_use;
4016     param.mad_enable = mad_enable;
4017     param.brc_enabled = brc_enabled;
4018     param.roi_enabled = roi_enable;
4019
4020     if (avc_state->mb_status_supported) {
4021         param.mb_vproc_stats_enable =  avc_state->flatness_check_enable || avc_state->adaptive_transform_decision_enable;
4022     }
4023
4024     if (!avc_state->mbenc_curbe_set_in_brc_update) {
4025         gpe->context_init(ctx, gpe_context);
4026     }
4027
4028     gpe->reset_binding_table(ctx, gpe_context);
4029
4030     if (!avc_state->mbenc_curbe_set_in_brc_update) {
4031         /*set curbe here*/
4032         generic_ctx->pfn_set_curbe_mbenc(ctx, encode_state, gpe_context, encoder_context, &param);
4033     }
4034
4035     /* MB brc const data buffer set up*/
4036     if (mb_const_data_buffer_in_use) {
4037         // caculate the lambda table, it is kernel controlled trellis quantization,gen95+
4038         if (avc_state->lambda_table_enable)
4039             gen95_avc_calc_lambda_table(ctx, encode_state, encoder_context);
4040
4041         gen9_avc_load_mb_brc_const_data(ctx, encode_state, encoder_context);
4042     }
4043
4044     /*clear the mad buffer*/
4045     if (mad_enable) {
4046         i965_zero_gpe_resource(&(avc_ctx->res_mad_data_buffer));
4047     }
4048     /*send surface*/
4049     generic_ctx->pfn_send_mbenc_surface(ctx, encode_state, gpe_context, encoder_context, &param);
4050
4051     gpe->setup_interface_data(ctx, gpe_context);
4052
4053     /*walker setting*/
4054     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
4055
4056     kernel_walker_param.use_scoreboard = 1;
4057     kernel_walker_param.resolution_x = downscaled_width_in_mb ;
4058     kernel_walker_param.resolution_y = downscaled_height_in_mb ;
4059     if (mbenc_i_frame_dist_in_use) {
4060         kernel_walker_param.no_dependency = 1;
4061     } else {
4062         switch (generic_state->frame_type) {
4063         case SLICE_TYPE_I:
4064             kernel_walker_param.walker_degree = WALKER_45_DEGREE;
4065             break;
4066         case SLICE_TYPE_P:
4067             kernel_walker_param.walker_degree = WALKER_26_DEGREE;
4068             break;
4069         case SLICE_TYPE_B:
4070             kernel_walker_param.walker_degree = WALKER_26_DEGREE;
4071             if (!slice_param->direct_spatial_mv_pred_flag) {
4072                 kernel_walker_param.walker_degree = WALKER_45_DEGREE;
4073             }
4074             break;
4075         default:
4076             assert(0);
4077         }
4078         kernel_walker_param.no_dependency = 0;
4079     }
4080
4081     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
4082
4083     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
4084                                             gpe_context,
4085                                             media_function,
4086                                             &media_object_walker_param);
4087     return VA_STATUS_SUCCESS;
4088 }
4089
4090 /*
4091 me kernle related function
4092 */
4093 static void
4094 gen9_avc_set_curbe_me(VADriverContextP ctx,
4095                       struct encode_state *encode_state,
4096                       struct i965_gpe_context *gpe_context,
4097                       struct intel_encoder_context *encoder_context,
4098                       void * param)
4099 {
4100     gen9_avc_me_curbe_data *curbe_cmd;
4101     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4102     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4103     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4104
4105     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4106
4107     struct me_param * curbe_param = (struct me_param *)param ;
4108     unsigned char  use_mv_from_prev_step = 0;
4109     unsigned char write_distortions = 0;
4110     unsigned char qp_prime_y = 0;
4111     unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
4112     unsigned char seach_table_idx = 0;
4113     unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
4114     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
4115     unsigned int scale_factor = 0;
4116
4117     qp_prime_y = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta;
4118     switch (curbe_param->hme_type) {
4119     case INTEL_ENC_HME_4x : {
4120         use_mv_from_prev_step = (generic_state->b16xme_enabled) ? 1 : 0;
4121         write_distortions = 1;
4122         mv_shift_factor = 2;
4123         scale_factor = 4;
4124         prev_mv_read_pos_factor = 0;
4125         break;
4126     }
4127     case INTEL_ENC_HME_16x : {
4128         use_mv_from_prev_step = (generic_state->b32xme_enabled) ? 1 : 0;
4129         write_distortions = 0;
4130         mv_shift_factor = 2;
4131         scale_factor = 16;
4132         prev_mv_read_pos_factor = 1;
4133         break;
4134     }
4135     case INTEL_ENC_HME_32x : {
4136         use_mv_from_prev_step = 0;
4137         write_distortions = 0;
4138         mv_shift_factor = 1;
4139         scale_factor = 32;
4140         prev_mv_read_pos_factor = 0;
4141         break;
4142     }
4143     default:
4144         assert(0);
4145
4146     }
4147     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
4148
4149     if (!curbe_cmd)
4150         return;
4151
4152     downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
4153     downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
4154
4155     memcpy(curbe_cmd, gen9_avc_me_curbe_init_data, sizeof(gen9_avc_me_curbe_data));
4156
4157     curbe_cmd->dw3.sub_pel_mode = 3;
4158     if (avc_state->field_scaling_output_interleaved) {
4159         /*frame set to zero,field specified*/
4160         curbe_cmd->dw3.src_access = 0;
4161         curbe_cmd->dw3.ref_access = 0;
4162         curbe_cmd->dw7.src_field_polarity = 0;
4163     }
4164     curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
4165     curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
4166     curbe_cmd->dw5.qp_prime_y = qp_prime_y;
4167
4168     curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
4169     curbe_cmd->dw6.write_distortions = write_distortions;
4170     curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
4171     curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
4172
4173     if (generic_state->frame_type == SLICE_TYPE_B) {
4174         curbe_cmd->dw1.bi_weight = 32;
4175         curbe_cmd->dw13.num_ref_idx_l1_minus1 = slice_param->num_ref_idx_l1_active_minus1;
4176         me_method = gen9_avc_b_me_method[generic_state->preset];
4177         seach_table_idx = 1;
4178     }
4179
4180     if (generic_state->frame_type == SLICE_TYPE_P ||
4181         generic_state->frame_type == SLICE_TYPE_B)
4182         curbe_cmd->dw13.num_ref_idx_l0_minus1 = slice_param->num_ref_idx_l0_active_minus1;
4183
4184     curbe_cmd->dw13.ref_streamin_cost = 5;
4185     curbe_cmd->dw13.roi_enable = 0;
4186
4187     curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
4188     curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
4189
4190     memcpy(&curbe_cmd->dw16, table_enc_search_path[seach_table_idx][me_method], 14 * sizeof(int));
4191
4192     curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN9_AVC_ME_MV_DATA_SURFACE_INDEX;
4193     curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x) ? GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX : GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX ;
4194     curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN9_AVC_ME_DISTORTION_SURFACE_INDEX;
4195     curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN9_AVC_ME_BRC_DISTORTION_INDEX;
4196     curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX;
4197     curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX;
4198     curbe_cmd->dw38.reserved = GEN9_AVC_ME_VDENC_STREAMIN_INDEX;
4199
4200     i965_gpe_context_unmap_curbe(gpe_context);
4201     return;
4202 }
4203
4204 static void
4205 gen9_avc_send_surface_me(VADriverContextP ctx,
4206                          struct encode_state *encode_state,
4207                          struct i965_gpe_context *gpe_context,
4208                          struct intel_encoder_context *encoder_context,
4209                          void * param)
4210 {
4211     struct i965_driver_data *i965 = i965_driver_data(ctx);
4212
4213     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4214     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4215     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4216     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4217
4218     struct object_surface *obj_surface, *input_surface;
4219     struct gen9_surface_avc *avc_priv_surface;
4220     struct i965_gpe_resource *gpe_resource;
4221     struct me_param * curbe_param = (struct me_param *)param ;
4222
4223     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4224     VASurfaceID surface_id;
4225     int i = 0;
4226
4227     /* all scaled input surface stored in reconstructed_object*/
4228     obj_surface = encode_state->reconstructed_object;
4229     if (!obj_surface || !obj_surface->private_data)
4230         return;
4231     avc_priv_surface = obj_surface->private_data;
4232
4233
4234     switch (curbe_param->hme_type) {
4235     case INTEL_ENC_HME_4x : {
4236         /*memv output 4x*/
4237         gpe_resource = &avc_ctx->s4x_memv_data_buffer;
4238         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4239                                        gpe_resource,
4240                                        1,
4241                                        I965_SURFACEFORMAT_R8_UNORM,
4242                                        GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
4243
4244         /*memv input 16x*/
4245         if (generic_state->b16xme_enabled) {
4246             gpe_resource = &avc_ctx->s16x_memv_data_buffer;
4247             gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4248                                            gpe_resource,
4249                                            1,
4250                                            I965_SURFACEFORMAT_R8_UNORM,
4251                                            GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX);
4252         }
4253         /* brc distortion  output*/
4254         gpe_resource = &avc_ctx->res_brc_dist_data_surface;
4255         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4256                                        gpe_resource,
4257                                        1,
4258                                        I965_SURFACEFORMAT_R8_UNORM,
4259                                        GEN9_AVC_ME_BRC_DISTORTION_INDEX);
4260         /* memv distortion output*/
4261         gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
4262         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4263                                        gpe_resource,
4264                                        1,
4265                                        I965_SURFACEFORMAT_R8_UNORM,
4266                                        GEN9_AVC_ME_DISTORTION_SURFACE_INDEX);
4267         /*input current down scaled YUV surface*/
4268         obj_surface = encode_state->reconstructed_object;
4269         avc_priv_surface = obj_surface->private_data;
4270         input_surface = avc_priv_surface->scaled_4x_surface_obj;
4271         gen9_add_adv_gpe_surface(ctx, gpe_context,
4272                                  input_surface,
4273                                  GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
4274         /*input ref scaled YUV surface*/
4275         for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
4276             surface_id = slice_param->RefPicList0[i].picture_id;
4277             obj_surface = SURFACE(surface_id);
4278             if (!obj_surface || !obj_surface->private_data)
4279                 break;
4280             avc_priv_surface = obj_surface->private_data;
4281
4282             input_surface = avc_priv_surface->scaled_4x_surface_obj;
4283
4284             gen9_add_adv_gpe_surface(ctx, gpe_context,
4285                                      input_surface,
4286                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
4287         }
4288
4289         obj_surface = encode_state->reconstructed_object;
4290         avc_priv_surface = obj_surface->private_data;
4291         input_surface = avc_priv_surface->scaled_4x_surface_obj;
4292
4293         gen9_add_adv_gpe_surface(ctx, gpe_context,
4294                                  input_surface,
4295                                  GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
4296
4297         for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
4298             surface_id = slice_param->RefPicList1[i].picture_id;
4299             obj_surface = SURFACE(surface_id);
4300             if (!obj_surface || !obj_surface->private_data)
4301                 break;
4302             avc_priv_surface = obj_surface->private_data;
4303
4304             input_surface = avc_priv_surface->scaled_4x_surface_obj;
4305
4306             gen9_add_adv_gpe_surface(ctx, gpe_context,
4307                                      input_surface,
4308                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
4309         }
4310         break;
4311
4312     }
4313     case INTEL_ENC_HME_16x : {
4314         gpe_resource = &avc_ctx->s16x_memv_data_buffer;
4315         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4316                                        gpe_resource,
4317                                        1,
4318                                        I965_SURFACEFORMAT_R8_UNORM,
4319                                        GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
4320
4321         if (generic_state->b32xme_enabled) {
4322             gpe_resource = &avc_ctx->s32x_memv_data_buffer;
4323             gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4324                                            gpe_resource,
4325                                            1,
4326                                            I965_SURFACEFORMAT_R8_UNORM,
4327                                            GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX);
4328         }
4329
4330         obj_surface = encode_state->reconstructed_object;
4331         avc_priv_surface = obj_surface->private_data;
4332         input_surface = avc_priv_surface->scaled_16x_surface_obj;
4333         gen9_add_adv_gpe_surface(ctx, gpe_context,
4334                                  input_surface,
4335                                  GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
4336
4337         for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
4338             surface_id = slice_param->RefPicList0[i].picture_id;
4339             obj_surface = SURFACE(surface_id);
4340             if (!obj_surface || !obj_surface->private_data)
4341                 break;
4342             avc_priv_surface = obj_surface->private_data;
4343
4344             input_surface = avc_priv_surface->scaled_16x_surface_obj;
4345
4346             gen9_add_adv_gpe_surface(ctx, gpe_context,
4347                                      input_surface,
4348                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
4349         }
4350
4351         obj_surface = encode_state->reconstructed_object;
4352         avc_priv_surface = obj_surface->private_data;
4353         input_surface = avc_priv_surface->scaled_16x_surface_obj;
4354
4355         gen9_add_adv_gpe_surface(ctx, gpe_context,
4356                                  input_surface,
4357                                  GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
4358
4359         for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
4360             surface_id = slice_param->RefPicList1[i].picture_id;
4361             obj_surface = SURFACE(surface_id);
4362             if (!obj_surface || !obj_surface->private_data)
4363                 break;
4364             avc_priv_surface = obj_surface->private_data;
4365
4366             input_surface = avc_priv_surface->scaled_16x_surface_obj;
4367
4368             gen9_add_adv_gpe_surface(ctx, gpe_context,
4369                                      input_surface,
4370                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
4371         }
4372         break;
4373     }
4374     case INTEL_ENC_HME_32x : {
4375         gpe_resource = &avc_ctx->s32x_memv_data_buffer;
4376         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4377                                        gpe_resource,
4378                                        1,
4379                                        I965_SURFACEFORMAT_R8_UNORM,
4380                                        GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
4381
4382         obj_surface = encode_state->reconstructed_object;
4383         avc_priv_surface = obj_surface->private_data;
4384         input_surface = avc_priv_surface->scaled_32x_surface_obj;
4385         gen9_add_adv_gpe_surface(ctx, gpe_context,
4386                                  input_surface,
4387                                  GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
4388
4389         for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
4390             surface_id = slice_param->RefPicList0[i].picture_id;
4391             obj_surface = SURFACE(surface_id);
4392             if (!obj_surface || !obj_surface->private_data)
4393                 break;
4394             avc_priv_surface = obj_surface->private_data;
4395
4396             input_surface = avc_priv_surface->scaled_32x_surface_obj;
4397
4398             gen9_add_adv_gpe_surface(ctx, gpe_context,
4399                                      input_surface,
4400                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
4401         }
4402
4403         obj_surface = encode_state->reconstructed_object;
4404         avc_priv_surface = obj_surface->private_data;
4405         input_surface = avc_priv_surface->scaled_32x_surface_obj;
4406
4407         gen9_add_adv_gpe_surface(ctx, gpe_context,
4408                                  input_surface,
4409                                  GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
4410
4411         for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
4412             surface_id = slice_param->RefPicList1[i].picture_id;
4413             obj_surface = SURFACE(surface_id);
4414             if (!obj_surface || !obj_surface->private_data)
4415                 break;
4416             avc_priv_surface = obj_surface->private_data;
4417
4418             input_surface = avc_priv_surface->scaled_32x_surface_obj;
4419
4420             gen9_add_adv_gpe_surface(ctx, gpe_context,
4421                                      input_surface,
4422                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
4423         }
4424         break;
4425     }
4426     default:
4427         assert(0);
4428
4429     }
4430 }
4431
4432 static VAStatus
4433 gen9_avc_kernel_me(VADriverContextP ctx,
4434                    struct encode_state *encode_state,
4435                    struct intel_encoder_context *encoder_context,
4436                    int hme_type)
4437 {
4438     struct i965_driver_data *i965 = i965_driver_data(ctx);
4439     struct i965_gpe_table *gpe = &i965->gpe_table;
4440     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4441     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
4442     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4443     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4444
4445     struct i965_gpe_context *gpe_context;
4446     struct gpe_media_object_walker_parameter media_object_walker_param;
4447     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
4448     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
4449     int media_function = 0;
4450     int kernel_idx = 0;
4451     struct me_param param ;
4452     unsigned int scale_factor = 0;
4453
4454     switch (hme_type) {
4455     case INTEL_ENC_HME_4x : {
4456         media_function = INTEL_MEDIA_STATE_4X_ME;
4457         scale_factor = 4;
4458         break;
4459     }
4460     case INTEL_ENC_HME_16x : {
4461         media_function = INTEL_MEDIA_STATE_16X_ME;
4462         scale_factor = 16;
4463         break;
4464     }
4465     case INTEL_ENC_HME_32x : {
4466         media_function = INTEL_MEDIA_STATE_32X_ME;
4467         scale_factor = 32;
4468         break;
4469     }
4470     default:
4471         assert(0);
4472
4473     }
4474
4475     downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
4476     downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
4477
4478     /* I frame should not come here.*/
4479     kernel_idx = (generic_state->frame_type == SLICE_TYPE_P) ? GEN9_AVC_KERNEL_ME_P_IDX : GEN9_AVC_KERNEL_ME_B_IDX;
4480     gpe_context = &(avc_ctx->context_me.gpe_contexts[kernel_idx]);
4481
4482     gpe->context_init(ctx, gpe_context);
4483     gpe->reset_binding_table(ctx, gpe_context);
4484
4485     /*set curbe*/
4486     memset(&param, 0, sizeof(param));
4487     param.hme_type = hme_type;
4488     generic_ctx->pfn_set_curbe_me(ctx, encode_state, gpe_context, encoder_context, &param);
4489
4490     /*send surface*/
4491     generic_ctx->pfn_send_me_surface(ctx, encode_state, gpe_context, encoder_context, &param);
4492
4493     gpe->setup_interface_data(ctx, gpe_context);
4494
4495     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
4496     /* the scaling is based on 8x8 blk level */
4497     kernel_walker_param.resolution_x = downscaled_width_in_mb ;
4498     kernel_walker_param.resolution_y = downscaled_height_in_mb ;
4499     kernel_walker_param.no_dependency = 1;
4500
4501     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
4502
4503     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
4504                                             gpe_context,
4505                                             media_function,
4506                                             &media_object_walker_param);
4507
4508     return VA_STATUS_SUCCESS;
4509 }
4510
4511 /*
4512 wp related function
4513 */
4514 static void
4515 gen9_avc_set_curbe_wp(VADriverContextP ctx,
4516                       struct encode_state *encode_state,
4517                       struct i965_gpe_context *gpe_context,
4518                       struct intel_encoder_context *encoder_context,
4519                       void * param)
4520 {
4521     gen9_avc_wp_curbe_data *cmd;
4522     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4523     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4524     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4525     struct wp_param * curbe_param = (struct wp_param *)param;
4526
4527     cmd = i965_gpe_context_map_curbe(gpe_context);
4528
4529     if (!cmd)
4530         return;
4531     memset(cmd, 0, sizeof(gen9_avc_wp_curbe_data));
4532     if (curbe_param->ref_list_idx) {
4533         cmd->dw0.default_weight = slice_param->luma_weight_l1[0];
4534         cmd->dw0.default_offset = slice_param->luma_offset_l1[0];
4535     } else {
4536         cmd->dw0.default_weight = slice_param->luma_weight_l0[0];
4537         cmd->dw0.default_offset = slice_param->luma_offset_l0[0];
4538     }
4539
4540     cmd->dw49.input_surface = GEN9_AVC_WP_INPUT_REF_SURFACE_INDEX;
4541     cmd->dw50.output_surface = GEN9_AVC_WP_OUTPUT_SCALED_SURFACE_INDEX;
4542
4543     i965_gpe_context_unmap_curbe(gpe_context);
4544
4545 }
4546
4547 static void
4548 gen9_avc_send_surface_wp(VADriverContextP ctx,
4549                          struct encode_state *encode_state,
4550                          struct i965_gpe_context *gpe_context,
4551                          struct intel_encoder_context *encoder_context,
4552                          void * param)
4553 {
4554     struct i965_driver_data *i965 = i965_driver_data(ctx);
4555     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4556     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4557     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4558     struct wp_param * curbe_param = (struct wp_param *)param;
4559     struct object_surface *obj_surface;
4560     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4561     VASurfaceID surface_id;
4562
4563     if (curbe_param->ref_list_idx) {
4564         surface_id = slice_param->RefPicList1[0].picture_id;
4565         obj_surface = SURFACE(surface_id);
4566         if (!obj_surface || !obj_surface->private_data)
4567             avc_state->weighted_ref_l1_enable = 0;
4568         else
4569             avc_state->weighted_ref_l1_enable = 1;
4570     } else {
4571         surface_id = slice_param->RefPicList0[0].picture_id;
4572         obj_surface = SURFACE(surface_id);
4573         if (!obj_surface || !obj_surface->private_data)
4574             avc_state->weighted_ref_l0_enable = 0;
4575         else
4576             avc_state->weighted_ref_l0_enable = 1;
4577     }
4578     if (!obj_surface)
4579         obj_surface = encode_state->reference_objects[0];
4580
4581
4582     gen9_add_adv_gpe_surface(ctx, gpe_context,
4583                              obj_surface,
4584                              GEN9_AVC_WP_INPUT_REF_SURFACE_INDEX);
4585
4586     obj_surface = avc_ctx->wp_output_pic_select_surface_obj[curbe_param->ref_list_idx];
4587     gen9_add_adv_gpe_surface(ctx, gpe_context,
4588                              obj_surface,
4589                              GEN9_AVC_WP_OUTPUT_SCALED_SURFACE_INDEX);
4590 }
4591
4592
4593 static VAStatus
4594 gen9_avc_kernel_wp(VADriverContextP ctx,
4595                    struct encode_state *encode_state,
4596                    struct intel_encoder_context *encoder_context,
4597                    unsigned int list1_in_use)
4598 {
4599     struct i965_driver_data *i965 = i965_driver_data(ctx);
4600     struct i965_gpe_table *gpe = &i965->gpe_table;
4601     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4602     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4603     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4604     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
4605
4606     struct i965_gpe_context *gpe_context;
4607     struct gpe_media_object_walker_parameter media_object_walker_param;
4608     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
4609     int media_function = INTEL_MEDIA_STATE_ENC_WP;
4610     struct wp_param param;
4611
4612     gpe_context = &(avc_ctx->context_wp.gpe_contexts);
4613
4614     gpe->context_init(ctx, gpe_context);
4615     gpe->reset_binding_table(ctx, gpe_context);
4616
4617     memset(&param, 0, sizeof(param));
4618     param.ref_list_idx = (list1_in_use == 1) ? 1 : 0;
4619     /*set curbe*/
4620     generic_ctx->pfn_set_curbe_wp(ctx, encode_state, gpe_context, encoder_context, &param);
4621
4622     /*send surface*/
4623     generic_ctx->pfn_send_wp_surface(ctx, encode_state, gpe_context, encoder_context, &param);
4624
4625     gpe->setup_interface_data(ctx, gpe_context);
4626
4627     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
4628     /* the scaling is based on 8x8 blk level */
4629     kernel_walker_param.resolution_x = generic_state->frame_width_in_mbs;
4630     kernel_walker_param.resolution_y = generic_state->frame_height_in_mbs;
4631     kernel_walker_param.no_dependency = 1;
4632
4633     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
4634
4635     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
4636                                             gpe_context,
4637                                             media_function,
4638                                             &media_object_walker_param);
4639
4640     return VA_STATUS_SUCCESS;
4641 }
4642
4643
4644 /*
4645 sfd related function
4646 */
4647 static void
4648 gen9_avc_set_curbe_sfd(VADriverContextP ctx,
4649                        struct encode_state *encode_state,
4650                        struct i965_gpe_context *gpe_context,
4651                        struct intel_encoder_context *encoder_context,
4652                        void * param)
4653 {
4654     gen9_avc_sfd_curbe_data *cmd;
4655     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4656     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4657     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4658     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4659
4660     cmd = i965_gpe_context_map_curbe(gpe_context);
4661
4662     if (!cmd)
4663         return;
4664     memset(cmd, 0, sizeof(gen9_avc_sfd_curbe_data));
4665
4666     cmd->dw0.enable_intra_cost_scaling_for_static_frame = 1 ;
4667     cmd->dw0.enable_adaptive_mv_stream_in = 0 ;
4668     cmd->dw0.stream_in_type = 7 ;
4669     cmd->dw0.slice_type = slice_type_kernel[generic_state->frame_type]  ;
4670     cmd->dw0.brc_mode_enable = generic_state->brc_enabled ;
4671     cmd->dw0.vdenc_mode_disable = 1 ;
4672
4673     cmd->dw1.hme_stream_in_ref_cost = 5 ;
4674     cmd->dw1.num_of_refs = slice_param->num_ref_idx_l0_active_minus1 ;
4675     cmd->dw1.qp_value = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta ;
4676
4677     cmd->dw2.frame_width_in_mbs = generic_state->frame_width_in_mbs ;
4678     cmd->dw2.frame_height_in_mbs = generic_state->frame_height_in_mbs ;
4679
4680     cmd->dw3.large_mv_threshold = 128 ;
4681     cmd->dw4.total_large_mv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs) / 100 ;
4682     cmd->dw5.zmv_threshold = 4 ;
4683     cmd->dw6.total_zmv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * avc_state->zero_mv_threshold) / 100 ; // zero_mv_threshold = 60;
4684     cmd->dw7.min_dist_threshold = 10 ;
4685
4686     if (generic_state->frame_type == SLICE_TYPE_P) {
4687         memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_p_frame, AVC_QP_MAX * sizeof(unsigned char));
4688
4689     } else if (generic_state->frame_type == SLICE_TYPE_B) {
4690         memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_b_frame, AVC_QP_MAX * sizeof(unsigned char));
4691     }
4692
4693     cmd->dw21.actual_width_in_mb = cmd->dw2.frame_width_in_mbs ;
4694     cmd->dw21.actual_height_in_mb = cmd->dw2.frame_height_in_mbs ;
4695     cmd->dw24.vdenc_input_image_state_index = GEN9_AVC_SFD_VDENC_INPUT_IMAGE_STATE_INDEX ;
4696     cmd->dw26.mv_data_surface_index = GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX ;
4697     cmd->dw27.inter_distortion_surface_index = GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX ;
4698     cmd->dw28.output_data_surface_index = GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX ;
4699     cmd->dw29.vdenc_output_image_state_index = GEN9_AVC_SFD_VDENC_OUTPUT_IMAGE_STATE_INDEX ;
4700
4701     i965_gpe_context_unmap_curbe(gpe_context);
4702
4703 }
4704
4705 static void
4706 gen9_avc_send_surface_sfd(VADriverContextP ctx,
4707                           struct encode_state *encode_state,
4708                           struct i965_gpe_context *gpe_context,
4709                           struct intel_encoder_context *encoder_context,
4710                           void * param)
4711 {
4712     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4713     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4714     struct i965_gpe_resource *gpe_resource;
4715     int size = 0;
4716
4717     /*HME mv data surface memv output 4x*/
4718     gpe_resource = &avc_ctx->s4x_memv_data_buffer;
4719     gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4720                                    gpe_resource,
4721                                    1,
4722                                    I965_SURFACEFORMAT_R8_UNORM,
4723                                    GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX);
4724
4725     /* memv distortion */
4726     gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
4727     gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4728                                    gpe_resource,
4729                                    1,
4730                                    I965_SURFACEFORMAT_R8_UNORM,
4731                                    GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX);
4732     /*buffer output*/
4733     size = 32 * 4 * 4;
4734     gpe_resource = &avc_ctx->res_sfd_output_buffer;
4735     gen9_add_buffer_gpe_surface(ctx,
4736                                 gpe_context,
4737                                 gpe_resource,
4738                                 0,
4739                                 size / 4,
4740                                 0,
4741                                 GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX);
4742
4743 }
4744
4745 static VAStatus
4746 gen9_avc_kernel_sfd(VADriverContextP ctx,
4747                     struct encode_state *encode_state,
4748                     struct intel_encoder_context *encoder_context)
4749 {
4750     struct i965_driver_data *i965 = i965_driver_data(ctx);
4751     struct i965_gpe_table *gpe = &i965->gpe_table;
4752     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4753     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4754     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
4755
4756     struct i965_gpe_context *gpe_context;
4757     struct gpe_media_object_parameter media_object_param;
4758     struct gpe_media_object_inline_data media_object_inline_data;
4759     int media_function = INTEL_MEDIA_STATE_STATIC_FRAME_DETECTION;
4760     gpe_context = &(avc_ctx->context_sfd.gpe_contexts);
4761
4762     gpe->context_init(ctx, gpe_context);
4763     gpe->reset_binding_table(ctx, gpe_context);
4764
4765     /*set curbe*/
4766     generic_ctx->pfn_set_curbe_sfd(ctx, encode_state, gpe_context, encoder_context, NULL);
4767
4768     /*send surface*/
4769     generic_ctx->pfn_send_sfd_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
4770
4771     gpe->setup_interface_data(ctx, gpe_context);
4772
4773     memset(&media_object_param, 0, sizeof(media_object_param));
4774     memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
4775     media_object_param.pinline_data = &media_object_inline_data;
4776     media_object_param.inline_size = sizeof(media_object_inline_data);
4777
4778     gen9_avc_run_kernel_media_object(ctx, encoder_context,
4779                                      gpe_context,
4780                                      media_function,
4781                                      &media_object_param);
4782
4783     return VA_STATUS_SUCCESS;
4784 }
4785
4786 /*
4787 kernel related function:init/destroy etc
4788 */
4789 static void
4790 gen9_avc_kernel_init_scaling(VADriverContextP ctx,
4791                              struct generic_encoder_context *generic_context,
4792                              struct gen_avc_scaling_context *kernel_context)
4793 {
4794     struct i965_driver_data *i965 = i965_driver_data(ctx);
4795     struct i965_gpe_table *gpe = &i965->gpe_table;
4796     struct i965_gpe_context *gpe_context = NULL;
4797     struct encoder_kernel_parameter kernel_param ;
4798     struct encoder_scoreboard_parameter scoreboard_param;
4799     struct i965_kernel common_kernel;
4800
4801     if (IS_SKL(i965->intel.device_info) ||
4802         IS_BXT(i965->intel.device_info)) {
4803         kernel_param.curbe_size = sizeof(gen9_avc_scaling4x_curbe_data);
4804         kernel_param.inline_data_size = sizeof(gen9_avc_scaling4x_curbe_data);
4805     } else if (IS_KBL(i965->intel.device_info) ||
4806                IS_GLK(i965->intel.device_info)) {
4807         kernel_param.curbe_size = sizeof(gen95_avc_scaling4x_curbe_data);
4808         kernel_param.inline_data_size = sizeof(gen95_avc_scaling4x_curbe_data);
4809     }
4810
4811     /* 4x scaling kernel*/
4812     kernel_param.sampler_size = 0;
4813
4814     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4815     scoreboard_param.mask = 0xFF;
4816     scoreboard_param.enable = generic_context->use_hw_scoreboard;
4817     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
4818     scoreboard_param.walkpat_flag = 0;
4819
4820     gpe_context = &kernel_context->gpe_contexts[GEN9_AVC_KERNEL_SCALING_4X_IDX];
4821     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4822     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4823
4824     memset(&common_kernel, 0, sizeof(common_kernel));
4825
4826     intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4827                                          generic_context->enc_kernel_size,
4828                                          INTEL_GENERIC_ENC_SCALING4X,
4829                                          0,
4830                                          &common_kernel);
4831
4832     gpe->load_kernels(ctx,
4833                       gpe_context,
4834                       &common_kernel,
4835                       1);
4836
4837     /*2x scaling kernel*/
4838     kernel_param.curbe_size = sizeof(gen9_avc_scaling2x_curbe_data);
4839     kernel_param.inline_data_size = 0;
4840     kernel_param.sampler_size = 0;
4841
4842     gpe_context = &kernel_context->gpe_contexts[GEN9_AVC_KERNEL_SCALING_2X_IDX];
4843     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4844     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4845
4846     memset(&common_kernel, 0, sizeof(common_kernel));
4847
4848     intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4849                                          generic_context->enc_kernel_size,
4850                                          INTEL_GENERIC_ENC_SCALING2X,
4851                                          0,
4852                                          &common_kernel);
4853
4854     gpe->load_kernels(ctx,
4855                       gpe_context,
4856                       &common_kernel,
4857                       1);
4858
4859 }
4860
4861 static void
4862 gen9_avc_kernel_init_me(VADriverContextP ctx,
4863                         struct generic_encoder_context *generic_context,
4864                         struct gen_avc_me_context *kernel_context)
4865 {
4866     struct i965_driver_data *i965 = i965_driver_data(ctx);
4867     struct i965_gpe_table *gpe = &i965->gpe_table;
4868     struct i965_gpe_context *gpe_context = NULL;
4869     struct encoder_kernel_parameter kernel_param ;
4870     struct encoder_scoreboard_parameter scoreboard_param;
4871     struct i965_kernel common_kernel;
4872     int i = 0;
4873
4874     kernel_param.curbe_size = sizeof(gen9_avc_me_curbe_data);
4875     kernel_param.inline_data_size = 0;
4876     kernel_param.sampler_size = 0;
4877
4878     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4879     scoreboard_param.mask = 0xFF;
4880     scoreboard_param.enable = generic_context->use_hw_scoreboard;
4881     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
4882     scoreboard_param.walkpat_flag = 0;
4883
4884     for (i = 0; i < 2; i++) {
4885         gpe_context = &kernel_context->gpe_contexts[i];
4886         gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4887         gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4888
4889         memset(&common_kernel, 0, sizeof(common_kernel));
4890
4891         intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4892                                              generic_context->enc_kernel_size,
4893                                              INTEL_GENERIC_ENC_ME,
4894                                              i,
4895                                              &common_kernel);
4896
4897         gpe->load_kernels(ctx,
4898                           gpe_context,
4899                           &common_kernel,
4900                           1);
4901     }
4902
4903 }
4904
4905 static void
4906 gen9_avc_kernel_init_mbenc(VADriverContextP ctx,
4907                            struct generic_encoder_context *generic_context,
4908                            struct gen_avc_mbenc_context *kernel_context)
4909 {
4910     struct i965_driver_data *i965 = i965_driver_data(ctx);
4911     struct i965_gpe_table *gpe = &i965->gpe_table;
4912     struct i965_gpe_context *gpe_context = NULL;
4913     struct encoder_kernel_parameter kernel_param ;
4914     struct encoder_scoreboard_parameter scoreboard_param;
4915     struct i965_kernel common_kernel;
4916     int i = 0;
4917     unsigned int curbe_size = 0;
4918
4919     if (IS_SKL(i965->intel.device_info) ||
4920         IS_BXT(i965->intel.device_info)) {
4921         curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
4922     } else if (IS_KBL(i965->intel.device_info) ||
4923                IS_GLK(i965->intel.device_info)) {
4924         curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
4925     }
4926
4927     assert(curbe_size > 0);
4928     kernel_param.curbe_size = curbe_size;
4929     kernel_param.inline_data_size = 0;
4930     kernel_param.sampler_size = 0;
4931
4932     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4933     scoreboard_param.mask = 0xFF;
4934     scoreboard_param.enable = generic_context->use_hw_scoreboard;
4935     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
4936     scoreboard_param.walkpat_flag = 0;
4937
4938     for (i = 0; i < NUM_GEN9_AVC_KERNEL_MBENC ; i++) {
4939         gpe_context = &kernel_context->gpe_contexts[i];
4940         gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4941         gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4942
4943         memset(&common_kernel, 0, sizeof(common_kernel));
4944
4945         intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4946                                              generic_context->enc_kernel_size,
4947                                              INTEL_GENERIC_ENC_MBENC,
4948                                              i,
4949                                              &common_kernel);
4950
4951         gpe->load_kernels(ctx,
4952                           gpe_context,
4953                           &common_kernel,
4954                           1);
4955     }
4956
4957 }
4958
4959 static void
4960 gen9_avc_kernel_init_brc(VADriverContextP ctx,
4961                          struct generic_encoder_context *generic_context,
4962                          struct gen_avc_brc_context *kernel_context)
4963 {
4964     struct i965_driver_data *i965 = i965_driver_data(ctx);
4965     struct i965_gpe_table *gpe = &i965->gpe_table;
4966     struct i965_gpe_context *gpe_context = NULL;
4967     struct encoder_kernel_parameter kernel_param ;
4968     struct encoder_scoreboard_parameter scoreboard_param;
4969     struct i965_kernel common_kernel;
4970     int i = 0;
4971
4972     static const int brc_curbe_size[NUM_GEN9_AVC_KERNEL_BRC] = {
4973         (sizeof(gen9_avc_brc_init_reset_curbe_data)),
4974         (sizeof(gen9_avc_frame_brc_update_curbe_data)),
4975         (sizeof(gen9_avc_brc_init_reset_curbe_data)),
4976         (sizeof(gen9_avc_mbenc_curbe_data)),
4977         0,
4978         (sizeof(gen9_avc_mb_brc_curbe_data))
4979     };
4980
4981     kernel_param.inline_data_size = 0;
4982     kernel_param.sampler_size = 0;
4983
4984     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4985     scoreboard_param.mask = 0xFF;
4986     scoreboard_param.enable = generic_context->use_hw_scoreboard;
4987     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
4988     scoreboard_param.walkpat_flag = 0;
4989
4990     for (i = 0; i < NUM_GEN9_AVC_KERNEL_BRC; i++) {
4991         kernel_param.curbe_size = brc_curbe_size[i];
4992         gpe_context = &kernel_context->gpe_contexts[i];
4993         gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4994         gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4995
4996         memset(&common_kernel, 0, sizeof(common_kernel));
4997
4998         intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4999                                              generic_context->enc_kernel_size,
5000                                              INTEL_GENERIC_ENC_BRC,
5001                                              i,
5002                                              &common_kernel);
5003
5004         gpe->load_kernels(ctx,
5005                           gpe_context,
5006                           &common_kernel,
5007                           1);
5008     }
5009
5010 }
5011
5012 static void
5013 gen9_avc_kernel_init_wp(VADriverContextP ctx,
5014                         struct generic_encoder_context *generic_context,
5015                         struct gen_avc_wp_context *kernel_context)
5016 {
5017     struct i965_driver_data *i965 = i965_driver_data(ctx);
5018     struct i965_gpe_table *gpe = &i965->gpe_table;
5019     struct i965_gpe_context *gpe_context = NULL;
5020     struct encoder_kernel_parameter kernel_param ;
5021     struct encoder_scoreboard_parameter scoreboard_param;
5022     struct i965_kernel common_kernel;
5023
5024     kernel_param.curbe_size = sizeof(gen9_avc_wp_curbe_data);
5025     kernel_param.inline_data_size = 0;
5026     kernel_param.sampler_size = 0;
5027
5028     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
5029     scoreboard_param.mask = 0xFF;
5030     scoreboard_param.enable = generic_context->use_hw_scoreboard;
5031     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
5032     scoreboard_param.walkpat_flag = 0;
5033
5034     gpe_context = &kernel_context->gpe_contexts;
5035     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
5036     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
5037
5038     memset(&common_kernel, 0, sizeof(common_kernel));
5039
5040     intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
5041                                          generic_context->enc_kernel_size,
5042                                          INTEL_GENERIC_ENC_WP,
5043                                          0,
5044                                          &common_kernel);
5045
5046     gpe->load_kernels(ctx,
5047                       gpe_context,
5048                       &common_kernel,
5049                       1);
5050
5051 }
5052
5053 static void
5054 gen9_avc_kernel_init_sfd(VADriverContextP ctx,
5055                          struct generic_encoder_context *generic_context,
5056                          struct gen_avc_sfd_context *kernel_context)
5057 {
5058     struct i965_driver_data *i965 = i965_driver_data(ctx);
5059     struct i965_gpe_table *gpe = &i965->gpe_table;
5060     struct i965_gpe_context *gpe_context = NULL;
5061     struct encoder_kernel_parameter kernel_param ;
5062     struct encoder_scoreboard_parameter scoreboard_param;
5063     struct i965_kernel common_kernel;
5064
5065     kernel_param.curbe_size = sizeof(gen9_avc_sfd_curbe_data);
5066     kernel_param.inline_data_size = 0;
5067     kernel_param.sampler_size = 0;
5068
5069     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
5070     scoreboard_param.mask = 0xFF;
5071     scoreboard_param.enable = generic_context->use_hw_scoreboard;
5072     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
5073     scoreboard_param.walkpat_flag = 0;
5074
5075     gpe_context = &kernel_context->gpe_contexts;
5076     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
5077     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
5078
5079     memset(&common_kernel, 0, sizeof(common_kernel));
5080
5081     intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
5082                                          generic_context->enc_kernel_size,
5083                                          INTEL_GENERIC_ENC_SFD,
5084                                          0,
5085                                          &common_kernel);
5086
5087     gpe->load_kernels(ctx,
5088                       gpe_context,
5089                       &common_kernel,
5090                       1);
5091
5092 }
5093
5094 static void
5095 gen9_avc_kernel_destroy(struct encoder_vme_mfc_context * vme_context)
5096 {
5097
5098     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5099     struct i965_driver_data *i965 = i965_driver_data(avc_ctx->ctx);
5100     struct i965_gpe_table *gpe = &i965->gpe_table;
5101
5102     int i = 0;
5103
5104     gen9_avc_free_resources(vme_context);
5105
5106     for (i = 0; i < NUM_GEN9_AVC_KERNEL_SCALING; i++)
5107         gpe->context_destroy(&avc_ctx->context_scaling.gpe_contexts[i]);
5108
5109     for (i = 0; i < NUM_GEN9_AVC_KERNEL_BRC; i++)
5110         gpe->context_destroy(&avc_ctx->context_brc.gpe_contexts[i]);
5111
5112     for (i = 0; i < NUM_GEN9_AVC_KERNEL_ME; i++)
5113         gpe->context_destroy(&avc_ctx->context_me.gpe_contexts[i]);
5114
5115     for (i = 0; i < NUM_GEN9_AVC_KERNEL_MBENC; i++)
5116         gpe->context_destroy(&avc_ctx->context_mbenc.gpe_contexts[i]);
5117
5118     gpe->context_destroy(&avc_ctx->context_wp.gpe_contexts);
5119
5120     gpe->context_destroy(&avc_ctx->context_sfd.gpe_contexts);
5121
5122 }
5123
5124 /*
5125 vme pipeline
5126 */
5127 static void
5128 gen9_avc_update_parameters(VADriverContextP ctx,
5129                            VAProfile profile,
5130                            struct encode_state *encode_state,
5131                            struct intel_encoder_context *encoder_context)
5132 {
5133     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5134     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5135     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5136     VAEncSequenceParameterBufferH264 *seq_param;
5137     VAEncSliceParameterBufferH264 *slice_param;
5138     int i, j, slice_index;
5139     unsigned int preset = generic_state->preset;
5140
5141     /* seq/pic/slice parameter setting */
5142     generic_state->b16xme_supported = gen9_avc_super_hme[preset];
5143     generic_state->b32xme_supported = gen9_avc_ultra_hme[preset];
5144
5145     avc_state->seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
5146     avc_state->pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
5147
5148     avc_state->slice_num = 0;
5149     slice_index = 0;
5150     for (j = 0; j < encode_state->num_slice_params_ext; j++) {
5151         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
5152         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
5153             avc_state->slice_param[slice_index] = slice_param;
5154             slice_param++;
5155             slice_index++;
5156             avc_state->slice_num++;
5157         }
5158     }
5159
5160     /* how many slices support by now? 1 slice or multi slices, but row slice.not slice group. */
5161     seq_param = avc_state->seq_param;
5162     slice_param = avc_state->slice_param[0];
5163
5164     generic_state->frame_type = avc_state->slice_param[0]->slice_type;
5165
5166     if (slice_param->slice_type == SLICE_TYPE_I ||
5167         slice_param->slice_type == SLICE_TYPE_SI)
5168         generic_state->frame_type = SLICE_TYPE_I;
5169     else if (slice_param->slice_type == SLICE_TYPE_P)
5170         generic_state->frame_type = SLICE_TYPE_P;
5171     else if (slice_param->slice_type == SLICE_TYPE_B)
5172         generic_state->frame_type = SLICE_TYPE_B;
5173     if (profile == VAProfileH264High)
5174         avc_state->transform_8x8_mode_enable = 0;//work around for high profile to disabel pic_param->pic_fields.bits.transform_8x8_mode_flag
5175     else
5176         avc_state->transform_8x8_mode_enable = 0;
5177
5178     /* rc init*/
5179     if (generic_state->brc_enabled && (!generic_state->brc_inited || generic_state->brc_need_reset)) {
5180         generic_state->target_bit_rate = ALIGN(seq_param->bits_per_second, 1000) / 1000;
5181         generic_state->init_vbv_buffer_fullness_in_bit = seq_param->bits_per_second;
5182         generic_state->vbv_buffer_size_in_bit = (uint64_t)seq_param->bits_per_second << 1;
5183         generic_state->frames_per_100s = 3000; /* 30fps */
5184     }
5185
5186     generic_state->gop_size = seq_param->intra_period;
5187     generic_state->gop_ref_distance = seq_param->ip_period;
5188
5189     if (generic_state->internal_rate_mode == VA_RC_CBR) {
5190         generic_state->max_bit_rate = generic_state->target_bit_rate;
5191         generic_state->min_bit_rate = generic_state->target_bit_rate;
5192     }
5193
5194     if (generic_state->frame_type == SLICE_TYPE_I || generic_state->first_frame) {
5195         gen9_avc_update_misc_parameters(ctx, encode_state, encoder_context);
5196     }
5197
5198     generic_state->preset = encoder_context->quality_level;
5199     if (encoder_context->quality_level == INTEL_PRESET_UNKNOWN) {
5200         generic_state->preset = INTEL_PRESET_RT_SPEED;
5201     }
5202     generic_state->kernel_mode = gen9_avc_kernel_mode[generic_state->preset];
5203
5204     if (!generic_state->brc_inited) {
5205         generic_state->brc_init_reset_input_bits_per_frame = ((double)(generic_state->max_bit_rate * 1000) * 100) / generic_state->frames_per_100s;;
5206         generic_state->brc_init_current_target_buf_full_in_bits = generic_state->init_vbv_buffer_fullness_in_bit;
5207         generic_state->brc_init_reset_buf_size_in_bits = generic_state->vbv_buffer_size_in_bit;
5208         generic_state->brc_target_size = generic_state->init_vbv_buffer_fullness_in_bit;
5209     }
5210
5211
5212     generic_state->curr_pak_pass = 0;
5213     generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
5214
5215     if (generic_state->internal_rate_mode == VA_RC_CBR ||
5216         generic_state->internal_rate_mode == VA_RC_VBR)
5217         generic_state->brc_enabled = 1;
5218     else
5219         generic_state->brc_enabled = 0;
5220
5221     if (generic_state->brc_enabled &&
5222         (!generic_state->init_vbv_buffer_fullness_in_bit ||
5223          !generic_state->vbv_buffer_size_in_bit ||
5224          !generic_state->max_bit_rate ||
5225          !generic_state->target_bit_rate ||
5226          !generic_state->frames_per_100s)) {
5227         WARN_ONCE("Rate control parameter is required for BRC\n");
5228         generic_state->brc_enabled = 0;
5229     }
5230
5231     if (!generic_state->brc_enabled) {
5232         generic_state->target_bit_rate = 0;
5233         generic_state->max_bit_rate = 0;
5234         generic_state->min_bit_rate = 0;
5235         generic_state->init_vbv_buffer_fullness_in_bit = 0;
5236         generic_state->vbv_buffer_size_in_bit = 0;
5237         generic_state->num_pak_passes = 1;
5238     } else {
5239         generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
5240     }
5241
5242
5243     generic_state->frame_width_in_mbs = seq_param->picture_width_in_mbs;
5244     generic_state->frame_height_in_mbs = seq_param->picture_height_in_mbs;
5245     generic_state->frame_width_in_pixel = generic_state->frame_width_in_mbs * 16;
5246     generic_state->frame_height_in_pixel = generic_state->frame_height_in_mbs * 16;
5247
5248     generic_state->frame_width_4x  = ALIGN(generic_state->frame_width_in_pixel / 4, 16);
5249     generic_state->frame_height_4x = ALIGN(generic_state->frame_height_in_pixel / 4, 16);
5250     generic_state->downscaled_width_4x_in_mb  = generic_state->frame_width_4x / 16 ;
5251     generic_state->downscaled_height_4x_in_mb = generic_state->frame_height_4x / 16;
5252
5253     generic_state->frame_width_16x  =  ALIGN(generic_state->frame_width_in_pixel / 16, 16);
5254     generic_state->frame_height_16x =  ALIGN(generic_state->frame_height_in_pixel / 16, 16);
5255     generic_state->downscaled_width_16x_in_mb  = generic_state->frame_width_16x / 16 ;
5256     generic_state->downscaled_height_16x_in_mb = generic_state->frame_height_16x / 16;
5257
5258     generic_state->frame_width_32x  = ALIGN(generic_state->frame_width_in_pixel / 32, 16);
5259     generic_state->frame_height_32x = ALIGN(generic_state->frame_height_in_pixel / 32, 16);
5260     generic_state->downscaled_width_32x_in_mb  = generic_state->frame_width_32x / 16 ;
5261     generic_state->downscaled_height_32x_in_mb = generic_state->frame_height_32x / 16;
5262
5263     if (generic_state->hme_supported) {
5264         generic_state->hme_enabled = 1;
5265     } else {
5266         generic_state->hme_enabled = 0;
5267     }
5268
5269     if (generic_state->b16xme_supported) {
5270         generic_state->b16xme_enabled = 1;
5271     } else {
5272         generic_state->b16xme_enabled = 0;
5273     }
5274
5275     if (generic_state->b32xme_supported) {
5276         generic_state->b32xme_enabled = 1;
5277     } else {
5278         generic_state->b32xme_enabled = 0;
5279     }
5280     /* disable HME/16xME if the size is too small */
5281     if (generic_state->frame_width_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5282         generic_state->b32xme_supported = 0;
5283         generic_state->b32xme_enabled = 0;
5284         generic_state->b16xme_supported = 0;
5285         generic_state->b16xme_enabled = 0;
5286         generic_state->frame_width_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5287         generic_state->downscaled_width_4x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5288     }
5289     if (generic_state->frame_height_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5290         generic_state->b32xme_supported = 0;
5291         generic_state->b32xme_enabled = 0;
5292         generic_state->b16xme_supported = 0;
5293         generic_state->b16xme_enabled = 0;
5294         generic_state->frame_height_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5295         generic_state->downscaled_height_4x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5296     }
5297
5298     if (generic_state->frame_width_16x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5299         generic_state->b32xme_supported = 0;
5300         generic_state->b32xme_enabled = 0;
5301         generic_state->frame_width_16x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5302         generic_state->downscaled_width_16x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5303     }
5304     if (generic_state->frame_height_16x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5305         generic_state->b32xme_supported = 0;
5306         generic_state->b32xme_enabled = 0;
5307         generic_state->frame_height_16x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5308         generic_state->downscaled_height_16x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5309     }
5310
5311     if (generic_state->frame_width_32x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5312         generic_state->frame_width_32x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5313         generic_state->downscaled_width_32x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5314     }
5315     if (generic_state->frame_height_32x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5316         generic_state->frame_height_32x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5317         generic_state->downscaled_height_32x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5318     }
5319
5320 }
5321
5322 static VAStatus
5323 gen9_avc_encode_check_parameter(VADriverContextP ctx,
5324                                 struct encode_state *encode_state,
5325                                 struct intel_encoder_context *encoder_context)
5326 {
5327     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5328     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5329     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5330     unsigned int rate_control_mode = encoder_context->rate_control_mode;
5331     unsigned int preset = generic_state->preset;
5332     VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param ;
5333     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5334     int i = 0;
5335     int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
5336     /*avbr init*/
5337     generic_state->avbr_curracy = 30;
5338     generic_state->avbr_convergence = 150;
5339
5340     switch (rate_control_mode & 0x7f) {
5341     case VA_RC_CBR:
5342         generic_state->internal_rate_mode = VA_RC_CBR;
5343         break;
5344
5345     case VA_RC_VBR:
5346         generic_state->internal_rate_mode = VA_RC_VBR;
5347         break;
5348
5349     case VA_RC_CQP:
5350     default:
5351         generic_state->internal_rate_mode = VA_RC_CQP;
5352         break;
5353     }
5354
5355     if (rate_control_mode != VA_RC_NONE &&
5356         rate_control_mode != VA_RC_CQP) {
5357         generic_state->brc_enabled = 1;
5358         generic_state->brc_distortion_buffer_supported = 1;
5359         generic_state->brc_constant_buffer_supported = 1;
5360         generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
5361     }
5362
5363     /*check brc parameter*/
5364     if (generic_state->brc_enabled) {
5365         avc_state->mb_qp_data_enable = 0;
5366     }
5367
5368     /*set the brc init and reset accordingly*/
5369     if (generic_state->brc_need_reset &&
5370         (generic_state->brc_distortion_buffer_supported == 0 ||
5371          rate_control_mode == VA_RC_CQP)) {
5372         generic_state->brc_need_reset = 0;// not support by CQP
5373     }
5374
5375     if (generic_state->brc_need_reset && !avc_state->sfd_mb_enable) {
5376         avc_state->sfd_enable = 0;
5377     }
5378
5379     if (generic_state->frames_per_window_size == 0) {
5380         generic_state->frames_per_window_size = (generic_state->frames_per_100s / 100 < 60) ? (generic_state->frames_per_100s / 100) : 60;
5381     } else if (generic_state->frames_per_window_size > 2 * generic_state->frames_per_100s / 100) {
5382         generic_state->frames_per_window_size = (generic_state->frames_per_100s / 100 < 60) ? (generic_state->frames_per_100s / 100) : 60;
5383     }
5384
5385     if (generic_state->brc_enabled) {
5386         generic_state->hme_enabled = generic_state->frame_type != SLICE_TYPE_I;
5387         if (avc_state->min_max_qp_enable) {
5388             generic_state->num_pak_passes = 1;
5389         }
5390         generic_state->brc_roi_enable = (rate_control_mode != VA_RC_CQP) && (generic_state->num_roi > 0);// only !CQP
5391         generic_state->mb_brc_enabled = generic_state->mb_brc_enabled || generic_state->brc_roi_enable;
5392     } else {
5393         generic_state->num_pak_passes = 1;// CQP only one pass
5394     }
5395
5396     avc_state->mbenc_i_frame_dist_in_use = 0;
5397     avc_state->mbenc_i_frame_dist_in_use = (generic_state->brc_enabled) && (generic_state->brc_distortion_buffer_supported) && (generic_state->frame_type == SLICE_TYPE_I);
5398
5399     /*ROI must enable mbbrc.*/
5400
5401     /*CAD check*/
5402     if (avc_state->caf_supported) {
5403         switch (generic_state->frame_type) {
5404         case SLICE_TYPE_I:
5405             break;
5406         case SLICE_TYPE_P:
5407             avc_state->caf_enable = gen9_avc_all_fractional[preset] & 0x01;
5408             break;
5409         case SLICE_TYPE_B:
5410             avc_state->caf_enable = (gen9_avc_all_fractional[preset] >> 1) & 0x01;
5411             break;
5412         }
5413
5414         if (avc_state->caf_enable && avc_state->caf_disable_hd && gen9_avc_disable_all_fractional_check_for_high_res[preset]) {
5415             if (generic_state->frame_width_in_pixel >= 1280 && generic_state->frame_height_in_pixel >= 720)
5416                 avc_state->caf_enable = 0;
5417         }
5418     }
5419
5420     avc_state->adaptive_transform_decision_enable &= gen9_avc_enable_adaptive_tx_decision[preset & 0x7];
5421
5422     /* Flatness check is enabled only if scaling will be performed and CAF is enabled. here only frame */
5423     if (avc_state->flatness_check_supported) {
5424         avc_state->flatness_check_enable = ((avc_state->caf_enable) && (generic_state->brc_enabled || generic_state->hme_supported)) ;
5425     } else {
5426         avc_state->flatness_check_enable = 0;
5427     }
5428
5429     /* check mb_status_supported/enbale*/
5430     if (avc_state->adaptive_transform_decision_enable) {
5431         avc_state->mb_status_enable = 1;
5432     } else {
5433         avc_state->mb_status_enable = 0;
5434     }
5435     /*slice check,all the slices use the same slice height except the last slice*/
5436     avc_state->arbitrary_num_mbs_in_slice = 0;
5437     for (i = 0; i < avc_state->slice_num; i++) {
5438         if (avc_state->slice_param[i]->num_macroblocks % generic_state->frame_width_in_mbs > 0) {
5439             avc_state->arbitrary_num_mbs_in_slice = 1;
5440             avc_state->slice_height = 1; /* slice height will be ignored by kernel ans here set it as default value */
5441         } else {
5442             avc_state->slice_height = avc_state->slice_param[i]->num_macroblocks / generic_state->frame_width_in_mbs;
5443         }
5444     }
5445
5446     if (generic_state->frame_type == SLICE_TYPE_I) {
5447         generic_state->hme_enabled = 0;
5448         generic_state->b16xme_enabled = 0;
5449         generic_state->b32xme_enabled = 0;
5450     }
5451
5452     if (generic_state->frame_type == SLICE_TYPE_B) {
5453         gen9_avc_get_dist_scale_factor(ctx, encode_state, encoder_context);
5454         avc_state->bi_weight = gen9_avc_get_biweight(avc_state->dist_scale_factor_list0[0], pic_param->pic_fields.bits.weighted_bipred_idc);
5455     }
5456
5457     /* Determine if SkipBiasAdjustment should be enabled for P picture 1. No B frame 2. Qp >= 22 3. CQP mode */
5458     avc_state->skip_bias_adjustment_enable = avc_state->skip_bias_adjustment_supported && (generic_state->frame_type == SLICE_TYPE_P)
5459                                              && (generic_state->gop_ref_distance == 1) && (avc_state->pic_param->pic_init_qp + avc_state->slice_param[0]->slice_qp_delta >= 22) && !generic_state->brc_enabled;
5460
5461     if (generic_state->kernel_mode == INTEL_ENC_KERNEL_QUALITY) {
5462         avc_state->tq_enable = 1;
5463         avc_state->tq_rounding = 6;
5464         if (generic_state->brc_enabled) {
5465             generic_state->mb_brc_enabled = 1;
5466         }
5467     }
5468
5469     //check the inter rounding
5470     avc_state->rounding_value = 0;
5471     avc_state->rounding_inter_p = 255;//default
5472     avc_state->rounding_inter_b = 255; //default
5473     avc_state->rounding_inter_b_ref = 255; //default
5474
5475     if (generic_state->frame_type == SLICE_TYPE_P) {
5476         if (avc_state->rounding_inter_p == AVC_INVALID_ROUNDING_VALUE) {
5477             if (avc_state->adaptive_rounding_inter_enable && !(generic_state->brc_enabled)) {
5478                 if (generic_state->gop_ref_distance == 1)
5479                     avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_p_without_b[slice_qp];
5480                 else
5481                     avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_p[slice_qp];
5482             } else {
5483                 avc_state->rounding_value = gen9_avc_inter_rounding_p[generic_state->preset];
5484             }
5485
5486         } else {
5487             avc_state->rounding_value = avc_state->rounding_inter_p;
5488         }
5489     } else if (generic_state->frame_type == SLICE_TYPE_B) {
5490         if (pic_param->pic_fields.bits.reference_pic_flag) {
5491             if (avc_state->rounding_inter_b_ref == AVC_INVALID_ROUNDING_VALUE)
5492                 avc_state->rounding_value = gen9_avc_inter_rounding_b_ref[generic_state->preset];
5493             else
5494                 avc_state->rounding_value = avc_state->rounding_inter_b_ref;
5495         } else {
5496             if (avc_state->rounding_inter_b == AVC_INVALID_ROUNDING_VALUE) {
5497                 if (avc_state->adaptive_rounding_inter_enable && !(generic_state->brc_enabled))
5498                     avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_b[slice_qp];
5499                 else
5500                     avc_state->rounding_value = gen9_avc_inter_rounding_b[generic_state->preset];
5501             } else {
5502                 avc_state->rounding_value = avc_state->rounding_inter_b;
5503             }
5504         }
5505     }
5506     return VA_STATUS_SUCCESS;
5507 }
5508
5509 static VAStatus
5510 gen9_avc_vme_gpe_kernel_prepare(VADriverContextP ctx,
5511                                 struct encode_state *encode_state,
5512                                 struct intel_encoder_context *encoder_context)
5513 {
5514     VAStatus va_status;
5515     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5516     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5517     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5518     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5519     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5520
5521     struct object_surface *obj_surface;
5522     struct object_buffer *obj_buffer;
5523     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5524     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
5525     struct i965_coded_buffer_segment *coded_buffer_segment;
5526
5527     struct gen9_surface_avc *avc_priv_surface;
5528     dri_bo *bo;
5529     struct avc_surface_param surface_param;
5530     int i, j = 0;
5531     unsigned char * pdata;
5532
5533     /* Setup current reconstruct frame */
5534     obj_surface = encode_state->reconstructed_object;
5535     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5536
5537     if (va_status != VA_STATUS_SUCCESS)
5538         return va_status;
5539
5540     memset(&surface_param, 0, sizeof(surface_param));
5541     surface_param.frame_width = generic_state->frame_width_in_pixel;
5542     surface_param.frame_height = generic_state->frame_height_in_pixel;
5543     va_status = gen9_avc_init_check_surfaces(ctx,
5544                                              obj_surface,
5545                                              encoder_context,
5546                                              &surface_param);
5547     if (va_status != VA_STATUS_SUCCESS)
5548         return va_status;
5549     {
5550         /* init the member of avc_priv_surface,frame_store_id,qp_value*/
5551         avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
5552         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = 0;
5553         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = 0;
5554         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2]);
5555         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1]);
5556         i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2], avc_priv_surface->dmv_top);
5557         i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1], avc_priv_surface->dmv_bottom);
5558         avc_priv_surface->qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
5559         avc_priv_surface->frame_store_id = 0;
5560         avc_priv_surface->frame_idx = pic_param->CurrPic.frame_idx;
5561         avc_priv_surface->top_field_order_cnt = pic_param->CurrPic.TopFieldOrderCnt;
5562         avc_priv_surface->is_as_ref = pic_param->pic_fields.bits.reference_pic_flag;
5563         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = avc_priv_surface->top_field_order_cnt;
5564         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = avc_priv_surface->top_field_order_cnt + 1;
5565     }
5566     i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
5567     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_reconstructed_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
5568
5569     /* input YUV surface*/
5570     obj_surface = encode_state->input_yuv_object;
5571     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5572
5573     if (va_status != VA_STATUS_SUCCESS)
5574         return va_status;
5575     i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
5576     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_uncompressed_input_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
5577
5578     /* Reference surfaces */
5579     for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
5580         i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
5581         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2]);
5582         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1]);
5583         obj_surface = encode_state->reference_objects[i];
5584         avc_state->top_field_poc[2 * i] = 0;
5585         avc_state->top_field_poc[2 * i + 1] = 0;
5586
5587         if (obj_surface && obj_surface->bo) {
5588             i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->list_reference_res[i], obj_surface, GPE_RESOURCE_ALIGNMENT);
5589
5590             /* actually it should be handled when it is reconstructed surface*/
5591             va_status = gen9_avc_init_check_surfaces(ctx,
5592                                                      obj_surface, encoder_context,
5593                                                      &surface_param);
5594             if (va_status != VA_STATUS_SUCCESS)
5595                 return va_status;
5596             avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
5597             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2], avc_priv_surface->dmv_top);
5598             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1], avc_priv_surface->dmv_bottom);
5599             avc_state->top_field_poc[2 * i] = avc_priv_surface->top_field_order_cnt;
5600             avc_state->top_field_poc[2 * i + 1] = avc_priv_surface->top_field_order_cnt + 1;
5601             avc_priv_surface->frame_store_id = i;
5602         } else {
5603             break;
5604         }
5605     }
5606
5607     /* Encoded bitstream ?*/
5608     obj_buffer = encode_state->coded_buf_object;
5609     bo = obj_buffer->buffer_store->bo;
5610     i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
5611     i965_dri_object_to_buffer_gpe_resource(&generic_ctx->compressed_bitstream.res, bo);
5612     generic_ctx->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
5613     generic_ctx->compressed_bitstream.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
5614
5615     /*status buffer */
5616     avc_ctx->status_buffer.bo = bo;
5617
5618     /* set the internal flag to 0 to indicate the coded size is unknown */
5619     dri_bo_map(bo, 1);
5620     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
5621     coded_buffer_segment->mapped = 0;
5622     coded_buffer_segment->codec = encoder_context->codec;
5623     coded_buffer_segment->status_support = 1;
5624
5625     pdata = bo->virtual + avc_ctx->status_buffer.base_offset;
5626     memset(pdata, 0, avc_ctx->status_buffer.status_buffer_size);
5627     dri_bo_unmap(bo);
5628
5629     //frame id, it is the ref pic id in the reference_objects list.
5630     avc_state->num_refs[0] = 0;
5631     avc_state->num_refs[1] = 0;
5632     if (generic_state->frame_type == SLICE_TYPE_P) {
5633         avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
5634
5635         if (slice_param->num_ref_idx_active_override_flag)
5636             avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
5637     } else if (generic_state->frame_type == SLICE_TYPE_B) {
5638         avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
5639         avc_state->num_refs[1] = pic_param->num_ref_idx_l1_active_minus1 + 1;
5640
5641         if (slice_param->num_ref_idx_active_override_flag) {
5642             avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
5643             avc_state->num_refs[1] = slice_param->num_ref_idx_l1_active_minus1 + 1;
5644         }
5645     }
5646
5647     if (avc_state->num_refs[0] > ARRAY_ELEMS(avc_state->list_ref_idx[0]))
5648         return VA_STATUS_ERROR_INVALID_VALUE;
5649     if (avc_state->num_refs[1] > ARRAY_ELEMS(avc_state->list_ref_idx[1]))
5650         return VA_STATUS_ERROR_INVALID_VALUE;
5651
5652     for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[0]); i++) {
5653         VAPictureH264 *va_pic;
5654
5655         assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(avc_state->list_ref_idx[0]));
5656         avc_state->list_ref_idx[0][i] = 0;
5657
5658         if (i >= avc_state->num_refs[0])
5659             continue;
5660
5661         va_pic = &slice_param->RefPicList0[i];
5662
5663         for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
5664             obj_surface = encode_state->reference_objects[j];
5665
5666             if (obj_surface &&
5667                 obj_surface->bo &&
5668                 obj_surface->base.id == va_pic->picture_id) {
5669
5670                 assert(obj_surface->base.id != VA_INVALID_SURFACE);
5671                 avc_state->list_ref_idx[0][i] = j;
5672
5673                 break;
5674             }
5675         }
5676     }
5677     for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[1]); i++) {
5678         VAPictureH264 *va_pic;
5679
5680         assert(ARRAY_ELEMS(slice_param->RefPicList1) == ARRAY_ELEMS(avc_state->list_ref_idx[1]));
5681         avc_state->list_ref_idx[1][i] = 0;
5682
5683         if (i >= avc_state->num_refs[1])
5684             continue;
5685
5686         va_pic = &slice_param->RefPicList1[i];
5687
5688         for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
5689             obj_surface = encode_state->reference_objects[j];
5690
5691             if (obj_surface &&
5692                 obj_surface->bo &&
5693                 obj_surface->base.id == va_pic->picture_id) {
5694
5695                 assert(obj_surface->base.id != VA_INVALID_SURFACE);
5696                 avc_state->list_ref_idx[1][i] = j;
5697
5698                 break;
5699             }
5700         }
5701     }
5702
5703     return VA_STATUS_SUCCESS;
5704 }
5705
5706 static VAStatus
5707 gen9_avc_vme_gpe_kernel_init(VADriverContextP ctx,
5708                              struct encode_state *encode_state,
5709                              struct intel_encoder_context *encoder_context)
5710 {
5711     return VA_STATUS_SUCCESS;
5712 }
5713
5714 static VAStatus
5715 gen9_avc_vme_gpe_kernel_final(VADriverContextP ctx,
5716                               struct encode_state *encode_state,
5717                               struct intel_encoder_context *encoder_context)
5718 {
5719
5720     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5721     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5722     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5723
5724     /*set this flag when all kernel is finished*/
5725     if (generic_state->brc_enabled) {
5726         generic_state->brc_inited = 1;
5727         generic_state->brc_need_reset = 0;
5728         avc_state->mbenc_curbe_set_in_brc_update = 0;
5729     }
5730     return VA_STATUS_SUCCESS;
5731 }
5732
5733 static VAStatus
5734 gen9_avc_vme_gpe_kernel_run(VADriverContextP ctx,
5735                             struct encode_state *encode_state,
5736                             struct intel_encoder_context *encoder_context)
5737 {
5738     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5739     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5740     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5741
5742     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
5743     VAEncSliceParameterBufferH264 *slice_param = avc_state->slice_param[0];
5744     int sfd_in_use = 0;
5745
5746     /* BRC init/reset needs to be called before HME since it will reset the Brc Distortion surface*/
5747     if (generic_state->brc_enabled && (!generic_state->brc_inited || generic_state->brc_need_reset)) {
5748         gen9_avc_kernel_brc_init_reset(ctx, encode_state, encoder_context);
5749     }
5750
5751     /*down scaling*/
5752     if (generic_state->hme_supported) {
5753         gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
5754         if (generic_state->b16xme_supported) {
5755             gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_16x);
5756             if (generic_state->b32xme_supported) {
5757                 gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_32x);
5758             }
5759         }
5760     }
5761
5762     /*me kernel*/
5763     if (generic_state->hme_enabled) {
5764         if (generic_state->b16xme_enabled) {
5765             if (generic_state->b32xme_enabled) {
5766                 gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_32x);
5767             }
5768             gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_16x);
5769         }
5770         gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
5771     }
5772
5773     /*call SFD kernel after HME in same command buffer*/
5774     sfd_in_use = avc_state->sfd_enable && generic_state->hme_enabled;
5775     sfd_in_use = sfd_in_use && !avc_state->sfd_mb_enable;
5776     if (sfd_in_use) {
5777         gen9_avc_kernel_sfd(ctx, encode_state, encoder_context);
5778     }
5779
5780     /* BRC and MbEnc are included in the same task phase*/
5781     if (generic_state->brc_enabled) {
5782         if (avc_state->mbenc_i_frame_dist_in_use) {
5783             gen9_avc_kernel_mbenc(ctx, encode_state, encoder_context, true);
5784         }
5785         gen9_avc_kernel_brc_frame_update(ctx, encode_state, encoder_context);
5786
5787         if (avc_state->brc_split_enable && generic_state->mb_brc_enabled) {
5788             gen9_avc_kernel_brc_mb_update(ctx, encode_state, encoder_context);
5789         }
5790     }
5791
5792     /*weight prediction,disable by now */
5793     avc_state->weighted_ref_l0_enable = 0;
5794     avc_state->weighted_ref_l1_enable = 0;
5795     if (avc_state->weighted_prediction_supported &&
5796         ((generic_state->frame_type == SLICE_TYPE_P && pic_param->pic_fields.bits.weighted_pred_flag) ||
5797          (generic_state->frame_type == SLICE_TYPE_B && pic_param->pic_fields.bits.weighted_bipred_idc == INTEL_AVC_WP_MODE_EXPLICIT))) {
5798         if (slice_param->luma_weight_l0_flag & 1) {
5799             gen9_avc_kernel_wp(ctx, encode_state, encoder_context, 0);
5800
5801         } else if (!(slice_param->chroma_weight_l0_flag & 1)) {
5802             pic_param->pic_fields.bits.weighted_pred_flag = 0;// it should be handled in app
5803         }
5804
5805         if (generic_state->frame_type == SLICE_TYPE_B && pic_param->pic_fields.bits.weighted_bipred_idc == INTEL_AVC_WP_MODE_EXPLICIT) {
5806             if (slice_param->luma_weight_l1_flag & 1) {
5807                 gen9_avc_kernel_wp(ctx, encode_state, encoder_context, 1);
5808             } else if (!((slice_param->luma_weight_l0_flag & 1) ||
5809                          (slice_param->chroma_weight_l0_flag & 1) ||
5810                          (slice_param->chroma_weight_l1_flag & 1))) {
5811                 pic_param->pic_fields.bits.weighted_bipred_idc = INTEL_AVC_WP_MODE_DEFAULT;// it should be handled in app
5812             }
5813         }
5814     }
5815
5816     /*mbenc kernel*/
5817     gen9_avc_kernel_mbenc(ctx, encode_state, encoder_context, false);
5818
5819     /*ignore the reset vertical line kernel*/
5820
5821     return VA_STATUS_SUCCESS;
5822 }
5823
5824 static VAStatus
5825 gen9_avc_vme_pipeline(VADriverContextP ctx,
5826                       VAProfile profile,
5827                       struct encode_state *encode_state,
5828                       struct intel_encoder_context *encoder_context)
5829 {
5830     VAStatus va_status;
5831
5832     gen9_avc_update_parameters(ctx, profile, encode_state, encoder_context);
5833
5834     va_status = gen9_avc_encode_check_parameter(ctx, encode_state, encoder_context);
5835     if (va_status != VA_STATUS_SUCCESS)
5836         return va_status;
5837
5838     va_status = gen9_avc_allocate_resources(ctx, encode_state, encoder_context);
5839     if (va_status != VA_STATUS_SUCCESS)
5840         return va_status;
5841
5842     va_status = gen9_avc_vme_gpe_kernel_prepare(ctx, encode_state, encoder_context);
5843     if (va_status != VA_STATUS_SUCCESS)
5844         return va_status;
5845
5846     va_status = gen9_avc_vme_gpe_kernel_init(ctx, encode_state, encoder_context);
5847     if (va_status != VA_STATUS_SUCCESS)
5848         return va_status;
5849
5850     va_status = gen9_avc_vme_gpe_kernel_run(ctx, encode_state, encoder_context);
5851     if (va_status != VA_STATUS_SUCCESS)
5852         return va_status;
5853
5854     gen9_avc_vme_gpe_kernel_final(ctx, encode_state, encoder_context);
5855
5856     return VA_STATUS_SUCCESS;
5857 }
5858
5859 static void
5860 gen9_avc_vme_context_destroy(void * context)
5861 {
5862     struct encoder_vme_mfc_context *vme_context = (struct encoder_vme_mfc_context *)context;
5863     struct generic_encoder_context *generic_ctx;
5864     struct i965_avc_encoder_context *avc_ctx;
5865     struct generic_enc_codec_state *generic_state;
5866     struct avc_enc_state *avc_state;
5867
5868     if (!vme_context)
5869         return;
5870
5871     generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5872     avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5873     generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5874     avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5875
5876     gen9_avc_kernel_destroy(vme_context);
5877
5878     free(generic_ctx);
5879     free(avc_ctx);
5880     free(generic_state);
5881     free(avc_state);
5882     free(vme_context);
5883     return;
5884
5885 }
5886
5887 static void
5888 gen9_avc_kernel_init(VADriverContextP ctx,
5889                      struct intel_encoder_context *encoder_context)
5890 {
5891     struct i965_driver_data *i965 = i965_driver_data(ctx);
5892     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5893     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5894     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5895
5896     gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling);
5897     gen9_avc_kernel_init_brc(ctx, generic_ctx, &avc_ctx->context_brc);
5898     gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me);
5899     gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc);
5900     gen9_avc_kernel_init_wp(ctx, generic_ctx, &avc_ctx->context_wp);
5901     gen9_avc_kernel_init_sfd(ctx, generic_ctx, &avc_ctx->context_sfd);
5902
5903     //function pointer
5904     generic_ctx->pfn_set_curbe_scaling2x = gen9_avc_set_curbe_scaling2x;
5905     generic_ctx->pfn_set_curbe_scaling4x = gen9_avc_set_curbe_scaling4x;
5906     generic_ctx->pfn_set_curbe_me = gen9_avc_set_curbe_me;
5907     generic_ctx->pfn_set_curbe_mbenc = gen9_avc_set_curbe_mbenc;
5908     generic_ctx->pfn_set_curbe_brc_init_reset = gen9_avc_set_curbe_brc_init_reset;
5909     generic_ctx->pfn_set_curbe_brc_frame_update = gen9_avc_set_curbe_brc_frame_update;
5910     generic_ctx->pfn_set_curbe_brc_mb_update = gen9_avc_set_curbe_brc_mb_update;
5911     generic_ctx->pfn_set_curbe_sfd = gen9_avc_set_curbe_sfd;
5912     generic_ctx->pfn_set_curbe_wp = gen9_avc_set_curbe_wp;
5913
5914     generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
5915     generic_ctx->pfn_send_me_surface = gen9_avc_send_surface_me;
5916     generic_ctx->pfn_send_mbenc_surface = gen9_avc_send_surface_mbenc;
5917     generic_ctx->pfn_send_brc_init_reset_surface = gen9_avc_send_surface_brc_init_reset;
5918     generic_ctx->pfn_send_brc_frame_update_surface = gen9_avc_send_surface_brc_frame_update;
5919     generic_ctx->pfn_send_brc_mb_update_surface = gen9_avc_send_surface_brc_mb_update;
5920     generic_ctx->pfn_send_sfd_surface = gen9_avc_send_surface_sfd;
5921     generic_ctx->pfn_send_wp_surface = gen9_avc_send_surface_wp;
5922
5923     if (IS_SKL(i965->intel.device_info) ||
5924         IS_BXT(i965->intel.device_info))
5925         generic_ctx->pfn_set_curbe_scaling4x = gen9_avc_set_curbe_scaling4x;
5926     else if (IS_KBL(i965->intel.device_info) ||
5927              IS_GLK(i965->intel.device_info))
5928         generic_ctx->pfn_set_curbe_scaling4x = gen95_avc_set_curbe_scaling4x;
5929
5930 }
5931
5932 /*
5933 PAK pipeline related function
5934 */
5935 extern int
5936 intel_avc_enc_slice_type_fixup(int slice_type);
5937
5938 static void
5939 gen9_mfc_avc_pipe_mode_select(VADriverContextP ctx,
5940                               struct encode_state *encode_state,
5941                               struct intel_encoder_context *encoder_context)
5942 {
5943     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5944     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
5945     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
5946     struct intel_batchbuffer *batch = encoder_context->base.batch;
5947
5948     BEGIN_BCS_BATCH(batch, 5);
5949
5950     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
5951     OUT_BCS_BATCH(batch,
5952                   (0 << 29) |
5953                   (MFX_LONG_MODE << 17) |       /* Must be long format for encoder */
5954                   (MFD_MODE_VLD << 15) |
5955                   (0 << 13) |                   /* Non-VDEnc mode  is 0*/
5956                   ((generic_state->curr_pak_pass != (generic_state->num_pak_passes - 1)) << 10) |                  /* Stream-Out Enable */
5957                   ((!!avc_ctx->res_post_deblocking_output.bo) << 9)  |    /* Post Deblocking Output */
5958                   ((!!avc_ctx->res_pre_deblocking_output.bo) << 8)  |     /* Pre Deblocking Output */
5959                   (0 << 7)  |                   /* Scaled surface enable */
5960                   (0 << 6)  |                   /* Frame statistics stream out enable */
5961                   (0 << 5)  |                   /* not in stitch mode */
5962                   (1 << 4)  |                   /* encoding mode */
5963                   (MFX_FORMAT_AVC << 0));
5964     OUT_BCS_BATCH(batch,
5965                   (0 << 7)  | /* expand NOA bus flag */
5966                   (0 << 6)  | /* disable slice-level clock gating */
5967                   (0 << 5)  | /* disable clock gating for NOA */
5968                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
5969                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
5970                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
5971                   (0 << 1)  |
5972                   (0 << 0));
5973     OUT_BCS_BATCH(batch, 0);
5974     OUT_BCS_BATCH(batch, 0);
5975
5976     ADVANCE_BCS_BATCH(batch);
5977 }
5978
5979 static void
5980 gen9_mfc_avc_surface_state(VADriverContextP ctx,
5981                            struct intel_encoder_context *encoder_context,
5982                            struct i965_gpe_resource *gpe_resource,
5983                            int id)
5984 {
5985     struct intel_batchbuffer *batch = encoder_context->base.batch;
5986
5987     BEGIN_BCS_BATCH(batch, 6);
5988
5989     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
5990     OUT_BCS_BATCH(batch, id);
5991     OUT_BCS_BATCH(batch,
5992                   ((gpe_resource->height - 1) << 18) |
5993                   ((gpe_resource->width - 1) << 4));
5994     OUT_BCS_BATCH(batch,
5995                   (MFX_SURFACE_PLANAR_420_8 << 28) |    /* 420 planar YUV surface */
5996                   (1 << 27) |                           /* must be 1 for interleave U/V, hardware requirement */
5997                   ((gpe_resource->pitch - 1) << 3) |    /* pitch */
5998                   (0 << 2)  |                           /* must be 0 for interleave U/V */
5999                   (1 << 1)  |                           /* must be tiled */
6000                   (I965_TILEWALK_YMAJOR << 0));         /* tile walk, TILEWALK_YMAJOR */
6001     OUT_BCS_BATCH(batch,
6002                   (0 << 16) |                   /* must be 0 for interleave U/V */
6003                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
6004     OUT_BCS_BATCH(batch,
6005                   (0 << 16) |                   /* must be 0 for interleave U/V */
6006                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
6007
6008     ADVANCE_BCS_BATCH(batch);
6009 }
6010
6011 static void
6012 gen9_mfc_avc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
6013 {
6014     struct i965_driver_data *i965 = i965_driver_data(ctx);
6015     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6016     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
6017     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
6018     struct intel_batchbuffer *batch = encoder_context->base.batch;
6019     int i;
6020
6021     BEGIN_BCS_BATCH(batch, 65);
6022
6023     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (65 - 2));
6024
6025     /* the DW1-3 is for pre_deblocking */
6026     OUT_BUFFER_3DW(batch, avc_ctx->res_pre_deblocking_output.bo, 1, 0, i965->intel.mocs_state);
6027
6028     /* the DW4-6 is for the post_deblocking */
6029     OUT_BUFFER_3DW(batch, avc_ctx->res_post_deblocking_output.bo, 1, 0, i965->intel.mocs_state);
6030
6031     /* the DW7-9 is for the uncompressed_picture */
6032     OUT_BUFFER_3DW(batch, generic_ctx->res_uncompressed_input_surface.bo, 0, 0, i965->intel.mocs_state);
6033
6034     /* the DW10-12 is for PAK information (write) */
6035     OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, i965->intel.mocs_state);//?
6036
6037     /* the DW13-15 is for the intra_row_store_scratch */
6038     OUT_BUFFER_3DW(batch, avc_ctx->res_intra_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
6039
6040     /* the DW16-18 is for the deblocking filter */
6041     OUT_BUFFER_3DW(batch, avc_ctx->res_deblocking_filter_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
6042
6043     /* the DW 19-50 is for Reference pictures*/
6044     for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
6045         OUT_BUFFER_2DW(batch, avc_ctx->list_reference_res[i].bo, 0, 0);
6046     }
6047
6048     /* DW 51, reference picture attributes */
6049     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
6050
6051     /* The DW 52-54 is for PAK information (read) */
6052     OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, i965->intel.mocs_state);
6053
6054     /* the DW 55-57 is the ILDB buffer */
6055     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6056
6057     /* the DW 58-60 is the second ILDB buffer */
6058     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6059
6060     /* DW 61, memory compress enable & mode */
6061     OUT_BCS_BATCH(batch, 0);
6062
6063     /* the DW 62-64 is the buffer */
6064     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6065
6066     ADVANCE_BCS_BATCH(batch);
6067 }
6068
6069 static void
6070 gen9_mfc_avc_ind_obj_base_addr_state(VADriverContextP ctx,
6071                                      struct encode_state *encode_state,
6072                                      struct intel_encoder_context *encoder_context)
6073 {
6074     struct i965_driver_data *i965 = i965_driver_data(ctx);
6075     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6076     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
6077     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
6078     struct intel_batchbuffer *batch = encoder_context->base.batch;
6079     struct object_surface *obj_surface;
6080     struct gen9_surface_avc *avc_priv_surface;
6081     unsigned int size = 0;
6082     unsigned int w_mb = generic_state->frame_width_in_mbs;
6083     unsigned int h_mb = generic_state->frame_height_in_mbs;
6084
6085     obj_surface = encode_state->reconstructed_object;
6086
6087     if (!obj_surface || !obj_surface->private_data)
6088         return;
6089     avc_priv_surface = obj_surface->private_data;
6090
6091     BEGIN_BCS_BATCH(batch, 26);
6092
6093     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
6094     /* The DW1-5 is for the MFX indirect bistream offset */
6095     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6096     OUT_BUFFER_2DW(batch, NULL, 0, 0);
6097
6098     /* the DW6-10 is for MFX Indirect MV Object Base Address */
6099     size = w_mb * h_mb * 32 * 4;
6100     OUT_BUFFER_3DW(batch,
6101                    avc_priv_surface->res_mv_data_surface.bo,
6102                    1,
6103                    0,
6104                    i965->intel.mocs_state);
6105     OUT_BUFFER_2DW(batch,
6106                    avc_priv_surface->res_mv_data_surface.bo,
6107                    1,
6108                    ALIGN(size, 0x1000));
6109
6110     /* The DW11-15 is for MFX IT-COFF. Not used on encoder */
6111     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6112     OUT_BUFFER_2DW(batch, NULL, 0, 0);
6113
6114     /* The DW16-20 is for MFX indirect DBLK. Not used on encoder */
6115     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6116     OUT_BUFFER_2DW(batch, NULL, 0, 0);
6117
6118     /* The DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder
6119      * Note: an offset is specified in MFX_AVC_SLICE_STATE
6120      */
6121     OUT_BUFFER_3DW(batch,
6122                    generic_ctx->compressed_bitstream.res.bo,
6123                    1,
6124                    0,
6125                    i965->intel.mocs_state);
6126     OUT_BUFFER_2DW(batch,
6127                    generic_ctx->compressed_bitstream.res.bo,
6128                    1,
6129                    generic_ctx->compressed_bitstream.end_offset);
6130
6131     ADVANCE_BCS_BATCH(batch);
6132 }
6133
6134 static void
6135 gen9_mfc_avc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
6136 {
6137     struct i965_driver_data *i965 = i965_driver_data(ctx);
6138     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6139     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
6140     struct intel_batchbuffer *batch = encoder_context->base.batch;
6141
6142     BEGIN_BCS_BATCH(batch, 10);
6143
6144     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
6145
6146     /* The DW1-3 is for bsd/mpc row store scratch buffer */
6147     OUT_BUFFER_3DW(batch, avc_ctx->res_bsd_mpc_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
6148
6149     /* The DW4-6 is for MPR Row Store Scratch Buffer Base Address, ignore for encoder */
6150     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6151
6152     /* The DW7-9 is for Bitplane Read Buffer Base Address, ignore for encoder */
6153     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6154
6155     ADVANCE_BCS_BATCH(batch);
6156 }
6157
6158 static void
6159 gen9_mfc_avc_directmode_state(VADriverContextP ctx,
6160                               struct intel_encoder_context *encoder_context)
6161 {
6162     struct i965_driver_data *i965 = i965_driver_data(ctx);
6163     struct intel_batchbuffer *batch = encoder_context->base.batch;
6164     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6165     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
6166     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
6167
6168     int i;
6169
6170     BEGIN_BCS_BATCH(batch, 71);
6171
6172     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
6173
6174     /* Reference frames and Current frames */
6175     /* the DW1-32 is for the direct MV for reference */
6176     for (i = 0; i < NUM_MFC_AVC_DMV_BUFFERS - 2; i += 2) {
6177         if (avc_ctx->res_direct_mv_buffersr[i].bo != NULL) {
6178             OUT_BCS_RELOC64(batch, avc_ctx->res_direct_mv_buffersr[i].bo,
6179                             I915_GEM_DOMAIN_INSTRUCTION, 0,
6180                             0);
6181         } else {
6182             OUT_BCS_BATCH(batch, 0);
6183             OUT_BCS_BATCH(batch, 0);
6184         }
6185     }
6186
6187     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
6188
6189     /* the DW34-36 is the MV for the current frame */
6190     OUT_BCS_RELOC64(batch, avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2].bo,
6191                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
6192                     0);
6193
6194     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
6195
6196     /* POL list */
6197     for (i = 0; i < 32; i++) {
6198         OUT_BCS_BATCH(batch, avc_state->top_field_poc[i]);
6199     }
6200     OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2]);
6201     OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1]);
6202
6203     ADVANCE_BCS_BATCH(batch);
6204 }
6205
6206 static void
6207 gen9_mfc_qm_state(VADriverContextP ctx,
6208                   int qm_type,
6209                   const unsigned int *qm,
6210                   int qm_length,
6211                   struct intel_encoder_context *encoder_context)
6212 {
6213     struct intel_batchbuffer *batch = encoder_context->base.batch;
6214     unsigned int qm_buffer[16];
6215
6216     assert(qm_length <= 16);
6217     assert(sizeof(*qm) == 4);
6218     memset(qm_buffer, 0, 16 * 4);
6219     memcpy(qm_buffer, qm, qm_length * 4);
6220
6221     BEGIN_BCS_BATCH(batch, 18);
6222     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
6223     OUT_BCS_BATCH(batch, qm_type << 0);
6224     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
6225     ADVANCE_BCS_BATCH(batch);
6226 }
6227
6228 static void
6229 gen9_mfc_avc_qm_state(VADriverContextP ctx,
6230                       struct encode_state *encode_state,
6231                       struct intel_encoder_context *encoder_context)
6232 {
6233     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6234     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
6235     VAEncSequenceParameterBufferH264  *seq_param = avc_state->seq_param;
6236     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
6237
6238
6239     const unsigned int *qm_4x4_intra;
6240     const unsigned int *qm_4x4_inter;
6241     const unsigned int *qm_8x8_intra;
6242     const unsigned int *qm_8x8_inter;
6243
6244     if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
6245         && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
6246         qm_4x4_intra = qm_4x4_inter = qm_8x8_intra = qm_8x8_inter = qm_flat;
6247     } else {
6248         VAIQMatrixBufferH264 *qm;
6249         assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
6250         qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
6251         qm_4x4_intra = (unsigned int *)qm->ScalingList4x4[0];
6252         qm_4x4_inter = (unsigned int *)qm->ScalingList4x4[3];
6253         qm_8x8_intra = (unsigned int *)qm->ScalingList8x8[0];
6254         qm_8x8_inter = (unsigned int *)qm->ScalingList8x8[1];
6255     }
6256
6257     gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm_4x4_intra, 12, encoder_context);
6258     gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm_4x4_inter, 12, encoder_context);
6259     gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm_8x8_intra, 16, encoder_context);
6260     gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm_8x8_inter, 16, encoder_context);
6261 }
6262
6263 static void
6264 gen9_mfc_fqm_state(VADriverContextP ctx,
6265                    int fqm_type,
6266                    const unsigned int *fqm,
6267                    int fqm_length,
6268                    struct intel_encoder_context *encoder_context)
6269 {
6270     struct intel_batchbuffer *batch = encoder_context->base.batch;
6271     unsigned int fqm_buffer[32];
6272
6273     assert(fqm_length <= 32);
6274     assert(sizeof(*fqm) == 4);
6275     memset(fqm_buffer, 0, 32 * 4);
6276     memcpy(fqm_buffer, fqm, fqm_length * 4);
6277
6278     BEGIN_BCS_BATCH(batch, 34);
6279     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
6280     OUT_BCS_BATCH(batch, fqm_type << 0);
6281     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
6282     ADVANCE_BCS_BATCH(batch);
6283 }
6284
6285 static void
6286 gen9_mfc_fill_fqm(uint8_t *qm, uint16_t *fqm, int len)
6287 {
6288     int i, j;
6289     for (i = 0; i < len; i++)
6290         for (j = 0; j < len; j++) {
6291             assert(qm[j * len + i]);
6292             fqm[i * len + j] = (1 << 16) / qm[j * len + i];
6293         }
6294 }
6295
6296 static void
6297 gen9_mfc_avc_fqm_state(VADriverContextP ctx,
6298                        struct encode_state *encode_state,
6299                        struct intel_encoder_context *encoder_context)
6300 {
6301     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6302     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
6303     VAEncSequenceParameterBufferH264  *seq_param = avc_state->seq_param;
6304     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
6305
6306     if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
6307         && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
6308         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm_flat, 24, encoder_context);
6309         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm_flat, 24, encoder_context);
6310         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm_flat, 32, encoder_context);
6311         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm_flat, 32, encoder_context);
6312     } else {
6313         int i;
6314         uint32_t fqm[32];
6315         VAIQMatrixBufferH264 *qm;
6316         assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
6317         qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
6318
6319         for (i = 0; i < 3; i++)
6320             gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * i, 4);
6321         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm, 24, encoder_context);
6322
6323         for (i = 3; i < 6; i++)
6324             gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * (i - 3), 4);
6325         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm, 24, encoder_context);
6326
6327         gen9_mfc_fill_fqm(qm->ScalingList8x8[0], (uint16_t *)fqm, 8);
6328         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm, 32, encoder_context);
6329
6330         gen9_mfc_fill_fqm(qm->ScalingList8x8[1], (uint16_t *)fqm, 8);
6331         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm, 32, encoder_context);
6332     }
6333 }
6334
6335 static void
6336 gen9_mfc_avc_insert_object(VADriverContextP ctx,
6337                            struct intel_encoder_context *encoder_context,
6338                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
6339                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
6340                            int slice_header_indicator,
6341                            struct intel_batchbuffer *batch)
6342 {
6343     if (data_bits_in_last_dw == 0)
6344         data_bits_in_last_dw = 32;
6345
6346     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
6347
6348     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws));
6349     OUT_BCS_BATCH(batch,
6350                   (0 << 16) |   /* always start at offset 0 */
6351                   (slice_header_indicator << 14) |
6352                   (data_bits_in_last_dw << 8) |
6353                   (skip_emul_byte_count << 4) |
6354                   (!!emulation_flag << 3) |
6355                   ((!!is_last_header) << 2) |
6356                   ((!!is_end_of_slice) << 1) |
6357                   (0 << 0));    /* check this flag */
6358     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
6359
6360     ADVANCE_BCS_BATCH(batch);
6361 }
6362
6363 static void
6364 gen9_mfc_avc_insert_aud_packed_data(VADriverContextP ctx,
6365                                     struct encode_state *encode_state,
6366                                     struct intel_encoder_context *encoder_context,
6367                                     struct intel_batchbuffer *batch)
6368 {
6369     VAEncPackedHeaderParameterBuffer *param = NULL;
6370     unsigned int length_in_bits;
6371     unsigned int *header_data = NULL;
6372     unsigned char *nal_type = NULL;
6373     int count, i, start_index;
6374
6375     count = encode_state->slice_rawdata_count[0];
6376     start_index = (encode_state->slice_rawdata_index[0] & SLICE_PACKED_DATA_INDEX_MASK);
6377
6378     for (i = 0; i < count; i++) {
6379         unsigned int skip_emul_byte_cnt;
6380
6381         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
6382         nal_type = (unsigned char *)header_data;
6383
6384         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
6385
6386         length_in_bits = param->bit_length;
6387
6388         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6389
6390         if ((*(nal_type + skip_emul_byte_cnt - 1) & 0x1f) == AVC_NAL_DELIMITER) {
6391             gen9_mfc_avc_insert_object(ctx,
6392                                        encoder_context,
6393                                        header_data,
6394                                        ALIGN(length_in_bits, 32) >> 5,
6395                                        length_in_bits & 0x1f,
6396                                        skip_emul_byte_cnt,
6397                                        0,
6398                                        0,
6399                                        !param->has_emulation_bytes,
6400                                        0,
6401                                        batch);
6402             break;
6403         }
6404     }
6405 }
6406
6407 static void
6408 gen9_mfc_avc_insert_slice_packed_data(VADriverContextP ctx,
6409                                       struct encode_state *encode_state,
6410                                       struct intel_encoder_context *encoder_context,
6411                                       int slice_index,
6412                                       struct intel_batchbuffer *batch)
6413 {
6414     VAEncPackedHeaderParameterBuffer *param = NULL;
6415     unsigned int length_in_bits;
6416     unsigned int *header_data = NULL;
6417     int count, i, start_index;
6418     int slice_header_index;
6419     unsigned char *nal_type = NULL;
6420
6421     if (encode_state->slice_header_index[slice_index] == 0)
6422         slice_header_index = -1;
6423     else
6424         slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
6425
6426     count = encode_state->slice_rawdata_count[slice_index];
6427     start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
6428
6429     for (i = 0; i < count; i++) {
6430         unsigned int skip_emul_byte_cnt;
6431
6432         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
6433         nal_type = (unsigned char *)header_data;
6434
6435         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
6436
6437         length_in_bits = param->bit_length;
6438
6439         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6440
6441         /* skip the slice header packed data type as it is lastly inserted */
6442         if (param->type == VAEncPackedHeaderSlice || (*(nal_type + skip_emul_byte_cnt - 1) & 0x1f) == AVC_NAL_DELIMITER)
6443             continue;
6444
6445         /* as the slice header is still required, the last header flag is set to
6446          * zero.
6447          */
6448         gen9_mfc_avc_insert_object(ctx,
6449                                    encoder_context,
6450                                    header_data,
6451                                    ALIGN(length_in_bits, 32) >> 5,
6452                                    length_in_bits & 0x1f,
6453                                    skip_emul_byte_cnt,
6454                                    0,
6455                                    0,
6456                                    !param->has_emulation_bytes,
6457                                    0,
6458                                    batch);
6459     }
6460
6461     if (slice_header_index == -1) {
6462         VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
6463         VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
6464         VAEncSliceParameterBufferH264 *slice_params = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
6465         unsigned char *slice_header = NULL;
6466         int slice_header_length_in_bits = 0;
6467
6468         /* No slice header data is passed. And the driver needs to generate it */
6469         /* For the Normal H264 */
6470         slice_header_length_in_bits = build_avc_slice_header(seq_param,
6471                                                              pic_param,
6472                                                              slice_params,
6473                                                              &slice_header);
6474         gen9_mfc_avc_insert_object(ctx,
6475                                    encoder_context,
6476                                    (unsigned int *)slice_header,
6477                                    ALIGN(slice_header_length_in_bits, 32) >> 5,
6478                                    slice_header_length_in_bits & 0x1f,
6479                                    5,  /* first 5 bytes are start code + nal unit type */
6480                                    1, 0, 1,
6481                                    1,
6482                                    batch);
6483
6484         free(slice_header);
6485     } else {
6486         unsigned int skip_emul_byte_cnt;
6487
6488         header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
6489
6490         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[slice_header_index]->buffer);
6491         length_in_bits = param->bit_length;
6492
6493         /* as the slice header is the last header data for one slice,
6494          * the last header flag is set to one.
6495          */
6496         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6497
6498         gen9_mfc_avc_insert_object(ctx,
6499                                    encoder_context,
6500                                    header_data,
6501                                    ALIGN(length_in_bits, 32) >> 5,
6502                                    length_in_bits & 0x1f,
6503                                    skip_emul_byte_cnt,
6504                                    1,
6505                                    0,
6506                                    !param->has_emulation_bytes,
6507                                    1,
6508                                    batch);
6509     }
6510
6511     return;
6512 }
6513
6514 static void
6515 gen9_mfc_avc_inset_headers(VADriverContextP ctx,
6516                            struct encode_state *encode_state,
6517                            struct intel_encoder_context *encoder_context,
6518                            VAEncSliceParameterBufferH264 *slice_param,
6519                            int slice_index,
6520                            struct intel_batchbuffer *batch)
6521 {
6522     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6523     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
6524     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
6525     unsigned int internal_rate_mode = generic_state->internal_rate_mode;
6526     unsigned int skip_emul_byte_cnt;
6527
6528     if (slice_index == 0) {
6529
6530         /* if AUD exist and insert it firstly */
6531         gen9_mfc_avc_insert_aud_packed_data(ctx, encode_state, encoder_context, batch);
6532
6533         if (encode_state->packed_header_data[idx]) {
6534             VAEncPackedHeaderParameterBuffer *param = NULL;
6535             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
6536             unsigned int length_in_bits;
6537
6538             assert(encode_state->packed_header_param[idx]);
6539             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
6540             length_in_bits = param->bit_length;
6541
6542             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6543             gen9_mfc_avc_insert_object(ctx,
6544                                        encoder_context,
6545                                        header_data,
6546                                        ALIGN(length_in_bits, 32) >> 5,
6547                                        length_in_bits & 0x1f,
6548                                        skip_emul_byte_cnt,
6549                                        0,
6550                                        0,
6551                                        !param->has_emulation_bytes,
6552                                        0,
6553                                        batch);
6554         }
6555
6556         idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
6557
6558         if (encode_state->packed_header_data[idx]) {
6559             VAEncPackedHeaderParameterBuffer *param = NULL;
6560             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
6561             unsigned int length_in_bits;
6562
6563             assert(encode_state->packed_header_param[idx]);
6564             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
6565             length_in_bits = param->bit_length;
6566
6567             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6568
6569             gen9_mfc_avc_insert_object(ctx,
6570                                        encoder_context,
6571                                        header_data,
6572                                        ALIGN(length_in_bits, 32) >> 5,
6573                                        length_in_bits & 0x1f,
6574                                        skip_emul_byte_cnt,
6575                                        0,
6576                                        0,
6577                                        !param->has_emulation_bytes,
6578                                        0,
6579                                        batch);
6580         }
6581
6582         idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
6583
6584         if (encode_state->packed_header_data[idx]) {
6585             VAEncPackedHeaderParameterBuffer *param = NULL;
6586             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
6587             unsigned int length_in_bits;
6588
6589             assert(encode_state->packed_header_param[idx]);
6590             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
6591             length_in_bits = param->bit_length;
6592
6593             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6594             gen9_mfc_avc_insert_object(ctx,
6595                                        encoder_context,
6596                                        header_data,
6597                                        ALIGN(length_in_bits, 32) >> 5,
6598                                        length_in_bits & 0x1f,
6599                                        skip_emul_byte_cnt,
6600                                        0,
6601                                        0,
6602                                        !param->has_emulation_bytes,
6603                                        0,
6604                                        batch);
6605         } else if (internal_rate_mode == VA_RC_CBR) {
6606             /* insert others */
6607         }
6608     }
6609
6610     gen9_mfc_avc_insert_slice_packed_data(ctx,
6611                                           encode_state,
6612                                           encoder_context,
6613                                           slice_index,
6614                                           batch);
6615 }
6616
6617 static void
6618 gen9_mfc_avc_slice_state(VADriverContextP ctx,
6619                          struct encode_state *encode_state,
6620                          struct intel_encoder_context *encoder_context,
6621                          VAEncPictureParameterBufferH264 *pic_param,
6622                          VAEncSliceParameterBufferH264 *slice_param,
6623                          VAEncSliceParameterBufferH264 *next_slice_param,
6624                          struct intel_batchbuffer *batch)
6625 {
6626     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6627     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
6628     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
6629     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
6630     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
6631     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
6632     unsigned char correct[6], grow, shrink;
6633     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
6634     int max_qp_n, max_qp_p;
6635     int i;
6636     int weighted_pred_idc = 0;
6637     int num_ref_l0 = 0, num_ref_l1 = 0;
6638     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
6639     int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
6640     unsigned int rc_panic_enable = 0;
6641     unsigned int rate_control_counter_enable = 0;
6642     unsigned int rounding_value = 0;
6643     unsigned int rounding_inter_enable = 0;
6644
6645     slice_hor_pos = slice_param->macroblock_address % generic_state->frame_width_in_mbs;
6646     slice_ver_pos = slice_param->macroblock_address / generic_state->frame_width_in_mbs;
6647
6648     if (next_slice_param) {
6649         next_slice_hor_pos = next_slice_param->macroblock_address % generic_state->frame_width_in_mbs;
6650         next_slice_ver_pos = next_slice_param->macroblock_address / generic_state->frame_width_in_mbs;
6651     } else {
6652         next_slice_hor_pos = 0;
6653         next_slice_ver_pos = generic_state->frame_height_in_mbs;
6654     }
6655
6656     if (slice_type == SLICE_TYPE_I) {
6657         luma_log2_weight_denom = 0;
6658         chroma_log2_weight_denom = 0;
6659     } else if (slice_type == SLICE_TYPE_P) {
6660         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
6661         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
6662         rounding_inter_enable = avc_state->rounding_inter_enable;
6663         rounding_value = avc_state->rounding_value;
6664
6665         if (slice_param->num_ref_idx_active_override_flag)
6666             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
6667     } else if (slice_type == SLICE_TYPE_B) {
6668         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
6669         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
6670         num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
6671         rounding_inter_enable = avc_state->rounding_inter_enable;
6672         rounding_value = avc_state->rounding_value;
6673
6674         if (slice_param->num_ref_idx_active_override_flag) {
6675             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
6676             num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
6677         }
6678
6679         if (weighted_pred_idc == 2) {
6680             /* 8.4.3 - Derivation process for prediction weights (8-279) */
6681             luma_log2_weight_denom = 5;
6682             chroma_log2_weight_denom = 5;
6683         }
6684     }
6685
6686     max_qp_n = 0;
6687     max_qp_p = 0;
6688     grow = 0;
6689     shrink = 0;
6690
6691     rate_control_counter_enable = (generic_state->brc_enabled && (generic_state->curr_pak_pass != 0));
6692     rc_panic_enable = (avc_state->rc_panic_enable &&
6693                        (!avc_state->min_max_qp_enable) &&
6694                        (encoder_context->rate_control_mode != VA_RC_CQP) &&
6695                        (generic_state->curr_pak_pass == (generic_state->num_pak_passes - 1)));
6696
6697     for (i = 0; i < 6; i++)
6698         correct[i] = 0;
6699
6700     BEGIN_BCS_BATCH(batch, 11);
6701
6702     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
6703     OUT_BCS_BATCH(batch, slice_type);
6704     OUT_BCS_BATCH(batch,
6705                   (num_ref_l1 << 24) |
6706                   (num_ref_l0 << 16) |
6707                   (chroma_log2_weight_denom << 8) |
6708                   (luma_log2_weight_denom << 0));
6709     OUT_BCS_BATCH(batch,
6710                   (weighted_pred_idc << 30) |
6711                   (((slice_type == SLICE_TYPE_B) ? slice_param->direct_spatial_mv_pred_flag : 0) << 29) |
6712                   (slice_param->disable_deblocking_filter_idc << 27) |
6713                   (slice_param->cabac_init_idc << 24) |
6714                   (slice_qp << 16) |
6715                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
6716                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
6717
6718     OUT_BCS_BATCH(batch,
6719                   slice_ver_pos << 24 |
6720                   slice_hor_pos << 16 |
6721                   slice_param->macroblock_address);
6722     OUT_BCS_BATCH(batch,
6723                   next_slice_ver_pos << 16 |
6724                   next_slice_hor_pos);
6725
6726     OUT_BCS_BATCH(batch,
6727                   (rate_control_counter_enable << 31) |
6728                   (1 << 30) |           /* ResetRateControlCounter */
6729                   (2 << 28) |           /* Loose Rate Control */
6730                   (0 << 24) |           /* RC Stable Tolerance */
6731                   (rc_panic_enable << 23) |           /* RC Panic Enable */
6732                   (1 << 22) |           /* CBP mode */
6733                   (0 << 21) |           /* MB Type Direct Conversion, 0: Enable, 1: Disable */
6734                   (0 << 20) |           /* MB Type Skip Conversion, 0: Enable, 1: Disable */
6735                   (!next_slice_param << 19) |                   /* Is Last Slice */
6736                   (0 << 18) |           /* BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable */
6737                   (1 << 17) |           /* HeaderPresentFlag */
6738                   (1 << 16) |           /* SliceData PresentFlag */
6739                   (0 << 15) |           /* TailPresentFlag  */
6740                   (1 << 13) |           /* RBSP NAL TYPE */
6741                   (1 << 12));           /* CabacZeroWordInsertionEnable */
6742
6743     OUT_BCS_BATCH(batch, generic_ctx->compressed_bitstream.start_offset);
6744
6745     OUT_BCS_BATCH(batch,
6746                   (max_qp_n << 24) |     /*Target QP - 24 is lowest QP*/
6747                   (max_qp_p << 16) |     /*Target QP + 20 is highest QP*/
6748                   (shrink << 8) |
6749                   (grow << 0));
6750     OUT_BCS_BATCH(batch,
6751                   (rounding_inter_enable << 31) |
6752                   (rounding_value << 28) |
6753                   (1 << 27) |
6754                   (5 << 24) |
6755                   (correct[5] << 20) |
6756                   (correct[4] << 16) |
6757                   (correct[3] << 12) |
6758                   (correct[2] << 8) |
6759                   (correct[1] << 4) |
6760                   (correct[0] << 0));
6761     OUT_BCS_BATCH(batch, 0);
6762
6763     ADVANCE_BCS_BATCH(batch);
6764 }
6765
6766 static uint8_t
6767 gen9_mfc_avc_get_ref_idx_state(VAPictureH264 *va_pic, unsigned int frame_store_id)
6768 {
6769     unsigned int is_long_term =
6770         !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
6771     unsigned int is_top_field =
6772         !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
6773     unsigned int is_bottom_field =
6774         !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
6775
6776     return ((is_long_term                         << 6) |
6777             (0 << 5) |
6778             (frame_store_id                       << 1) |
6779             ((is_top_field ^ 1) & is_bottom_field));
6780 }
6781
6782 static void
6783 gen9_mfc_avc_ref_idx_state(VADriverContextP ctx,
6784                            struct encode_state *encode_state,
6785                            struct intel_encoder_context *encoder_context,
6786                            VAEncSliceParameterBufferH264 *slice_param,
6787                            struct intel_batchbuffer *batch)
6788 {
6789     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6790     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
6791     VAPictureH264 *ref_pic;
6792     int i, slice_type, ref_idx_shift;
6793     unsigned int fwd_ref_entry;
6794     unsigned int bwd_ref_entry;
6795
6796     /* max 4 ref frames are allowed for l0 and l1 */
6797     fwd_ref_entry = 0x80808080;
6798     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
6799
6800     if ((slice_type == SLICE_TYPE_P) ||
6801         (slice_type == SLICE_TYPE_B)) {
6802         for (i = 0; i < MIN(avc_state->num_refs[0], 4); i++) {
6803             ref_pic = &slice_param->RefPicList0[i];
6804             ref_idx_shift = i * 8;
6805
6806             fwd_ref_entry &= ~(0xFF << ref_idx_shift);
6807             fwd_ref_entry += (gen9_mfc_avc_get_ref_idx_state(ref_pic, avc_state->list_ref_idx[0][i]) << ref_idx_shift);
6808         }
6809     }
6810
6811     bwd_ref_entry = 0x80808080;
6812     if (slice_type == SLICE_TYPE_B) {
6813         for (i = 0; i < MIN(avc_state->num_refs[1], 4); i++) {
6814             ref_pic = &slice_param->RefPicList1[i];
6815             ref_idx_shift = i * 8;
6816
6817             bwd_ref_entry &= ~(0xFF << ref_idx_shift);
6818             bwd_ref_entry += (gen9_mfc_avc_get_ref_idx_state(ref_pic, avc_state->list_ref_idx[1][i]) << ref_idx_shift);
6819         }
6820     }
6821
6822     if ((slice_type == SLICE_TYPE_P) ||
6823         (slice_type == SLICE_TYPE_B)) {
6824         BEGIN_BCS_BATCH(batch, 10);
6825         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
6826         OUT_BCS_BATCH(batch, 0);                        // L0
6827         OUT_BCS_BATCH(batch, fwd_ref_entry);
6828
6829         for (i = 0; i < 7; i++) {
6830             OUT_BCS_BATCH(batch, 0x80808080);
6831         }
6832
6833         ADVANCE_BCS_BATCH(batch);
6834     }
6835
6836     if (slice_type == SLICE_TYPE_B) {
6837         BEGIN_BCS_BATCH(batch, 10);
6838         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
6839         OUT_BCS_BATCH(batch, 1);                  //Select L1
6840         OUT_BCS_BATCH(batch, bwd_ref_entry);      //max 4 reference allowed
6841         for (i = 0; i < 7; i++) {
6842             OUT_BCS_BATCH(batch, 0x80808080);
6843         }
6844         ADVANCE_BCS_BATCH(batch);
6845     }
6846 }
6847
6848 static void
6849 gen9_mfc_avc_weightoffset_state(VADriverContextP ctx,
6850                                 struct encode_state *encode_state,
6851                                 struct intel_encoder_context *encoder_context,
6852                                 VAEncPictureParameterBufferH264 *pic_param,
6853                                 VAEncSliceParameterBufferH264 *slice_param,
6854                                 struct intel_batchbuffer *batch)
6855 {
6856     int i, slice_type;
6857     short weightoffsets[32 * 6];
6858
6859     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
6860
6861     if (slice_type == SLICE_TYPE_P &&
6862         pic_param->pic_fields.bits.weighted_pred_flag == 1) {
6863         memset(weightoffsets, 0, 32 * 6 * sizeof(short));
6864         for (i = 0; i < 32; i++) {
6865             weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
6866             weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
6867             weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
6868             weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
6869             weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
6870             weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
6871         }
6872
6873         BEGIN_BCS_BATCH(batch, 98);
6874         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
6875         OUT_BCS_BATCH(batch, 0);
6876         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
6877
6878         ADVANCE_BCS_BATCH(batch);
6879     }
6880
6881     if (slice_type == SLICE_TYPE_B &&
6882         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
6883         memset(weightoffsets, 0, 32 * 6 * sizeof(short));
6884         for (i = 0; i < 32; i++) {
6885             weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
6886             weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
6887             weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
6888             weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
6889             weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
6890             weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
6891         }
6892
6893         BEGIN_BCS_BATCH(batch, 98);
6894         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
6895         OUT_BCS_BATCH(batch, 0);
6896         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
6897         ADVANCE_BCS_BATCH(batch);
6898
6899         memset(weightoffsets, 0, 32 * 6 * sizeof(short));
6900         for (i = 0; i < 32; i++) {
6901             weightoffsets[i * 6 + 0] = slice_param->luma_weight_l1[i];
6902             weightoffsets[i * 6 + 1] = slice_param->luma_offset_l1[i];
6903             weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l1[i][0];
6904             weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l1[i][0];
6905             weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l1[i][1];
6906             weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l1[i][1];
6907         }
6908
6909         BEGIN_BCS_BATCH(batch, 98);
6910         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
6911         OUT_BCS_BATCH(batch, 1);
6912         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
6913         ADVANCE_BCS_BATCH(batch);
6914     }
6915 }
6916
6917 static void
6918 gen9_mfc_avc_single_slice(VADriverContextP ctx,
6919                           struct encode_state *encode_state,
6920                           struct intel_encoder_context *encoder_context,
6921                           VAEncSliceParameterBufferH264 *slice_param,
6922                           VAEncSliceParameterBufferH264 *next_slice_param,
6923                           int slice_index)
6924 {
6925     struct i965_driver_data *i965 = i965_driver_data(ctx);
6926     struct i965_gpe_table *gpe = &i965->gpe_table;
6927     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6928     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
6929     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
6930     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
6931     struct intel_batchbuffer *batch = encoder_context->base.batch;
6932     struct intel_batchbuffer *slice_batch = avc_ctx->pres_slice_batch_buffer_2nd_level;
6933     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
6934     struct gpe_mi_batch_buffer_start_parameter second_level_batch;
6935     struct object_surface *obj_surface;
6936     struct gen9_surface_avc *avc_priv_surface;
6937
6938     unsigned int slice_offset = 0;
6939
6940     if (generic_state->curr_pak_pass == 0) {
6941         slice_offset = intel_batchbuffer_used_size(slice_batch);
6942         avc_state->slice_batch_offset[slice_index] = slice_offset;
6943         gen9_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context, slice_param, slice_batch);
6944         gen9_mfc_avc_weightoffset_state(ctx,
6945                                         encode_state,
6946                                         encoder_context,
6947                                         pic_param,
6948                                         slice_param,
6949                                         slice_batch);
6950         gen9_mfc_avc_slice_state(ctx,
6951                                  encode_state,
6952                                  encoder_context,
6953                                  pic_param,
6954                                  slice_param,
6955                                  next_slice_param,
6956                                  slice_batch);
6957         gen9_mfc_avc_inset_headers(ctx,
6958                                    encode_state,
6959                                    encoder_context,
6960                                    slice_param,
6961                                    slice_index,
6962                                    slice_batch);
6963
6964         BEGIN_BCS_BATCH(slice_batch, 2);
6965         OUT_BCS_BATCH(slice_batch, 0);
6966         OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END);
6967         ADVANCE_BCS_BATCH(slice_batch);
6968
6969     } else {
6970         slice_offset = avc_state->slice_batch_offset[slice_index];
6971     }
6972     /* insert slice as second levle.*/
6973     memset(&second_level_batch, 0, sizeof(second_level_batch));
6974     second_level_batch.is_second_level = 1; /* Must be the second level batch buffer */
6975     second_level_batch.offset = slice_offset;
6976     second_level_batch.bo = slice_batch->buffer;
6977     gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
6978
6979     /* insert mb code as second levle.*/
6980     obj_surface = encode_state->reconstructed_object;
6981     assert(obj_surface->private_data);
6982     avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
6983
6984     memset(&second_level_batch, 0, sizeof(second_level_batch));
6985     second_level_batch.is_second_level = 1; /* Must be the second level batch buffer */
6986     second_level_batch.offset = slice_param->macroblock_address * 16 * 4;
6987     second_level_batch.bo = avc_priv_surface->res_mb_code_surface.bo;
6988     gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
6989
6990 }
6991
6992 static void
6993 gen9_avc_pak_slice_level(VADriverContextP ctx,
6994                          struct encode_state *encode_state,
6995                          struct intel_encoder_context *encoder_context)
6996 {
6997     struct i965_driver_data *i965 = i965_driver_data(ctx);
6998     struct i965_gpe_table *gpe = &i965->gpe_table;
6999     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7000     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
7001     struct intel_batchbuffer *batch = encoder_context->base.batch;
7002     struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
7003     VAEncSliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
7004     int i, j;
7005     int slice_index = 0;
7006     int is_frame_level = (avc_state->slice_num > 1) ? 0 : 1;   /* check it for SKL,now single slice per frame */
7007     int has_tail = 0;             /* check it later */
7008
7009     for (j = 0; j < encode_state->num_slice_params_ext; j++) {
7010         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
7011
7012         if (j == encode_state->num_slice_params_ext - 1)
7013             next_slice_group_param = NULL;
7014         else
7015             next_slice_group_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j + 1]->buffer;
7016
7017         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
7018             if (i < encode_state->slice_params_ext[j]->num_elements - 1)
7019                 next_slice_param = slice_param + 1;
7020             else
7021                 next_slice_param = next_slice_group_param;
7022
7023             gen9_mfc_avc_single_slice(ctx,
7024                                       encode_state,
7025                                       encoder_context,
7026                                       slice_param,
7027                                       next_slice_param,
7028                                       slice_index);
7029             slice_param++;
7030             slice_index++;
7031
7032             if (is_frame_level)
7033                 break;
7034         }
7035
7036         if (is_frame_level)
7037             break;
7038     }
7039
7040     if (has_tail) {
7041         /* insert a tail if required */
7042     }
7043
7044     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
7045     mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
7046     gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_params);
7047 }
7048 static void
7049 gen9_avc_pak_picture_level(VADriverContextP ctx,
7050                            struct encode_state *encode_state,
7051                            struct intel_encoder_context *encoder_context)
7052 {
7053     struct i965_driver_data *i965 = i965_driver_data(ctx);
7054     struct i965_gpe_table *gpe = &i965->gpe_table;
7055     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7056     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
7057     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
7058     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
7059     struct gpe_mi_batch_buffer_start_parameter second_level_batch;
7060     struct intel_batchbuffer *batch = encoder_context->base.batch;
7061
7062     if (generic_state->brc_enabled &&
7063         generic_state->curr_pak_pass) {
7064         struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
7065         struct encoder_status_buffer_internal *status_buffer;
7066         status_buffer = &(avc_ctx->status_buffer);
7067
7068         memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
7069         mi_conditional_batch_buffer_end_params.offset = status_buffer->image_status_mask_offset;
7070         mi_conditional_batch_buffer_end_params.bo = status_buffer->bo;
7071         mi_conditional_batch_buffer_end_params.compare_data = 0;
7072         mi_conditional_batch_buffer_end_params.compare_mask_mode_disabled = 0;
7073         gpe->mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
7074     }
7075
7076     gen9_mfc_avc_pipe_mode_select(ctx, encode_state, encoder_context);
7077     gen9_mfc_avc_surface_state(ctx, encoder_context, &(generic_ctx->res_reconstructed_surface), 0);
7078     gen9_mfc_avc_surface_state(ctx, encoder_context, &(generic_ctx->res_uncompressed_input_surface), 4);
7079     gen9_mfc_avc_pipe_buf_addr_state(ctx, encoder_context);
7080     gen9_mfc_avc_ind_obj_base_addr_state(ctx, encode_state, encoder_context);
7081     gen9_mfc_avc_bsp_buf_base_addr_state(ctx, encoder_context);
7082
7083     if (generic_state->brc_enabled) {
7084         memset(&second_level_batch, 0, sizeof(second_level_batch));
7085         if (generic_state->curr_pak_pass == 0) {
7086             second_level_batch.offset = 0;
7087         } else {
7088             second_level_batch.offset = generic_state->curr_pak_pass * INTEL_AVC_IMAGE_STATE_CMD_SIZE;
7089         }
7090         second_level_batch.is_second_level = 1;
7091         second_level_batch.bo = avc_ctx->res_brc_image_state_read_buffer.bo;
7092         gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
7093     } else {
7094         /*generate a new image state */
7095         gen9_avc_set_image_state_non_brc(ctx, encode_state, encoder_context, &(avc_ctx->res_image_state_batch_buffer_2nd_level));
7096         memset(&second_level_batch, 0, sizeof(second_level_batch));
7097         second_level_batch.offset = 0;
7098         second_level_batch.is_second_level = 1;
7099         second_level_batch.bo = avc_ctx->res_image_state_batch_buffer_2nd_level.bo;
7100         gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
7101     }
7102
7103     gen9_mfc_avc_qm_state(ctx, encode_state, encoder_context);
7104     gen9_mfc_avc_fqm_state(ctx, encode_state, encoder_context);
7105     gen9_mfc_avc_directmode_state(ctx, encoder_context);
7106
7107 }
7108
7109 static void
7110 gen9_avc_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
7111 {
7112     struct i965_driver_data *i965 = i965_driver_data(ctx);
7113     struct i965_gpe_table *gpe = &i965->gpe_table;
7114     struct intel_batchbuffer *batch = encoder_context->base.batch;
7115     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7116     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
7117     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
7118
7119     struct gpe_mi_store_register_mem_parameter mi_store_reg_mem_param;
7120     struct gpe_mi_store_data_imm_parameter mi_store_data_imm_param;
7121     struct gpe_mi_flush_dw_parameter mi_flush_dw_param;
7122     struct encoder_status_buffer_internal *status_buffer;
7123
7124     status_buffer = &(avc_ctx->status_buffer);
7125
7126     memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
7127     gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
7128
7129     /* read register and store into status_buffer and pak_statitistic info */
7130     memset(&mi_store_reg_mem_param, 0, sizeof(mi_store_reg_mem_param));
7131     mi_store_reg_mem_param.bo = status_buffer->bo;
7132     mi_store_reg_mem_param.offset = status_buffer->bs_byte_count_frame_offset;
7133     mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_reg_offset;
7134     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7135
7136     mi_store_reg_mem_param.bo = status_buffer->bo;
7137     mi_store_reg_mem_param.offset = status_buffer->image_status_mask_offset;
7138     mi_store_reg_mem_param.mmio_offset = status_buffer->image_status_mask_reg_offset;
7139     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7140
7141     /*update the status in the pak_statistic_surface */
7142     mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
7143     mi_store_reg_mem_param.offset = 0;
7144     mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_reg_offset;
7145     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7146
7147     mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
7148     mi_store_reg_mem_param.offset = 4;
7149     mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_nh_reg_offset;
7150     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7151
7152     memset(&mi_store_data_imm_param, 0, sizeof(mi_store_data_imm_param));
7153     mi_store_data_imm_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
7154     mi_store_data_imm_param.offset = sizeof(unsigned int) * 2;
7155     mi_store_data_imm_param.dw0 = (generic_state->curr_pak_pass + 1);
7156     gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm_param);
7157
7158     mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
7159     mi_store_reg_mem_param.offset = sizeof(unsigned int) * (4 + generic_state->curr_pak_pass) ;
7160     mi_store_reg_mem_param.mmio_offset = status_buffer->image_status_ctrl_reg_offset;
7161     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7162
7163     memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
7164     gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
7165
7166     return;
7167 }
7168
7169 static void
7170 gen9_avc_pak_brc_prepare(struct encode_state *encode_state,
7171                          struct intel_encoder_context *encoder_context)
7172 {
7173     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7174     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
7175     unsigned int rate_control_mode = encoder_context->rate_control_mode;
7176
7177     switch (rate_control_mode & 0x7f) {
7178     case VA_RC_CBR:
7179         generic_state->internal_rate_mode = VA_RC_CBR;
7180         break;
7181
7182     case VA_RC_VBR:
7183         generic_state->internal_rate_mode = VA_RC_VBR;//AVBR
7184         break;
7185
7186     case VA_RC_CQP:
7187     default:
7188         generic_state->internal_rate_mode = VA_RC_CQP;
7189         break;
7190     }
7191
7192     if (encoder_context->quality_level == 0)
7193         encoder_context->quality_level = ENCODER_DEFAULT_QUALITY_AVC;
7194 }
7195
7196 static VAStatus
7197 gen9_avc_pak_pipeline_prepare(VADriverContextP ctx,
7198                               struct encode_state *encode_state,
7199                               struct intel_encoder_context *encoder_context)
7200 {
7201     VAStatus va_status;
7202     struct i965_driver_data *i965 = i965_driver_data(ctx);
7203     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7204     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
7205     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
7206     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
7207     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
7208
7209     struct object_surface *obj_surface;
7210     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
7211     VAEncSliceParameterBufferH264 *slice_param = avc_state->slice_param[0];
7212
7213     struct gen9_surface_avc *avc_priv_surface;
7214     int i, j, enable_avc_ildb = 0;
7215     unsigned int allocate_flag = 1;
7216     unsigned int size;
7217     unsigned int w_mb = generic_state->frame_width_in_mbs;
7218     unsigned int h_mb = generic_state->frame_height_in_mbs;
7219     struct avc_surface_param surface_param;
7220
7221     /* update the parameter and check slice parameter */
7222     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
7223         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
7224         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
7225
7226         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
7227             assert((slice_param->slice_type == SLICE_TYPE_I) ||
7228                    (slice_param->slice_type == SLICE_TYPE_SI) ||
7229                    (slice_param->slice_type == SLICE_TYPE_P) ||
7230                    (slice_param->slice_type == SLICE_TYPE_SP) ||
7231                    (slice_param->slice_type == SLICE_TYPE_B));
7232
7233             if (slice_param->disable_deblocking_filter_idc != 1) {
7234                 enable_avc_ildb = 1;
7235                 break;
7236             }
7237
7238             slice_param++;
7239         }
7240     }
7241     avc_state->enable_avc_ildb = enable_avc_ildb;
7242
7243     /* setup the all surface and buffer for PAK */
7244     /* Setup current reconstruct frame */
7245     obj_surface = encode_state->reconstructed_object;
7246     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
7247
7248     if (va_status != VA_STATUS_SUCCESS)
7249         return va_status;
7250
7251     memset(&surface_param, 0, sizeof(surface_param));
7252     surface_param.frame_width = generic_state->frame_width_in_pixel;
7253     surface_param.frame_height = generic_state->frame_height_in_pixel;
7254     va_status = gen9_avc_init_check_surfaces(ctx,
7255                                              obj_surface, encoder_context,
7256                                              &surface_param);
7257     if (va_status != VA_STATUS_SUCCESS)
7258         return va_status;
7259     /* init the member of avc_priv_surface,frame_store_id,qp_value */
7260     {
7261         avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
7262         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = 0;
7263         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = 0;
7264         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2]);
7265         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1]);
7266         i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2], avc_priv_surface->dmv_top);
7267         i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1], avc_priv_surface->dmv_bottom);
7268         avc_priv_surface->qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
7269         avc_priv_surface->frame_store_id = 0;
7270         avc_priv_surface->frame_idx = pic_param->CurrPic.frame_idx;
7271         avc_priv_surface->top_field_order_cnt = pic_param->CurrPic.TopFieldOrderCnt;
7272         avc_priv_surface->is_as_ref = pic_param->pic_fields.bits.reference_pic_flag;
7273         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = avc_priv_surface->top_field_order_cnt;
7274         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = avc_priv_surface->top_field_order_cnt + 1;
7275     }
7276     i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
7277     i965_free_gpe_resource(&avc_ctx->res_post_deblocking_output);
7278     i965_free_gpe_resource(&avc_ctx->res_pre_deblocking_output);
7279     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_reconstructed_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
7280
7281
7282     if (avc_state->enable_avc_ildb) {
7283         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_post_deblocking_output, obj_surface, GPE_RESOURCE_ALIGNMENT);
7284     } else {
7285         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_pre_deblocking_output, obj_surface, GPE_RESOURCE_ALIGNMENT);
7286     }
7287     /* input YUV surface */
7288     obj_surface = encode_state->input_yuv_object;
7289     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
7290
7291     if (va_status != VA_STATUS_SUCCESS)
7292         return va_status;
7293     i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
7294     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_uncompressed_input_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
7295
7296     /* Reference surfaces */
7297     for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
7298         i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
7299         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2]);
7300         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1]);
7301         obj_surface = encode_state->reference_objects[i];
7302         avc_state->top_field_poc[2 * i] = 0;
7303         avc_state->top_field_poc[2 * i + 1] = 0;
7304
7305         if (obj_surface && obj_surface->bo) {
7306             i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->list_reference_res[i], obj_surface, GPE_RESOURCE_ALIGNMENT);
7307
7308             /* actually it should be handled when it is reconstructed surface */
7309             va_status = gen9_avc_init_check_surfaces(ctx,
7310                                                      obj_surface, encoder_context,
7311                                                      &surface_param);
7312             if (va_status != VA_STATUS_SUCCESS)
7313                 return va_status;
7314             avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
7315             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2], avc_priv_surface->dmv_top);
7316             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1], avc_priv_surface->dmv_bottom);
7317             avc_priv_surface->frame_store_id = i;
7318             avc_state->top_field_poc[2 * i] = avc_priv_surface->top_field_order_cnt;
7319             avc_state->top_field_poc[2 * i + 1] = avc_priv_surface->top_field_order_cnt + 1;
7320         } else {
7321             break;
7322         }
7323     }
7324
7325     if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
7326         intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
7327         avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
7328     }
7329
7330     avc_ctx->pres_slice_batch_buffer_2nd_level =
7331         intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD,
7332                               4096 *
7333                               encode_state->num_slice_params_ext);
7334     if (!avc_ctx->pres_slice_batch_buffer_2nd_level)
7335         return VA_STATUS_ERROR_ALLOCATION_FAILED;
7336
7337     for (i = 0; i < MAX_AVC_SLICE_NUM; i++) {
7338         avc_state->slice_batch_offset[i] = 0;
7339     }
7340
7341
7342     size = w_mb * 64;
7343     i965_free_gpe_resource(&avc_ctx->res_intra_row_store_scratch_buffer);
7344     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
7345                                                &avc_ctx->res_intra_row_store_scratch_buffer,
7346                                                size,
7347                                                "PAK Intra row store scratch buffer");
7348     if (!allocate_flag)
7349         goto failed_allocation;
7350
7351     size = w_mb * 4 * 64;
7352     i965_free_gpe_resource(&avc_ctx->res_deblocking_filter_row_store_scratch_buffer);
7353     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
7354                                                &avc_ctx->res_deblocking_filter_row_store_scratch_buffer,
7355                                                size,
7356                                                "PAK Deblocking filter row store scratch buffer");
7357     if (!allocate_flag)
7358         goto failed_allocation;
7359
7360     size = w_mb * 2 * 64;
7361     i965_free_gpe_resource(&avc_ctx->res_bsd_mpc_row_store_scratch_buffer);
7362     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
7363                                                &avc_ctx->res_bsd_mpc_row_store_scratch_buffer,
7364                                                size,
7365                                                "PAK BSD/MPC row store scratch buffer");
7366     if (!allocate_flag)
7367         goto failed_allocation;
7368
7369     size = w_mb * h_mb * 16;
7370     i965_free_gpe_resource(&avc_ctx->res_pak_mb_status_buffer);
7371     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
7372                                                &avc_ctx->res_pak_mb_status_buffer,
7373                                                size,
7374                                                "PAK MB status buffer");
7375     if (!allocate_flag)
7376         goto failed_allocation;
7377
7378     return VA_STATUS_SUCCESS;
7379
7380 failed_allocation:
7381     return VA_STATUS_ERROR_ALLOCATION_FAILED;
7382 }
7383
7384 static VAStatus
7385 gen9_avc_encode_picture(VADriverContextP ctx,
7386                         VAProfile profile,
7387                         struct encode_state *encode_state,
7388                         struct intel_encoder_context *encoder_context)
7389 {
7390     VAStatus va_status;
7391     struct i965_driver_data *i965 = i965_driver_data(ctx);
7392     struct i965_gpe_table *gpe = &i965->gpe_table;
7393     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7394     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
7395     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
7396     struct intel_batchbuffer *batch = encoder_context->base.batch;
7397
7398     va_status = gen9_avc_pak_pipeline_prepare(ctx, encode_state, encoder_context);
7399
7400     if (va_status != VA_STATUS_SUCCESS)
7401         return va_status;
7402
7403     if (i965->intel.has_bsd2)
7404         intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
7405     else
7406         intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
7407     intel_batchbuffer_emit_mi_flush(batch);
7408
7409     for (generic_state->curr_pak_pass = 0;
7410          generic_state->curr_pak_pass < generic_state->num_pak_passes;
7411          generic_state->curr_pak_pass++) {
7412
7413         if (generic_state->curr_pak_pass == 0) {
7414             /* Initialize the avc Image Ctrl reg for the first pass,write 0 to staturs/control register, is it needed in AVC? */
7415             struct gpe_mi_load_register_imm_parameter mi_load_reg_imm;
7416             struct encoder_status_buffer_internal *status_buffer;
7417
7418             status_buffer = &(avc_ctx->status_buffer);
7419             memset(&mi_load_reg_imm, 0, sizeof(mi_load_reg_imm));
7420             mi_load_reg_imm.mmio_offset = status_buffer->image_status_ctrl_reg_offset;
7421             mi_load_reg_imm.data = 0;
7422             gpe->mi_load_register_imm(ctx, batch, &mi_load_reg_imm);
7423         }
7424         gen9_avc_pak_picture_level(ctx, encode_state, encoder_context);
7425         gen9_avc_pak_slice_level(ctx, encode_state, encoder_context);
7426         gen9_avc_read_mfc_status(ctx, encoder_context);
7427
7428     }
7429
7430     if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
7431         intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
7432         avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
7433     }
7434
7435     intel_batchbuffer_end_atomic(batch);
7436     intel_batchbuffer_flush(batch);
7437
7438     generic_state->seq_frame_number++;
7439     generic_state->total_frame_number++;
7440     generic_state->first_frame = 0;
7441     return VA_STATUS_SUCCESS;
7442 }
7443
7444 static VAStatus
7445 gen9_avc_pak_pipeline(VADriverContextP ctx,
7446                       VAProfile profile,
7447                       struct encode_state *encode_state,
7448                       struct intel_encoder_context *encoder_context)
7449 {
7450     VAStatus vaStatus;
7451
7452     switch (profile) {
7453     case VAProfileH264ConstrainedBaseline:
7454     case VAProfileH264Main:
7455     case VAProfileH264High:
7456     case VAProfileH264MultiviewHigh:
7457     case VAProfileH264StereoHigh:
7458         vaStatus = gen9_avc_encode_picture(ctx, profile, encode_state, encoder_context);
7459         break;
7460
7461     default:
7462         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
7463         break;
7464     }
7465
7466     return vaStatus;
7467 }
7468
7469 static void
7470 gen9_avc_pak_context_destroy(void * context)
7471 {
7472     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)context;
7473     struct generic_encoder_context * generic_ctx;
7474     struct i965_avc_encoder_context * avc_ctx;
7475     int i = 0;
7476
7477     if (!pak_context)
7478         return;
7479
7480     generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
7481     avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
7482
7483     // other things
7484     i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
7485     i965_free_gpe_resource(&avc_ctx->res_post_deblocking_output);
7486     i965_free_gpe_resource(&avc_ctx->res_pre_deblocking_output);
7487     i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
7488
7489     i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
7490     i965_free_gpe_resource(&avc_ctx->res_intra_row_store_scratch_buffer);
7491     i965_free_gpe_resource(&avc_ctx->res_deblocking_filter_row_store_scratch_buffer);
7492     i965_free_gpe_resource(&avc_ctx->res_bsd_mpc_row_store_scratch_buffer);
7493     i965_free_gpe_resource(&avc_ctx->res_pak_mb_status_buffer);
7494
7495     for (i = 0 ; i < MAX_MFC_AVC_REFERENCE_SURFACES; i++) {
7496         i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
7497     }
7498
7499     for (i = 0 ; i < NUM_MFC_AVC_DMV_BUFFERS; i++) {
7500         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i]);
7501     }
7502
7503     if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
7504         intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
7505         avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
7506     }
7507
7508 }
7509
7510 static VAStatus
7511 gen9_avc_get_coded_status(VADriverContextP ctx,
7512                           struct intel_encoder_context *encoder_context,
7513                           struct i965_coded_buffer_segment *coded_buf_seg)
7514 {
7515     struct encoder_status *avc_encode_status;
7516
7517     if (!encoder_context || !coded_buf_seg)
7518         return VA_STATUS_ERROR_INVALID_BUFFER;
7519
7520     avc_encode_status = (struct encoder_status *)coded_buf_seg->codec_private_data;
7521     coded_buf_seg->base.size = avc_encode_status->bs_byte_count_frame;
7522
7523     return VA_STATUS_SUCCESS;
7524 }
7525
7526 Bool
7527 gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
7528 {
7529     /* VME & PAK share the same context */
7530     struct i965_driver_data *i965 = i965_driver_data(ctx);
7531     struct encoder_vme_mfc_context * vme_context = NULL;
7532     struct generic_encoder_context * generic_ctx = NULL;
7533     struct i965_avc_encoder_context * avc_ctx = NULL;
7534     struct generic_enc_codec_state * generic_state = NULL;
7535     struct avc_enc_state * avc_state = NULL;
7536     struct encoder_status_buffer_internal *status_buffer;
7537     uint32_t base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
7538
7539     vme_context = calloc(1, sizeof(struct encoder_vme_mfc_context));
7540     generic_ctx = calloc(1, sizeof(struct generic_encoder_context));
7541     avc_ctx = calloc(1, sizeof(struct i965_avc_encoder_context));
7542     generic_state = calloc(1, sizeof(struct generic_enc_codec_state));
7543     avc_state = calloc(1, sizeof(struct avc_enc_state));
7544
7545     if (!vme_context || !generic_ctx || !avc_ctx || !generic_state || !avc_state)
7546         goto allocate_structure_failed;
7547
7548     memset(vme_context, 0, sizeof(struct encoder_vme_mfc_context));
7549     memset(generic_ctx, 0, sizeof(struct generic_encoder_context));
7550     memset(avc_ctx, 0, sizeof(struct i965_avc_encoder_context));
7551     memset(generic_state, 0, sizeof(struct generic_enc_codec_state));
7552     memset(avc_state, 0, sizeof(struct avc_enc_state));
7553
7554     encoder_context->vme_context = vme_context;
7555     vme_context->generic_enc_ctx = generic_ctx;
7556     vme_context->private_enc_ctx = avc_ctx;
7557     vme_context->generic_enc_state = generic_state;
7558     vme_context->private_enc_state = avc_state;
7559
7560     if (IS_SKL(i965->intel.device_info) ||
7561         IS_BXT(i965->intel.device_info)) {
7562         generic_ctx->enc_kernel_ptr = (void *)skl_avc_encoder_kernels;
7563         generic_ctx->enc_kernel_size = sizeof(skl_avc_encoder_kernels);
7564     } else if (IS_KBL(i965->intel.device_info) ||
7565                IS_GLK(i965->intel.device_info)) {
7566         generic_ctx->enc_kernel_ptr = (void *)kbl_avc_encoder_kernels;
7567         generic_ctx->enc_kernel_size = sizeof(kbl_avc_encoder_kernels);
7568     } else
7569         goto allocate_structure_failed;
7570
7571     /* initialize misc ? */
7572     avc_ctx->ctx = ctx;
7573     generic_ctx->use_hw_scoreboard = 1;
7574     generic_ctx->use_hw_non_stalling_scoreboard = 1;
7575
7576     /* initialize generic state */
7577
7578     generic_state->kernel_mode = INTEL_ENC_KERNEL_NORMAL;
7579     generic_state->preset = INTEL_PRESET_RT_SPEED;
7580     generic_state->seq_frame_number = 0;
7581     generic_state->total_frame_number = 0;
7582     generic_state->frame_type = 0;
7583     generic_state->first_frame = 1;
7584
7585     generic_state->frame_width_in_pixel = 0;
7586     generic_state->frame_height_in_pixel = 0;
7587     generic_state->frame_width_in_mbs = 0;
7588     generic_state->frame_height_in_mbs = 0;
7589     generic_state->frame_width_4x = 0;
7590     generic_state->frame_height_4x = 0;
7591     generic_state->frame_width_16x = 0;
7592     generic_state->frame_height_16x = 0;
7593     generic_state->frame_width_32x = 0;
7594     generic_state->downscaled_width_4x_in_mb = 0;
7595     generic_state->downscaled_height_4x_in_mb = 0;
7596     generic_state->downscaled_width_16x_in_mb = 0;
7597     generic_state->downscaled_height_16x_in_mb = 0;
7598     generic_state->downscaled_width_32x_in_mb = 0;
7599     generic_state->downscaled_height_32x_in_mb = 0;
7600
7601     generic_state->hme_supported = 1;
7602     generic_state->b16xme_supported = 1;
7603     generic_state->b32xme_supported = 0;
7604     generic_state->hme_enabled = 0;
7605     generic_state->b16xme_enabled = 0;
7606     generic_state->b32xme_enabled = 0;
7607     generic_state->brc_distortion_buffer_supported = 1;
7608     generic_state->brc_constant_buffer_supported = 0;
7609
7610
7611     generic_state->frame_rate = 30;
7612     generic_state->brc_allocated = 0;
7613     generic_state->brc_inited = 0;
7614     generic_state->brc_need_reset = 0;
7615     generic_state->is_low_delay = 0;
7616     generic_state->brc_enabled = 0;//default
7617     generic_state->internal_rate_mode = 0;
7618     generic_state->curr_pak_pass = 0;
7619     generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
7620     generic_state->is_first_pass = 1;
7621     generic_state->is_last_pass = 0;
7622     generic_state->mb_brc_enabled = 0; // enable mb brc
7623     generic_state->brc_roi_enable = 0;
7624     generic_state->brc_dirty_roi_enable = 0;
7625     generic_state->skip_frame_enbale = 0;
7626
7627     generic_state->target_bit_rate = 0;
7628     generic_state->max_bit_rate = 0;
7629     generic_state->min_bit_rate = 0;
7630     generic_state->init_vbv_buffer_fullness_in_bit = 0;
7631     generic_state->vbv_buffer_size_in_bit = 0;
7632     generic_state->frames_per_100s = 0;
7633     generic_state->gop_size = 0;
7634     generic_state->gop_ref_distance = 0;
7635     generic_state->brc_target_size = 0;
7636     generic_state->brc_mode = 0;
7637     generic_state->brc_init_current_target_buf_full_in_bits = 0.0;
7638     generic_state->brc_init_reset_input_bits_per_frame = 0.0;
7639     generic_state->brc_init_reset_buf_size_in_bits = 0;
7640     generic_state->brc_init_previous_target_buf_full_in_bits = 0;
7641     generic_state->frames_per_window_size = 0;//default
7642     generic_state->target_percentage = 0;
7643
7644     generic_state->avbr_curracy = 0;
7645     generic_state->avbr_convergence = 0;
7646
7647     generic_state->num_skip_frames = 0;
7648     generic_state->size_skip_frames = 0;
7649
7650     generic_state->num_roi = 0;
7651     generic_state->max_delta_qp = 0;
7652     generic_state->min_delta_qp = 0;
7653
7654     if (encoder_context->rate_control_mode != VA_RC_NONE &&
7655         encoder_context->rate_control_mode != VA_RC_CQP) {
7656         generic_state->brc_enabled = 1;
7657         generic_state->brc_distortion_buffer_supported = 1;
7658         generic_state->brc_constant_buffer_supported = 1;
7659         generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
7660     }
7661     /*avc state initialization */
7662     avc_state->mad_enable = 0;
7663     avc_state->mb_disable_skip_map_enable = 0;
7664     avc_state->sfd_enable = 1;//default
7665     avc_state->sfd_mb_enable = 1;//set it true
7666     avc_state->adaptive_search_window_enable = 1;//default
7667     avc_state->mb_qp_data_enable = 0;
7668     avc_state->intra_refresh_i_enable = 0;
7669     avc_state->min_max_qp_enable = 0;
7670     avc_state->skip_bias_adjustment_enable = 0;//default,same as   skip_bias_adjustment_supporte? no
7671
7672     //external input
7673     avc_state->non_ftq_skip_threshold_lut_input_enable = 0;
7674     avc_state->ftq_skip_threshold_lut_input_enable = 0;
7675     avc_state->ftq_override = 0;
7676
7677     avc_state->direct_bias_adjustment_enable = 0;
7678     avc_state->global_motion_bias_adjustment_enable = 0;
7679     avc_state->disable_sub_mb_partion = 0;
7680     avc_state->arbitrary_num_mbs_in_slice = 0;
7681     avc_state->adaptive_transform_decision_enable = 0;//default
7682     avc_state->skip_check_disable = 0;
7683     avc_state->tq_enable = 0;
7684     avc_state->enable_avc_ildb = 0;
7685     avc_state->mbaff_flag = 0;
7686     avc_state->enable_force_skip = 1;//default
7687     avc_state->rc_panic_enable = 1;//default
7688     avc_state->suppress_recon_enable = 1;//default
7689
7690     avc_state->ref_pic_select_list_supported = 1;
7691     avc_state->mb_brc_supported = 1;//?,default
7692     avc_state->multi_pre_enable = 1;//default
7693     avc_state->ftq_enable = 1;//default
7694     avc_state->caf_supported = 1; //default
7695     avc_state->caf_enable = 0;
7696     avc_state->caf_disable_hd = 1;//default
7697     avc_state->skip_bias_adjustment_supported = 1;//default
7698
7699     avc_state->adaptive_intra_scaling_enable = 1;//default
7700     avc_state->old_mode_cost_enable = 0;//default
7701     avc_state->multi_ref_qp_enable = 1;//default
7702     avc_state->weighted_ref_l0_enable = 1;//default
7703     avc_state->weighted_ref_l1_enable = 1;//default
7704     avc_state->weighted_prediction_supported = 0;
7705     avc_state->brc_split_enable = 0;
7706     avc_state->slice_level_report_supported = 0;
7707
7708     avc_state->fbr_bypass_enable = 1;//default
7709     avc_state->field_scaling_output_interleaved = 0;
7710     avc_state->mb_variance_output_enable = 0;
7711     avc_state->mb_pixel_average_output_enable = 0;
7712     avc_state->rolling_intra_refresh_enable = 0;// same as intra_refresh_i_enable?
7713     avc_state->mbenc_curbe_set_in_brc_update = 0;
7714     avc_state->rounding_inter_enable = 1; //default
7715     avc_state->adaptive_rounding_inter_enable = 1;//default
7716
7717     avc_state->mbenc_i_frame_dist_in_use = 0;
7718     avc_state->mb_status_supported = 1; //set in intialization for gen9
7719     avc_state->mb_status_enable = 0;
7720     avc_state->mb_vproc_stats_enable = 0;
7721     avc_state->flatness_check_enable = 0;
7722     avc_state->flatness_check_supported = 1;//default
7723     avc_state->block_based_skip_enable = 0;
7724     avc_state->use_widi_mbenc_kernel = 0;
7725     avc_state->kernel_trellis_enable = 0;
7726     avc_state->generic_reserved = 0;
7727
7728     avc_state->rounding_value = 0;
7729     avc_state->rounding_inter_p = AVC_INVALID_ROUNDING_VALUE;//default
7730     avc_state->rounding_inter_b = AVC_INVALID_ROUNDING_VALUE; //default
7731     avc_state->rounding_inter_b_ref = AVC_INVALID_ROUNDING_VALUE; //default
7732     avc_state->min_qp_i = INTEL_AVC_MIN_QP;
7733     avc_state->min_qp_p = INTEL_AVC_MIN_QP;
7734     avc_state->min_qp_b = INTEL_AVC_MIN_QP;
7735     avc_state->max_qp_i = INTEL_AVC_MAX_QP;
7736     avc_state->max_qp_p = INTEL_AVC_MAX_QP;
7737     avc_state->max_qp_b = INTEL_AVC_MAX_QP;
7738
7739     memset(avc_state->non_ftq_skip_threshold_lut, 0, AVC_QP_MAX * sizeof(uint8_t));
7740     memset(avc_state->ftq_skip_threshold_lut, 0, AVC_QP_MAX * sizeof(uint8_t));
7741     memset(avc_state->lamda_value_lut, 0, AVC_QP_MAX * 2 * sizeof(uint32_t));
7742
7743     avc_state->intra_refresh_qp_threshold = 0;
7744     avc_state->trellis_flag = 0;
7745     avc_state->hme_mv_cost_scaling_factor = 0;
7746     avc_state->slice_height = 1;
7747     avc_state->slice_num = 1;
7748     memset(avc_state->dist_scale_factor_list0, 0, 32 * sizeof(uint32_t));
7749     avc_state->bi_weight = 0;
7750
7751     avc_state->lambda_table_enable = 0;
7752
7753
7754     if (IS_SKL(i965->intel.device_info) ||
7755         IS_BXT(i965->intel.device_info)) {
7756         avc_state->brc_const_data_surface_width = 64;
7757         avc_state->brc_const_data_surface_height = 44;
7758     } else if (IS_KBL(i965->intel.device_info) ||
7759                IS_GLK(i965->intel.device_info)) {
7760         avc_state->brc_const_data_surface_width = 64;
7761         avc_state->brc_const_data_surface_height = 53;
7762         //gen95
7763         avc_state->decouple_mbenc_curbe_from_brc_enable = 1;
7764         avc_state->extended_mv_cost_range_enable = 0;
7765         avc_state->reserved_g95 = 0;
7766         avc_state->mbenc_brc_buffer_size = 128;
7767         avc_state->kernel_trellis_enable = 1;
7768         avc_state->lambda_table_enable = 1;
7769         avc_state->brc_split_enable = 1;
7770     }
7771
7772     avc_state->num_refs[0] = 0;
7773     avc_state->num_refs[1] = 0;
7774     memset(avc_state->list_ref_idx, 0, 32 * 2 * sizeof(uint32_t));
7775     memset(avc_state->top_field_poc, 0, NUM_MFC_AVC_DMV_BUFFERS * sizeof(int32_t));
7776     avc_state->tq_rounding = 0;
7777     avc_state->zero_mv_threshold = 0;
7778     avc_state->slice_second_levle_batch_buffer_in_use = 0;
7779
7780     //1. seq/pic/slice
7781
7782     /* the definition of status buffer offset for Encoder */
7783
7784     status_buffer = &avc_ctx->status_buffer;
7785     memset(status_buffer, 0, sizeof(struct encoder_status_buffer_internal));
7786
7787     status_buffer->base_offset = base_offset;
7788     status_buffer->bs_byte_count_frame_offset = base_offset + offsetof(struct encoder_status, bs_byte_count_frame);
7789     status_buffer->bs_byte_count_frame_nh_offset = base_offset + offsetof(struct encoder_status, bs_byte_count_frame_nh);
7790     status_buffer->image_status_mask_offset = base_offset + offsetof(struct encoder_status, image_status_mask);
7791     status_buffer->image_status_ctrl_offset = base_offset + offsetof(struct encoder_status, image_status_ctrl);
7792     status_buffer->mfc_qp_status_count_offset = base_offset + offsetof(struct encoder_status, mfc_qp_status_count);
7793     status_buffer->media_index_offset       = base_offset + offsetof(struct encoder_status, media_index);
7794
7795     status_buffer->status_buffer_size = sizeof(struct encoder_status);
7796     status_buffer->bs_byte_count_frame_reg_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG;
7797     status_buffer->bs_byte_count_frame_nh_reg_offset = MFC_BITSTREAM_BYTECOUNT_SLICE_REG;
7798     status_buffer->image_status_mask_reg_offset = MFC_IMAGE_STATUS_MASK_REG;
7799     status_buffer->image_status_ctrl_reg_offset = MFC_IMAGE_STATUS_CTRL_REG;
7800     status_buffer->mfc_qp_status_count_reg_offset = MFC_QP_STATUS_COUNT_REG;
7801
7802     gen9_avc_kernel_init(ctx, encoder_context);
7803     encoder_context->vme_context = vme_context;
7804     encoder_context->vme_pipeline = gen9_avc_vme_pipeline;
7805     encoder_context->vme_context_destroy = gen9_avc_vme_context_destroy;
7806
7807     return true;
7808
7809 allocate_structure_failed:
7810
7811     free(vme_context);
7812     free(generic_ctx);
7813     free(avc_ctx);
7814     free(generic_state);
7815     free(avc_state);
7816     return false;
7817 }
7818
7819 Bool
7820 gen9_avc_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
7821 {
7822     /* VME & PAK share the same context */
7823     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7824
7825     if (!pak_context)
7826         return false;
7827
7828     encoder_context->mfc_context = pak_context;
7829     encoder_context->mfc_context_destroy = gen9_avc_pak_context_destroy;
7830     encoder_context->mfc_pipeline = gen9_avc_pak_pipeline;
7831     encoder_context->mfc_brc_prepare = gen9_avc_pak_brc_prepare;
7832     encoder_context->get_status = gen9_avc_get_coded_status;
7833     return true;
7834 }