OSDN Git Service

android: move the libraries to /vendor
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_avc_encoder.c
1 /*
2  * Copyright @ 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWAR OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Pengfei Qu <Pengfei.qu@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <stdbool.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35 #include <va/va.h>
36
37 #include "intel_batchbuffer.h"
38 #include "intel_driver.h"
39
40 #include "i965_defines.h"
41 #include "i965_structs.h"
42 #include "i965_drv_video.h"
43 #include "i965_encoder.h"
44 #include "i965_encoder_utils.h"
45 #include "intel_media.h"
46
47 #include "i965_gpe_utils.h"
48 #include "i965_encoder_common.h"
49 #include "i965_avc_encoder_common.h"
50 #include "gen9_avc_encoder_kernels.h"
51 #include "gen9_avc_encoder.h"
52 #include "gen9_avc_const_def.h"
53
54 #define MAX_URB_SIZE                    4096 /* In register */
55 #define NUM_KERNELS_PER_GPE_CONTEXT     1
56 #define MBENC_KERNEL_BASE GEN9_AVC_KERNEL_MBENC_QUALITY_I
57 #define GPE_RESOURCE_ALIGNMENT 4  /* 4 means 16 = 1 << 4) */
58
59 #define OUT_BUFFER_2DW(batch, bo, is_target, delta)  do {               \
60         if (bo) {                                                       \
61             OUT_BCS_RELOC64(batch,                                        \
62                             bo,                                         \
63                             I915_GEM_DOMAIN_INSTRUCTION,                \
64                             is_target ? I915_GEM_DOMAIN_RENDER : 0,     \
65                             delta);                                     \
66         } else {                                                        \
67             OUT_BCS_BATCH(batch, 0);                                    \
68             OUT_BCS_BATCH(batch, 0);                                    \
69         }                                                               \
70     } while (0)
71
72 #define OUT_BUFFER_3DW(batch, bo, is_target, delta, attr)  do { \
73         OUT_BUFFER_2DW(batch, bo, is_target, delta);            \
74         OUT_BCS_BATCH(batch, attr);                             \
75     } while (0)
76
77 static const uint32_t qm_flat[16] = {
78     0x10101010, 0x10101010, 0x10101010, 0x10101010,
79     0x10101010, 0x10101010, 0x10101010, 0x10101010,
80     0x10101010, 0x10101010, 0x10101010, 0x10101010,
81     0x10101010, 0x10101010, 0x10101010, 0x10101010
82 };
83
84 static const uint32_t fqm_flat[32] = {
85     0x10001000, 0x10001000, 0x10001000, 0x10001000,
86     0x10001000, 0x10001000, 0x10001000, 0x10001000,
87     0x10001000, 0x10001000, 0x10001000, 0x10001000,
88     0x10001000, 0x10001000, 0x10001000, 0x10001000,
89     0x10001000, 0x10001000, 0x10001000, 0x10001000,
90     0x10001000, 0x10001000, 0x10001000, 0x10001000,
91     0x10001000, 0x10001000, 0x10001000, 0x10001000,
92     0x10001000, 0x10001000, 0x10001000, 0x10001000
93 };
94
95 static const unsigned int slice_type_kernel[3] = {1, 2, 0};
96
97 static const gen9_avc_brc_init_reset_curbe_data gen9_avc_brc_init_reset_curbe_init_data = {
98     // unsigned int 0
99     {
100         0
101     },
102
103     // unsigned int 1
104     {
105         0
106     },
107
108     // unsigned int 2
109     {
110         0
111     },
112
113     // unsigned int 3
114     {
115         0
116     },
117
118     // unsigned int 4
119     {
120         0
121     },
122
123     // unsigned int 5
124     {
125         0
126     },
127
128     // unsigned int 6
129     {
130         0
131     },
132
133     // unsigned int 7
134     {
135         0
136     },
137
138     // unsigned int 8
139     {
140         0,
141         0
142     },
143
144     // unsigned int 9
145     {
146         0,
147         0
148     },
149
150     // unsigned int 10
151     {
152         0,
153         0
154     },
155
156     // unsigned int 11
157     {
158         0,
159         1
160     },
161
162     // unsigned int 12
163     {
164         51,
165         0
166     },
167
168     // unsigned int 13
169     {
170         40,
171         60,
172         80,
173         120
174     },
175
176     // unsigned int 14
177     {
178         35,
179         60,
180         80,
181         120
182     },
183
184     // unsigned int 15
185     {
186         40,
187         60,
188         90,
189         115
190     },
191
192     // unsigned int 16
193     {
194         0,
195         0,
196         0,
197         0
198     },
199
200     // unsigned int 17
201     {
202         0,
203         0,
204         0,
205         0
206     },
207
208     // unsigned int 18
209     {
210         0,
211         0,
212         0,
213         0
214     },
215
216     // unsigned int 19
217     {
218         0,
219         0,
220         0,
221         0
222     },
223
224     // unsigned int 20
225     {
226         0,
227         0,
228         0,
229         0
230     },
231
232     // unsigned int 21
233     {
234         0,
235         0,
236         0,
237         0
238     },
239
240     // unsigned int 22
241     {
242         0,
243         0,
244         0,
245         0
246     },
247
248     // unsigned int 23
249     {
250         0
251     }
252 };
253
254 static const gen9_avc_frame_brc_update_curbe_data gen9_avc_frame_brc_update_curbe_init_data = {
255     // unsigned int 0
256     {
257         0
258     },
259
260     // unsigned int 1
261     {
262         0
263     },
264
265     // unsigned int 2
266     {
267         0
268     },
269
270     // unsigned int 3
271     {
272         10,
273         50
274     },
275
276     // unsigned int 4
277     {
278         100,
279         150
280     },
281
282     // unsigned int 5
283     {
284         0,
285         0,
286         0,
287         0
288     },
289
290     // unsigned int 6
291     {
292         0,
293         0,
294         0,
295         0,
296         0,
297         0
298     },
299
300     // unsigned int 7
301     {
302         0
303     },
304
305     // unsigned int 8
306     {
307         1,
308         1,
309         3,
310         2
311     },
312
313     // unsigned int 9
314     {
315         1,
316         40,
317         5,
318         5
319     },
320
321     // unsigned int 10
322     {
323         3,
324         1,
325         7,
326         18
327     },
328
329     // unsigned int 11
330     {
331         25,
332         37,
333         40,
334         75
335     },
336
337     // unsigned int 12
338     {
339         97,
340         103,
341         125,
342         160
343     },
344
345     // unsigned int 13
346     {
347         -3,
348         -2,
349         -1,
350         0
351     },
352
353     // unsigned int 14
354     {
355         1,
356         2,
357         3,
358         0xff
359     },
360
361     // unsigned int 15
362     {
363         0,
364         0,
365         0,
366         0
367     },
368
369     // unsigned int 16
370     {
371         0
372     },
373
374     // unsigned int 17
375     {
376         0
377     },
378
379     // unsigned int 18
380     {
381         0
382     },
383
384     // unsigned int 19
385     {
386         0
387     },
388
389     // unsigned int 20
390     {
391         0
392     },
393
394     // unsigned int 21
395     {
396         0
397     },
398
399     // unsigned int 22
400     {
401         0
402     },
403
404     // unsigned int 23
405     {
406         0
407     },
408
409 };
410
411 static void
412 gen9_avc_update_misc_parameters(VADriverContextP ctx,
413                                 struct encode_state *encode_state,
414                                 struct intel_encoder_context *encoder_context)
415 {
416     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
417     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
418     int i;
419
420     /* brc */
421     generic_state->max_bit_rate = (encoder_context->brc.bits_per_second[0] + 1000 - 1) / 1000;
422
423     generic_state->brc_need_reset = encoder_context->brc.need_reset;
424
425     if (generic_state->internal_rate_mode == VA_RC_CBR) {
426         generic_state->min_bit_rate = generic_state->max_bit_rate;
427         generic_state->mb_brc_enabled = encoder_context->brc.mb_rate_control[0];
428
429         if (generic_state->target_bit_rate != generic_state->max_bit_rate) {
430             generic_state->target_bit_rate = generic_state->max_bit_rate;
431             generic_state->brc_need_reset = 1;
432         }
433     } else if (generic_state->internal_rate_mode == VA_RC_VBR) {
434         generic_state->min_bit_rate = generic_state->max_bit_rate * (2 * encoder_context->brc.target_percentage[0] - 100) / 100;
435         generic_state->mb_brc_enabled = encoder_context->brc.mb_rate_control[0];
436
437         if (generic_state->target_bit_rate != generic_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100) {
438             generic_state->target_bit_rate = generic_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100;
439             generic_state->brc_need_reset = 1;
440         }
441     }
442
443     /*  frame rate */
444     if (generic_state->internal_rate_mode != VA_RC_CQP) {
445         generic_state->frames_per_100s = encoder_context->brc.framerate[0].num * 100 / encoder_context->brc.framerate[0].den ;
446         generic_state->frame_rate = encoder_context->brc.framerate[0].num / encoder_context->brc.framerate[0].den ;
447         generic_state->frames_per_window_size = (int)(encoder_context->brc.window_size * generic_state->frame_rate / 1000); // brc.windows size in ms as the unit
448     } else {
449         generic_state->frames_per_100s = 30 * 100;
450         generic_state->frame_rate = 30 ;
451         generic_state->frames_per_window_size = 30;
452     }
453
454     /*  HRD */
455     if (generic_state->internal_rate_mode != VA_RC_CQP) {
456         generic_state->vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;//misc->buffer_size;
457         generic_state->init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;//misc->initial_buffer_fullness;
458     }
459
460     /* ROI */
461     generic_state->num_roi = MIN(encoder_context->brc.num_roi, 3);
462     if (generic_state->num_roi > 0) {
463         generic_state->max_delta_qp = encoder_context->brc.roi_max_delta_qp;
464         generic_state->min_delta_qp = encoder_context->brc.roi_min_delta_qp;
465
466         for (i = 0; i < generic_state->num_roi; i++) {
467             generic_state->roi[i].left   = encoder_context->brc.roi[i].left;
468             generic_state->roi[i].right  = encoder_context->brc.roi[i].right;
469             generic_state->roi[i].top    = encoder_context->brc.roi[i].top;
470             generic_state->roi[i].bottom = encoder_context->brc.roi[i].bottom;
471             generic_state->roi[i].value  = encoder_context->brc.roi[i].value;
472
473             generic_state->roi[i].left /= 16;
474             generic_state->roi[i].right /= 16;
475             generic_state->roi[i].top /= 16;
476             generic_state->roi[i].bottom /= 16;
477         }
478     }
479
480 }
481
482 static bool
483 intel_avc_get_kernel_header_and_size(void *pvbinary,
484                                      int binary_size,
485                                      INTEL_GENERIC_ENC_OPERATION operation,
486                                      int krnstate_idx,
487                                      struct i965_kernel *ret_kernel)
488 {
489     typedef uint32_t BIN_PTR[4];
490
491     char *bin_start;
492     gen9_avc_encoder_kernel_header      *pkh_table;
493     kernel_header          *pcurr_header, *pinvalid_entry, *pnext_header;
494     int next_krnoffset;
495
496     if (!pvbinary || !ret_kernel)
497         return false;
498
499     bin_start = (char *)pvbinary;
500     pkh_table = (gen9_avc_encoder_kernel_header *)pvbinary;
501     pinvalid_entry = &(pkh_table->static_detection) + 1;
502     next_krnoffset = binary_size;
503
504     if (operation == INTEL_GENERIC_ENC_SCALING4X) {
505         pcurr_header = &pkh_table->ply_dscale_ply;
506     } else if (operation == INTEL_GENERIC_ENC_SCALING2X) {
507         pcurr_header = &pkh_table->ply_2xdscale_ply;
508     } else if (operation == INTEL_GENERIC_ENC_ME) {
509         pcurr_header = &pkh_table->me_p;
510     } else if (operation == INTEL_GENERIC_ENC_BRC) {
511         pcurr_header = &pkh_table->frame_brc_init;
512     } else if (operation == INTEL_GENERIC_ENC_MBENC) {
513         pcurr_header = &pkh_table->mbenc_quality_I;
514     } else if (operation == INTEL_GENERIC_ENC_WP) {
515         pcurr_header = &pkh_table->wp;
516     } else if (operation == INTEL_GENERIC_ENC_SFD) {
517         pcurr_header = &pkh_table->static_detection;
518     } else {
519         return false;
520     }
521
522     pcurr_header += krnstate_idx;
523     ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
524
525     pnext_header = (pcurr_header + 1);
526     if (pnext_header < pinvalid_entry) {
527         next_krnoffset = pnext_header->kernel_start_pointer << 6;
528     }
529     ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
530
531     return true;
532 }
533 static void
534 gen9_free_surfaces_avc(void **data)
535 {
536     struct gen9_surface_avc *avc_surface;
537
538     if (!data || !*data)
539         return;
540
541     avc_surface = *data;
542
543     if (avc_surface->scaled_4x_surface_obj) {
544         i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_4x_surface_id, 1);
545         avc_surface->scaled_4x_surface_id = VA_INVALID_SURFACE;
546         avc_surface->scaled_4x_surface_obj = NULL;
547     }
548
549     if (avc_surface->scaled_16x_surface_obj) {
550         i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_16x_surface_id, 1);
551         avc_surface->scaled_16x_surface_id = VA_INVALID_SURFACE;
552         avc_surface->scaled_16x_surface_obj = NULL;
553     }
554
555     if (avc_surface->scaled_32x_surface_obj) {
556         i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_32x_surface_id, 1);
557         avc_surface->scaled_32x_surface_id = VA_INVALID_SURFACE;
558         avc_surface->scaled_32x_surface_obj = NULL;
559     }
560
561     i965_free_gpe_resource(&avc_surface->res_mb_code_surface);
562     i965_free_gpe_resource(&avc_surface->res_mv_data_surface);
563     i965_free_gpe_resource(&avc_surface->res_ref_pic_select_surface);
564
565     dri_bo_unreference(avc_surface->dmv_top);
566     avc_surface->dmv_top = NULL;
567     dri_bo_unreference(avc_surface->dmv_bottom);
568     avc_surface->dmv_bottom = NULL;
569
570     free(avc_surface);
571
572     *data = NULL;
573
574     return;
575 }
576
577 static VAStatus
578 gen9_avc_init_check_surfaces(VADriverContextP ctx,
579                              struct object_surface *obj_surface,
580                              struct intel_encoder_context *encoder_context,
581                              struct avc_surface_param *surface_param)
582 {
583     struct i965_driver_data *i965 = i965_driver_data(ctx);
584     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
585     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
586     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
587
588     struct gen9_surface_avc *avc_surface;
589     int downscaled_width_4x, downscaled_height_4x;
590     int downscaled_width_16x, downscaled_height_16x;
591     int downscaled_width_32x, downscaled_height_32x;
592     int size = 0;
593     unsigned int frame_width_in_mbs = ALIGN(surface_param->frame_width, 16) / 16;
594     unsigned int frame_height_in_mbs = ALIGN(surface_param->frame_height, 16) / 16;
595     unsigned int frame_mb_nums = frame_width_in_mbs * frame_height_in_mbs;
596     int allocate_flag = 1;
597     int width, height;
598
599     if (!obj_surface || !obj_surface->bo)
600         return VA_STATUS_ERROR_INVALID_SURFACE;
601
602     if (obj_surface->private_data) {
603         return VA_STATUS_SUCCESS;
604     }
605
606     avc_surface = calloc(1, sizeof(struct gen9_surface_avc));
607
608     if (!avc_surface)
609         return VA_STATUS_ERROR_ALLOCATION_FAILED;
610
611     avc_surface->ctx = ctx;
612     obj_surface->private_data = avc_surface;
613     obj_surface->free_private_data = gen9_free_surfaces_avc;
614
615     downscaled_width_4x = generic_state->frame_width_4x;
616     downscaled_height_4x = generic_state->frame_height_4x;
617
618     i965_CreateSurfaces(ctx,
619                         downscaled_width_4x,
620                         downscaled_height_4x,
621                         VA_RT_FORMAT_YUV420,
622                         1,
623                         &avc_surface->scaled_4x_surface_id);
624
625     avc_surface->scaled_4x_surface_obj = SURFACE(avc_surface->scaled_4x_surface_id);
626
627     if (!avc_surface->scaled_4x_surface_obj) {
628         return VA_STATUS_ERROR_ALLOCATION_FAILED;
629     }
630
631     i965_check_alloc_surface_bo(ctx, avc_surface->scaled_4x_surface_obj, 1,
632                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
633
634     downscaled_width_16x = generic_state->frame_width_16x;
635     downscaled_height_16x = generic_state->frame_height_16x;
636     i965_CreateSurfaces(ctx,
637                         downscaled_width_16x,
638                         downscaled_height_16x,
639                         VA_RT_FORMAT_YUV420,
640                         1,
641                         &avc_surface->scaled_16x_surface_id);
642     avc_surface->scaled_16x_surface_obj = SURFACE(avc_surface->scaled_16x_surface_id);
643
644     if (!avc_surface->scaled_16x_surface_obj) {
645         return VA_STATUS_ERROR_ALLOCATION_FAILED;
646     }
647
648     i965_check_alloc_surface_bo(ctx, avc_surface->scaled_16x_surface_obj, 1,
649                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
650
651     if (generic_state->b32xme_supported ||
652         generic_state->b32xme_enabled) {
653         downscaled_width_32x = generic_state->frame_width_32x;
654         downscaled_height_32x = generic_state->frame_height_32x;
655         i965_CreateSurfaces(ctx,
656                             downscaled_width_32x,
657                             downscaled_height_32x,
658                             VA_RT_FORMAT_YUV420,
659                             1,
660                             &avc_surface->scaled_32x_surface_id);
661         avc_surface->scaled_32x_surface_obj = SURFACE(avc_surface->scaled_32x_surface_id);
662
663         if (!avc_surface->scaled_32x_surface_obj) {
664             return VA_STATUS_ERROR_ALLOCATION_FAILED;
665         }
666
667         i965_check_alloc_surface_bo(ctx, avc_surface->scaled_32x_surface_obj, 1,
668                                     VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
669     }
670
671     /*mb code and mv data for each frame*/
672     size = frame_mb_nums * 16 * 4;
673     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
674                                                &avc_surface->res_mb_code_surface,
675                                                ALIGN(size, 0x1000),
676                                                "mb code buffer");
677     if (!allocate_flag)
678         goto failed_allocation;
679
680     size = frame_mb_nums * 32 * 4;
681     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
682                                                &avc_surface->res_mv_data_surface,
683                                                ALIGN(size, 0x1000),
684                                                "mv data buffer");
685     if (!allocate_flag)
686         goto failed_allocation;
687
688     /* ref pic list*/
689     if (avc_state->ref_pic_select_list_supported) {
690         width = ALIGN(frame_width_in_mbs * 8, 64);
691         height = frame_height_in_mbs ;
692         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
693                                                       &avc_surface->res_ref_pic_select_surface,
694                                                       width, height,
695                                                       width,
696                                                       "Ref pic select list buffer");
697         if (!allocate_flag)
698             goto failed_allocation;
699     }
700
701     /*direct mv*/
702     avc_surface->dmv_top =
703         dri_bo_alloc(i965->intel.bufmgr,
704                      "direct mv top Buffer",
705                      68 * frame_mb_nums,
706                      64);
707     avc_surface->dmv_bottom =
708         dri_bo_alloc(i965->intel.bufmgr,
709                      "direct mv bottom Buffer",
710                      68 * frame_mb_nums,
711                      64);
712     assert(avc_surface->dmv_top);
713     assert(avc_surface->dmv_bottom);
714
715     return VA_STATUS_SUCCESS;
716
717 failed_allocation:
718     return VA_STATUS_ERROR_ALLOCATION_FAILED;
719 }
720
721 static VAStatus
722 gen9_avc_allocate_resources(VADriverContextP ctx,
723                             struct encode_state *encode_state,
724                             struct intel_encoder_context *encoder_context)
725 {
726     struct i965_driver_data *i965 = i965_driver_data(ctx);
727     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
728     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
729     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
730     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
731     unsigned int size  = 0;
732     unsigned int width  = 0;
733     unsigned int height  = 0;
734     unsigned char * data  = NULL;
735     int allocate_flag = 1;
736     int i = 0;
737
738     /*all the surface/buffer are allocated here*/
739
740     /*second level batch buffer for image state write when cqp etc*/
741     i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
742     size = INTEL_AVC_IMAGE_STATE_CMD_SIZE ;
743     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
744                                                &avc_ctx->res_image_state_batch_buffer_2nd_level,
745                                                ALIGN(size, 0x1000),
746                                                "second levle batch (image state write) buffer");
747     if (!allocate_flag)
748         goto failed_allocation;
749
750     /* scaling related surface   */
751     if (avc_state->mb_status_supported) {
752         i965_free_gpe_resource(&avc_ctx->res_mb_status_buffer);
753         size = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * 16 * 4 + 1023)&~0x3ff;
754         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
755                                                    &avc_ctx->res_mb_status_buffer,
756                                                    ALIGN(size, 0x1000),
757                                                    "MB statistics output buffer");
758         if (!allocate_flag)
759             goto failed_allocation;
760         i965_zero_gpe_resource(&avc_ctx->res_mb_status_buffer);
761     }
762
763     if (avc_state->flatness_check_supported) {
764         width = generic_state->frame_width_in_mbs * 4;
765         height = generic_state->frame_height_in_mbs * 4;
766         i965_free_gpe_resource(&avc_ctx->res_flatness_check_surface);
767         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
768                                                       &avc_ctx->res_flatness_check_surface,
769                                                       width, height,
770                                                       ALIGN(width, 64),
771                                                       "Flatness check buffer");
772         if (!allocate_flag)
773             goto failed_allocation;
774     }
775     /* me related surface */
776     width = generic_state->downscaled_width_4x_in_mb * 8;
777     height = generic_state->downscaled_height_4x_in_mb * 4 * 10;
778     i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
779     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
780                                                   &avc_ctx->s4x_memv_distortion_buffer,
781                                                   width, height,
782                                                   ALIGN(width, 64),
783                                                   "4x MEMV distortion buffer");
784     if (!allocate_flag)
785         goto failed_allocation;
786     i965_zero_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
787
788     width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
789     height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
790     i965_free_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
791     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
792                                                   &avc_ctx->s4x_memv_min_distortion_brc_buffer,
793                                                   width, height,
794                                                   width,
795                                                   "4x MEMV min distortion brc buffer");
796     if (!allocate_flag)
797         goto failed_allocation;
798     i965_zero_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
799
800
801     width = ALIGN(generic_state->downscaled_width_4x_in_mb * 32, 64);
802     height = generic_state->downscaled_height_4x_in_mb * 4 * 2 * 10;
803     i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
804     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
805                                                   &avc_ctx->s4x_memv_data_buffer,
806                                                   width, height,
807                                                   width,
808                                                   "4x MEMV data buffer");
809     if (!allocate_flag)
810         goto failed_allocation;
811     i965_zero_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
812
813
814     width = ALIGN(generic_state->downscaled_width_16x_in_mb * 32, 64);
815     height = generic_state->downscaled_height_16x_in_mb * 4 * 2 * 10 ;
816     i965_free_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
817     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
818                                                   &avc_ctx->s16x_memv_data_buffer,
819                                                   width, height,
820                                                   width,
821                                                   "16x MEMV data buffer");
822     if (!allocate_flag)
823         goto failed_allocation;
824     i965_zero_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
825
826
827     width = ALIGN(generic_state->downscaled_width_32x_in_mb * 32, 64);
828     height = generic_state->downscaled_height_32x_in_mb * 4 * 2 * 10 ;
829     i965_free_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
830     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
831                                                   &avc_ctx->s32x_memv_data_buffer,
832                                                   width, height,
833                                                   width,
834                                                   "32x MEMV data buffer");
835     if (!allocate_flag)
836         goto failed_allocation;
837     i965_zero_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
838
839
840     if (!generic_state->brc_allocated) {
841         /*brc related surface */
842         i965_free_gpe_resource(&avc_ctx->res_brc_history_buffer);
843         size = 864;
844         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
845                                                    &avc_ctx->res_brc_history_buffer,
846                                                    ALIGN(size, 0x1000),
847                                                    "brc history buffer");
848         if (!allocate_flag)
849             goto failed_allocation;
850
851         i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
852         size = 64;//44
853         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
854                                                    &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
855                                                    ALIGN(size, 0x1000),
856                                                    "brc pak statistic buffer");
857         if (!allocate_flag)
858             goto failed_allocation;
859
860         i965_free_gpe_resource(&avc_ctx->res_brc_image_state_read_buffer);
861         size = INTEL_AVC_IMAGE_STATE_CMD_SIZE * 7;
862         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
863                                                    &avc_ctx->res_brc_image_state_read_buffer,
864                                                    ALIGN(size, 0x1000),
865                                                    "brc image state read buffer");
866         if (!allocate_flag)
867             goto failed_allocation;
868
869         i965_free_gpe_resource(&avc_ctx->res_brc_image_state_write_buffer);
870         size = INTEL_AVC_IMAGE_STATE_CMD_SIZE * 7;
871         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
872                                                    &avc_ctx->res_brc_image_state_write_buffer,
873                                                    ALIGN(size, 0x1000),
874                                                    "brc image state write buffer");
875         if (!allocate_flag)
876             goto failed_allocation;
877
878         width = ALIGN(avc_state->brc_const_data_surface_width, 64);
879         height = avc_state->brc_const_data_surface_height;
880         i965_free_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
881         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
882                                                       &avc_ctx->res_brc_const_data_buffer,
883                                                       width, height,
884                                                       width,
885                                                       "brc const data buffer");
886         if (!allocate_flag)
887             goto failed_allocation;
888
889         if (generic_state->brc_distortion_buffer_supported) {
890             width = ALIGN(generic_state->downscaled_width_4x_in_mb * 8, 64);
891             height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
892             width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
893             height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
894             i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
895             allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
896                                                           &avc_ctx->res_brc_dist_data_surface,
897                                                           width, height,
898                                                           width,
899                                                           "brc dist data buffer");
900             if (!allocate_flag)
901                 goto failed_allocation;
902             i965_zero_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
903         }
904
905         if (generic_state->brc_roi_enable) {
906             width = ALIGN(generic_state->downscaled_width_4x_in_mb * 16, 64);
907             height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
908             i965_free_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
909             allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
910                                                           &avc_ctx->res_mbbrc_roi_surface,
911                                                           width, height,
912                                                           width,
913                                                           "mbbrc roi buffer");
914             if (!allocate_flag)
915                 goto failed_allocation;
916             i965_zero_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
917         }
918
919         /*mb qp in mb brc*/
920         width = ALIGN(generic_state->downscaled_width_4x_in_mb * 4, 64);
921         height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
922         i965_free_gpe_resource(&avc_ctx->res_mbbrc_mb_qp_data_surface);
923         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
924                                                       &avc_ctx->res_mbbrc_mb_qp_data_surface,
925                                                       width, height,
926                                                       width,
927                                                       "mbbrc mb qp buffer");
928         if (!allocate_flag)
929             goto failed_allocation;
930
931         i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
932         size = 16 * AVC_QP_MAX * 4;
933         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
934                                                    &avc_ctx->res_mbbrc_const_data_buffer,
935                                                    ALIGN(size, 0x1000),
936                                                    "mbbrc const data buffer");
937         if (!allocate_flag)
938             goto failed_allocation;
939
940         if (avc_state->decouple_mbenc_curbe_from_brc_enable) {
941             i965_free_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
942             size = avc_state->mbenc_brc_buffer_size;
943             allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
944                                                        &avc_ctx->res_mbenc_brc_buffer,
945                                                        ALIGN(size, 0x1000),
946                                                        "mbenc brc buffer");
947             if (!allocate_flag)
948                 goto failed_allocation;
949             i965_zero_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
950         }
951         generic_state->brc_allocated = 1;
952     }
953
954     /*mb qp external*/
955     if (avc_state->mb_qp_data_enable) {
956         width = ALIGN(generic_state->downscaled_width_4x_in_mb * 4, 64);
957         height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
958         i965_free_gpe_resource(&avc_ctx->res_mb_qp_data_surface);
959         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
960                                                       &avc_ctx->res_mb_qp_data_surface,
961                                                       width, height,
962                                                       width,
963                                                       "external mb qp buffer");
964         if (!allocate_flag)
965             goto failed_allocation;
966     }
967
968     /*     mbenc related surface. it share most of surface with other kernels     */
969     if (avc_state->arbitrary_num_mbs_in_slice) {
970         width = (generic_state->frame_width_in_mbs + 1) * 64;
971         height = generic_state->frame_height_in_mbs ;
972         i965_free_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
973         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
974                                                       &avc_ctx->res_mbenc_slice_map_surface,
975                                                       width, height,
976                                                       width,
977                                                       "slice map buffer");
978         if (!allocate_flag)
979             goto failed_allocation;
980
981         /*generate slice map,default one slice per frame.*/
982     }
983
984     /* sfd related surface  */
985     if (avc_state->sfd_enable) {
986         i965_free_gpe_resource(&avc_ctx->res_sfd_output_buffer);
987         size = 128;
988         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
989                                                    &avc_ctx->res_sfd_output_buffer,
990                                                    size,
991                                                    "sfd output buffer");
992         if (!allocate_flag)
993             goto failed_allocation;
994
995         i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer);
996         size = ALIGN(52, 64);
997         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
998                                                    &avc_ctx->res_sfd_cost_table_p_frame_buffer,
999                                                    size,
1000                                                    "sfd P frame cost table buffer");
1001         if (!allocate_flag)
1002             goto failed_allocation;
1003         data = i965_map_gpe_resource(&(avc_ctx->res_sfd_cost_table_p_frame_buffer));
1004         assert(data);
1005         memcpy(data, gen9_avc_sfd_cost_table_p_frame, sizeof(unsigned char) * 52);
1006         i965_unmap_gpe_resource(&(avc_ctx->res_sfd_cost_table_p_frame_buffer));
1007
1008         i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_b_frame_buffer);
1009         size = ALIGN(52, 64);
1010         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1011                                                    &avc_ctx->res_sfd_cost_table_b_frame_buffer,
1012                                                    size,
1013                                                    "sfd B frame cost table buffer");
1014         if (!allocate_flag)
1015             goto failed_allocation;
1016         data = i965_map_gpe_resource(&(avc_ctx->res_sfd_cost_table_b_frame_buffer));
1017         assert(data);
1018         memcpy(data, gen9_avc_sfd_cost_table_b_frame, sizeof(unsigned char) * 52);
1019         i965_unmap_gpe_resource(&(avc_ctx->res_sfd_cost_table_b_frame_buffer));
1020     }
1021
1022     /* wp related surfaces */
1023     if (avc_state->weighted_prediction_supported) {
1024         for (i = 0; i < 2 ; i++) {
1025             if (avc_ctx->wp_output_pic_select_surface_obj[i]) {
1026                 continue;
1027             }
1028
1029             width = generic_state->frame_width_in_pixel;
1030             height = generic_state->frame_height_in_pixel ;
1031             i965_CreateSurfaces(ctx,
1032                                 width,
1033                                 height,
1034                                 VA_RT_FORMAT_YUV420,
1035                                 1,
1036                                 &avc_ctx->wp_output_pic_select_surface_id[i]);
1037             avc_ctx->wp_output_pic_select_surface_obj[i] = SURFACE(avc_ctx->wp_output_pic_select_surface_id[i]);
1038
1039             if (!avc_ctx->wp_output_pic_select_surface_obj[i]) {
1040                 goto failed_allocation;
1041             }
1042
1043             i965_check_alloc_surface_bo(ctx, avc_ctx->wp_output_pic_select_surface_obj[i], 1,
1044                                         VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
1045         }
1046         i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[0]);
1047         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_wp_output_pic_select_surface_list[0], avc_ctx->wp_output_pic_select_surface_obj[0], GPE_RESOURCE_ALIGNMENT);
1048         i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[1]);
1049         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_wp_output_pic_select_surface_list[1], avc_ctx->wp_output_pic_select_surface_obj[1], GPE_RESOURCE_ALIGNMENT);
1050     }
1051
1052     /* other   */
1053
1054     i965_free_gpe_resource(&avc_ctx->res_mad_data_buffer);
1055     size = 4 * 1;
1056     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1057                                                &avc_ctx->res_mad_data_buffer,
1058                                                ALIGN(size, 0x1000),
1059                                                "MAD data buffer");
1060     if (!allocate_flag)
1061         goto failed_allocation;
1062
1063     return VA_STATUS_SUCCESS;
1064
1065 failed_allocation:
1066     return VA_STATUS_ERROR_ALLOCATION_FAILED;
1067 }
1068
1069 static void
1070 gen9_avc_free_resources(struct encoder_vme_mfc_context * vme_context)
1071 {
1072     if (!vme_context)
1073         return;
1074
1075     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1076     VADriverContextP ctx = avc_ctx->ctx;
1077     int i = 0;
1078
1079     /* free all the surface/buffer here*/
1080     i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
1081     i965_free_gpe_resource(&avc_ctx->res_mb_status_buffer);
1082     i965_free_gpe_resource(&avc_ctx->res_flatness_check_surface);
1083     i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
1084     i965_free_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
1085     i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
1086     i965_free_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
1087     i965_free_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
1088     i965_free_gpe_resource(&avc_ctx->res_brc_history_buffer);
1089     i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
1090     i965_free_gpe_resource(&avc_ctx->res_brc_image_state_read_buffer);
1091     i965_free_gpe_resource(&avc_ctx->res_brc_image_state_write_buffer);
1092     i965_free_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
1093     i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
1094     i965_free_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
1095     i965_free_gpe_resource(&avc_ctx->res_mbbrc_mb_qp_data_surface);
1096     i965_free_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
1097     i965_free_gpe_resource(&avc_ctx->res_mb_qp_data_surface);
1098     i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
1099     i965_free_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1100     i965_free_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1101     i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer);
1102     i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_b_frame_buffer);
1103     i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[0]);
1104     i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[1]);
1105     i965_free_gpe_resource(&avc_ctx->res_mad_data_buffer);
1106
1107     for (i = 0; i < 2 ; i++) {
1108         if (avc_ctx->wp_output_pic_select_surface_obj[i]) {
1109             i965_DestroySurfaces(ctx, &avc_ctx->wp_output_pic_select_surface_id[i], 1);
1110             avc_ctx->wp_output_pic_select_surface_id[i] = VA_INVALID_SURFACE;
1111             avc_ctx->wp_output_pic_select_surface_obj[i] = NULL;
1112         }
1113     }
1114
1115 }
1116
1117 static void
1118 gen9_avc_run_kernel_media_object(VADriverContextP ctx,
1119                                  struct intel_encoder_context *encoder_context,
1120                                  struct i965_gpe_context *gpe_context,
1121                                  int media_function,
1122                                  struct gpe_media_object_parameter *param)
1123 {
1124     struct i965_driver_data *i965 = i965_driver_data(ctx);
1125     struct i965_gpe_table *gpe = &i965->gpe_table;
1126     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1127     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1128
1129     struct intel_batchbuffer *batch = encoder_context->base.batch;
1130     struct encoder_status_buffer_internal *status_buffer;
1131     struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1132
1133     if (!batch)
1134         return;
1135
1136     intel_batchbuffer_start_atomic(batch, 0x1000);
1137     intel_batchbuffer_emit_mi_flush(batch);
1138
1139     status_buffer = &(avc_ctx->status_buffer);
1140     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1141     mi_store_data_imm.bo = status_buffer->bo;
1142     mi_store_data_imm.offset = status_buffer->media_index_offset;
1143     mi_store_data_imm.dw0 = media_function;
1144     gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1145
1146     gpe->pipeline_setup(ctx, gpe_context, batch);
1147     gpe->media_object(ctx, gpe_context, batch, param);
1148     gpe->media_state_flush(ctx, gpe_context, batch);
1149
1150     gpe->pipeline_end(ctx, gpe_context, batch);
1151
1152     intel_batchbuffer_end_atomic(batch);
1153
1154     intel_batchbuffer_flush(batch);
1155 }
1156
1157 static void
1158 gen9_avc_run_kernel_media_object_walker(VADriverContextP ctx,
1159                                         struct intel_encoder_context *encoder_context,
1160                                         struct i965_gpe_context *gpe_context,
1161                                         int media_function,
1162                                         struct gpe_media_object_walker_parameter *param)
1163 {
1164     struct i965_driver_data *i965 = i965_driver_data(ctx);
1165     struct i965_gpe_table *gpe = &i965->gpe_table;
1166     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1167     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1168
1169     struct intel_batchbuffer *batch = encoder_context->base.batch;
1170     struct encoder_status_buffer_internal *status_buffer;
1171     struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1172
1173     if (!batch)
1174         return;
1175
1176     intel_batchbuffer_start_atomic(batch, 0x1000);
1177
1178     intel_batchbuffer_emit_mi_flush(batch);
1179
1180     status_buffer = &(avc_ctx->status_buffer);
1181     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1182     mi_store_data_imm.bo = status_buffer->bo;
1183     mi_store_data_imm.offset = status_buffer->media_index_offset;
1184     mi_store_data_imm.dw0 = media_function;
1185     gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1186
1187     gpe->pipeline_setup(ctx, gpe_context, batch);
1188     gpe->media_object_walker(ctx, gpe_context, batch, param);
1189     gpe->media_state_flush(ctx, gpe_context, batch);
1190
1191     gpe->pipeline_end(ctx, gpe_context, batch);
1192
1193     intel_batchbuffer_end_atomic(batch);
1194
1195     intel_batchbuffer_flush(batch);
1196 }
1197
1198 static void
1199 gen9_init_gpe_context_avc(VADriverContextP ctx,
1200                           struct i965_gpe_context *gpe_context,
1201                           struct encoder_kernel_parameter *kernel_param)
1202 {
1203     struct i965_driver_data *i965 = i965_driver_data(ctx);
1204
1205     gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
1206
1207     gpe_context->sampler.entry_size = 0;
1208     gpe_context->sampler.max_entries = 0;
1209
1210     if (kernel_param->sampler_size) {
1211         gpe_context->sampler.entry_size = ALIGN(kernel_param->sampler_size, 64);
1212         gpe_context->sampler.max_entries = 1;
1213     }
1214
1215     gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
1216     gpe_context->idrt.max_entries = NUM_KERNELS_PER_GPE_CONTEXT;
1217
1218     gpe_context->surface_state_binding_table.max_entries = MAX_AVC_ENCODER_SURFACES;
1219     gpe_context->surface_state_binding_table.binding_table_offset = 0;
1220     gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_AVC_ENCODER_SURFACES * 4, 64);
1221     gpe_context->surface_state_binding_table.length = ALIGN(MAX_AVC_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_AVC_ENCODER_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
1222
1223     if (i965->intel.eu_total > 0)
1224         gpe_context->vfe_state.max_num_threads = 6 * i965->intel.eu_total;
1225     else
1226         gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads
1227
1228     gpe_context->vfe_state.curbe_allocation_size = MAX(1, ALIGN(gpe_context->curbe.length, 32) >> 5); // in registers
1229     gpe_context->vfe_state.urb_entry_size = MAX(1, ALIGN(kernel_param->inline_data_size, 32) >> 5); // in registers
1230     gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE -
1231                                               gpe_context->vfe_state.curbe_allocation_size -
1232                                               ((gpe_context->idrt.entry_size >> 5) *
1233                                                gpe_context->idrt.max_entries)) / gpe_context->vfe_state.urb_entry_size;
1234     gpe_context->vfe_state.num_urb_entries = CLAMP(1, 127, gpe_context->vfe_state.num_urb_entries);
1235     gpe_context->vfe_state.gpgpu_mode = 0;
1236 }
1237
1238 static void
1239 gen9_init_vfe_scoreboard_avc(struct i965_gpe_context *gpe_context,
1240                              struct encoder_scoreboard_parameter *scoreboard_param)
1241 {
1242     gpe_context->vfe_desc5.scoreboard0.mask = scoreboard_param->mask;
1243     gpe_context->vfe_desc5.scoreboard0.type = scoreboard_param->type;
1244     gpe_context->vfe_desc5.scoreboard0.enable = scoreboard_param->enable;
1245
1246     if (scoreboard_param->walkpat_flag) {
1247         gpe_context->vfe_desc5.scoreboard0.mask = 0x0F;
1248         gpe_context->vfe_desc5.scoreboard0.type = 1;
1249
1250         gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0x0;
1251         gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0xF;
1252
1253         gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
1254         gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xE;
1255
1256         gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0xF;
1257         gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0x3;
1258
1259         gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
1260         gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0x1;
1261     } else {
1262         // Scoreboard 0
1263         gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0xF;
1264         gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0x0;
1265
1266         // Scoreboard 1
1267         gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
1268         gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xF;
1269
1270         // Scoreboard 2
1271         gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0x1;
1272         gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0xF;
1273
1274         // Scoreboard 3
1275         gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
1276         gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0xF;
1277
1278         // Scoreboard 4
1279         gpe_context->vfe_desc7.scoreboard2.delta_x4 = 0xF;
1280         gpe_context->vfe_desc7.scoreboard2.delta_y4 = 0x1;
1281
1282         // Scoreboard 5
1283         gpe_context->vfe_desc7.scoreboard2.delta_x5 = 0x0;
1284         gpe_context->vfe_desc7.scoreboard2.delta_y5 = 0xE;
1285
1286         // Scoreboard 6
1287         gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0x1;
1288         gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
1289
1290         // Scoreboard 7
1291         gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0xF;
1292         gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
1293     }
1294 }
1295 /*
1296 VME pipeline related function
1297 */
1298
1299 /*
1300 scaling kernel related function
1301 */
1302 static void
1303 gen9_avc_set_curbe_scaling4x(VADriverContextP ctx,
1304                              struct encode_state *encode_state,
1305                              struct i965_gpe_context *gpe_context,
1306                              struct intel_encoder_context *encoder_context,
1307                              void *param)
1308 {
1309     gen9_avc_scaling4x_curbe_data *curbe_cmd;
1310     struct scaling_param *surface_param = (struct scaling_param *)param;
1311
1312     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1313
1314     if (!curbe_cmd)
1315         return;
1316
1317     memset(curbe_cmd, 0, sizeof(gen9_avc_scaling4x_curbe_data));
1318
1319     curbe_cmd->dw0.input_picture_width  = surface_param->input_frame_width;
1320     curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1321
1322     curbe_cmd->dw1.input_y_bti = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1323     curbe_cmd->dw2.output_y_bti = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1324
1325
1326     curbe_cmd->dw5.flatness_threshold = 128;
1327     curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
1328     curbe_cmd->dw7.enable_mb_variance_output = surface_param->enable_mb_variance_output;
1329     curbe_cmd->dw8.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
1330
1331     if (curbe_cmd->dw6.enable_mb_flatness_check ||
1332         curbe_cmd->dw7.enable_mb_variance_output ||
1333         curbe_cmd->dw8.enable_mb_pixel_average_output) {
1334         curbe_cmd->dw10.mbv_proc_stat_bti = GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX;
1335     }
1336
1337     i965_gpe_context_unmap_curbe(gpe_context);
1338     return;
1339 }
1340
1341 static void
1342 gen95_avc_set_curbe_scaling4x(VADriverContextP ctx,
1343                               struct encode_state *encode_state,
1344                               struct i965_gpe_context *gpe_context,
1345                               struct intel_encoder_context *encoder_context,
1346                               void *param)
1347 {
1348     gen95_avc_scaling4x_curbe_data *curbe_cmd;
1349     struct scaling_param *surface_param = (struct scaling_param *)param;
1350
1351     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1352
1353     if (!curbe_cmd)
1354         return;
1355
1356     memset(curbe_cmd, 0, sizeof(gen95_avc_scaling4x_curbe_data));
1357
1358     curbe_cmd->dw0.input_picture_width  = surface_param->input_frame_width;
1359     curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1360
1361     curbe_cmd->dw1.input_y_bti_frame = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1362     curbe_cmd->dw2.output_y_bti_frame = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1363
1364     if (surface_param->enable_mb_flatness_check)
1365         curbe_cmd->dw5.flatness_threshold = 128;
1366     curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
1367     curbe_cmd->dw6.enable_mb_variance_output = surface_param->enable_mb_variance_output;
1368     curbe_cmd->dw6.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
1369     curbe_cmd->dw6.enable_block8x8_statistics_output = surface_param->blk8x8_stat_enabled;
1370
1371     if (curbe_cmd->dw6.enable_mb_flatness_check ||
1372         curbe_cmd->dw6.enable_mb_variance_output ||
1373         curbe_cmd->dw6.enable_mb_pixel_average_output) {
1374         curbe_cmd->dw8.mbv_proc_stat_bti_frame = GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX;
1375     }
1376
1377     i965_gpe_context_unmap_curbe(gpe_context);
1378     return;
1379 }
1380
1381 static void
1382 gen9_avc_set_curbe_scaling2x(VADriverContextP ctx,
1383                              struct encode_state *encode_state,
1384                              struct i965_gpe_context *gpe_context,
1385                              struct intel_encoder_context *encoder_context,
1386                              void *param)
1387 {
1388     gen9_avc_scaling2x_curbe_data *curbe_cmd;
1389     struct scaling_param *surface_param = (struct scaling_param *)param;
1390
1391     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1392
1393     if (!curbe_cmd)
1394         return;
1395
1396     memset(curbe_cmd, 0, sizeof(gen9_avc_scaling2x_curbe_data));
1397
1398     curbe_cmd->dw0.input_picture_width  = surface_param->input_frame_width;
1399     curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1400
1401     curbe_cmd->dw8.input_y_bti = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1402     curbe_cmd->dw9.output_y_bti = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1403
1404     i965_gpe_context_unmap_curbe(gpe_context);
1405     return;
1406 }
1407
1408 static void
1409 gen9_avc_send_surface_scaling(VADriverContextP ctx,
1410                               struct encode_state *encode_state,
1411                               struct i965_gpe_context *gpe_context,
1412                               struct intel_encoder_context *encoder_context,
1413                               void *param)
1414 {
1415     struct scaling_param *surface_param = (struct scaling_param *)param;
1416     unsigned int surface_format;
1417     unsigned int res_size;
1418
1419     if (surface_param->scaling_out_use_32unorm_surf_fmt)
1420         surface_format = I965_SURFACEFORMAT_R32_UNORM;
1421     else if (surface_param->scaling_out_use_16unorm_surf_fmt)
1422         surface_format = I965_SURFACEFORMAT_R16_UNORM;
1423     else
1424         surface_format = I965_SURFACEFORMAT_R8_UNORM;
1425
1426     gen9_add_2d_gpe_surface(ctx, gpe_context,
1427                             surface_param->input_surface,
1428                             0, 1, surface_format,
1429                             GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX);
1430
1431     gen9_add_2d_gpe_surface(ctx, gpe_context,
1432                             surface_param->output_surface,
1433                             0, 1, surface_format,
1434                             GEN9_AVC_SCALING_FRAME_DST_Y_INDEX);
1435
1436     /*add buffer mv_proc_stat, here need change*/
1437     if (surface_param->mbv_proc_stat_enabled) {
1438         res_size = 16 * (surface_param->input_frame_width / 16) * (surface_param->input_frame_height / 16) * sizeof(unsigned int);
1439
1440         gen9_add_buffer_gpe_surface(ctx,
1441                                     gpe_context,
1442                                     surface_param->pres_mbv_proc_stat_buffer,
1443                                     0,
1444                                     res_size / 4,
1445                                     0,
1446                                     GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
1447     } else if (surface_param->enable_mb_flatness_check) {
1448         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
1449                                        surface_param->pres_flatness_check_surface,
1450                                        1,
1451                                        I965_SURFACEFORMAT_R8_UNORM,
1452                                        GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
1453     }
1454
1455     return;
1456 }
1457
1458 static VAStatus
1459 gen9_avc_kernel_scaling(VADriverContextP ctx,
1460                         struct encode_state *encode_state,
1461                         struct intel_encoder_context *encoder_context,
1462                         int hme_type)
1463 {
1464     struct i965_driver_data *i965 = i965_driver_data(ctx);
1465     struct i965_gpe_table *gpe = &i965->gpe_table;
1466     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1467     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1468     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1469     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1470     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
1471
1472     struct i965_gpe_context *gpe_context;
1473     struct scaling_param surface_param;
1474     struct object_surface *obj_surface;
1475     struct gen9_surface_avc *avc_priv_surface;
1476     struct gpe_media_object_walker_parameter media_object_walker_param;
1477     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
1478     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
1479     int media_function = 0;
1480     int kernel_idx = 0;
1481
1482     obj_surface = encode_state->reconstructed_object;
1483     avc_priv_surface = obj_surface->private_data;
1484
1485     memset(&surface_param, 0, sizeof(struct scaling_param));
1486     switch (hme_type) {
1487     case INTEL_ENC_HME_4x : {
1488         media_function = INTEL_MEDIA_STATE_4X_SCALING;
1489         kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
1490         downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
1491         downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
1492
1493         surface_param.input_surface = encode_state->input_yuv_object ;
1494         surface_param.input_frame_width = generic_state->frame_width_in_pixel ;
1495         surface_param.input_frame_height = generic_state->frame_height_in_pixel ;
1496
1497         surface_param.output_surface = avc_priv_surface->scaled_4x_surface_obj ;
1498         surface_param.output_frame_width = generic_state->frame_width_4x ;
1499         surface_param.output_frame_height = generic_state->frame_height_4x ;
1500
1501         surface_param.enable_mb_flatness_check = avc_state->flatness_check_enable;
1502         surface_param.enable_mb_variance_output = avc_state->mb_status_enable;
1503         surface_param.enable_mb_pixel_average_output = avc_state->mb_status_enable;
1504
1505         surface_param.blk8x8_stat_enabled = 0 ;
1506         surface_param.use_4x_scaling  = 1 ;
1507         surface_param.use_16x_scaling = 0 ;
1508         surface_param.use_32x_scaling = 0 ;
1509         break;
1510     }
1511     case INTEL_ENC_HME_16x : {
1512         media_function = INTEL_MEDIA_STATE_16X_SCALING;
1513         kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
1514         downscaled_width_in_mb = generic_state->downscaled_width_16x_in_mb;
1515         downscaled_height_in_mb = generic_state->downscaled_height_16x_in_mb;
1516
1517         surface_param.input_surface = avc_priv_surface->scaled_4x_surface_obj ;
1518         surface_param.input_frame_width = generic_state->frame_width_4x ;
1519         surface_param.input_frame_height = generic_state->frame_height_4x ;
1520
1521         surface_param.output_surface = avc_priv_surface->scaled_16x_surface_obj ;
1522         surface_param.output_frame_width = generic_state->frame_width_16x ;
1523         surface_param.output_frame_height = generic_state->frame_height_16x ;
1524
1525         surface_param.enable_mb_flatness_check = 0 ;
1526         surface_param.enable_mb_variance_output = 0 ;
1527         surface_param.enable_mb_pixel_average_output = 0 ;
1528
1529         surface_param.blk8x8_stat_enabled = 0 ;
1530         surface_param.use_4x_scaling  = 0 ;
1531         surface_param.use_16x_scaling = 1 ;
1532         surface_param.use_32x_scaling = 0 ;
1533
1534         break;
1535     }
1536     case INTEL_ENC_HME_32x : {
1537         media_function = INTEL_MEDIA_STATE_32X_SCALING;
1538         kernel_idx = GEN9_AVC_KERNEL_SCALING_2X_IDX;
1539         downscaled_width_in_mb = generic_state->downscaled_width_32x_in_mb;
1540         downscaled_height_in_mb = generic_state->downscaled_height_32x_in_mb;
1541
1542         surface_param.input_surface = avc_priv_surface->scaled_16x_surface_obj ;
1543         surface_param.input_frame_width = generic_state->frame_width_16x ;
1544         surface_param.input_frame_height = generic_state->frame_height_16x ;
1545
1546         surface_param.output_surface = avc_priv_surface->scaled_32x_surface_obj ;
1547         surface_param.output_frame_width = generic_state->frame_width_32x ;
1548         surface_param.output_frame_height = generic_state->frame_height_32x ;
1549
1550         surface_param.enable_mb_flatness_check = 0 ;
1551         surface_param.enable_mb_variance_output = 0 ;
1552         surface_param.enable_mb_pixel_average_output = 0 ;
1553
1554         surface_param.blk8x8_stat_enabled = 0 ;
1555         surface_param.use_4x_scaling  = 0 ;
1556         surface_param.use_16x_scaling = 0 ;
1557         surface_param.use_32x_scaling = 1 ;
1558         break;
1559     }
1560     default :
1561         assert(0);
1562
1563     }
1564
1565     gpe_context = &(avc_ctx->context_scaling.gpe_contexts[kernel_idx]);
1566
1567     gpe->context_init(ctx, gpe_context);
1568     gpe->reset_binding_table(ctx, gpe_context);
1569
1570     if (surface_param.use_32x_scaling) {
1571         generic_ctx->pfn_set_curbe_scaling2x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1572     } else {
1573         generic_ctx->pfn_set_curbe_scaling4x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1574     }
1575
1576     if (surface_param.use_32x_scaling) {
1577         surface_param.scaling_out_use_16unorm_surf_fmt = 1 ;
1578         surface_param.scaling_out_use_32unorm_surf_fmt = 0 ;
1579     } else {
1580         surface_param.scaling_out_use_16unorm_surf_fmt = 0 ;
1581         surface_param.scaling_out_use_32unorm_surf_fmt = 1 ;
1582     }
1583
1584     if (surface_param.use_4x_scaling) {
1585         if (avc_state->mb_status_supported) {
1586             surface_param.enable_mb_flatness_check = 0;
1587             surface_param.mbv_proc_stat_enabled = (surface_param.use_4x_scaling) ? (avc_state->mb_status_enable || avc_state->flatness_check_enable) : 0 ;
1588             surface_param.pres_mbv_proc_stat_buffer = &(avc_ctx->res_mb_status_buffer);
1589
1590         } else {
1591             surface_param.enable_mb_flatness_check = (surface_param.use_4x_scaling) ? avc_state->flatness_check_enable : 0;
1592             surface_param.mbv_proc_stat_enabled = 0 ;
1593             surface_param.pres_flatness_check_surface = &(avc_ctx->res_flatness_check_surface);
1594         }
1595     }
1596
1597     generic_ctx->pfn_send_scaling_surface(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1598
1599     /* setup the interface data */
1600     gpe->setup_interface_data(ctx, gpe_context);
1601
1602     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
1603     if (surface_param.use_32x_scaling) {
1604         kernel_walker_param.resolution_x = downscaled_width_in_mb ;
1605         kernel_walker_param.resolution_y = downscaled_height_in_mb ;
1606     } else {
1607         /* the scaling is based on 8x8 blk level */
1608         kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
1609         kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
1610     }
1611     kernel_walker_param.no_dependency = 1;
1612
1613     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
1614
1615     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
1616                                             gpe_context,
1617                                             media_function,
1618                                             &media_object_walker_param);
1619
1620     return VA_STATUS_SUCCESS;
1621 }
1622
1623 /*
1624 frame/mb brc related function
1625 */
1626 static void
1627 gen9_avc_init_mfx_avc_img_state(VADriverContextP ctx,
1628                                 struct encode_state *encode_state,
1629                                 struct intel_encoder_context *encoder_context,
1630                                 struct gen9_mfx_avc_img_state *pstate)
1631 {
1632     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1633     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1634     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1635
1636     VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
1637     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
1638
1639     memset(pstate, 0, sizeof(*pstate));
1640
1641     pstate->dw0.dword_length = (sizeof(struct gen9_mfx_avc_img_state)) / 4 - 2;
1642     pstate->dw0.sub_opcode_b = 0;
1643     pstate->dw0.sub_opcode_a = 0;
1644     pstate->dw0.command_opcode = 1;
1645     pstate->dw0.pipeline = 2;
1646     pstate->dw0.command_type = 3;
1647
1648     pstate->dw1.frame_size_in_mbs = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs ;
1649
1650     pstate->dw2.frame_width_in_mbs_minus1 = generic_state->frame_width_in_mbs - 1;
1651     pstate->dw2.frame_height_in_mbs_minus1 = generic_state->frame_height_in_mbs - 1;
1652
1653     pstate->dw3.image_structure = 0;//frame is zero
1654     pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
1655     pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
1656     pstate->dw3.brc_domain_rate_control_enable = 0;//0,set for non-vdenc mode;
1657     pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
1658     pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
1659
1660     pstate->dw4.field_picture_flag = 0;
1661     pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
1662     pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
1663     pstate->dw4.transform_8x8_idct_mode_flag = pic_param->pic_fields.bits.transform_8x8_mode_flag;
1664     pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
1665     pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
1666     pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
1667     pstate->dw4.mb_mv_format_flag = 1;
1668     pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
1669     pstate->dw4.mv_unpacked_flag = 1;
1670     pstate->dw4.insert_test_flag = 0;
1671     pstate->dw4.load_slice_pointer_flag = 0;
1672     pstate->dw4.macroblock_stat_enable = 0;        /* disable in the first pass */
1673     pstate->dw4.minimum_frame_size = 0;
1674     pstate->dw5.intra_mb_max_bit_flag = 1;
1675     pstate->dw5.inter_mb_max_bit_flag = 1;
1676     pstate->dw5.frame_size_over_flag = 1;
1677     pstate->dw5.frame_size_under_flag = 1;
1678     pstate->dw5.intra_mb_ipcm_flag = 1;
1679     pstate->dw5.mb_rate_ctrl_flag = 0;
1680     pstate->dw5.non_first_pass_flag = 0;
1681     pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
1682     pstate->dw5.aq_chroma_disable = 1;
1683     if (pstate->dw4.entropy_coding_flag && (avc_state->tq_enable)) {
1684         pstate->dw5.aq_enable = avc_state->tq_enable;
1685         pstate->dw5.aq_rounding = avc_state->tq_rounding;
1686     } else {
1687         pstate->dw5.aq_rounding = 0;
1688     }
1689
1690     pstate->dw6.intra_mb_max_size = 2700;
1691     pstate->dw6.inter_mb_max_size = 4095;
1692
1693     pstate->dw8.slice_delta_qp_max0 = 0;
1694     pstate->dw8.slice_delta_qp_max1 = 0;
1695     pstate->dw8.slice_delta_qp_max2 = 0;
1696     pstate->dw8.slice_delta_qp_max3 = 0;
1697
1698     pstate->dw9.slice_delta_qp_min0 = 0;
1699     pstate->dw9.slice_delta_qp_min1 = 0;
1700     pstate->dw9.slice_delta_qp_min2 = 0;
1701     pstate->dw9.slice_delta_qp_min3 = 0;
1702
1703     pstate->dw10.frame_bitrate_min = 0;
1704     pstate->dw10.frame_bitrate_min_unit = 1;
1705     pstate->dw10.frame_bitrate_min_unit_mode = 1;
1706     pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
1707     pstate->dw10.frame_bitrate_max_unit = 1;
1708     pstate->dw10.frame_bitrate_max_unit_mode = 1;
1709
1710     pstate->dw11.frame_bitrate_min_delta = 0;
1711     pstate->dw11.frame_bitrate_max_delta = 0;
1712
1713     pstate->dw12.vad_error_logic = 1;
1714     /* set paramters DW19/DW20 for slices */
1715 }
1716
1717 void gen9_avc_set_image_state(VADriverContextP ctx,
1718                               struct encode_state *encode_state,
1719                               struct intel_encoder_context *encoder_context,
1720                               struct i965_gpe_resource *gpe_resource)
1721 {
1722     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1723     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
1724     char *pdata;
1725     int i;
1726     unsigned int * data;
1727     struct gen9_mfx_avc_img_state cmd;
1728
1729     pdata = i965_map_gpe_resource(gpe_resource);
1730
1731     if (!pdata)
1732         return;
1733
1734     gen9_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
1735     for (i = 0; i < generic_state->num_pak_passes; i++) {
1736
1737         if (i == 0) {
1738             cmd.dw4.macroblock_stat_enable = 0;
1739             cmd.dw5.non_first_pass_flag = 0;
1740         } else {
1741             cmd.dw4.macroblock_stat_enable = 1;
1742             cmd.dw5.non_first_pass_flag = 1;
1743             cmd.dw5.intra_mb_ipcm_flag = 1;
1744
1745         }
1746         cmd.dw5.mb_rate_ctrl_flag = 0;
1747         memcpy(pdata, &cmd, sizeof(struct gen9_mfx_avc_img_state));
1748         data = (unsigned int *)(pdata + sizeof(struct gen9_mfx_avc_img_state));
1749         *data = MI_BATCH_BUFFER_END;
1750
1751         pdata += INTEL_AVC_IMAGE_STATE_CMD_SIZE;
1752     }
1753     i965_unmap_gpe_resource(gpe_resource);
1754     return;
1755 }
1756
1757 void gen9_avc_set_image_state_non_brc(VADriverContextP ctx,
1758                                       struct encode_state *encode_state,
1759                                       struct intel_encoder_context *encoder_context,
1760                                       struct i965_gpe_resource *gpe_resource)
1761 {
1762     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1763     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
1764     char *pdata;
1765
1766     unsigned int * data;
1767     struct gen9_mfx_avc_img_state cmd;
1768
1769     pdata = i965_map_gpe_resource(gpe_resource);
1770
1771     if (!pdata)
1772         return;
1773
1774     gen9_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
1775
1776     if (generic_state->curr_pak_pass == 0) {
1777         cmd.dw4.macroblock_stat_enable = 0;
1778         cmd.dw5.non_first_pass_flag = 0;
1779
1780     } else {
1781         cmd.dw4.macroblock_stat_enable = 1;
1782         cmd.dw5.non_first_pass_flag = 0;
1783         cmd.dw5.intra_mb_ipcm_flag = 1;
1784     }
1785
1786     cmd.dw5.mb_rate_ctrl_flag = 0;
1787     memcpy(pdata, &cmd, sizeof(struct gen9_mfx_avc_img_state));
1788     data = (unsigned int *)(pdata + sizeof(struct gen9_mfx_avc_img_state));
1789     *data = MI_BATCH_BUFFER_END;
1790
1791     i965_unmap_gpe_resource(gpe_resource);
1792     return;
1793 }
1794
1795 static void
1796 gen95_avc_calc_lambda_table(VADriverContextP ctx,
1797                             struct encode_state *encode_state,
1798                             struct intel_encoder_context *encoder_context)
1799 {
1800     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1801     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1802     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1803     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
1804     unsigned int value, inter, intra;
1805     unsigned int rounding_value = 0;
1806     unsigned int size = 0;
1807     int i = 0;
1808     int col = 0;
1809     unsigned int * lambda_table = (unsigned int *)(avc_state->lamda_value_lut);
1810
1811     value = 0;
1812     inter = 0;
1813     intra = 0;
1814
1815     size = AVC_QP_MAX * 2 * sizeof(unsigned int);
1816     switch (generic_state->frame_type) {
1817     case SLICE_TYPE_I:
1818         memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_i_frame[0][0], size * sizeof(unsigned char));
1819         break;
1820     case SLICE_TYPE_P:
1821         memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_p_frame[0][0], size * sizeof(unsigned char));
1822         break;
1823     case SLICE_TYPE_B:
1824         memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_b_frame[0][0], size * sizeof(unsigned char));
1825         break;
1826     default:
1827         assert(0);
1828         break;
1829     }
1830
1831     for (i = 0; i < AVC_QP_MAX ; i++) {
1832         for (col = 0; col < 2; col++) {
1833             value = *(lambda_table + i * 2 + col);
1834             intra = value >> 16;
1835
1836             if (intra < GEN95_AVC_MAX_LAMBDA) {
1837                 if (intra == 0xfffa) {
1838                     intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
1839                 }
1840             }
1841
1842             intra = intra << 16;
1843             inter = value & 0xffff;
1844
1845             if (inter < GEN95_AVC_MAX_LAMBDA) {
1846                 if (inter == 0xffef) {
1847                     if (generic_state->frame_type == SLICE_TYPE_P) {
1848                         if (avc_state->rounding_inter_p == AVC_INVALID_ROUNDING_VALUE)
1849                             rounding_value = gen9_avc_inter_rounding_p[generic_state->preset];
1850                         else
1851                             rounding_value = avc_state->rounding_inter_p;
1852                     } else if (generic_state->frame_type == SLICE_TYPE_B) {
1853                         if (pic_param->pic_fields.bits.reference_pic_flag) {
1854                             if (avc_state->rounding_inter_b_ref == AVC_INVALID_ROUNDING_VALUE)
1855                                 rounding_value = gen9_avc_inter_rounding_b_ref[generic_state->preset];
1856                             else
1857                                 rounding_value = avc_state->rounding_inter_b_ref;
1858                         } else {
1859                             if (avc_state->rounding_inter_b == AVC_INVALID_ROUNDING_VALUE)
1860                                 rounding_value = gen9_avc_inter_rounding_b[generic_state->preset];
1861                             else
1862                                 rounding_value = avc_state->rounding_inter_b;
1863                         }
1864                     }
1865                 }
1866                 inter = 0xf000 + rounding_value;
1867             }
1868             *(lambda_table + i * 2 + col) = intra + inter;
1869         }
1870     }
1871 }
1872
1873 static void
1874 gen9_avc_init_brc_const_data(VADriverContextP ctx,
1875                              struct encode_state *encode_state,
1876                              struct intel_encoder_context *encoder_context)
1877 {
1878     struct i965_driver_data *i965 = i965_driver_data(ctx);
1879     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1880     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1881     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1882     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1883
1884     struct i965_gpe_resource *gpe_resource = NULL;
1885     unsigned char * data = NULL;
1886     unsigned char * data_tmp = NULL;
1887     unsigned int size = 0;
1888     unsigned int table_idx = 0;
1889     unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
1890     int i = 0;
1891
1892     struct object_surface *obj_surface;
1893     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
1894     VASurfaceID surface_id;
1895     unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
1896
1897     gpe_resource = &(avc_ctx->res_brc_const_data_buffer);
1898     assert(gpe_resource);
1899
1900     i965_zero_gpe_resource(gpe_resource);
1901
1902     data = i965_map_gpe_resource(gpe_resource);
1903     assert(data);
1904
1905     table_idx = slice_type_kernel[generic_state->frame_type];
1906
1907     /* Fill surface with QP Adjustment table, Distortion threshold table, MaxFrame threshold table, Distortion QP Adjustment Table*/
1908     size = sizeof(gen9_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb);
1909     memcpy(data, gen9_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb, size * sizeof(unsigned char));
1910
1911     data += size;
1912
1913     /* skip threshold table*/
1914     size = 128;
1915     switch (generic_state->frame_type) {
1916     case SLICE_TYPE_P:
1917         memcpy(data, gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
1918         break;
1919     case SLICE_TYPE_B:
1920         memcpy(data, gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
1921         break;
1922     default:
1923         /*SLICE_TYPE_I,no change */
1924         break;
1925     }
1926
1927     if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->non_ftq_skip_threshold_lut_input_enable) {
1928         for (i = 0; i < AVC_QP_MAX ; i++) {
1929             *(data + 1 + (i * 2)) = (unsigned char)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
1930         }
1931     }
1932     data += size;
1933
1934     /*fill the qp for ref list*/
1935     size = 32 + 32 + 32 + 160;
1936     memset(data, 0xff, 32);
1937     memset(data + 32 + 32, 0xff, 32);
1938     switch (generic_state->frame_type) {
1939     case SLICE_TYPE_P: {
1940         for (i = 0 ; i <  slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
1941             surface_id = slice_param->RefPicList0[i].picture_id;
1942             obj_surface = SURFACE(surface_id);
1943             if (!obj_surface)
1944                 break;
1945             *(data + i) = avc_state->list_ref_idx[0][i];//?
1946         }
1947     }
1948     break;
1949     case SLICE_TYPE_B: {
1950         data = data + 32 + 32;
1951         for (i = 0 ; i <  slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
1952             surface_id = slice_param->RefPicList1[i].picture_id;
1953             obj_surface = SURFACE(surface_id);
1954             if (!obj_surface)
1955                 break;
1956             *(data + i) = avc_state->list_ref_idx[1][i];//?
1957         }
1958
1959         data = data - 32 - 32;
1960
1961         for (i = 0 ; i <  slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
1962             surface_id = slice_param->RefPicList0[i].picture_id;
1963             obj_surface = SURFACE(surface_id);
1964             if (!obj_surface)
1965                 break;
1966             *(data + i) = avc_state->list_ref_idx[0][i];//?
1967         }
1968     }
1969     break;
1970     default:
1971         /*SLICE_TYPE_I,no change */
1972         break;
1973     }
1974     data += size;
1975
1976     /*mv cost and mode cost*/
1977     size = 1664;
1978     memcpy(data, (unsigned char *)&gen9_avc_mode_mv_cost_table[table_idx][0][0], size * sizeof(unsigned char));
1979
1980     if (avc_state->old_mode_cost_enable) {
1981         data_tmp = data;
1982         for (i = 0; i < AVC_QP_MAX ; i++) {
1983             *(data_tmp + 3) = (unsigned int)gen9_avc_old_intra_mode_cost[i];
1984             data_tmp += 16;
1985         }
1986     }
1987
1988     if (avc_state->ftq_skip_threshold_lut_input_enable) {
1989         for (i = 0; i < AVC_QP_MAX ; i++) {
1990             *(data + (i * 32) + 24) =
1991                 *(data + (i * 32) + 25) =
1992                     *(data + (i * 32) + 27) =
1993                         *(data + (i * 32) + 28) =
1994                             *(data + (i * 32) + 29) =
1995                                 *(data + (i * 32) + 30) =
1996                                     *(data + (i * 32) + 31) = avc_state->ftq_skip_threshold_lut[i];
1997         }
1998
1999     }
2000     data += size;
2001
2002     /*ref cost*/
2003     size = 128;
2004     memcpy(data, (unsigned char *)&gen9_avc_ref_cost[table_idx][0], size * sizeof(unsigned char));
2005     data += size;
2006
2007     /*scaling factor*/
2008     size = 64;
2009     if (avc_state->adaptive_intra_scaling_enable) {
2010         memcpy(data, (unsigned char *)gen9_avc_adaptive_intra_scaling_factor, size * sizeof(unsigned char));
2011     } else {
2012         memcpy(data, (unsigned char *)gen9_avc_intra_scaling_factor, size * sizeof(unsigned char));
2013     }
2014
2015     if (IS_KBL(i965->intel.device_info) ||
2016         IS_GLK(i965->intel.device_info)) {
2017         data += size;
2018
2019         size = 512;
2020         memcpy(data, (unsigned char *)gen95_avc_lambda_data, size * sizeof(unsigned char));
2021         data += size;
2022
2023         size = 64;
2024         memcpy(data, (unsigned char *)gen95_avc_ftq25, size * sizeof(unsigned char));
2025     }
2026
2027     i965_unmap_gpe_resource(gpe_resource);
2028 }
2029
2030 static void
2031 gen9_avc_init_brc_const_data_old(VADriverContextP ctx,
2032                                  struct encode_state *encode_state,
2033                                  struct intel_encoder_context *encoder_context)
2034 {
2035     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2036     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2037     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2038     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2039
2040     struct i965_gpe_resource *gpe_resource = NULL;
2041     unsigned int * data = NULL;
2042     unsigned int * data_tmp = NULL;
2043     unsigned int size = 0;
2044     unsigned int table_idx = 0;
2045     unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
2046     unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
2047     int i = 0;
2048
2049     gpe_resource = &(avc_ctx->res_brc_const_data_buffer);
2050     assert(gpe_resource);
2051
2052     i965_zero_gpe_resource(gpe_resource);
2053
2054     data = i965_map_gpe_resource(gpe_resource);
2055     assert(data);
2056
2057     table_idx = slice_type_kernel[generic_state->frame_type];
2058
2059     /* Fill surface with QP Adjustment table, Distortion threshold table, MaxFrame threshold table, Distortion QP Adjustment Table*/
2060     size = sizeof(gen75_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb);
2061     memcpy(data, gen75_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb, size * sizeof(unsigned char));
2062
2063     data += size;
2064
2065     /* skip threshold table*/
2066     size = 128;
2067     switch (generic_state->frame_type) {
2068     case SLICE_TYPE_P:
2069         memcpy(data, gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2070         break;
2071     case SLICE_TYPE_B:
2072         memcpy(data, gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2073         break;
2074     default:
2075         /*SLICE_TYPE_I,no change */
2076         break;
2077     }
2078
2079     if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->non_ftq_skip_threshold_lut_input_enable) {
2080         for (i = 0; i < AVC_QP_MAX ; i++) {
2081             *(data + 1 + (i * 2)) = (unsigned char)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
2082         }
2083     }
2084     data += size;
2085
2086     /*fill the qp for ref list*/
2087     size = 128;
2088     data += size;
2089     size = 128;
2090     data += size;
2091
2092     /*mv cost and mode cost*/
2093     size = 1664;
2094     memcpy(data, (unsigned char *)&gen75_avc_mode_mv_cost_table[table_idx][0][0], size * sizeof(unsigned char));
2095
2096     if (avc_state->old_mode_cost_enable) {
2097         data_tmp = data;
2098         for (i = 0; i < AVC_QP_MAX ; i++) {
2099             *(data_tmp + 3) = (unsigned int)gen9_avc_old_intra_mode_cost[i];
2100             data_tmp += 16;
2101         }
2102     }
2103
2104     if (avc_state->ftq_skip_threshold_lut_input_enable) {
2105         for (i = 0; i < AVC_QP_MAX ; i++) {
2106             *(data + (i * 32) + 24) =
2107                 *(data + (i * 32) + 25) =
2108                     *(data + (i * 32) + 27) =
2109                         *(data + (i * 32) + 28) =
2110                             *(data + (i * 32) + 29) =
2111                                 *(data + (i * 32) + 30) =
2112                                     *(data + (i * 32) + 31) = avc_state->ftq_skip_threshold_lut[i];
2113         }
2114
2115     }
2116     data += size;
2117
2118     /*ref cost*/
2119     size = 128;
2120     memcpy(data, (unsigned char *)&gen9_avc_ref_cost[table_idx][0], size * sizeof(unsigned char));
2121
2122     i965_unmap_gpe_resource(gpe_resource);
2123 }
2124 static void
2125 gen9_avc_set_curbe_brc_init_reset(VADriverContextP ctx,
2126                                   struct encode_state *encode_state,
2127                                   struct i965_gpe_context *gpe_context,
2128                                   struct intel_encoder_context *encoder_context,
2129                                   void * param)
2130 {
2131     gen9_avc_brc_init_reset_curbe_data *cmd;
2132     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2133     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2134     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2135     double input_bits_per_frame = 0;
2136     double bps_ratio = 0;
2137     VAEncSequenceParameterBufferH264 * seq_param = avc_state->seq_param;
2138     struct avc_param common_param;
2139
2140     cmd = i965_gpe_context_map_curbe(gpe_context);
2141
2142     if (!cmd)
2143         return;
2144
2145     memcpy(cmd, &gen9_avc_brc_init_reset_curbe_init_data, sizeof(gen9_avc_brc_init_reset_curbe_data));
2146
2147     memset(&common_param, 0, sizeof(common_param));
2148     common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
2149     common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
2150     common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
2151     common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
2152     common_param.frames_per_100s = generic_state->frames_per_100s;
2153     common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
2154     common_param.target_bit_rate = generic_state->target_bit_rate;
2155
2156     cmd->dw0.profile_level_max_frame = i965_avc_get_profile_level_max_frame(&common_param, seq_param->level_idc);
2157     cmd->dw1.init_buf_full_in_bits = generic_state->init_vbv_buffer_fullness_in_bit;
2158     cmd->dw2.buf_size_in_bits = generic_state->vbv_buffer_size_in_bit;
2159     cmd->dw3.average_bit_rate = generic_state->target_bit_rate * 1000;
2160     cmd->dw4.max_bit_rate = generic_state->max_bit_rate * 1000;
2161     cmd->dw8.gop_p = (generic_state->gop_ref_distance) ? ((generic_state->gop_size - 1) / generic_state->gop_ref_distance) : 0;
2162     cmd->dw9.gop_b = (generic_state->gop_size - 1 - cmd->dw8.gop_p);
2163     cmd->dw9.frame_width_in_bytes = generic_state->frame_width_in_pixel;
2164     cmd->dw10.frame_height_in_bytes = generic_state->frame_height_in_pixel;
2165     cmd->dw12.no_slices = avc_state->slice_num;
2166
2167     //VUI
2168     if (seq_param->vui_parameters_present_flag && generic_state->internal_rate_mode != INTEL_BRC_AVBR) {
2169         cmd->dw4.max_bit_rate = cmd->dw4.max_bit_rate;
2170         if (generic_state->internal_rate_mode == VA_RC_CBR) {
2171             cmd->dw3.average_bit_rate = cmd->dw4.max_bit_rate;
2172
2173         }
2174
2175     }
2176     cmd->dw6.frame_rate_m = generic_state->frames_per_100s;
2177     cmd->dw7.frame_rate_d = 100;
2178     cmd->dw8.brc_flag = 0;
2179     cmd->dw8.brc_flag |= (generic_state->mb_brc_enabled) ? 0 : 0x8000;
2180
2181
2182     if (generic_state->internal_rate_mode == VA_RC_CBR) {
2183         //CBR
2184         cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate;
2185         cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISCBR;
2186
2187     } else if (generic_state->internal_rate_mode == VA_RC_VBR) {
2188         //VBR
2189         if (cmd->dw4.max_bit_rate < cmd->dw3.average_bit_rate) {
2190             cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate << 1;
2191         }
2192         cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISVBR;
2193
2194     } else if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2195         //AVBR
2196         cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate;
2197         cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISAVBR;
2198
2199     }
2200     //igonre icq/vcm/qvbr
2201
2202     cmd->dw10.avbr_accuracy = generic_state->avbr_curracy;
2203     cmd->dw11.avbr_convergence = generic_state->avbr_convergence;
2204
2205     //frame bits
2206     input_bits_per_frame = (double)(cmd->dw4.max_bit_rate) * (double)(cmd->dw7.frame_rate_d) / (double)(cmd->dw6.frame_rate_m);;
2207
2208     if (cmd->dw2.buf_size_in_bits == 0) {
2209         cmd->dw2.buf_size_in_bits = (unsigned int)(input_bits_per_frame * 4);
2210     }
2211
2212     if (cmd->dw1.init_buf_full_in_bits == 0) {
2213         cmd->dw1.init_buf_full_in_bits = cmd->dw2.buf_size_in_bits * 7 / 8;
2214     }
2215     if (cmd->dw1.init_buf_full_in_bits < (unsigned int)(input_bits_per_frame * 2)) {
2216         cmd->dw1.init_buf_full_in_bits = (unsigned int)(input_bits_per_frame * 2);
2217     }
2218     if (cmd->dw1.init_buf_full_in_bits > cmd->dw2.buf_size_in_bits) {
2219         cmd->dw1.init_buf_full_in_bits = cmd->dw2.buf_size_in_bits;
2220     }
2221
2222     //AVBR
2223     if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2224         cmd->dw2.buf_size_in_bits = 2 * generic_state->target_bit_rate * 1000;
2225         cmd->dw1.init_buf_full_in_bits = (unsigned int)(3 * cmd->dw2.buf_size_in_bits / 4);
2226
2227     }
2228
2229     bps_ratio = input_bits_per_frame / (cmd->dw2.buf_size_in_bits / 30.0);
2230     bps_ratio = (bps_ratio < 0.1) ? 0.1 : (bps_ratio > 3.5) ? 3.5 : bps_ratio;
2231
2232
2233     cmd->dw16.deviation_threshold_0_pand_b = (unsigned int)(-50 * pow(0.90, bps_ratio));
2234     cmd->dw16.deviation_threshold_1_pand_b = (unsigned int)(-50 * pow(0.66, bps_ratio));
2235     cmd->dw16.deviation_threshold_2_pand_b = (unsigned int)(-50 * pow(0.46, bps_ratio));
2236     cmd->dw16.deviation_threshold_3_pand_b = (unsigned int)(-50 * pow(0.3, bps_ratio));
2237     cmd->dw17.deviation_threshold_4_pand_b = (unsigned int)(50 *  pow(0.3, bps_ratio));
2238     cmd->dw17.deviation_threshold_5_pand_b = (unsigned int)(50 * pow(0.46, bps_ratio));
2239     cmd->dw17.deviation_threshold_6_pand_b = (unsigned int)(50 * pow(0.7,  bps_ratio));
2240     cmd->dw17.deviation_threshold_7_pand_b = (unsigned int)(50 * pow(0.9,  bps_ratio));
2241     cmd->dw18.deviation_threshold_0_vbr = (unsigned int)(-50 * pow(0.9, bps_ratio));
2242     cmd->dw18.deviation_threshold_1_vbr = (unsigned int)(-50 * pow(0.7, bps_ratio));
2243     cmd->dw18.deviation_threshold_2_vbr = (unsigned int)(-50 * pow(0.5, bps_ratio));
2244     cmd->dw18.deviation_threshold_3_vbr = (unsigned int)(-50 * pow(0.3, bps_ratio));
2245     cmd->dw19.deviation_threshold_4_vbr = (unsigned int)(100 * pow(0.4, bps_ratio));
2246     cmd->dw19.deviation_threshold_5_vbr = (unsigned int)(100 * pow(0.5, bps_ratio));
2247     cmd->dw19.deviation_threshold_6_vbr = (unsigned int)(100 * pow(0.75, bps_ratio));
2248     cmd->dw19.deviation_threshold_7_vbr = (unsigned int)(100 * pow(0.9, bps_ratio));
2249     cmd->dw20.deviation_threshold_0_i = (unsigned int)(-50 * pow(0.8, bps_ratio));
2250     cmd->dw20.deviation_threshold_1_i = (unsigned int)(-50 * pow(0.6, bps_ratio));
2251     cmd->dw20.deviation_threshold_2_i = (unsigned int)(-50 * pow(0.34, bps_ratio));
2252     cmd->dw20.deviation_threshold_3_i = (unsigned int)(-50 * pow(0.2, bps_ratio));
2253     cmd->dw21.deviation_threshold_4_i = (unsigned int)(50 * pow(0.2,  bps_ratio));
2254     cmd->dw21.deviation_threshold_5_i = (unsigned int)(50 * pow(0.4,  bps_ratio));
2255     cmd->dw21.deviation_threshold_6_i = (unsigned int)(50 * pow(0.66, bps_ratio));
2256     cmd->dw21.deviation_threshold_7_i = (unsigned int)(50 * pow(0.9,  bps_ratio));
2257
2258     cmd->dw22.sliding_window_size = generic_state->frames_per_window_size;
2259
2260     i965_gpe_context_unmap_curbe(gpe_context);
2261
2262     return;
2263 }
2264
2265 static void
2266 gen9_avc_send_surface_brc_init_reset(VADriverContextP ctx,
2267                                      struct encode_state *encode_state,
2268                                      struct i965_gpe_context *gpe_context,
2269                                      struct intel_encoder_context *encoder_context,
2270                                      void * param_mbenc)
2271 {
2272     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2273     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2274
2275     gen9_add_buffer_gpe_surface(ctx,
2276                                 gpe_context,
2277                                 &avc_ctx->res_brc_history_buffer,
2278                                 0,
2279                                 avc_ctx->res_brc_history_buffer.size,
2280                                 0,
2281                                 GEN9_AVC_BRC_INIT_RESET_HISTORY_INDEX);
2282
2283     gen9_add_buffer_2d_gpe_surface(ctx,
2284                                    gpe_context,
2285                                    &avc_ctx->res_brc_dist_data_surface,
2286                                    1,
2287                                    I965_SURFACEFORMAT_R8_UNORM,
2288                                    GEN9_AVC_BRC_INIT_RESET_DISTORTION_INDEX);
2289
2290     return;
2291 }
2292
2293 static VAStatus
2294 gen9_avc_kernel_brc_init_reset(VADriverContextP ctx,
2295                                struct encode_state *encode_state,
2296                                struct intel_encoder_context *encoder_context)
2297 {
2298     struct i965_driver_data *i965 = i965_driver_data(ctx);
2299     struct i965_gpe_table *gpe = &i965->gpe_table;
2300     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2301     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2302     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2303     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
2304
2305     struct i965_gpe_context *gpe_context;
2306     struct gpe_media_object_parameter media_object_param;
2307     struct gpe_media_object_inline_data media_object_inline_data;
2308     int media_function = 0;
2309     int kernel_idx = GEN9_AVC_KERNEL_BRC_INIT;
2310
2311     media_function = INTEL_MEDIA_STATE_BRC_INIT_RESET;
2312
2313     if (generic_state->brc_inited)
2314         kernel_idx = GEN9_AVC_KERNEL_BRC_RESET;
2315
2316     gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
2317
2318     gpe->context_init(ctx, gpe_context);
2319     gpe->reset_binding_table(ctx, gpe_context);
2320
2321     generic_ctx->pfn_set_curbe_brc_init_reset(ctx, encode_state, gpe_context, encoder_context, NULL);
2322
2323     generic_ctx->pfn_send_brc_init_reset_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
2324
2325     gpe->setup_interface_data(ctx, gpe_context);
2326
2327     memset(&media_object_param, 0, sizeof(media_object_param));
2328     memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
2329     media_object_param.pinline_data = &media_object_inline_data;
2330     media_object_param.inline_size = sizeof(media_object_inline_data);
2331
2332     gen9_avc_run_kernel_media_object(ctx, encoder_context,
2333                                      gpe_context,
2334                                      media_function,
2335                                      &media_object_param);
2336
2337     return VA_STATUS_SUCCESS;
2338 }
2339
2340 static void
2341 gen9_avc_set_curbe_brc_frame_update(VADriverContextP ctx,
2342                                     struct encode_state *encode_state,
2343                                     struct i965_gpe_context *gpe_context,
2344                                     struct intel_encoder_context *encoder_context,
2345                                     void * param)
2346 {
2347     gen9_avc_frame_brc_update_curbe_data *cmd;
2348     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2349     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2350     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2351     struct object_surface *obj_surface;
2352     struct gen9_surface_avc *avc_priv_surface;
2353     struct avc_param common_param;
2354     VAEncSequenceParameterBufferH264 * seq_param = avc_state->seq_param;
2355
2356     obj_surface = encode_state->reconstructed_object;
2357
2358     if (!obj_surface || !obj_surface->private_data)
2359         return;
2360     avc_priv_surface = obj_surface->private_data;
2361
2362     cmd = i965_gpe_context_map_curbe(gpe_context);
2363
2364     if (!cmd)
2365         return;
2366
2367     memcpy(cmd, &gen9_avc_frame_brc_update_curbe_init_data, sizeof(gen9_avc_frame_brc_update_curbe_data));
2368
2369     cmd->dw5.target_size_flag = 0 ;
2370     if (generic_state->brc_init_current_target_buf_full_in_bits > (double)generic_state->brc_init_reset_buf_size_in_bits) {
2371         /*overflow*/
2372         generic_state->brc_init_current_target_buf_full_in_bits -= (double)generic_state->brc_init_reset_buf_size_in_bits;
2373         cmd->dw5.target_size_flag = 1 ;
2374     }
2375
2376     if (generic_state->skip_frame_enbale) {
2377         cmd->dw6.num_skip_frames = generic_state->num_skip_frames ;
2378         cmd->dw7.size_skip_frames = generic_state->size_skip_frames;
2379
2380         generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame * generic_state->num_skip_frames;
2381
2382     }
2383     cmd->dw0.target_size = (unsigned int)generic_state->brc_init_current_target_buf_full_in_bits ;
2384     cmd->dw1.frame_number = generic_state->seq_frame_number ;
2385     cmd->dw2.size_of_pic_headers = generic_state->herder_bytes_inserted << 3 ;
2386     cmd->dw5.cur_frame_type = generic_state->frame_type ;
2387     cmd->dw5.brc_flag = 0 ;
2388     cmd->dw5.brc_flag |= (avc_priv_surface->is_as_ref) ? INTEL_ENCODE_BRCUPDATE_IS_REFERENCE : 0 ;
2389
2390     if (avc_state->multi_pre_enable) {
2391         cmd->dw5.brc_flag  |= INTEL_ENCODE_BRCUPDATE_IS_ACTUALQP ;
2392         cmd->dw14.qp_index_of_cur_pic = avc_priv_surface->frame_idx ; //do not know this. use -1
2393     }
2394
2395     cmd->dw5.max_num_paks = generic_state->num_pak_passes ;
2396     if (avc_state->min_max_qp_enable) {
2397         switch (generic_state->frame_type) {
2398         case SLICE_TYPE_I:
2399             cmd->dw6.minimum_qp = avc_state->min_qp_i ;
2400             cmd->dw6.maximum_qp = avc_state->max_qp_i ;
2401             break;
2402         case SLICE_TYPE_P:
2403             cmd->dw6.minimum_qp = avc_state->min_qp_p ;
2404             cmd->dw6.maximum_qp = avc_state->max_qp_p ;
2405             break;
2406         case SLICE_TYPE_B:
2407             cmd->dw6.minimum_qp = avc_state->min_qp_b ;
2408             cmd->dw6.maximum_qp = avc_state->max_qp_b ;
2409             break;
2410         }
2411     } else {
2412         cmd->dw6.minimum_qp = 0 ;
2413         cmd->dw6.maximum_qp = 0 ;
2414     }
2415     cmd->dw6.enable_force_skip = avc_state->enable_force_skip ;
2416     cmd->dw6.enable_sliding_window = 0 ;
2417
2418     generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame;
2419
2420     if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2421         cmd->dw3.start_gadj_frame0 = (unsigned int)((10 *   generic_state->avbr_convergence) / (double)150);
2422         cmd->dw3.start_gadj_frame1 = (unsigned int)((50 *   generic_state->avbr_convergence) / (double)150);
2423         cmd->dw4.start_gadj_frame2 = (unsigned int)((100 *  generic_state->avbr_convergence) / (double)150);
2424         cmd->dw4.start_gadj_frame3 = (unsigned int)((150 *  generic_state->avbr_convergence) / (double)150);
2425         cmd->dw11.g_rate_ratio_threshold_0 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 40)));
2426         cmd->dw11.g_rate_ratio_threshold_1 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 75)));
2427         cmd->dw12.g_rate_ratio_threshold_2 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 97)));
2428         cmd->dw12.g_rate_ratio_threshold_3 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (103 - 100)));
2429         cmd->dw12.g_rate_ratio_threshold_4 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (125 - 100)));
2430         cmd->dw12.g_rate_ratio_threshold_5 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (160 - 100)));
2431
2432     }
2433     cmd->dw15.enable_roi = generic_state->brc_roi_enable ;
2434
2435     memset(&common_param, 0, sizeof(common_param));
2436     common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
2437     common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
2438     common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
2439     common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
2440     common_param.frames_per_100s = generic_state->frames_per_100s;
2441     common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
2442     common_param.target_bit_rate = generic_state->target_bit_rate;
2443
2444     cmd->dw19.user_max_frame = i965_avc_get_profile_level_max_frame(&common_param, seq_param->level_idc);
2445     i965_gpe_context_unmap_curbe(gpe_context);
2446
2447     return;
2448 }
2449
2450 static void
2451 gen9_avc_send_surface_brc_frame_update(VADriverContextP ctx,
2452                                        struct encode_state *encode_state,
2453                                        struct i965_gpe_context *gpe_context,
2454                                        struct intel_encoder_context *encoder_context,
2455                                        void * param_brc)
2456 {
2457     struct i965_driver_data *i965 = i965_driver_data(ctx);
2458     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2459     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2460     struct brc_param * param = (struct brc_param *)param_brc ;
2461     struct i965_gpe_context * gpe_context_mbenc = param->gpe_context_mbenc;
2462     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2463     unsigned char is_g95 = 0;
2464
2465     if (IS_SKL(i965->intel.device_info) ||
2466         IS_BXT(i965->intel.device_info))
2467         is_g95 = 0;
2468     else if (IS_KBL(i965->intel.device_info) ||
2469              IS_GLK(i965->intel.device_info))
2470         is_g95 = 1;
2471
2472     /* brc history buffer*/
2473     gen9_add_buffer_gpe_surface(ctx,
2474                                 gpe_context,
2475                                 &avc_ctx->res_brc_history_buffer,
2476                                 0,
2477                                 avc_ctx->res_brc_history_buffer.size,
2478                                 0,
2479                                 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_HISTORY_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_HISTORY_INDEX));
2480
2481     /* previous pak buffer*/
2482     gen9_add_buffer_gpe_surface(ctx,
2483                                 gpe_context,
2484                                 &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
2485                                 0,
2486                                 avc_ctx->res_brc_pre_pak_statistics_output_buffer.size,
2487                                 0,
2488                                 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_PAK_STATISTICS_OUTPUT_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_PAK_STATISTICS_OUTPUT_INDEX));
2489
2490     /* image state command buffer read only*/
2491     gen9_add_buffer_gpe_surface(ctx,
2492                                 gpe_context,
2493                                 &avc_ctx->res_brc_image_state_read_buffer,
2494                                 0,
2495                                 avc_ctx->res_brc_image_state_read_buffer.size,
2496                                 0,
2497                                 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_READ_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_READ_INDEX));
2498
2499     /* image state command buffer write only*/
2500     gen9_add_buffer_gpe_surface(ctx,
2501                                 gpe_context,
2502                                 &avc_ctx->res_brc_image_state_write_buffer,
2503                                 0,
2504                                 avc_ctx->res_brc_image_state_write_buffer.size,
2505                                 0,
2506                                 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_WRITE_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_WRITE_INDEX));
2507
2508     if (avc_state->mbenc_brc_buffer_size > 0) {
2509         gen9_add_buffer_gpe_surface(ctx,
2510                                     gpe_context,
2511                                     &(avc_ctx->res_mbenc_brc_buffer),
2512                                     0,
2513                                     avc_ctx->res_mbenc_brc_buffer.size,
2514                                     0,
2515                                     GEN95_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_WRITE_INDEX);
2516     } else {
2517         /*  Mbenc curbe input buffer */
2518         gen9_add_dri_buffer_gpe_surface(ctx,
2519                                         gpe_context,
2520                                         gpe_context_mbenc->dynamic_state.bo,
2521                                         0,
2522                                         ALIGN(gpe_context_mbenc->curbe.length, 64),
2523                                         gpe_context_mbenc->curbe.offset,
2524                                         GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_READ_INDEX);
2525         /* Mbenc curbe output buffer */
2526         gen9_add_dri_buffer_gpe_surface(ctx,
2527                                         gpe_context,
2528                                         gpe_context_mbenc->dynamic_state.bo,
2529                                         0,
2530                                         ALIGN(gpe_context_mbenc->curbe.length, 64),
2531                                         gpe_context_mbenc->curbe.offset,
2532                                         GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_WRITE_INDEX);
2533     }
2534
2535     /* AVC_ME Distortion 2D surface buffer,input/output. is it res_brc_dist_data_surface*/
2536     gen9_add_buffer_2d_gpe_surface(ctx,
2537                                    gpe_context,
2538                                    &avc_ctx->res_brc_dist_data_surface,
2539                                    1,
2540                                    I965_SURFACEFORMAT_R8_UNORM,
2541                                    (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_DISTORTION_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_DISTORTION_INDEX));
2542
2543     /* BRC const data 2D surface buffer */
2544     gen9_add_buffer_2d_gpe_surface(ctx,
2545                                    gpe_context,
2546                                    &avc_ctx->res_brc_const_data_buffer,
2547                                    1,
2548                                    I965_SURFACEFORMAT_R8_UNORM,
2549                                    (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX));
2550
2551     /* MB statistical data surface*/
2552     gen9_add_buffer_gpe_surface(ctx,
2553                                 gpe_context,
2554                                 &avc_ctx->res_mb_status_buffer,
2555                                 0,
2556                                 avc_ctx->res_mb_status_buffer.size,
2557                                 0,
2558                                 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX));
2559
2560     return;
2561 }
2562
2563 static VAStatus
2564 gen9_avc_kernel_brc_frame_update(VADriverContextP ctx,
2565                                  struct encode_state *encode_state,
2566                                  struct intel_encoder_context *encoder_context)
2567
2568 {
2569     struct i965_driver_data *i965 = i965_driver_data(ctx);
2570     struct i965_gpe_table *gpe = &i965->gpe_table;
2571     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2572     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
2573     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2574     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2575     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2576
2577     struct i965_gpe_context *gpe_context = NULL;
2578     struct gpe_media_object_parameter media_object_param;
2579     struct gpe_media_object_inline_data media_object_inline_data;
2580     int media_function = 0;
2581     int kernel_idx = 0;
2582     unsigned int mb_const_data_buffer_in_use, mb_qp_buffer_in_use;
2583     unsigned int brc_enabled = 0;
2584     unsigned int roi_enable = (generic_state->num_roi > 0) ? 1 : 0;
2585     unsigned int dirty_roi_enable = ((generic_state->dirty_num_roi > 0) && (generic_state->frame_type == SLICE_TYPE_P) && (0));
2586
2587     /* the following set the mbenc curbe*/
2588     struct mbenc_param curbe_mbenc_param ;
2589     struct brc_param curbe_brc_param ;
2590
2591     mb_const_data_buffer_in_use =
2592         generic_state->mb_brc_enabled ||
2593         roi_enable ||
2594         dirty_roi_enable ||
2595         avc_state->mb_qp_data_enable ||
2596         avc_state->rolling_intra_refresh_enable;
2597     mb_qp_buffer_in_use =
2598         generic_state->mb_brc_enabled ||
2599         generic_state->brc_roi_enable ||
2600         avc_state->mb_qp_data_enable;
2601
2602     switch (generic_state->kernel_mode) {
2603     case INTEL_ENC_KERNEL_NORMAL : {
2604         kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_NORMAL_I;
2605         break;
2606     }
2607     case INTEL_ENC_KERNEL_PERFORMANCE : {
2608         kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_PERFORMANCE_I;
2609         break;
2610     }
2611     case INTEL_ENC_KERNEL_QUALITY : {
2612         kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_QUALITY_I;
2613         break;
2614     }
2615     default:
2616         assert(0);
2617
2618     }
2619
2620     if (generic_state->frame_type == SLICE_TYPE_P) {
2621         kernel_idx += 1;
2622     } else if (generic_state->frame_type == SLICE_TYPE_B) {
2623         kernel_idx += 2;
2624     }
2625
2626     gpe_context = &(avc_ctx->context_mbenc.gpe_contexts[kernel_idx]);
2627     gpe->context_init(ctx, gpe_context);
2628
2629     memset(&curbe_mbenc_param, 0, sizeof(struct mbenc_param));
2630
2631     curbe_mbenc_param.mb_const_data_buffer_in_use = mb_const_data_buffer_in_use;
2632     curbe_mbenc_param.mb_qp_buffer_in_use = mb_qp_buffer_in_use;
2633     curbe_mbenc_param.mbenc_i_frame_dist_in_use = 0;
2634     curbe_mbenc_param.brc_enabled = brc_enabled;
2635     curbe_mbenc_param.roi_enabled = roi_enable;
2636
2637     /* set curbe mbenc*/
2638     generic_ctx->pfn_set_curbe_mbenc(ctx, encode_state, gpe_context, encoder_context, &curbe_mbenc_param);
2639
2640     // gen95 set curbe out of the brc. gen9 do it here
2641     avc_state->mbenc_curbe_set_in_brc_update = !avc_state->decouple_mbenc_curbe_from_brc_enable;
2642     /*begin brc frame update*/
2643     memset(&curbe_brc_param, 0, sizeof(struct brc_param));
2644     curbe_brc_param.gpe_context_mbenc = gpe_context;
2645     media_function = INTEL_MEDIA_STATE_BRC_UPDATE;
2646     kernel_idx = GEN9_AVC_KERNEL_BRC_FRAME_UPDATE;
2647     gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
2648     curbe_brc_param.gpe_context_brc_frame_update = gpe_context;
2649
2650     gpe->context_init(ctx, gpe_context);
2651     gpe->reset_binding_table(ctx, gpe_context);
2652     /*brc copy ignored*/
2653
2654     /* set curbe frame update*/
2655     generic_ctx->pfn_set_curbe_brc_frame_update(ctx, encode_state, gpe_context, encoder_context, &curbe_brc_param);
2656
2657     /* load brc constant data, is it same as mbenc mb brc constant data? no.*/
2658     if (avc_state->multi_pre_enable) {
2659         gen9_avc_init_brc_const_data(ctx, encode_state, encoder_context);
2660     } else {
2661         gen9_avc_init_brc_const_data_old(ctx, encode_state, encoder_context);
2662     }
2663     /* image state construct*/
2664     gen9_avc_set_image_state(ctx, encode_state, encoder_context, &(avc_ctx->res_brc_image_state_read_buffer));
2665     /* set surface frame mbenc*/
2666     generic_ctx->pfn_send_brc_frame_update_surface(ctx, encode_state, gpe_context, encoder_context, &curbe_brc_param);
2667
2668
2669     gpe->setup_interface_data(ctx, gpe_context);
2670
2671     memset(&media_object_param, 0, sizeof(media_object_param));
2672     memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
2673     media_object_param.pinline_data = &media_object_inline_data;
2674     media_object_param.inline_size = sizeof(media_object_inline_data);
2675
2676     gen9_avc_run_kernel_media_object(ctx, encoder_context,
2677                                      gpe_context,
2678                                      media_function,
2679                                      &media_object_param);
2680
2681     return VA_STATUS_SUCCESS;
2682 }
2683
2684 static void
2685 gen9_avc_set_curbe_brc_mb_update(VADriverContextP ctx,
2686                                  struct encode_state *encode_state,
2687                                  struct i965_gpe_context *gpe_context,
2688                                  struct intel_encoder_context *encoder_context,
2689                                  void * param)
2690 {
2691     gen9_avc_mb_brc_curbe_data *cmd;
2692     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2693     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2694
2695     cmd = i965_gpe_context_map_curbe(gpe_context);
2696
2697     if (!cmd)
2698         return;
2699
2700     memset(cmd, 0, sizeof(gen9_avc_mb_brc_curbe_data));
2701
2702     cmd->dw0.cur_frame_type = generic_state->frame_type;
2703     if (generic_state->brc_roi_enable) {
2704         cmd->dw0.enable_roi = 1;
2705     } else {
2706         cmd->dw0.enable_roi = 0;
2707     }
2708
2709     i965_gpe_context_unmap_curbe(gpe_context);
2710
2711     return;
2712 }
2713
2714 static void
2715 gen9_avc_send_surface_brc_mb_update(VADriverContextP ctx,
2716                                     struct encode_state *encode_state,
2717                                     struct i965_gpe_context *gpe_context,
2718                                     struct intel_encoder_context *encoder_context,
2719                                     void * param_mbenc)
2720 {
2721     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2722     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2723     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2724
2725     /* brc history buffer*/
2726     gen9_add_buffer_gpe_surface(ctx,
2727                                 gpe_context,
2728                                 &avc_ctx->res_brc_history_buffer,
2729                                 0,
2730                                 avc_ctx->res_brc_history_buffer.size,
2731                                 0,
2732                                 GEN9_AVC_MB_BRC_UPDATE_HISTORY_INDEX);
2733
2734     /* MB qp data buffer is it same as res_mbbrc_mb_qp_data_surface*/
2735     if (generic_state->mb_brc_enabled) {
2736         gen9_add_buffer_2d_gpe_surface(ctx,
2737                                        gpe_context,
2738                                        &avc_ctx->res_mbbrc_mb_qp_data_surface,
2739                                        1,
2740                                        I965_SURFACEFORMAT_R8_UNORM,
2741                                        GEN9_AVC_MB_BRC_UPDATE_MB_QP_INDEX);
2742
2743     }
2744
2745     /* BRC roi feature*/
2746     if (generic_state->brc_roi_enable) {
2747         gen9_add_buffer_gpe_surface(ctx,
2748                                     gpe_context,
2749                                     &avc_ctx->res_mbbrc_roi_surface,
2750                                     0,
2751                                     avc_ctx->res_mbbrc_roi_surface.size,
2752                                     0,
2753                                     GEN9_AVC_MB_BRC_UPDATE_ROI_INDEX);
2754
2755     }
2756
2757     /* MB statistical data surface*/
2758     gen9_add_buffer_gpe_surface(ctx,
2759                                 gpe_context,
2760                                 &avc_ctx->res_mb_status_buffer,
2761                                 0,
2762                                 avc_ctx->res_mb_status_buffer.size,
2763                                 0,
2764                                 GEN9_AVC_MB_BRC_UPDATE_MB_STATUS_INDEX);
2765
2766     return;
2767 }
2768
2769 static VAStatus
2770 gen9_avc_kernel_brc_mb_update(VADriverContextP ctx,
2771                               struct encode_state *encode_state,
2772                               struct intel_encoder_context *encoder_context)
2773
2774 {
2775     struct i965_driver_data *i965 = i965_driver_data(ctx);
2776     struct i965_gpe_table *gpe = &i965->gpe_table;
2777     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2778     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2779     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2780     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
2781
2782     struct i965_gpe_context *gpe_context;
2783     struct gpe_media_object_walker_parameter media_object_walker_param;
2784     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
2785     int media_function = 0;
2786     int kernel_idx = 0;
2787
2788     media_function = INTEL_MEDIA_STATE_MB_BRC_UPDATE;
2789     kernel_idx = GEN9_AVC_KERNEL_BRC_MB_UPDATE;
2790     gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
2791
2792     gpe->context_init(ctx, gpe_context);
2793     gpe->reset_binding_table(ctx, gpe_context);
2794
2795     /* set curbe brc mb update*/
2796     generic_ctx->pfn_set_curbe_brc_mb_update(ctx, encode_state, gpe_context, encoder_context, NULL);
2797
2798
2799     /* set surface brc mb update*/
2800     generic_ctx->pfn_send_brc_mb_update_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
2801
2802
2803     gpe->setup_interface_data(ctx, gpe_context);
2804
2805     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2806     /* the scaling is based on 8x8 blk level */
2807     kernel_walker_param.resolution_x = (generic_state->frame_width_in_mbs + 1) / 2;
2808     kernel_walker_param.resolution_y = (generic_state->frame_height_in_mbs + 1) / 2 ;
2809     kernel_walker_param.no_dependency = 1;
2810
2811     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
2812
2813     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
2814                                             gpe_context,
2815                                             media_function,
2816                                             &media_object_walker_param);
2817
2818     return VA_STATUS_SUCCESS;
2819 }
2820
2821 /*
2822 mbenc kernel related function,it include intra dist kernel
2823 */
2824 static int
2825 gen9_avc_get_biweight(int dist_scale_factor_ref_id0_list0, unsigned short weighted_bipredidc)
2826 {
2827     int biweight = 32;      // default value
2828
2829     /* based on kernel HLD*/
2830     if (weighted_bipredidc != INTEL_AVC_WP_MODE_IMPLICIT) {
2831         biweight = 32;
2832     } else {
2833         biweight = (dist_scale_factor_ref_id0_list0 + 2) >> 2;
2834
2835         if (biweight != 16 && biweight != 21 &&
2836             biweight != 32 && biweight != 43 && biweight != 48) {
2837             biweight = 32;        // If # of B-pics between two refs is more than 3. VME does not support it.
2838         }
2839     }
2840
2841     return biweight;
2842 }
2843
2844 static void
2845 gen9_avc_get_dist_scale_factor(VADriverContextP ctx,
2846                                struct encode_state *encode_state,
2847                                struct intel_encoder_context *encoder_context)
2848 {
2849     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2850     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2851     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
2852     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
2853
2854     int max_num_references;
2855     VAPictureH264 *curr_pic;
2856     VAPictureH264 *ref_pic_l0;
2857     VAPictureH264 *ref_pic_l1;
2858     int i = 0;
2859     int tb = 0;
2860     int td = 0;
2861     int tx = 0;
2862     int tmp = 0;
2863     int poc0 = 0;
2864     int poc1 = 0;
2865
2866     max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
2867
2868     memset(avc_state->dist_scale_factor_list0, 0, 32 * sizeof(unsigned int));
2869     curr_pic = &pic_param->CurrPic;
2870     for (i = 0; i < max_num_references; i++) {
2871         ref_pic_l0 = &(slice_param->RefPicList0[i]);
2872
2873         if ((ref_pic_l0->flags & VA_PICTURE_H264_INVALID) ||
2874             (ref_pic_l0->picture_id == VA_INVALID_SURFACE))
2875             break;
2876         ref_pic_l1 = &(slice_param->RefPicList1[0]);
2877         if ((ref_pic_l0->flags & VA_PICTURE_H264_INVALID) ||
2878             (ref_pic_l0->picture_id == VA_INVALID_SURFACE))
2879             break;
2880
2881         poc0 = (curr_pic->TopFieldOrderCnt - ref_pic_l0->TopFieldOrderCnt);
2882         poc1 = (ref_pic_l1->TopFieldOrderCnt - ref_pic_l0->TopFieldOrderCnt);
2883         CLIP(poc0, -128, 127);
2884         CLIP(poc1, -128, 127);
2885         tb = poc0;
2886         td = poc1;
2887
2888         if (td == 0) {
2889             td = 1;
2890         }
2891         tmp = (td / 2 > 0) ? (td / 2) : (-(td / 2));
2892         tx = (16384 + tmp) / td ;
2893         tmp = (tb * tx + 32) >> 6;
2894         CLIP(tmp, -1024, 1023);
2895         avc_state->dist_scale_factor_list0[i] = tmp;
2896     }
2897     return;
2898 }
2899
2900 static unsigned int
2901 gen9_avc_get_qp_from_ref_list(VADriverContextP ctx,
2902                               VAEncSliceParameterBufferH264 *slice_param,
2903                               int list,
2904                               int ref_frame_idx)
2905 {
2906     struct i965_driver_data *i965 = i965_driver_data(ctx);
2907     struct object_surface *obj_surface;
2908     struct gen9_surface_avc *avc_priv_surface;
2909     VASurfaceID surface_id;
2910
2911     assert(slice_param);
2912     assert(list < 2);
2913
2914     if (list == 0) {
2915         if (ref_frame_idx < slice_param->num_ref_idx_l0_active_minus1 + 1)
2916             surface_id = slice_param->RefPicList0[ref_frame_idx].picture_id;
2917         else
2918             return 0;
2919     } else {
2920         if (ref_frame_idx < slice_param->num_ref_idx_l1_active_minus1 + 1)
2921             surface_id = slice_param->RefPicList1[ref_frame_idx].picture_id;
2922         else
2923             return 0;
2924     }
2925     obj_surface = SURFACE(surface_id);
2926     if (obj_surface && obj_surface->private_data) {
2927         avc_priv_surface = obj_surface->private_data;
2928         return avc_priv_surface->qp_value;
2929     } else {
2930         return 0;
2931     }
2932 }
2933
2934 static void
2935 gen9_avc_load_mb_brc_const_data(VADriverContextP ctx,
2936                                 struct encode_state *encode_state,
2937                                 struct intel_encoder_context *encoder_context)
2938 {
2939     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2940     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2941     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2942     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2943
2944     struct i965_gpe_resource *gpe_resource = NULL;
2945     unsigned int * data = NULL;
2946     unsigned int * data_tmp = NULL;
2947     unsigned int size = 16 * 52;
2948     unsigned int table_idx = 0;
2949     unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
2950     unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
2951     int i = 0;
2952
2953     gpe_resource = &(avc_ctx->res_mbbrc_const_data_buffer);
2954     assert(gpe_resource);
2955     data = i965_map_gpe_resource(gpe_resource);
2956     assert(data);
2957
2958     table_idx = slice_type_kernel[generic_state->frame_type];
2959
2960     memcpy(data, gen9_avc_mb_brc_const_data[table_idx][0], size * sizeof(unsigned int));
2961
2962     data_tmp = data;
2963
2964     switch (generic_state->frame_type) {
2965     case SLICE_TYPE_I:
2966         for (i = 0; i < AVC_QP_MAX ; i++) {
2967             if (avc_state->old_mode_cost_enable)
2968                 *data = (unsigned int)gen9_avc_old_intra_mode_cost[i];
2969             data += 16;
2970         }
2971         break;
2972     case SLICE_TYPE_P:
2973     case SLICE_TYPE_B:
2974         for (i = 0; i < AVC_QP_MAX ; i++) {
2975             if (generic_state->frame_type == SLICE_TYPE_P) {
2976                 if (avc_state->skip_bias_adjustment_enable)
2977                     *(data + 3) = (unsigned int)gen9_avc_mv_cost_p_skip_adjustment[i];
2978             }
2979             if (avc_state->non_ftq_skip_threshold_lut_input_enable) {
2980                 *(data + 9) = (unsigned int)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
2981             } else if (generic_state->frame_type == SLICE_TYPE_P) {
2982                 *(data + 9) = (unsigned int)gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag][i];
2983             } else {
2984                 *(data + 9) = (unsigned int)gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag][i];
2985             }
2986
2987             if (avc_state->adaptive_intra_scaling_enable) {
2988                 *(data + 10) = (unsigned int)gen9_avc_adaptive_intra_scaling_factor[i];
2989             } else {
2990                 *(data + 10) = (unsigned int)gen9_avc_intra_scaling_factor[i];
2991
2992             }
2993             data += 16;
2994
2995         }
2996         break;
2997     default:
2998         assert(0);
2999     }
3000
3001     data = data_tmp;
3002     for (i = 0; i < AVC_QP_MAX ; i++) {
3003         if (avc_state->ftq_skip_threshold_lut_input_enable) {
3004             *(data + 6) = (avc_state->ftq_skip_threshold_lut[i] |
3005                            (avc_state->ftq_skip_threshold_lut[i] << 16) |
3006                            (avc_state->ftq_skip_threshold_lut[i] << 24));
3007             *(data + 7) = (avc_state->ftq_skip_threshold_lut[i] |
3008                            (avc_state->ftq_skip_threshold_lut[i] << 8) |
3009                            (avc_state->ftq_skip_threshold_lut[i] << 16) |
3010                            (avc_state->ftq_skip_threshold_lut[i] << 24));
3011         }
3012
3013         if (avc_state->kernel_trellis_enable) {
3014             *(data + 11) = (unsigned int)avc_state->lamda_value_lut[i][0];
3015             *(data + 12) = (unsigned int)avc_state->lamda_value_lut[i][1];
3016
3017         }
3018         data += 16;
3019
3020     }
3021     i965_unmap_gpe_resource(gpe_resource);
3022 }
3023
3024 static void
3025 gen9_avc_set_curbe_mbenc(VADriverContextP ctx,
3026                          struct encode_state *encode_state,
3027                          struct i965_gpe_context *gpe_context,
3028                          struct intel_encoder_context *encoder_context,
3029                          void * param)
3030 {
3031     struct i965_driver_data *i965 = i965_driver_data(ctx);
3032     union {
3033         gen9_avc_mbenc_curbe_data *g9;
3034         gen95_avc_mbenc_curbe_data *g95;
3035     } cmd;
3036     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3037     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3038     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3039
3040     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3041     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
3042     VASurfaceID surface_id;
3043     struct object_surface *obj_surface;
3044
3045     struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
3046     unsigned char qp = 0;
3047     unsigned char me_method = 0;
3048     unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
3049     unsigned int table_idx = 0;
3050     unsigned char is_g9 = 0;
3051     unsigned char is_g95 = 0;
3052     unsigned int curbe_size = 0;
3053
3054     unsigned int preset = generic_state->preset;
3055     if (IS_SKL(i965->intel.device_info) ||
3056         IS_BXT(i965->intel.device_info)) {
3057         cmd.g9 = (gen9_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
3058         if (!cmd.g9)
3059             return;
3060         is_g9 = 1;
3061         curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
3062         memset(cmd.g9, 0, curbe_size);
3063
3064         if (mbenc_i_frame_dist_in_use) {
3065             memcpy(cmd.g9, gen9_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
3066
3067         } else {
3068             switch (generic_state->frame_type) {
3069             case SLICE_TYPE_I:
3070                 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
3071                 break;
3072             case SLICE_TYPE_P:
3073                 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
3074                 break;
3075             case SLICE_TYPE_B:
3076                 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
3077                 break;
3078             default:
3079                 assert(0);
3080             }
3081
3082         }
3083     } else if (IS_KBL(i965->intel.device_info) ||
3084                IS_GLK(i965->intel.device_info)) {
3085         cmd.g95 = (gen95_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
3086         if (!cmd.g95)
3087             return;
3088         is_g95 = 1;
3089         curbe_size = sizeof(gen95_avc_mbenc_curbe_data);
3090         memset(cmd.g9, 0, curbe_size);
3091
3092         if (mbenc_i_frame_dist_in_use) {
3093             memcpy(cmd.g95, gen95_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
3094
3095         } else {
3096             switch (generic_state->frame_type) {
3097             case SLICE_TYPE_I:
3098                 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
3099                 break;
3100             case SLICE_TYPE_P:
3101                 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
3102                 break;
3103             case SLICE_TYPE_B:
3104                 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
3105                 break;
3106             default:
3107                 assert(0);
3108             }
3109
3110         }
3111     }
3112
3113     me_method = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_b_me_method[preset] : gen9_avc_p_me_method[preset];
3114     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
3115
3116     cmd.g9->dw0.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
3117     cmd.g9->dw37.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
3118     cmd.g9->dw0.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
3119     cmd.g9->dw37.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
3120
3121     cmd.g9->dw2.max_len_sp = gen9_avc_max_len_sp[preset];
3122     cmd.g9->dw38.max_len_sp = 0;
3123
3124     if (is_g95)
3125         cmd.g95->dw1.extended_mv_cost_range = avc_state->extended_mv_cost_range_enable;
3126
3127     cmd.g9->dw3.src_access = 0;
3128     cmd.g9->dw3.ref_access = 0;
3129
3130     if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) {
3131         //disable ftq_override by now.
3132         if (avc_state->ftq_override) {
3133             cmd.g9->dw3.ftq_enable = avc_state->ftq_enable;
3134
3135         } else {
3136             // both gen9 and gen95 come here by now
3137             if (generic_state->frame_type == SLICE_TYPE_P) {
3138                 cmd.g9->dw3.ftq_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
3139
3140             } else {
3141                 cmd.g9->dw3.ftq_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
3142             }
3143         }
3144     } else {
3145         cmd.g9->dw3.ftq_enable = 0;
3146     }
3147
3148     if (avc_state->disable_sub_mb_partion)
3149         cmd.g9->dw3.sub_mb_part_mask = 0x7;
3150
3151     if (mbenc_i_frame_dist_in_use) {
3152         cmd.g9->dw2.pitch_width = generic_state->downscaled_width_4x_in_mb;
3153         cmd.g9->dw4.picture_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
3154         cmd.g9->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4;
3155         cmd.g9->dw6.batch_buffer_end = 0;
3156         cmd.g9->dw31.intra_compute_type = 1;
3157
3158     } else {
3159         cmd.g9->dw2.pitch_width = generic_state->frame_width_in_mbs;
3160         cmd.g9->dw4.picture_height_minus1 = generic_state->frame_height_in_mbs - 1;
3161         cmd.g9->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ? generic_state->frame_height_in_mbs : avc_state->slice_height;
3162
3163         {
3164             memcpy(&(cmd.g9->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
3165             if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
3166                 //cmd.g9->dw8 = gen9_avc_old_intra_mode_cost[qp];
3167             } else if (avc_state->skip_bias_adjustment_enable) {
3168                 /* Load different MvCost for P picture when SkipBiasAdjustment is enabled
3169                 // No need to check for P picture as the flag is only enabled for P picture */
3170                 cmd.g9->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
3171
3172             }
3173         }
3174
3175         table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
3176         memcpy(&(cmd.g9->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
3177     }
3178     cmd.g9->dw4.enable_fbr_bypass = avc_state->fbr_bypass_enable;
3179     cmd.g9->dw4.enable_intra_cost_scaling_for_static_frame = avc_state->sfd_enable && generic_state->hme_enabled;
3180     cmd.g9->dw4.field_parity_flag = 0;//bottom field
3181     cmd.g9->dw4.enable_cur_fld_idr = 0;//field realted
3182     cmd.g9->dw4.contrained_intra_pred_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
3183     cmd.g9->dw4.hme_enable = generic_state->hme_enabled;
3184     cmd.g9->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
3185     cmd.g9->dw4.use_actual_ref_qp_value = generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
3186
3187
3188     cmd.g9->dw7.intra_part_mask = avc_state->transform_8x8_mode_enable ? 0 : 0x02;
3189     cmd.g9->dw7.src_field_polarity = 0;//field related
3190
3191     /*ftq_skip_threshold_lut set,dw14 /15*/
3192
3193     /*r5 disable NonFTQSkipThresholdLUT*/
3194     if (generic_state->frame_type == SLICE_TYPE_P) {
3195         cmd.g9->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
3196
3197     } else if (generic_state->frame_type == SLICE_TYPE_B) {
3198         cmd.g9->dw32.skip_val = gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
3199
3200     }
3201
3202     cmd.g9->dw13.qp_prime_y = qp;
3203     cmd.g9->dw13.qp_prime_cb = qp;
3204     cmd.g9->dw13.qp_prime_cr = qp;
3205     cmd.g9->dw13.target_size_in_word = 0xff;//hardcode for brc disable
3206
3207     if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) {
3208         switch (gen9_avc_multi_pred[preset]) {
3209         case 0:
3210             cmd.g9->dw32.mult_pred_l0_disable = 128;
3211             cmd.g9->dw32.mult_pred_l1_disable = 128;
3212             break;
3213         case 1:
3214             cmd.g9->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_P) ? 1 : 128;
3215             cmd.g9->dw32.mult_pred_l1_disable = 128;
3216             break;
3217         case 2:
3218             cmd.g9->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3219             cmd.g9->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3220             break;
3221         case 3:
3222             cmd.g9->dw32.mult_pred_l0_disable = 1;
3223             cmd.g9->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3224             break;
3225
3226         }
3227
3228     } else {
3229         cmd.g9->dw32.mult_pred_l0_disable = 128;
3230         cmd.g9->dw32.mult_pred_l1_disable = 128;
3231     }
3232
3233     /*field setting for dw33 34, ignored*/
3234
3235     if (avc_state->adaptive_transform_decision_enable) {
3236         if (generic_state->frame_type != SLICE_TYPE_I) {
3237             cmd.g9->dw34.enable_adaptive_tx_decision = 1;
3238             if (is_g95) {
3239                 cmd.g95->dw60.mb_texture_threshold = 1024;
3240                 cmd.g95->dw60.tx_decision_threshold = 128;
3241             }
3242
3243         }
3244
3245         if (is_g9) {
3246             cmd.g9->dw58.mb_texture_threshold = 1024;
3247             cmd.g9->dw58.tx_decision_threshold = 128;
3248         }
3249     }
3250
3251
3252     if (generic_state->frame_type == SLICE_TYPE_B) {
3253         cmd.g9->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
3254         cmd.g9->dw34.list1_ref_id0_frm_field_parity = 0;
3255         cmd.g9->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
3256     }
3257
3258     cmd.g9->dw34.b_original_bff = 0; //frame only
3259     cmd.g9->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
3260     cmd.g9->dw34.roi_enable_flag = curbe_param->roi_enabled;
3261     cmd.g9->dw34.mad_enable_falg = avc_state->mad_enable;
3262     cmd.g9->dw34.mb_brc_enable = avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled;
3263     cmd.g9->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
3264     if (is_g95) {
3265         cmd.g95->dw34.tq_enable = avc_state->tq_enable;
3266         cmd.g95->dw34.cqp_flag = !generic_state->brc_enabled;
3267     }
3268
3269     if (is_g9) {
3270         cmd.g9->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
3271
3272         if (cmd.g9->dw34.force_non_skip_check) {
3273             cmd.g9->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
3274         }
3275     }
3276
3277
3278     cmd.g9->dw36.check_all_fractional_enable = avc_state->caf_enable;
3279     cmd.g9->dw38.ref_threshold = 400;
3280     cmd.g9->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
3281
3282     /* Default:2 used for MBBRC (MB QP Surface width and height are 4x downscaled picture in MB unit * 4  bytes)
3283        0 used for MBQP data surface (MB QP Surface width and height are same as the input picture size in MB unit * 1bytes)
3284        starting GEN9, BRC use split kernel, MB QP surface is same size as input picture */
3285     cmd.g9->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled) ? 0 : 2;
3286
3287     if (mbenc_i_frame_dist_in_use) {
3288         cmd.g9->dw13.qp_prime_y = 0;
3289         cmd.g9->dw13.qp_prime_cb = 0;
3290         cmd.g9->dw13.qp_prime_cr = 0;
3291         cmd.g9->dw33.intra_16x16_nondc_penalty = 0;
3292         cmd.g9->dw33.intra_8x8_nondc_penalty = 0;
3293         cmd.g9->dw33.intra_4x4_nondc_penalty = 0;
3294
3295     }
3296     if (cmd.g9->dw4.use_actual_ref_qp_value) {
3297         cmd.g9->dw44.actual_qp_value_for_ref_id0_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 0);
3298         cmd.g9->dw44.actual_qp_value_for_ref_id1_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 1);
3299         cmd.g9->dw44.actual_qp_value_for_ref_id2_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 2);
3300         cmd.g9->dw44.actual_qp_value_for_ref_id3_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 3);
3301         cmd.g9->dw45.actual_qp_value_for_ref_id4_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 4);
3302         cmd.g9->dw45.actual_qp_value_for_ref_id5_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 5);
3303         cmd.g9->dw45.actual_qp_value_for_ref_id6_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 6);
3304         cmd.g9->dw45.actual_qp_value_for_ref_id7_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 7);
3305         cmd.g9->dw46.actual_qp_value_for_ref_id0_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 0);
3306         cmd.g9->dw46.actual_qp_value_for_ref_id1_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 1);
3307     }
3308
3309     table_idx = slice_type_kernel[generic_state->frame_type];
3310     cmd.g9->dw46.ref_cost = gen9_avc_ref_cost[table_idx][qp];
3311
3312     if (generic_state->frame_type == SLICE_TYPE_I) {
3313         cmd.g9->dw0.skip_mode_enable = 0;
3314         cmd.g9->dw37.skip_mode_enable = 0;
3315         cmd.g9->dw36.hme_combine_overlap = 0;
3316         cmd.g9->dw47.intra_cost_sf = 16;
3317         cmd.g9->dw34.enable_direct_bias_adjustment = 0;
3318         if (is_g9)
3319             cmd.g9->dw34.enable_global_motion_bias_adjustment = 0;
3320
3321     } else if (generic_state->frame_type == SLICE_TYPE_P) {
3322         cmd.g9->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
3323         cmd.g9->dw3.bme_disable_fbr = 1;
3324         cmd.g9->dw5.ref_width = gen9_avc_search_x[preset];
3325         cmd.g9->dw5.ref_height = gen9_avc_search_y[preset];
3326         cmd.g9->dw7.non_skip_zmv_added = 1;
3327         cmd.g9->dw7.non_skip_mode_added = 1;
3328         cmd.g9->dw7.skip_center_mask = 1;
3329         cmd.g9->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
3330         cmd.g9->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame onlys
3331         cmd.g9->dw36.hme_combine_overlap = 1;
3332         cmd.g9->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
3333         cmd.g9->dw39.ref_width = gen9_avc_search_x[preset];
3334         cmd.g9->dw39.ref_height = gen9_avc_search_y[preset];
3335         cmd.g9->dw34.enable_direct_bias_adjustment = 0;
3336         cmd.g9->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
3337         if (is_g9 && avc_state->global_motion_bias_adjustment_enable)
3338             cmd.g9->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
3339
3340     } else {
3341         cmd.g9->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
3342         cmd.g9->dw1.bi_weight = avc_state->bi_weight;
3343         cmd.g9->dw3.search_ctrl = 7;
3344         cmd.g9->dw3.skip_type = 1;
3345         cmd.g9->dw5.ref_width = gen9_avc_b_search_x[preset];
3346         cmd.g9->dw5.ref_height = gen9_avc_b_search_y[preset];
3347         cmd.g9->dw7.skip_center_mask = 0xff;
3348         cmd.g9->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
3349         cmd.g9->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
3350         cmd.g9->dw36.hme_combine_overlap = 1;
3351         surface_id = slice_param->RefPicList1[0].picture_id;
3352         obj_surface = SURFACE(surface_id);
3353         if (!obj_surface) {
3354             WARN_ONCE("Invalid backward reference frame\n");
3355             return;
3356         }
3357         cmd.g9->dw36.is_fwd_frame_short_term_ref = !!(slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
3358
3359         cmd.g9->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
3360         cmd.g9->dw36.num_ref_idx_l1_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l1_active_minus1 : 0;
3361         cmd.g9->dw39.ref_width = gen9_avc_b_search_x[preset];
3362         cmd.g9->dw39.ref_height = gen9_avc_b_search_y[preset];
3363         cmd.g9->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
3364         cmd.g9->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
3365         cmd.g9->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
3366         cmd.g9->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
3367         cmd.g9->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
3368         cmd.g9->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
3369         cmd.g9->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
3370         cmd.g9->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
3371
3372         cmd.g9->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
3373         if (cmd.g9->dw34.enable_direct_bias_adjustment) {
3374             cmd.g9->dw7.non_skip_zmv_added = 1;
3375             cmd.g9->dw7.non_skip_mode_added = 1;
3376         }
3377
3378         cmd.g9->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
3379         if (is_g9 && avc_state->global_motion_bias_adjustment_enable)
3380             cmd.g9->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
3381
3382     }
3383
3384     avc_state->block_based_skip_enable = cmd.g9->dw3.block_based_skip_enable;
3385
3386     if (avc_state->rolling_intra_refresh_enable) {
3387         /*by now disable it*/
3388         cmd.g9->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
3389         cmd.g9->dw32.mult_pred_l0_disable = 128;
3390         /* Pass the same IntraRefreshUnit to the kernel w/o the adjustment by -1, so as to have an overlap of one MB row or column of Intra macroblocks
3391          across one P frame to another P frame, as needed by the RollingI algo */
3392         if (is_g9) {
3393             cmd.g9->dw48.widi_intra_refresh_mb_num = 0;
3394             cmd.g9->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
3395             cmd.g9->dw48.widi_intra_refresh_qp_delta = 0;
3396         }
3397
3398         if (is_g95) {
3399             if (avc_state->rolling_intra_refresh_enable == INTEL_ROLLING_I_SQUARE && generic_state->brc_enabled) {
3400                 cmd.g95->dw4.enable_intra_refresh = 0;
3401                 cmd.g95->dw34.widi_intra_refresh_en = INTEL_ROLLING_I_DISABLED;
3402                 cmd.g95->dw48.widi_intra_refresh_mb_x = 0;
3403                 cmd.g95->dw61.widi_intra_refresh_mb_y = 0;
3404             } else {
3405                 cmd.g95->dw4.enable_intra_refresh = 1;
3406                 cmd.g95->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
3407                 cmd.g95->dw48.widi_intra_refresh_mb_x = 0;
3408                 cmd.g95->dw61.widi_intra_refresh_mb_y = 0;
3409                 cmd.g9->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
3410                 cmd.g9->dw48.widi_intra_refresh_qp_delta = 0;
3411             }
3412         }
3413
3414     } else {
3415         cmd.g9->dw34.widi_intra_refresh_en = 0;
3416     }
3417
3418     cmd.g9->dw34.enable_per_mb_static_check = avc_state->sfd_enable && generic_state->hme_enabled;
3419     cmd.g9->dw34.enable_adaptive_search_window_size = avc_state->adaptive_search_window_enable;
3420
3421     /*roi set disable by now. 49-56*/
3422     if (curbe_param->roi_enabled) {
3423         cmd.g9->dw49.roi_1_x_left   = generic_state->roi[0].left;
3424         cmd.g9->dw49.roi_1_y_top    = generic_state->roi[0].top;
3425         cmd.g9->dw50.roi_1_x_right  = generic_state->roi[0].right;
3426         cmd.g9->dw50.roi_1_y_bottom = generic_state->roi[0].bottom;
3427
3428         cmd.g9->dw51.roi_2_x_left   = generic_state->roi[1].left;
3429         cmd.g9->dw51.roi_2_y_top    = generic_state->roi[1].top;
3430         cmd.g9->dw52.roi_2_x_right  = generic_state->roi[1].right;
3431         cmd.g9->dw52.roi_2_y_bottom = generic_state->roi[1].bottom;
3432
3433         cmd.g9->dw53.roi_3_x_left   = generic_state->roi[2].left;
3434         cmd.g9->dw53.roi_3_y_top    = generic_state->roi[2].top;
3435         cmd.g9->dw54.roi_3_x_right  = generic_state->roi[2].right;
3436         cmd.g9->dw54.roi_3_y_bottom = generic_state->roi[2].bottom;
3437
3438         cmd.g9->dw55.roi_4_x_left   = generic_state->roi[3].left;
3439         cmd.g9->dw55.roi_4_y_top    = generic_state->roi[3].top;
3440         cmd.g9->dw56.roi_4_x_right  = generic_state->roi[3].right;
3441         cmd.g9->dw56.roi_4_y_bottom = generic_state->roi[3].bottom;
3442
3443         if (!generic_state->brc_enabled) {
3444             char tmp = 0;
3445             tmp = generic_state->roi[0].value;
3446             CLIP(tmp, -qp, AVC_QP_MAX - qp);
3447             cmd.g9->dw57.roi_1_dqp_prime_y = tmp;
3448             tmp = generic_state->roi[1].value;
3449             CLIP(tmp, -qp, AVC_QP_MAX - qp);
3450             cmd.g9->dw57.roi_2_dqp_prime_y = tmp;
3451             tmp = generic_state->roi[2].value;
3452             CLIP(tmp, -qp, AVC_QP_MAX - qp);
3453             cmd.g9->dw57.roi_3_dqp_prime_y = tmp;
3454             tmp = generic_state->roi[3].value;
3455             CLIP(tmp, -qp, AVC_QP_MAX - qp);
3456             cmd.g9->dw57.roi_4_dqp_prime_y = tmp;
3457         } else {
3458             cmd.g9->dw34.roi_enable_flag = 0;
3459         }
3460     }
3461
3462     if (is_g95) {
3463         if (avc_state->tq_enable) {
3464             if (generic_state->frame_type == SLICE_TYPE_I) {
3465                 cmd.g95->dw58.value = gen95_avc_tq_lambda_i_frame[qp][0];
3466                 cmd.g95->dw59.value = gen95_avc_tq_lambda_i_frame[qp][1];
3467
3468             } else if (generic_state->frame_type == SLICE_TYPE_P) {
3469                 cmd.g95->dw58.value = gen95_avc_tq_lambda_p_frame[qp][0];
3470                 cmd.g95->dw59.value = gen95_avc_tq_lambda_p_frame[qp][1];
3471
3472             } else {
3473                 cmd.g95->dw58.value = gen95_avc_tq_lambda_b_frame[qp][0];
3474                 cmd.g95->dw59.value = gen95_avc_tq_lambda_b_frame[qp][1];
3475             }
3476
3477             if (cmd.g95->dw58.lambda_8x8_inter > GEN95_AVC_MAX_LAMBDA)
3478                 cmd.g95->dw58.lambda_8x8_inter = 0xf000 + avc_state->rounding_value;
3479
3480             if (cmd.g95->dw58.lambda_8x8_intra > GEN95_AVC_MAX_LAMBDA)
3481                 cmd.g95->dw58.lambda_8x8_intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
3482
3483             if (cmd.g95->dw59.lambda_inter > GEN95_AVC_MAX_LAMBDA)
3484                 cmd.g95->dw59.lambda_inter = 0xf000 + avc_state->rounding_value;
3485
3486             if (cmd.g95->dw59.lambda_intra > GEN95_AVC_MAX_LAMBDA)
3487                 cmd.g95->dw59.lambda_intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
3488         }
3489     }
3490
3491     if (is_g95) {
3492         cmd.g95->dw66.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
3493         cmd.g95->dw67.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
3494         cmd.g95->dw68.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
3495         cmd.g95->dw69.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
3496         cmd.g95->dw70.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
3497         cmd.g95->dw71.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
3498         cmd.g95->dw72.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
3499         cmd.g95->dw73.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
3500         cmd.g95->dw74.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
3501         cmd.g95->dw75.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
3502         cmd.g95->dw76.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
3503         cmd.g95->dw77.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
3504         cmd.g95->dw78.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
3505         cmd.g95->dw79.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
3506         cmd.g95->dw80.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
3507         cmd.g95->dw81.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
3508         cmd.g95->dw82.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
3509         cmd.g95->dw83.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
3510         cmd.g95->dw84.brc_curbe_surf_index = GEN95_AVC_MBENC_BRC_CURBE_DATA_INDEX;
3511         cmd.g95->dw85.force_non_skip_mb_map_surface = GEN95_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
3512         cmd.g95->dw86.widi_wa_surf_index = GEN95_AVC_MBENC_WIDI_WA_INDEX;
3513         cmd.g95->dw87.static_detection_cost_table_index = GEN95_AVC_MBENC_SFD_COST_TABLE_INDEX;
3514     }
3515
3516     if (is_g9) {
3517         cmd.g9->dw64.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
3518         cmd.g9->dw65.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
3519         cmd.g9->dw66.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
3520         cmd.g9->dw67.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
3521         cmd.g9->dw68.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
3522         cmd.g9->dw69.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
3523         cmd.g9->dw70.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
3524         cmd.g9->dw71.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
3525         cmd.g9->dw72.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
3526         cmd.g9->dw73.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
3527         cmd.g9->dw74.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
3528         cmd.g9->dw75.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
3529         cmd.g9->dw76.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
3530         cmd.g9->dw77.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
3531         cmd.g9->dw78.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
3532         cmd.g9->dw79.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
3533         cmd.g9->dw80.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
3534         cmd.g9->dw81.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
3535         cmd.g9->dw82.force_non_skip_mb_map_surface = GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
3536         cmd.g9->dw83.widi_wa_surf_index = GEN9_AVC_MBENC_WIDI_WA_INDEX;
3537         cmd.g9->dw84.brc_curbe_surf_index = GEN9_AVC_MBENC_BRC_CURBE_DATA_INDEX;
3538         cmd.g9->dw85.static_detection_cost_table_index = GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX;
3539     }
3540
3541     i965_gpe_context_unmap_curbe(gpe_context);
3542
3543     return;
3544 }
3545
3546 static void
3547 gen9_avc_send_surface_mbenc(VADriverContextP ctx,
3548                             struct encode_state *encode_state,
3549                             struct i965_gpe_context *gpe_context,
3550                             struct intel_encoder_context *encoder_context,
3551                             void * param_mbenc)
3552 {
3553     struct i965_driver_data *i965 = i965_driver_data(ctx);
3554     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3555     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3556     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3557     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3558     struct object_surface *obj_surface;
3559     struct gen9_surface_avc *avc_priv_surface;
3560     struct i965_gpe_resource *gpe_resource;
3561     struct mbenc_param * param = (struct mbenc_param *)param_mbenc ;
3562     VASurfaceID surface_id;
3563     unsigned int mbenc_i_frame_dist_in_use = param->mbenc_i_frame_dist_in_use;
3564     unsigned int size = 0;
3565     unsigned int frame_mb_size = generic_state->frame_width_in_mbs *
3566                                  generic_state->frame_height_in_mbs;
3567     int i = 0;
3568     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3569     unsigned char is_g95 = 0;
3570
3571     if (IS_SKL(i965->intel.device_info) ||
3572         IS_BXT(i965->intel.device_info))
3573         is_g95 = 0;
3574     else if (IS_KBL(i965->intel.device_info) ||
3575              IS_GLK(i965->intel.device_info))
3576         is_g95 = 1;
3577
3578     obj_surface = encode_state->reconstructed_object;
3579
3580     if (!obj_surface || !obj_surface->private_data)
3581         return;
3582     avc_priv_surface = obj_surface->private_data;
3583
3584     /*pak obj command buffer output*/
3585     size = frame_mb_size * 16 * 4;
3586     gpe_resource = &avc_priv_surface->res_mb_code_surface;
3587     gen9_add_buffer_gpe_surface(ctx,
3588                                 gpe_context,
3589                                 gpe_resource,
3590                                 0,
3591                                 size / 4,
3592                                 0,
3593                                 GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX);
3594
3595     /*mv data buffer output*/
3596     size = frame_mb_size * 32 * 4;
3597     gpe_resource = &avc_priv_surface->res_mv_data_surface;
3598     gen9_add_buffer_gpe_surface(ctx,
3599                                 gpe_context,
3600                                 gpe_resource,
3601                                 0,
3602                                 size / 4,
3603                                 0,
3604                                 GEN9_AVC_MBENC_IND_MV_DATA_INDEX);
3605
3606     /*input current  YUV surface, current input Y/UV object*/
3607     if (mbenc_i_frame_dist_in_use) {
3608         obj_surface = encode_state->reconstructed_object;
3609         if (!obj_surface || !obj_surface->private_data)
3610             return;
3611         avc_priv_surface = obj_surface->private_data;
3612         obj_surface = avc_priv_surface->scaled_4x_surface_obj;
3613     } else {
3614         obj_surface = encode_state->input_yuv_object;
3615     }
3616     gen9_add_2d_gpe_surface(ctx,
3617                             gpe_context,
3618                             obj_surface,
3619                             0,
3620                             1,
3621                             I965_SURFACEFORMAT_R8_UNORM,
3622                             GEN9_AVC_MBENC_CURR_Y_INDEX);
3623
3624     gen9_add_2d_gpe_surface(ctx,
3625                             gpe_context,
3626                             obj_surface,
3627                             1,
3628                             1,
3629                             I965_SURFACEFORMAT_R16_UINT,
3630                             GEN9_AVC_MBENC_CURR_UV_INDEX);
3631
3632     if (generic_state->hme_enabled) {
3633         /*memv input 4x*/
3634         gpe_resource = &(avc_ctx->s4x_memv_data_buffer);
3635         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3636                                        gpe_resource,
3637                                        1,
3638                                        I965_SURFACEFORMAT_R8_UNORM,
3639                                        GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX);
3640         /* memv distortion input*/
3641         gpe_resource = &(avc_ctx->s4x_memv_distortion_buffer);
3642         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3643                                        gpe_resource,
3644                                        1,
3645                                        I965_SURFACEFORMAT_R8_UNORM,
3646                                        GEN9_AVC_MBENC_4XME_DISTORTION_INDEX);
3647     }
3648
3649     /*mbbrc const data_buffer*/
3650     if (param->mb_const_data_buffer_in_use) {
3651         size = 16 * AVC_QP_MAX * sizeof(unsigned int);
3652         gpe_resource = &avc_ctx->res_mbbrc_const_data_buffer;
3653         gen9_add_buffer_gpe_surface(ctx,
3654                                     gpe_context,
3655                                     gpe_resource,
3656                                     0,
3657                                     size / 4,
3658                                     0,
3659                                     GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX);
3660
3661     }
3662
3663     /*mb qp data_buffer*/
3664     if (param->mb_qp_buffer_in_use) {
3665         if (avc_state->mb_qp_data_enable)
3666             gpe_resource = &(avc_ctx->res_mb_qp_data_surface);
3667         else
3668             gpe_resource = &(avc_ctx->res_mbbrc_mb_qp_data_surface);
3669         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3670                                        gpe_resource,
3671                                        1,
3672                                        I965_SURFACEFORMAT_R8_UNORM,
3673                                        GEN9_AVC_MBENC_MBQP_INDEX);
3674     }
3675
3676     /*input current  YUV surface, current input Y/UV object*/
3677     if (mbenc_i_frame_dist_in_use) {
3678         obj_surface = encode_state->reconstructed_object;
3679         if (!obj_surface || !obj_surface->private_data)
3680             return;
3681         avc_priv_surface = obj_surface->private_data;
3682         obj_surface = avc_priv_surface->scaled_4x_surface_obj;
3683     } else {
3684         obj_surface = encode_state->input_yuv_object;
3685     }
3686     gen9_add_adv_gpe_surface(ctx, gpe_context,
3687                              obj_surface,
3688                              GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX);
3689     /*input ref YUV surface*/
3690     for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
3691         surface_id = slice_param->RefPicList0[i].picture_id;
3692         obj_surface = SURFACE(surface_id);
3693         if (!obj_surface || !obj_surface->private_data)
3694             break;
3695
3696         gen9_add_adv_gpe_surface(ctx, gpe_context,
3697                                  obj_surface,
3698                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 1);
3699     }
3700     /*input current  YUV surface, current input Y/UV object*/
3701     if (mbenc_i_frame_dist_in_use) {
3702         obj_surface = encode_state->reconstructed_object;
3703         if (!obj_surface || !obj_surface->private_data)
3704             return;
3705         avc_priv_surface = obj_surface->private_data;
3706         obj_surface = avc_priv_surface->scaled_4x_surface_obj;
3707     } else {
3708         obj_surface = encode_state->input_yuv_object;
3709     }
3710     gen9_add_adv_gpe_surface(ctx, gpe_context,
3711                              obj_surface,
3712                              GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX);
3713
3714     for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
3715         if (i > 0) break; // only  one ref supported here for B frame
3716         surface_id = slice_param->RefPicList1[i].picture_id;
3717         obj_surface = SURFACE(surface_id);
3718         if (!obj_surface || !obj_surface->private_data)
3719             break;
3720
3721         gen9_add_adv_gpe_surface(ctx, gpe_context,
3722                                  obj_surface,
3723                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1);
3724         gen9_add_adv_gpe_surface(ctx, gpe_context,
3725                                  obj_surface,
3726                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 2);
3727         if (i == 0) {
3728             avc_priv_surface = obj_surface->private_data;
3729             /*pak obj command buffer output(mb code)*/
3730             size = frame_mb_size * 16 * 4;
3731             gpe_resource = &avc_priv_surface->res_mb_code_surface;
3732             gen9_add_buffer_gpe_surface(ctx,
3733                                         gpe_context,
3734                                         gpe_resource,
3735                                         0,
3736                                         size / 4,
3737                                         0,
3738                                         GEN9_AVC_MBENC_FWD_MB_DATA_INDEX);
3739
3740             /*mv data buffer output*/
3741             size = frame_mb_size * 32 * 4;
3742             gpe_resource = &avc_priv_surface->res_mv_data_surface;
3743             gen9_add_buffer_gpe_surface(ctx,
3744                                         gpe_context,
3745                                         gpe_resource,
3746                                         0,
3747                                         size / 4,
3748                                         0,
3749                                         GEN9_AVC_MBENC_FWD_MV_DATA_INDEX);
3750
3751         }
3752
3753         if (i < INTEL_AVC_MAX_BWD_REF_NUM) {
3754             gen9_add_adv_gpe_surface(ctx, gpe_context,
3755                                      obj_surface,
3756                                      GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1 + INTEL_AVC_MAX_BWD_REF_NUM);
3757         }
3758
3759     }
3760
3761     /* BRC distortion data buffer for I frame*/
3762     if (mbenc_i_frame_dist_in_use) {
3763         gpe_resource = &(avc_ctx->res_brc_dist_data_surface);
3764         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3765                                        gpe_resource,
3766                                        1,
3767                                        I965_SURFACEFORMAT_R8_UNORM,
3768                                        GEN9_AVC_MBENC_BRC_DISTORTION_INDEX);
3769     }
3770
3771     /* as ref frame ,update later RefPicSelect of Current Picture*/
3772     obj_surface = encode_state->reconstructed_object;
3773     avc_priv_surface = obj_surface->private_data;
3774     if (avc_state->ref_pic_select_list_supported && avc_priv_surface->is_as_ref) {
3775         gpe_resource = &(avc_priv_surface->res_ref_pic_select_surface);
3776         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3777                                        gpe_resource,
3778                                        1,
3779                                        I965_SURFACEFORMAT_R8_UNORM,
3780                                        GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX);
3781
3782     }
3783
3784     if (param->mb_vproc_stats_enable) {
3785         /*mb status buffer input*/
3786         size = frame_mb_size * 16 * 4;
3787         gpe_resource = &(avc_ctx->res_mb_status_buffer);
3788         gen9_add_buffer_gpe_surface(ctx,
3789                                     gpe_context,
3790                                     gpe_resource,
3791                                     0,
3792                                     size / 4,
3793                                     0,
3794                                     GEN9_AVC_MBENC_MB_STATS_INDEX);
3795
3796     } else if (avc_state->flatness_check_enable) {
3797
3798         gpe_resource = &(avc_ctx->res_flatness_check_surface);
3799         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3800                                        gpe_resource,
3801                                        1,
3802                                        I965_SURFACEFORMAT_R8_UNORM,
3803                                        GEN9_AVC_MBENC_MB_STATS_INDEX);
3804     }
3805
3806     if (param->mad_enable) {
3807         /*mad buffer input*/
3808         size = 4;
3809         gpe_resource = &(avc_ctx->res_mad_data_buffer);
3810         gen9_add_buffer_gpe_surface(ctx,
3811                                     gpe_context,
3812                                     gpe_resource,
3813                                     0,
3814                                     size / 4,
3815                                     0,
3816                                     GEN9_AVC_MBENC_MAD_DATA_INDEX);
3817         i965_zero_gpe_resource(gpe_resource);
3818     }
3819
3820     /*brc updated mbenc curbe data buffer,it is ignored by gen9 and used in gen95*/
3821     if (avc_state->mbenc_brc_buffer_size > 0) {
3822         size = avc_state->mbenc_brc_buffer_size;
3823         gpe_resource = &(avc_ctx->res_mbenc_brc_buffer);
3824         gen9_add_buffer_gpe_surface(ctx,
3825                                     gpe_context,
3826                                     gpe_resource,
3827                                     0,
3828                                     size / 4,
3829                                     0,
3830                                     GEN95_AVC_MBENC_BRC_CURBE_DATA_INDEX);
3831     }
3832
3833     /*artitratry num mbs in slice*/
3834     if (avc_state->arbitrary_num_mbs_in_slice) {
3835         /*slice surface input*/
3836         gpe_resource = &(avc_ctx->res_mbenc_slice_map_surface);
3837         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3838                                        gpe_resource,
3839                                        1,
3840                                        I965_SURFACEFORMAT_R8_UNORM,
3841                                        GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX);
3842     }
3843
3844     /* BRC distortion data buffer for I frame */
3845     if (!mbenc_i_frame_dist_in_use) {
3846         if (avc_state->mb_disable_skip_map_enable) {
3847             gpe_resource = &(avc_ctx->res_mb_disable_skip_map_surface);
3848             gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3849                                            gpe_resource,
3850                                            1,
3851                                            I965_SURFACEFORMAT_R8_UNORM,
3852                                            (is_g95 ? GEN95_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX : GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX));
3853         }
3854
3855         if (avc_state->sfd_enable && generic_state->hme_enabled) {
3856             if (generic_state->frame_type == SLICE_TYPE_P) {
3857                 gpe_resource = &(avc_ctx->res_sfd_cost_table_p_frame_buffer);
3858
3859             } else if (generic_state->frame_type == SLICE_TYPE_B) {
3860                 gpe_resource = &(avc_ctx->res_sfd_cost_table_b_frame_buffer);
3861             }
3862
3863             if (generic_state->frame_type != SLICE_TYPE_I) {
3864                 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3865                                                gpe_resource,
3866                                                1,
3867                                                I965_SURFACEFORMAT_R8_UNORM,
3868                                                (is_g95 ? GEN95_AVC_MBENC_SFD_COST_TABLE_INDEX : GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX));
3869             }
3870         }
3871     }
3872
3873     return;
3874 }
3875
3876 static VAStatus
3877 gen9_avc_kernel_mbenc(VADriverContextP ctx,
3878                       struct encode_state *encode_state,
3879                       struct intel_encoder_context *encoder_context,
3880                       bool i_frame_dist_in_use)
3881 {
3882     struct i965_driver_data *i965 = i965_driver_data(ctx);
3883     struct i965_gpe_table *gpe = &i965->gpe_table;
3884     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3885     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
3886     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3887     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3888     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3889
3890     struct i965_gpe_context *gpe_context;
3891     struct gpe_media_object_walker_parameter media_object_walker_param;
3892     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
3893     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
3894     int media_function = 0;
3895     int kernel_idx = 0;
3896     unsigned int mb_const_data_buffer_in_use = 0;
3897     unsigned int mb_qp_buffer_in_use = 0;
3898     unsigned int brc_enabled = 0;
3899     unsigned int roi_enable = (generic_state->num_roi > 0) ? 1 : 0;
3900     unsigned int dirty_roi_enable = ((generic_state->dirty_num_roi > 0) && (generic_state->frame_type == SLICE_TYPE_P) && (0));
3901     struct mbenc_param param ;
3902
3903     int mbenc_i_frame_dist_in_use = i_frame_dist_in_use;
3904     int mad_enable = 0;
3905     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3906
3907     mb_const_data_buffer_in_use =
3908         generic_state->mb_brc_enabled ||
3909         roi_enable ||
3910         dirty_roi_enable ||
3911         avc_state->mb_qp_data_enable ||
3912         avc_state->rolling_intra_refresh_enable;
3913     mb_qp_buffer_in_use =
3914         generic_state->mb_brc_enabled ||
3915         generic_state->brc_roi_enable ||
3916         avc_state->mb_qp_data_enable;
3917
3918     if (mbenc_i_frame_dist_in_use) {
3919         media_function = INTEL_MEDIA_STATE_ENC_I_FRAME_DIST;
3920         kernel_idx = GEN9_AVC_KERNEL_BRC_I_FRAME_DIST;
3921         downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
3922         downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
3923         mad_enable = 0;
3924         brc_enabled = 0;
3925
3926         gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
3927     } else {
3928         switch (generic_state->kernel_mode) {
3929         case INTEL_ENC_KERNEL_NORMAL : {
3930             media_function = INTEL_MEDIA_STATE_ENC_NORMAL;
3931             kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_NORMAL_I;
3932             break;
3933         }
3934         case INTEL_ENC_KERNEL_PERFORMANCE : {
3935             media_function = INTEL_MEDIA_STATE_ENC_PERFORMANCE;
3936             kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_PERFORMANCE_I;
3937             break;
3938         }
3939         case INTEL_ENC_KERNEL_QUALITY : {
3940             media_function = INTEL_MEDIA_STATE_ENC_QUALITY;
3941             kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_QUALITY_I;
3942             break;
3943         }
3944         default:
3945             assert(0);
3946
3947         }
3948
3949         if (generic_state->frame_type == SLICE_TYPE_P) {
3950             kernel_idx += 1;
3951         } else if (generic_state->frame_type == SLICE_TYPE_B) {
3952             kernel_idx += 2;
3953         }
3954
3955         downscaled_width_in_mb = generic_state->frame_width_in_mbs;
3956         downscaled_height_in_mb = generic_state->frame_height_in_mbs;
3957         mad_enable = avc_state->mad_enable;
3958         brc_enabled = generic_state->brc_enabled;
3959
3960         gpe_context = &(avc_ctx->context_mbenc.gpe_contexts[kernel_idx]);
3961     }
3962
3963     memset(&param, 0, sizeof(struct mbenc_param));
3964
3965     param.mb_const_data_buffer_in_use = mb_const_data_buffer_in_use;
3966     param.mb_qp_buffer_in_use = mb_qp_buffer_in_use;
3967     param.mbenc_i_frame_dist_in_use = mbenc_i_frame_dist_in_use;
3968     param.mad_enable = mad_enable;
3969     param.brc_enabled = brc_enabled;
3970     param.roi_enabled = roi_enable;
3971
3972     if (avc_state->mb_status_supported) {
3973         param.mb_vproc_stats_enable =  avc_state->flatness_check_enable || avc_state->adaptive_transform_decision_enable;
3974     }
3975
3976     if (!avc_state->mbenc_curbe_set_in_brc_update) {
3977         gpe->context_init(ctx, gpe_context);
3978     }
3979
3980     gpe->reset_binding_table(ctx, gpe_context);
3981
3982     if (!avc_state->mbenc_curbe_set_in_brc_update) {
3983         /*set curbe here*/
3984         generic_ctx->pfn_set_curbe_mbenc(ctx, encode_state, gpe_context, encoder_context, &param);
3985     }
3986
3987     /* MB brc const data buffer set up*/
3988     if (mb_const_data_buffer_in_use) {
3989         // caculate the lambda table, it is kernel controlled trellis quantization,gen95+
3990         if (avc_state->lambda_table_enable)
3991             gen95_avc_calc_lambda_table(ctx, encode_state, encoder_context);
3992
3993         gen9_avc_load_mb_brc_const_data(ctx, encode_state, encoder_context);
3994     }
3995
3996     /*clear the mad buffer*/
3997     if (mad_enable) {
3998         i965_zero_gpe_resource(&(avc_ctx->res_mad_data_buffer));
3999     }
4000     /*send surface*/
4001     generic_ctx->pfn_send_mbenc_surface(ctx, encode_state, gpe_context, encoder_context, &param);
4002
4003     gpe->setup_interface_data(ctx, gpe_context);
4004
4005     /*walker setting*/
4006     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
4007
4008     kernel_walker_param.use_scoreboard = 1;
4009     kernel_walker_param.resolution_x = downscaled_width_in_mb ;
4010     kernel_walker_param.resolution_y = downscaled_height_in_mb ;
4011     if (mbenc_i_frame_dist_in_use) {
4012         kernel_walker_param.no_dependency = 1;
4013     } else {
4014         switch (generic_state->frame_type) {
4015         case SLICE_TYPE_I:
4016             kernel_walker_param.walker_degree = WALKER_45_DEGREE;
4017             break;
4018         case SLICE_TYPE_P:
4019             kernel_walker_param.walker_degree = WALKER_26_DEGREE;
4020             break;
4021         case SLICE_TYPE_B:
4022             kernel_walker_param.walker_degree = WALKER_26_DEGREE;
4023             if (!slice_param->direct_spatial_mv_pred_flag) {
4024                 kernel_walker_param.walker_degree = WALKER_45_DEGREE;
4025             }
4026             break;
4027         default:
4028             assert(0);
4029         }
4030         kernel_walker_param.no_dependency = 0;
4031     }
4032
4033     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
4034
4035     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
4036                                             gpe_context,
4037                                             media_function,
4038                                             &media_object_walker_param);
4039     return VA_STATUS_SUCCESS;
4040 }
4041
4042 /*
4043 me kernle related function
4044 */
4045 static void
4046 gen9_avc_set_curbe_me(VADriverContextP ctx,
4047                       struct encode_state *encode_state,
4048                       struct i965_gpe_context *gpe_context,
4049                       struct intel_encoder_context *encoder_context,
4050                       void * param)
4051 {
4052     gen9_avc_me_curbe_data *curbe_cmd;
4053     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4054     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4055     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4056
4057     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4058
4059     struct me_param * curbe_param = (struct me_param *)param ;
4060     unsigned char  use_mv_from_prev_step = 0;
4061     unsigned char write_distortions = 0;
4062     unsigned char qp_prime_y = 0;
4063     unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
4064     unsigned char seach_table_idx = 0;
4065     unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
4066     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
4067     unsigned int scale_factor = 0;
4068
4069     qp_prime_y = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta;
4070     switch (curbe_param->hme_type) {
4071     case INTEL_ENC_HME_4x : {
4072         use_mv_from_prev_step = (generic_state->b16xme_enabled) ? 1 : 0;
4073         write_distortions = 1;
4074         mv_shift_factor = 2;
4075         scale_factor = 4;
4076         prev_mv_read_pos_factor = 0;
4077         break;
4078     }
4079     case INTEL_ENC_HME_16x : {
4080         use_mv_from_prev_step = (generic_state->b32xme_enabled) ? 1 : 0;
4081         write_distortions = 0;
4082         mv_shift_factor = 2;
4083         scale_factor = 16;
4084         prev_mv_read_pos_factor = 1;
4085         break;
4086     }
4087     case INTEL_ENC_HME_32x : {
4088         use_mv_from_prev_step = 0;
4089         write_distortions = 0;
4090         mv_shift_factor = 1;
4091         scale_factor = 32;
4092         prev_mv_read_pos_factor = 0;
4093         break;
4094     }
4095     default:
4096         assert(0);
4097
4098     }
4099     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
4100
4101     if (!curbe_cmd)
4102         return;
4103
4104     downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
4105     downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
4106
4107     memcpy(curbe_cmd, gen9_avc_me_curbe_init_data, sizeof(gen9_avc_me_curbe_data));
4108
4109     curbe_cmd->dw3.sub_pel_mode = 3;
4110     if (avc_state->field_scaling_output_interleaved) {
4111         /*frame set to zero,field specified*/
4112         curbe_cmd->dw3.src_access = 0;
4113         curbe_cmd->dw3.ref_access = 0;
4114         curbe_cmd->dw7.src_field_polarity = 0;
4115     }
4116     curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
4117     curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
4118     curbe_cmd->dw5.qp_prime_y = qp_prime_y;
4119
4120     curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
4121     curbe_cmd->dw6.write_distortions = write_distortions;
4122     curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
4123     curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
4124
4125     if (generic_state->frame_type == SLICE_TYPE_B) {
4126         curbe_cmd->dw1.bi_weight = 32;
4127         curbe_cmd->dw13.num_ref_idx_l1_minus1 = slice_param->num_ref_idx_l1_active_minus1;
4128         me_method = gen9_avc_b_me_method[generic_state->preset];
4129         seach_table_idx = 1;
4130     }
4131
4132     if (generic_state->frame_type == SLICE_TYPE_P ||
4133         generic_state->frame_type == SLICE_TYPE_B)
4134         curbe_cmd->dw13.num_ref_idx_l0_minus1 = slice_param->num_ref_idx_l0_active_minus1;
4135
4136     curbe_cmd->dw13.ref_streamin_cost = 5;
4137     curbe_cmd->dw13.roi_enable = 0;
4138
4139     curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
4140     curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
4141
4142     memcpy(&curbe_cmd->dw16, table_enc_search_path[seach_table_idx][me_method], 14 * sizeof(int));
4143
4144     curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN9_AVC_ME_MV_DATA_SURFACE_INDEX;
4145     curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x) ? GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX : GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX ;
4146     curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN9_AVC_ME_DISTORTION_SURFACE_INDEX;
4147     curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN9_AVC_ME_BRC_DISTORTION_INDEX;
4148     curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX;
4149     curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX;
4150     curbe_cmd->dw38.reserved = GEN9_AVC_ME_VDENC_STREAMIN_INDEX;
4151
4152     i965_gpe_context_unmap_curbe(gpe_context);
4153     return;
4154 }
4155
4156 static void
4157 gen9_avc_send_surface_me(VADriverContextP ctx,
4158                          struct encode_state *encode_state,
4159                          struct i965_gpe_context *gpe_context,
4160                          struct intel_encoder_context *encoder_context,
4161                          void * param)
4162 {
4163     struct i965_driver_data *i965 = i965_driver_data(ctx);
4164
4165     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4166     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4167     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4168     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4169
4170     struct object_surface *obj_surface, *input_surface;
4171     struct gen9_surface_avc *avc_priv_surface;
4172     struct i965_gpe_resource *gpe_resource;
4173     struct me_param * curbe_param = (struct me_param *)param ;
4174
4175     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4176     VASurfaceID surface_id;
4177     int i = 0;
4178
4179     /* all scaled input surface stored in reconstructed_object*/
4180     obj_surface = encode_state->reconstructed_object;
4181     if (!obj_surface || !obj_surface->private_data)
4182         return;
4183     avc_priv_surface = obj_surface->private_data;
4184
4185
4186     switch (curbe_param->hme_type) {
4187     case INTEL_ENC_HME_4x : {
4188         /*memv output 4x*/
4189         gpe_resource = &avc_ctx->s4x_memv_data_buffer;
4190         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4191                                        gpe_resource,
4192                                        1,
4193                                        I965_SURFACEFORMAT_R8_UNORM,
4194                                        GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
4195
4196         /*memv input 16x*/
4197         if (generic_state->b16xme_enabled) {
4198             gpe_resource = &avc_ctx->s16x_memv_data_buffer;
4199             gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4200                                            gpe_resource,
4201                                            1,
4202                                            I965_SURFACEFORMAT_R8_UNORM,
4203                                            GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX);
4204         }
4205         /* brc distortion  output*/
4206         gpe_resource = &avc_ctx->res_brc_dist_data_surface;
4207         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4208                                        gpe_resource,
4209                                        1,
4210                                        I965_SURFACEFORMAT_R8_UNORM,
4211                                        GEN9_AVC_ME_BRC_DISTORTION_INDEX);
4212         /* memv distortion output*/
4213         gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
4214         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4215                                        gpe_resource,
4216                                        1,
4217                                        I965_SURFACEFORMAT_R8_UNORM,
4218                                        GEN9_AVC_ME_DISTORTION_SURFACE_INDEX);
4219         /*input current down scaled YUV surface*/
4220         obj_surface = encode_state->reconstructed_object;
4221         avc_priv_surface = obj_surface->private_data;
4222         input_surface = avc_priv_surface->scaled_4x_surface_obj;
4223         gen9_add_adv_gpe_surface(ctx, gpe_context,
4224                                  input_surface,
4225                                  GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
4226         /*input ref scaled YUV surface*/
4227         for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
4228             surface_id = slice_param->RefPicList0[i].picture_id;
4229             obj_surface = SURFACE(surface_id);
4230             if (!obj_surface || !obj_surface->private_data)
4231                 break;
4232             avc_priv_surface = obj_surface->private_data;
4233
4234             input_surface = avc_priv_surface->scaled_4x_surface_obj;
4235
4236             gen9_add_adv_gpe_surface(ctx, gpe_context,
4237                                      input_surface,
4238                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
4239         }
4240
4241         obj_surface = encode_state->reconstructed_object;
4242         avc_priv_surface = obj_surface->private_data;
4243         input_surface = avc_priv_surface->scaled_4x_surface_obj;
4244
4245         gen9_add_adv_gpe_surface(ctx, gpe_context,
4246                                  input_surface,
4247                                  GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
4248
4249         for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
4250             surface_id = slice_param->RefPicList1[i].picture_id;
4251             obj_surface = SURFACE(surface_id);
4252             if (!obj_surface || !obj_surface->private_data)
4253                 break;
4254             avc_priv_surface = obj_surface->private_data;
4255
4256             input_surface = avc_priv_surface->scaled_4x_surface_obj;
4257
4258             gen9_add_adv_gpe_surface(ctx, gpe_context,
4259                                      input_surface,
4260                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
4261         }
4262         break;
4263
4264     }
4265     case INTEL_ENC_HME_16x : {
4266         gpe_resource = &avc_ctx->s16x_memv_data_buffer;
4267         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4268                                        gpe_resource,
4269                                        1,
4270                                        I965_SURFACEFORMAT_R8_UNORM,
4271                                        GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
4272
4273         if (generic_state->b32xme_enabled) {
4274             gpe_resource = &avc_ctx->s32x_memv_data_buffer;
4275             gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4276                                            gpe_resource,
4277                                            1,
4278                                            I965_SURFACEFORMAT_R8_UNORM,
4279                                            GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX);
4280         }
4281
4282         obj_surface = encode_state->reconstructed_object;
4283         avc_priv_surface = obj_surface->private_data;
4284         input_surface = avc_priv_surface->scaled_16x_surface_obj;
4285         gen9_add_adv_gpe_surface(ctx, gpe_context,
4286                                  input_surface,
4287                                  GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
4288
4289         for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
4290             surface_id = slice_param->RefPicList0[i].picture_id;
4291             obj_surface = SURFACE(surface_id);
4292             if (!obj_surface || !obj_surface->private_data)
4293                 break;
4294             avc_priv_surface = obj_surface->private_data;
4295
4296             input_surface = avc_priv_surface->scaled_16x_surface_obj;
4297
4298             gen9_add_adv_gpe_surface(ctx, gpe_context,
4299                                      input_surface,
4300                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
4301         }
4302
4303         obj_surface = encode_state->reconstructed_object;
4304         avc_priv_surface = obj_surface->private_data;
4305         input_surface = avc_priv_surface->scaled_16x_surface_obj;
4306
4307         gen9_add_adv_gpe_surface(ctx, gpe_context,
4308                                  input_surface,
4309                                  GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
4310
4311         for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
4312             surface_id = slice_param->RefPicList1[i].picture_id;
4313             obj_surface = SURFACE(surface_id);
4314             if (!obj_surface || !obj_surface->private_data)
4315                 break;
4316             avc_priv_surface = obj_surface->private_data;
4317
4318             input_surface = avc_priv_surface->scaled_16x_surface_obj;
4319
4320             gen9_add_adv_gpe_surface(ctx, gpe_context,
4321                                      input_surface,
4322                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
4323         }
4324         break;
4325     }
4326     case INTEL_ENC_HME_32x : {
4327         gpe_resource = &avc_ctx->s32x_memv_data_buffer;
4328         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4329                                        gpe_resource,
4330                                        1,
4331                                        I965_SURFACEFORMAT_R8_UNORM,
4332                                        GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
4333
4334         obj_surface = encode_state->reconstructed_object;
4335         avc_priv_surface = obj_surface->private_data;
4336         input_surface = avc_priv_surface->scaled_32x_surface_obj;
4337         gen9_add_adv_gpe_surface(ctx, gpe_context,
4338                                  input_surface,
4339                                  GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
4340
4341         for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
4342             surface_id = slice_param->RefPicList0[i].picture_id;
4343             obj_surface = SURFACE(surface_id);
4344             if (!obj_surface || !obj_surface->private_data)
4345                 break;
4346             avc_priv_surface = obj_surface->private_data;
4347
4348             input_surface = avc_priv_surface->scaled_32x_surface_obj;
4349
4350             gen9_add_adv_gpe_surface(ctx, gpe_context,
4351                                      input_surface,
4352                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
4353         }
4354
4355         obj_surface = encode_state->reconstructed_object;
4356         avc_priv_surface = obj_surface->private_data;
4357         input_surface = avc_priv_surface->scaled_32x_surface_obj;
4358
4359         gen9_add_adv_gpe_surface(ctx, gpe_context,
4360                                  input_surface,
4361                                  GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
4362
4363         for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
4364             surface_id = slice_param->RefPicList1[i].picture_id;
4365             obj_surface = SURFACE(surface_id);
4366             if (!obj_surface || !obj_surface->private_data)
4367                 break;
4368             avc_priv_surface = obj_surface->private_data;
4369
4370             input_surface = avc_priv_surface->scaled_32x_surface_obj;
4371
4372             gen9_add_adv_gpe_surface(ctx, gpe_context,
4373                                      input_surface,
4374                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
4375         }
4376         break;
4377     }
4378     default:
4379         assert(0);
4380
4381     }
4382 }
4383
4384 static VAStatus
4385 gen9_avc_kernel_me(VADriverContextP ctx,
4386                    struct encode_state *encode_state,
4387                    struct intel_encoder_context *encoder_context,
4388                    int hme_type)
4389 {
4390     struct i965_driver_data *i965 = i965_driver_data(ctx);
4391     struct i965_gpe_table *gpe = &i965->gpe_table;
4392     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4393     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
4394     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4395     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4396
4397     struct i965_gpe_context *gpe_context;
4398     struct gpe_media_object_walker_parameter media_object_walker_param;
4399     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
4400     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
4401     int media_function = 0;
4402     int kernel_idx = 0;
4403     struct me_param param ;
4404     unsigned int scale_factor = 0;
4405
4406     switch (hme_type) {
4407     case INTEL_ENC_HME_4x : {
4408         media_function = INTEL_MEDIA_STATE_4X_ME;
4409         scale_factor = 4;
4410         break;
4411     }
4412     case INTEL_ENC_HME_16x : {
4413         media_function = INTEL_MEDIA_STATE_16X_ME;
4414         scale_factor = 16;
4415         break;
4416     }
4417     case INTEL_ENC_HME_32x : {
4418         media_function = INTEL_MEDIA_STATE_32X_ME;
4419         scale_factor = 32;
4420         break;
4421     }
4422     default:
4423         assert(0);
4424
4425     }
4426
4427     downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
4428     downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
4429
4430     /* I frame should not come here.*/
4431     kernel_idx = (generic_state->frame_type == SLICE_TYPE_P) ? GEN9_AVC_KERNEL_ME_P_IDX : GEN9_AVC_KERNEL_ME_B_IDX;
4432     gpe_context = &(avc_ctx->context_me.gpe_contexts[kernel_idx]);
4433
4434     gpe->context_init(ctx, gpe_context);
4435     gpe->reset_binding_table(ctx, gpe_context);
4436
4437     /*set curbe*/
4438     memset(&param, 0, sizeof(param));
4439     param.hme_type = hme_type;
4440     generic_ctx->pfn_set_curbe_me(ctx, encode_state, gpe_context, encoder_context, &param);
4441
4442     /*send surface*/
4443     generic_ctx->pfn_send_me_surface(ctx, encode_state, gpe_context, encoder_context, &param);
4444
4445     gpe->setup_interface_data(ctx, gpe_context);
4446
4447     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
4448     /* the scaling is based on 8x8 blk level */
4449     kernel_walker_param.resolution_x = downscaled_width_in_mb ;
4450     kernel_walker_param.resolution_y = downscaled_height_in_mb ;
4451     kernel_walker_param.no_dependency = 1;
4452
4453     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
4454
4455     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
4456                                             gpe_context,
4457                                             media_function,
4458                                             &media_object_walker_param);
4459
4460     return VA_STATUS_SUCCESS;
4461 }
4462
4463 /*
4464 wp related function
4465 */
4466 static void
4467 gen9_avc_set_curbe_wp(VADriverContextP ctx,
4468                       struct encode_state *encode_state,
4469                       struct i965_gpe_context *gpe_context,
4470                       struct intel_encoder_context *encoder_context,
4471                       void * param)
4472 {
4473     gen9_avc_wp_curbe_data *cmd;
4474     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4475     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4476     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4477     struct wp_param * curbe_param = (struct wp_param *)param;
4478
4479     cmd = i965_gpe_context_map_curbe(gpe_context);
4480
4481     if (!cmd)
4482         return;
4483     memset(cmd, 0, sizeof(gen9_avc_wp_curbe_data));
4484     if (curbe_param->ref_list_idx) {
4485         cmd->dw0.default_weight = slice_param->luma_weight_l1[0];
4486         cmd->dw0.default_offset = slice_param->luma_offset_l1[0];
4487     } else {
4488         cmd->dw0.default_weight = slice_param->luma_weight_l0[0];
4489         cmd->dw0.default_offset = slice_param->luma_offset_l0[0];
4490     }
4491
4492     cmd->dw49.input_surface = GEN9_AVC_WP_INPUT_REF_SURFACE_INDEX;
4493     cmd->dw50.output_surface = GEN9_AVC_WP_OUTPUT_SCALED_SURFACE_INDEX;
4494
4495     i965_gpe_context_unmap_curbe(gpe_context);
4496
4497 }
4498
4499 static void
4500 gen9_avc_send_surface_wp(VADriverContextP ctx,
4501                          struct encode_state *encode_state,
4502                          struct i965_gpe_context *gpe_context,
4503                          struct intel_encoder_context *encoder_context,
4504                          void * param)
4505 {
4506     struct i965_driver_data *i965 = i965_driver_data(ctx);
4507     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4508     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4509     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4510     struct wp_param * curbe_param = (struct wp_param *)param;
4511     struct object_surface *obj_surface;
4512     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4513     VASurfaceID surface_id;
4514
4515     if (curbe_param->ref_list_idx) {
4516         surface_id = slice_param->RefPicList1[0].picture_id;
4517         obj_surface = SURFACE(surface_id);
4518         if (!obj_surface || !obj_surface->private_data)
4519             avc_state->weighted_ref_l1_enable = 0;
4520         else
4521             avc_state->weighted_ref_l1_enable = 1;
4522     } else {
4523         surface_id = slice_param->RefPicList0[0].picture_id;
4524         obj_surface = SURFACE(surface_id);
4525         if (!obj_surface || !obj_surface->private_data)
4526             avc_state->weighted_ref_l0_enable = 0;
4527         else
4528             avc_state->weighted_ref_l0_enable = 1;
4529     }
4530     if (!obj_surface)
4531         obj_surface = encode_state->reference_objects[0];
4532
4533
4534     gen9_add_adv_gpe_surface(ctx, gpe_context,
4535                              obj_surface,
4536                              GEN9_AVC_WP_INPUT_REF_SURFACE_INDEX);
4537
4538     obj_surface = avc_ctx->wp_output_pic_select_surface_obj[curbe_param->ref_list_idx];
4539     gen9_add_adv_gpe_surface(ctx, gpe_context,
4540                              obj_surface,
4541                              GEN9_AVC_WP_OUTPUT_SCALED_SURFACE_INDEX);
4542 }
4543
4544
4545 static VAStatus
4546 gen9_avc_kernel_wp(VADriverContextP ctx,
4547                    struct encode_state *encode_state,
4548                    struct intel_encoder_context *encoder_context,
4549                    unsigned int list1_in_use)
4550 {
4551     struct i965_driver_data *i965 = i965_driver_data(ctx);
4552     struct i965_gpe_table *gpe = &i965->gpe_table;
4553     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4554     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4555     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4556     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
4557
4558     struct i965_gpe_context *gpe_context;
4559     struct gpe_media_object_walker_parameter media_object_walker_param;
4560     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
4561     int media_function = INTEL_MEDIA_STATE_ENC_WP;
4562     struct wp_param param;
4563
4564     gpe_context = &(avc_ctx->context_wp.gpe_contexts);
4565
4566     gpe->context_init(ctx, gpe_context);
4567     gpe->reset_binding_table(ctx, gpe_context);
4568
4569     memset(&param, 0, sizeof(param));
4570     param.ref_list_idx = (list1_in_use == 1) ? 1 : 0;
4571     /*set curbe*/
4572     generic_ctx->pfn_set_curbe_wp(ctx, encode_state, gpe_context, encoder_context, &param);
4573
4574     /*send surface*/
4575     generic_ctx->pfn_send_wp_surface(ctx, encode_state, gpe_context, encoder_context, &param);
4576
4577     gpe->setup_interface_data(ctx, gpe_context);
4578
4579     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
4580     /* the scaling is based on 8x8 blk level */
4581     kernel_walker_param.resolution_x = generic_state->frame_width_in_mbs;
4582     kernel_walker_param.resolution_y = generic_state->frame_height_in_mbs;
4583     kernel_walker_param.no_dependency = 1;
4584
4585     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
4586
4587     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
4588                                             gpe_context,
4589                                             media_function,
4590                                             &media_object_walker_param);
4591
4592     return VA_STATUS_SUCCESS;
4593 }
4594
4595
4596 /*
4597 sfd related function
4598 */
4599 static void
4600 gen9_avc_set_curbe_sfd(VADriverContextP ctx,
4601                        struct encode_state *encode_state,
4602                        struct i965_gpe_context *gpe_context,
4603                        struct intel_encoder_context *encoder_context,
4604                        void * param)
4605 {
4606     gen9_avc_sfd_curbe_data *cmd;
4607     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4608     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4609     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4610     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4611
4612     cmd = i965_gpe_context_map_curbe(gpe_context);
4613
4614     if (!cmd)
4615         return;
4616     memset(cmd, 0, sizeof(gen9_avc_sfd_curbe_data));
4617
4618     cmd->dw0.enable_intra_cost_scaling_for_static_frame = 1 ;
4619     cmd->dw0.enable_adaptive_mv_stream_in = 0 ;
4620     cmd->dw0.stream_in_type = 7 ;
4621     cmd->dw0.slice_type = slice_type_kernel[generic_state->frame_type]  ;
4622     cmd->dw0.brc_mode_enable = generic_state->brc_enabled ;
4623     cmd->dw0.vdenc_mode_disable = 1 ;
4624
4625     cmd->dw1.hme_stream_in_ref_cost = 5 ;
4626     cmd->dw1.num_of_refs = slice_param->num_ref_idx_l0_active_minus1 ;
4627     cmd->dw1.qp_value = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta ;
4628
4629     cmd->dw2.frame_width_in_mbs = generic_state->frame_width_in_mbs ;
4630     cmd->dw2.frame_height_in_mbs = generic_state->frame_height_in_mbs ;
4631
4632     cmd->dw3.large_mv_threshold = 128 ;
4633     cmd->dw4.total_large_mv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs) / 100 ;
4634     cmd->dw5.zmv_threshold = 4 ;
4635     cmd->dw6.total_zmv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * avc_state->zero_mv_threshold) / 100 ; // zero_mv_threshold = 60;
4636     cmd->dw7.min_dist_threshold = 10 ;
4637
4638     if (generic_state->frame_type == SLICE_TYPE_P) {
4639         memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_p_frame, AVC_QP_MAX * sizeof(unsigned char));
4640
4641     } else if (generic_state->frame_type == SLICE_TYPE_B) {
4642         memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_b_frame, AVC_QP_MAX * sizeof(unsigned char));
4643     }
4644
4645     cmd->dw21.actual_width_in_mb = cmd->dw2.frame_width_in_mbs ;
4646     cmd->dw21.actual_height_in_mb = cmd->dw2.frame_height_in_mbs ;
4647     cmd->dw24.vdenc_input_image_state_index = GEN9_AVC_SFD_VDENC_INPUT_IMAGE_STATE_INDEX ;
4648     cmd->dw26.mv_data_surface_index = GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX ;
4649     cmd->dw27.inter_distortion_surface_index = GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX ;
4650     cmd->dw28.output_data_surface_index = GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX ;
4651     cmd->dw29.vdenc_output_image_state_index = GEN9_AVC_SFD_VDENC_OUTPUT_IMAGE_STATE_INDEX ;
4652
4653     i965_gpe_context_unmap_curbe(gpe_context);
4654
4655 }
4656
4657 static void
4658 gen9_avc_send_surface_sfd(VADriverContextP ctx,
4659                           struct encode_state *encode_state,
4660                           struct i965_gpe_context *gpe_context,
4661                           struct intel_encoder_context *encoder_context,
4662                           void * param)
4663 {
4664     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4665     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4666     struct i965_gpe_resource *gpe_resource;
4667     int size = 0;
4668
4669     /*HME mv data surface memv output 4x*/
4670     gpe_resource = &avc_ctx->s4x_memv_data_buffer;
4671     gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4672                                    gpe_resource,
4673                                    1,
4674                                    I965_SURFACEFORMAT_R8_UNORM,
4675                                    GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX);
4676
4677     /* memv distortion */
4678     gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
4679     gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4680                                    gpe_resource,
4681                                    1,
4682                                    I965_SURFACEFORMAT_R8_UNORM,
4683                                    GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX);
4684     /*buffer output*/
4685     size = 32 * 4 * 4;
4686     gpe_resource = &avc_ctx->res_sfd_output_buffer;
4687     gen9_add_buffer_gpe_surface(ctx,
4688                                 gpe_context,
4689                                 gpe_resource,
4690                                 0,
4691                                 size / 4,
4692                                 0,
4693                                 GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX);
4694
4695 }
4696
4697 static VAStatus
4698 gen9_avc_kernel_sfd(VADriverContextP ctx,
4699                     struct encode_state *encode_state,
4700                     struct intel_encoder_context *encoder_context)
4701 {
4702     struct i965_driver_data *i965 = i965_driver_data(ctx);
4703     struct i965_gpe_table *gpe = &i965->gpe_table;
4704     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4705     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4706     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
4707
4708     struct i965_gpe_context *gpe_context;
4709     struct gpe_media_object_parameter media_object_param;
4710     struct gpe_media_object_inline_data media_object_inline_data;
4711     int media_function = INTEL_MEDIA_STATE_STATIC_FRAME_DETECTION;
4712     gpe_context = &(avc_ctx->context_sfd.gpe_contexts);
4713
4714     gpe->context_init(ctx, gpe_context);
4715     gpe->reset_binding_table(ctx, gpe_context);
4716
4717     /*set curbe*/
4718     generic_ctx->pfn_set_curbe_sfd(ctx, encode_state, gpe_context, encoder_context, NULL);
4719
4720     /*send surface*/
4721     generic_ctx->pfn_send_sfd_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
4722
4723     gpe->setup_interface_data(ctx, gpe_context);
4724
4725     memset(&media_object_param, 0, sizeof(media_object_param));
4726     memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
4727     media_object_param.pinline_data = &media_object_inline_data;
4728     media_object_param.inline_size = sizeof(media_object_inline_data);
4729
4730     gen9_avc_run_kernel_media_object(ctx, encoder_context,
4731                                      gpe_context,
4732                                      media_function,
4733                                      &media_object_param);
4734
4735     return VA_STATUS_SUCCESS;
4736 }
4737
4738 /*
4739 kernel related function:init/destroy etc
4740 */
4741 static void
4742 gen9_avc_kernel_init_scaling(VADriverContextP ctx,
4743                              struct generic_encoder_context *generic_context,
4744                              struct gen_avc_scaling_context *kernel_context)
4745 {
4746     struct i965_driver_data *i965 = i965_driver_data(ctx);
4747     struct i965_gpe_table *gpe = &i965->gpe_table;
4748     struct i965_gpe_context *gpe_context = NULL;
4749     struct encoder_kernel_parameter kernel_param ;
4750     struct encoder_scoreboard_parameter scoreboard_param;
4751     struct i965_kernel common_kernel;
4752
4753     if (IS_SKL(i965->intel.device_info) ||
4754         IS_BXT(i965->intel.device_info)) {
4755         kernel_param.curbe_size = sizeof(gen9_avc_scaling4x_curbe_data);
4756         kernel_param.inline_data_size = sizeof(gen9_avc_scaling4x_curbe_data);
4757     } else if (IS_KBL(i965->intel.device_info) ||
4758                IS_GLK(i965->intel.device_info)) {
4759         kernel_param.curbe_size = sizeof(gen95_avc_scaling4x_curbe_data);
4760         kernel_param.inline_data_size = sizeof(gen95_avc_scaling4x_curbe_data);
4761     }
4762
4763     /* 4x scaling kernel*/
4764     kernel_param.sampler_size = 0;
4765
4766     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4767     scoreboard_param.mask = 0xFF;
4768     scoreboard_param.enable = generic_context->use_hw_scoreboard;
4769     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
4770     scoreboard_param.walkpat_flag = 0;
4771
4772     gpe_context = &kernel_context->gpe_contexts[GEN9_AVC_KERNEL_SCALING_4X_IDX];
4773     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4774     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4775
4776     memset(&common_kernel, 0, sizeof(common_kernel));
4777
4778     intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4779                                          generic_context->enc_kernel_size,
4780                                          INTEL_GENERIC_ENC_SCALING4X,
4781                                          0,
4782                                          &common_kernel);
4783
4784     gpe->load_kernels(ctx,
4785                       gpe_context,
4786                       &common_kernel,
4787                       1);
4788
4789     /*2x scaling kernel*/
4790     kernel_param.curbe_size = sizeof(gen9_avc_scaling2x_curbe_data);
4791     kernel_param.inline_data_size = 0;
4792     kernel_param.sampler_size = 0;
4793
4794     gpe_context = &kernel_context->gpe_contexts[GEN9_AVC_KERNEL_SCALING_2X_IDX];
4795     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4796     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4797
4798     memset(&common_kernel, 0, sizeof(common_kernel));
4799
4800     intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4801                                          generic_context->enc_kernel_size,
4802                                          INTEL_GENERIC_ENC_SCALING2X,
4803                                          0,
4804                                          &common_kernel);
4805
4806     gpe->load_kernels(ctx,
4807                       gpe_context,
4808                       &common_kernel,
4809                       1);
4810
4811 }
4812
4813 static void
4814 gen9_avc_kernel_init_me(VADriverContextP ctx,
4815                         struct generic_encoder_context *generic_context,
4816                         struct gen_avc_me_context *kernel_context)
4817 {
4818     struct i965_driver_data *i965 = i965_driver_data(ctx);
4819     struct i965_gpe_table *gpe = &i965->gpe_table;
4820     struct i965_gpe_context *gpe_context = NULL;
4821     struct encoder_kernel_parameter kernel_param ;
4822     struct encoder_scoreboard_parameter scoreboard_param;
4823     struct i965_kernel common_kernel;
4824     int i = 0;
4825
4826     kernel_param.curbe_size = sizeof(gen9_avc_me_curbe_data);
4827     kernel_param.inline_data_size = 0;
4828     kernel_param.sampler_size = 0;
4829
4830     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4831     scoreboard_param.mask = 0xFF;
4832     scoreboard_param.enable = generic_context->use_hw_scoreboard;
4833     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
4834     scoreboard_param.walkpat_flag = 0;
4835
4836     for (i = 0; i < 2; i++) {
4837         gpe_context = &kernel_context->gpe_contexts[i];
4838         gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4839         gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4840
4841         memset(&common_kernel, 0, sizeof(common_kernel));
4842
4843         intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4844                                              generic_context->enc_kernel_size,
4845                                              INTEL_GENERIC_ENC_ME,
4846                                              i,
4847                                              &common_kernel);
4848
4849         gpe->load_kernels(ctx,
4850                           gpe_context,
4851                           &common_kernel,
4852                           1);
4853     }
4854
4855 }
4856
4857 static void
4858 gen9_avc_kernel_init_mbenc(VADriverContextP ctx,
4859                            struct generic_encoder_context *generic_context,
4860                            struct gen_avc_mbenc_context *kernel_context)
4861 {
4862     struct i965_driver_data *i965 = i965_driver_data(ctx);
4863     struct i965_gpe_table *gpe = &i965->gpe_table;
4864     struct i965_gpe_context *gpe_context = NULL;
4865     struct encoder_kernel_parameter kernel_param ;
4866     struct encoder_scoreboard_parameter scoreboard_param;
4867     struct i965_kernel common_kernel;
4868     int i = 0;
4869     unsigned int curbe_size = 0;
4870
4871     if (IS_SKL(i965->intel.device_info) ||
4872         IS_BXT(i965->intel.device_info)) {
4873         curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
4874     } else if (IS_KBL(i965->intel.device_info) ||
4875                IS_GLK(i965->intel.device_info)) {
4876         curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
4877     }
4878
4879     assert(curbe_size > 0);
4880     kernel_param.curbe_size = curbe_size;
4881     kernel_param.inline_data_size = 0;
4882     kernel_param.sampler_size = 0;
4883
4884     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4885     scoreboard_param.mask = 0xFF;
4886     scoreboard_param.enable = generic_context->use_hw_scoreboard;
4887     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
4888     scoreboard_param.walkpat_flag = 0;
4889
4890     for (i = 0; i < NUM_GEN9_AVC_KERNEL_MBENC ; i++) {
4891         gpe_context = &kernel_context->gpe_contexts[i];
4892         gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4893         gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4894
4895         memset(&common_kernel, 0, sizeof(common_kernel));
4896
4897         intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4898                                              generic_context->enc_kernel_size,
4899                                              INTEL_GENERIC_ENC_MBENC,
4900                                              i,
4901                                              &common_kernel);
4902
4903         gpe->load_kernels(ctx,
4904                           gpe_context,
4905                           &common_kernel,
4906                           1);
4907     }
4908
4909 }
4910
4911 static void
4912 gen9_avc_kernel_init_brc(VADriverContextP ctx,
4913                          struct generic_encoder_context *generic_context,
4914                          struct gen_avc_brc_context *kernel_context)
4915 {
4916     struct i965_driver_data *i965 = i965_driver_data(ctx);
4917     struct i965_gpe_table *gpe = &i965->gpe_table;
4918     struct i965_gpe_context *gpe_context = NULL;
4919     struct encoder_kernel_parameter kernel_param ;
4920     struct encoder_scoreboard_parameter scoreboard_param;
4921     struct i965_kernel common_kernel;
4922     int i = 0;
4923
4924     static const int brc_curbe_size[NUM_GEN9_AVC_KERNEL_BRC] = {
4925         (sizeof(gen9_avc_brc_init_reset_curbe_data)),
4926         (sizeof(gen9_avc_frame_brc_update_curbe_data)),
4927         (sizeof(gen9_avc_brc_init_reset_curbe_data)),
4928         (sizeof(gen9_avc_mbenc_curbe_data)),
4929         0,
4930         (sizeof(gen9_avc_mb_brc_curbe_data))
4931     };
4932
4933     kernel_param.inline_data_size = 0;
4934     kernel_param.sampler_size = 0;
4935
4936     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4937     scoreboard_param.mask = 0xFF;
4938     scoreboard_param.enable = generic_context->use_hw_scoreboard;
4939     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
4940     scoreboard_param.walkpat_flag = 0;
4941
4942     for (i = 0; i < NUM_GEN9_AVC_KERNEL_BRC; i++) {
4943         kernel_param.curbe_size = brc_curbe_size[i];
4944         gpe_context = &kernel_context->gpe_contexts[i];
4945         gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4946         gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4947
4948         memset(&common_kernel, 0, sizeof(common_kernel));
4949
4950         intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4951                                              generic_context->enc_kernel_size,
4952                                              INTEL_GENERIC_ENC_BRC,
4953                                              i,
4954                                              &common_kernel);
4955
4956         gpe->load_kernels(ctx,
4957                           gpe_context,
4958                           &common_kernel,
4959                           1);
4960     }
4961
4962 }
4963
4964 static void
4965 gen9_avc_kernel_init_wp(VADriverContextP ctx,
4966                         struct generic_encoder_context *generic_context,
4967                         struct gen_avc_wp_context *kernel_context)
4968 {
4969     struct i965_driver_data *i965 = i965_driver_data(ctx);
4970     struct i965_gpe_table *gpe = &i965->gpe_table;
4971     struct i965_gpe_context *gpe_context = NULL;
4972     struct encoder_kernel_parameter kernel_param ;
4973     struct encoder_scoreboard_parameter scoreboard_param;
4974     struct i965_kernel common_kernel;
4975
4976     kernel_param.curbe_size = sizeof(gen9_avc_wp_curbe_data);
4977     kernel_param.inline_data_size = 0;
4978     kernel_param.sampler_size = 0;
4979
4980     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4981     scoreboard_param.mask = 0xFF;
4982     scoreboard_param.enable = generic_context->use_hw_scoreboard;
4983     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
4984     scoreboard_param.walkpat_flag = 0;
4985
4986     gpe_context = &kernel_context->gpe_contexts;
4987     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4988     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4989
4990     memset(&common_kernel, 0, sizeof(common_kernel));
4991
4992     intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4993                                          generic_context->enc_kernel_size,
4994                                          INTEL_GENERIC_ENC_WP,
4995                                          0,
4996                                          &common_kernel);
4997
4998     gpe->load_kernels(ctx,
4999                       gpe_context,
5000                       &common_kernel,
5001                       1);
5002
5003 }
5004
5005 static void
5006 gen9_avc_kernel_init_sfd(VADriverContextP ctx,
5007                          struct generic_encoder_context *generic_context,
5008                          struct gen_avc_sfd_context *kernel_context)
5009 {
5010     struct i965_driver_data *i965 = i965_driver_data(ctx);
5011     struct i965_gpe_table *gpe = &i965->gpe_table;
5012     struct i965_gpe_context *gpe_context = NULL;
5013     struct encoder_kernel_parameter kernel_param ;
5014     struct encoder_scoreboard_parameter scoreboard_param;
5015     struct i965_kernel common_kernel;
5016
5017     kernel_param.curbe_size = sizeof(gen9_avc_sfd_curbe_data);
5018     kernel_param.inline_data_size = 0;
5019     kernel_param.sampler_size = 0;
5020
5021     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
5022     scoreboard_param.mask = 0xFF;
5023     scoreboard_param.enable = generic_context->use_hw_scoreboard;
5024     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
5025     scoreboard_param.walkpat_flag = 0;
5026
5027     gpe_context = &kernel_context->gpe_contexts;
5028     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
5029     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
5030
5031     memset(&common_kernel, 0, sizeof(common_kernel));
5032
5033     intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
5034                                          generic_context->enc_kernel_size,
5035                                          INTEL_GENERIC_ENC_SFD,
5036                                          0,
5037                                          &common_kernel);
5038
5039     gpe->load_kernels(ctx,
5040                       gpe_context,
5041                       &common_kernel,
5042                       1);
5043
5044 }
5045
5046 static void
5047 gen9_avc_kernel_destroy(struct encoder_vme_mfc_context * vme_context)
5048 {
5049
5050     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5051     struct i965_driver_data *i965 = i965_driver_data(avc_ctx->ctx);
5052     struct i965_gpe_table *gpe = &i965->gpe_table;
5053
5054     int i = 0;
5055
5056     gen9_avc_free_resources(vme_context);
5057
5058     for (i = 0; i < NUM_GEN9_AVC_KERNEL_SCALING; i++)
5059         gpe->context_destroy(&avc_ctx->context_scaling.gpe_contexts[i]);
5060
5061     for (i = 0; i < NUM_GEN9_AVC_KERNEL_BRC; i++)
5062         gpe->context_destroy(&avc_ctx->context_brc.gpe_contexts[i]);
5063
5064     for (i = 0; i < NUM_GEN9_AVC_KERNEL_ME; i++)
5065         gpe->context_destroy(&avc_ctx->context_me.gpe_contexts[i]);
5066
5067     for (i = 0; i < NUM_GEN9_AVC_KERNEL_MBENC; i++)
5068         gpe->context_destroy(&avc_ctx->context_mbenc.gpe_contexts[i]);
5069
5070     gpe->context_destroy(&avc_ctx->context_wp.gpe_contexts);
5071
5072     gpe->context_destroy(&avc_ctx->context_sfd.gpe_contexts);
5073
5074 }
5075
5076 /*
5077 vme pipeline
5078 */
5079 static void
5080 gen9_avc_update_parameters(VADriverContextP ctx,
5081                            VAProfile profile,
5082                            struct encode_state *encode_state,
5083                            struct intel_encoder_context *encoder_context)
5084 {
5085     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5086     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5087     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5088     VAEncSequenceParameterBufferH264 *seq_param;
5089     VAEncSliceParameterBufferH264 * slice_param;
5090     int i, j;
5091     unsigned int preset = generic_state->preset;
5092
5093     /* seq/pic/slice parameter setting */
5094     generic_state->b16xme_supported = gen9_avc_super_hme[preset];
5095     generic_state->b32xme_supported = gen9_avc_ultra_hme[preset];
5096
5097     avc_state->seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
5098     avc_state->pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
5099
5100
5101     avc_state->enable_avc_ildb = 0;
5102     avc_state->slice_num = 0;
5103     for (j = 0; j < encode_state->num_slice_params_ext && avc_state->enable_avc_ildb == 0; j++) {
5104         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
5105         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
5106
5107         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
5108             assert((slice_param->slice_type == SLICE_TYPE_I) ||
5109                    (slice_param->slice_type == SLICE_TYPE_SI) ||
5110                    (slice_param->slice_type == SLICE_TYPE_P) ||
5111                    (slice_param->slice_type == SLICE_TYPE_SP) ||
5112                    (slice_param->slice_type == SLICE_TYPE_B));
5113
5114             if (slice_param->disable_deblocking_filter_idc != 1) {
5115                 avc_state->enable_avc_ildb = 1;
5116             }
5117
5118             avc_state->slice_param[i] = slice_param;
5119             slice_param++;
5120             avc_state->slice_num++;
5121         }
5122     }
5123
5124     /* how many slices support by now? 1 slice or multi slices, but row slice.not slice group. */
5125     seq_param = avc_state->seq_param;
5126     slice_param = avc_state->slice_param[0];
5127
5128     generic_state->frame_type = avc_state->slice_param[0]->slice_type;
5129
5130     if (slice_param->slice_type == SLICE_TYPE_I ||
5131         slice_param->slice_type == SLICE_TYPE_SI)
5132         generic_state->frame_type = SLICE_TYPE_I;
5133     else if (slice_param->slice_type == SLICE_TYPE_P)
5134         generic_state->frame_type = SLICE_TYPE_P;
5135     else if (slice_param->slice_type == SLICE_TYPE_B)
5136         generic_state->frame_type = SLICE_TYPE_B;
5137     if (profile == VAProfileH264High)
5138         avc_state->transform_8x8_mode_enable = 0;//work around for high profile to disabel pic_param->pic_fields.bits.transform_8x8_mode_flag
5139     else
5140         avc_state->transform_8x8_mode_enable = 0;
5141
5142     /* rc init*/
5143     if (generic_state->brc_enabled && (!generic_state->brc_inited || generic_state->brc_need_reset)) {
5144         generic_state->target_bit_rate = ALIGN(seq_param->bits_per_second, 1000) / 1000;
5145         generic_state->init_vbv_buffer_fullness_in_bit = seq_param->bits_per_second;
5146         generic_state->vbv_buffer_size_in_bit = (uint64_t)seq_param->bits_per_second << 1;
5147         generic_state->frames_per_100s = 3000; /* 30fps */
5148     }
5149
5150     generic_state->gop_size = seq_param->intra_period;
5151     generic_state->gop_ref_distance = seq_param->ip_period;
5152
5153     if (generic_state->internal_rate_mode == VA_RC_CBR) {
5154         generic_state->max_bit_rate = generic_state->target_bit_rate;
5155         generic_state->min_bit_rate = generic_state->target_bit_rate;
5156     }
5157
5158     if (generic_state->frame_type == SLICE_TYPE_I || generic_state->first_frame) {
5159         gen9_avc_update_misc_parameters(ctx, encode_state, encoder_context);
5160     }
5161
5162     generic_state->preset = encoder_context->quality_level;
5163     if (encoder_context->quality_level == INTEL_PRESET_UNKNOWN) {
5164         generic_state->preset = INTEL_PRESET_RT_SPEED;
5165     }
5166     generic_state->kernel_mode = gen9_avc_kernel_mode[generic_state->preset];
5167
5168     if (!generic_state->brc_inited) {
5169         generic_state->brc_init_reset_input_bits_per_frame = ((double)(generic_state->max_bit_rate * 1000) * 100) / generic_state->frames_per_100s;;
5170         generic_state->brc_init_current_target_buf_full_in_bits = generic_state->init_vbv_buffer_fullness_in_bit;
5171         generic_state->brc_init_reset_buf_size_in_bits = generic_state->vbv_buffer_size_in_bit;
5172         generic_state->brc_target_size = generic_state->init_vbv_buffer_fullness_in_bit;
5173     }
5174
5175
5176     generic_state->curr_pak_pass = 0;
5177     generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
5178
5179     if (generic_state->internal_rate_mode == VA_RC_CBR ||
5180         generic_state->internal_rate_mode == VA_RC_VBR)
5181         generic_state->brc_enabled = 1;
5182     else
5183         generic_state->brc_enabled = 0;
5184
5185     if (generic_state->brc_enabled &&
5186         (!generic_state->init_vbv_buffer_fullness_in_bit ||
5187          !generic_state->vbv_buffer_size_in_bit ||
5188          !generic_state->max_bit_rate ||
5189          !generic_state->target_bit_rate ||
5190          !generic_state->frames_per_100s)) {
5191         WARN_ONCE("Rate control parameter is required for BRC\n");
5192         generic_state->brc_enabled = 0;
5193     }
5194
5195     if (!generic_state->brc_enabled) {
5196         generic_state->target_bit_rate = 0;
5197         generic_state->max_bit_rate = 0;
5198         generic_state->min_bit_rate = 0;
5199         generic_state->init_vbv_buffer_fullness_in_bit = 0;
5200         generic_state->vbv_buffer_size_in_bit = 0;
5201         generic_state->num_pak_passes = 1;
5202     } else {
5203         generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
5204     }
5205
5206
5207     generic_state->frame_width_in_mbs = seq_param->picture_width_in_mbs;
5208     generic_state->frame_height_in_mbs = seq_param->picture_height_in_mbs;
5209     generic_state->frame_width_in_pixel = generic_state->frame_width_in_mbs * 16;
5210     generic_state->frame_height_in_pixel = generic_state->frame_height_in_mbs * 16;
5211
5212     generic_state->frame_width_4x  = ALIGN(generic_state->frame_width_in_pixel / 4, 16);
5213     generic_state->frame_height_4x = ALIGN(generic_state->frame_height_in_pixel / 4, 16);
5214     generic_state->downscaled_width_4x_in_mb  = generic_state->frame_width_4x / 16 ;
5215     generic_state->downscaled_height_4x_in_mb = generic_state->frame_height_4x / 16;
5216
5217     generic_state->frame_width_16x  =  ALIGN(generic_state->frame_width_in_pixel / 16, 16);
5218     generic_state->frame_height_16x =  ALIGN(generic_state->frame_height_in_pixel / 16, 16);
5219     generic_state->downscaled_width_16x_in_mb  = generic_state->frame_width_16x / 16 ;
5220     generic_state->downscaled_height_16x_in_mb = generic_state->frame_height_16x / 16;
5221
5222     generic_state->frame_width_32x  = ALIGN(generic_state->frame_width_in_pixel / 32, 16);
5223     generic_state->frame_height_32x = ALIGN(generic_state->frame_height_in_pixel / 32, 16);
5224     generic_state->downscaled_width_32x_in_mb  = generic_state->frame_width_32x / 16 ;
5225     generic_state->downscaled_height_32x_in_mb = generic_state->frame_height_32x / 16;
5226
5227     if (generic_state->hme_supported) {
5228         generic_state->hme_enabled = 1;
5229     } else {
5230         generic_state->hme_enabled = 0;
5231     }
5232
5233     if (generic_state->b16xme_supported) {
5234         generic_state->b16xme_enabled = 1;
5235     } else {
5236         generic_state->b16xme_enabled = 0;
5237     }
5238
5239     if (generic_state->b32xme_supported) {
5240         generic_state->b32xme_enabled = 1;
5241     } else {
5242         generic_state->b32xme_enabled = 0;
5243     }
5244     /* disable HME/16xME if the size is too small */
5245     if (generic_state->frame_width_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5246         generic_state->b32xme_supported = 0;
5247         generic_state->b32xme_enabled = 0;
5248         generic_state->b16xme_supported = 0;
5249         generic_state->b16xme_enabled = 0;
5250         generic_state->frame_width_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5251         generic_state->downscaled_width_4x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5252     }
5253     if (generic_state->frame_height_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5254         generic_state->b32xme_supported = 0;
5255         generic_state->b32xme_enabled = 0;
5256         generic_state->b16xme_supported = 0;
5257         generic_state->b16xme_enabled = 0;
5258         generic_state->frame_height_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5259         generic_state->downscaled_height_4x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5260     }
5261
5262     if (generic_state->frame_width_16x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5263         generic_state->b32xme_supported = 0;
5264         generic_state->b32xme_enabled = 0;
5265         generic_state->frame_width_16x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5266         generic_state->downscaled_width_16x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5267     }
5268     if (generic_state->frame_height_16x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5269         generic_state->b32xme_supported = 0;
5270         generic_state->b32xme_enabled = 0;
5271         generic_state->frame_height_16x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5272         generic_state->downscaled_height_16x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5273     }
5274
5275     if (generic_state->frame_width_32x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5276         generic_state->frame_width_32x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5277         generic_state->downscaled_width_32x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5278     }
5279     if (generic_state->frame_height_32x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5280         generic_state->frame_height_32x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5281         generic_state->downscaled_height_32x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5282     }
5283
5284 }
5285
5286 static VAStatus
5287 gen9_avc_encode_check_parameter(VADriverContextP ctx,
5288                                 struct encode_state *encode_state,
5289                                 struct intel_encoder_context *encoder_context)
5290 {
5291     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5292     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5293     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5294     unsigned int rate_control_mode = encoder_context->rate_control_mode;
5295     unsigned int preset = generic_state->preset;
5296     VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param ;
5297     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5298     int i = 0;
5299     int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
5300     /*avbr init*/
5301     generic_state->avbr_curracy = 30;
5302     generic_state->avbr_convergence = 150;
5303
5304     switch (rate_control_mode & 0x7f) {
5305     case VA_RC_CBR:
5306         generic_state->internal_rate_mode = VA_RC_CBR;
5307         break;
5308
5309     case VA_RC_VBR:
5310         generic_state->internal_rate_mode = VA_RC_VBR;
5311         break;
5312
5313     case VA_RC_CQP:
5314     default:
5315         generic_state->internal_rate_mode = VA_RC_CQP;
5316         break;
5317     }
5318
5319     if (rate_control_mode != VA_RC_NONE &&
5320         rate_control_mode != VA_RC_CQP) {
5321         generic_state->brc_enabled = 1;
5322         generic_state->brc_distortion_buffer_supported = 1;
5323         generic_state->brc_constant_buffer_supported = 1;
5324         generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
5325     }
5326
5327     /*check brc parameter*/
5328     if (generic_state->brc_enabled) {
5329         avc_state->mb_qp_data_enable = 0;
5330     }
5331
5332     /*set the brc init and reset accordingly*/
5333     if (generic_state->brc_need_reset &&
5334         (generic_state->brc_distortion_buffer_supported == 0 ||
5335          rate_control_mode == VA_RC_CQP)) {
5336         generic_state->brc_need_reset = 0;// not support by CQP
5337     }
5338
5339     if (generic_state->brc_need_reset && !avc_state->sfd_mb_enable) {
5340         avc_state->sfd_enable = 0;
5341     }
5342
5343     if (generic_state->frames_per_window_size == 0) {
5344         generic_state->frames_per_window_size = (generic_state->frames_per_100s / 100 < 60) ? (generic_state->frames_per_100s / 100) : 60;
5345     } else if (generic_state->frames_per_window_size > 2 * generic_state->frames_per_100s / 100) {
5346         generic_state->frames_per_window_size = (generic_state->frames_per_100s / 100 < 60) ? (generic_state->frames_per_100s / 100) : 60;
5347     }
5348
5349     if (generic_state->brc_enabled) {
5350         generic_state->hme_enabled = generic_state->frame_type != SLICE_TYPE_I;
5351         if (avc_state->min_max_qp_enable) {
5352             generic_state->num_pak_passes = 1;
5353         }
5354         generic_state->brc_roi_enable = (rate_control_mode != VA_RC_CQP) && (generic_state->num_roi > 0);// only !CQP
5355         generic_state->mb_brc_enabled = generic_state->mb_brc_enabled || generic_state->brc_roi_enable;
5356     } else {
5357         generic_state->num_pak_passes = 1;// CQP only one pass
5358     }
5359
5360     avc_state->mbenc_i_frame_dist_in_use = 0;
5361     avc_state->mbenc_i_frame_dist_in_use = (generic_state->brc_enabled) && (generic_state->brc_distortion_buffer_supported) && (generic_state->frame_type == SLICE_TYPE_I);
5362
5363     /*ROI must enable mbbrc.*/
5364
5365     /*CAD check*/
5366     if (avc_state->caf_supported) {
5367         switch (generic_state->frame_type) {
5368         case SLICE_TYPE_I:
5369             break;
5370         case SLICE_TYPE_P:
5371             avc_state->caf_enable = gen9_avc_all_fractional[preset] & 0x01;
5372             break;
5373         case SLICE_TYPE_B:
5374             avc_state->caf_enable = (gen9_avc_all_fractional[preset] >> 1) & 0x01;
5375             break;
5376         }
5377
5378         if (avc_state->caf_enable && avc_state->caf_disable_hd && gen9_avc_disable_all_fractional_check_for_high_res[preset]) {
5379             if (generic_state->frame_width_in_pixel >= 1280 && generic_state->frame_height_in_pixel >= 720)
5380                 avc_state->caf_enable = 0;
5381         }
5382     }
5383
5384     avc_state->adaptive_transform_decision_enable &= gen9_avc_enable_adaptive_tx_decision[preset & 0x7];
5385
5386     /* Flatness check is enabled only if scaling will be performed and CAF is enabled. here only frame */
5387     if (avc_state->flatness_check_supported) {
5388         avc_state->flatness_check_enable = ((avc_state->caf_enable) && (generic_state->brc_enabled || generic_state->hme_supported)) ;
5389     } else {
5390         avc_state->flatness_check_enable = 0;
5391     }
5392
5393     /* check mb_status_supported/enbale*/
5394     if (avc_state->adaptive_transform_decision_enable) {
5395         avc_state->mb_status_enable = 1;
5396     } else {
5397         avc_state->mb_status_enable = 0;
5398     }
5399     /*slice check,all the slices use the same slice height except the last slice*/
5400     avc_state->arbitrary_num_mbs_in_slice = 0;
5401     for (i = 0; i < avc_state->slice_num; i++) {
5402         assert(avc_state->slice_param[i]->num_macroblocks % generic_state->frame_width_in_mbs == 0);
5403         avc_state->slice_height = avc_state->slice_param[i]->num_macroblocks / generic_state->frame_width_in_mbs;
5404         /*add it later for muli slices map*/
5405     }
5406
5407     if (generic_state->frame_type == SLICE_TYPE_I) {
5408         generic_state->hme_enabled = 0;
5409         generic_state->b16xme_enabled = 0;
5410         generic_state->b32xme_enabled = 0;
5411     }
5412
5413     if (generic_state->frame_type == SLICE_TYPE_B) {
5414         gen9_avc_get_dist_scale_factor(ctx, encode_state, encoder_context);
5415         avc_state->bi_weight = gen9_avc_get_biweight(avc_state->dist_scale_factor_list0[0], pic_param->pic_fields.bits.weighted_bipred_idc);
5416     }
5417
5418     /* Determine if SkipBiasAdjustment should be enabled for P picture 1. No B frame 2. Qp >= 22 3. CQP mode */
5419     avc_state->skip_bias_adjustment_enable = avc_state->skip_bias_adjustment_supported && (generic_state->frame_type == SLICE_TYPE_P)
5420                                              && (generic_state->gop_ref_distance == 1) && (avc_state->pic_param->pic_init_qp + avc_state->slice_param[0]->slice_qp_delta >= 22) && !generic_state->brc_enabled;
5421
5422     if (generic_state->kernel_mode == INTEL_ENC_KERNEL_QUALITY) {
5423         avc_state->tq_enable = 1;
5424         avc_state->tq_rounding = 6;
5425         if (generic_state->brc_enabled) {
5426             generic_state->mb_brc_enabled = 1;
5427         }
5428     }
5429
5430     //check the inter rounding
5431     avc_state->rounding_value = 0;
5432     avc_state->rounding_inter_p = 255;//default
5433     avc_state->rounding_inter_b = 255; //default
5434     avc_state->rounding_inter_b_ref = 255; //default
5435
5436     if (generic_state->frame_type == SLICE_TYPE_P) {
5437         if (avc_state->rounding_inter_p == AVC_INVALID_ROUNDING_VALUE) {
5438             if (avc_state->adaptive_rounding_inter_enable && !(generic_state->brc_enabled)) {
5439                 if (generic_state->gop_ref_distance == 1)
5440                     avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_p_without_b[slice_qp];
5441                 else
5442                     avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_p[slice_qp];
5443             } else {
5444                 avc_state->rounding_value = gen9_avc_inter_rounding_p[generic_state->preset];
5445             }
5446
5447         } else {
5448             avc_state->rounding_value = avc_state->rounding_inter_p;
5449         }
5450     } else if (generic_state->frame_type == SLICE_TYPE_B) {
5451         if (pic_param->pic_fields.bits.reference_pic_flag) {
5452             if (avc_state->rounding_inter_b_ref == AVC_INVALID_ROUNDING_VALUE)
5453                 avc_state->rounding_value = gen9_avc_inter_rounding_b_ref[generic_state->preset];
5454             else
5455                 avc_state->rounding_value = avc_state->rounding_inter_b_ref;
5456         } else {
5457             if (avc_state->rounding_inter_b == AVC_INVALID_ROUNDING_VALUE) {
5458                 if (avc_state->adaptive_rounding_inter_enable && !(generic_state->brc_enabled))
5459                     avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_b[slice_qp];
5460                 else
5461                     avc_state->rounding_value = gen9_avc_inter_rounding_b[generic_state->preset];
5462             } else {
5463                 avc_state->rounding_value = avc_state->rounding_inter_b;
5464             }
5465         }
5466     }
5467     return VA_STATUS_SUCCESS;
5468 }
5469
5470 static VAStatus
5471 gen9_avc_vme_gpe_kernel_prepare(VADriverContextP ctx,
5472                                 struct encode_state *encode_state,
5473                                 struct intel_encoder_context *encoder_context)
5474 {
5475     VAStatus va_status;
5476     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5477     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5478     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5479     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5480     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5481
5482     struct object_surface *obj_surface;
5483     struct object_buffer *obj_buffer;
5484     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5485     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
5486     struct i965_coded_buffer_segment *coded_buffer_segment;
5487
5488     struct gen9_surface_avc *avc_priv_surface;
5489     dri_bo *bo;
5490     struct avc_surface_param surface_param;
5491     int i, j = 0;
5492     unsigned char * pdata;
5493
5494     /* Setup current reconstruct frame */
5495     obj_surface = encode_state->reconstructed_object;
5496     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5497
5498     if (va_status != VA_STATUS_SUCCESS)
5499         return va_status;
5500
5501     memset(&surface_param, 0, sizeof(surface_param));
5502     surface_param.frame_width = generic_state->frame_width_in_pixel;
5503     surface_param.frame_height = generic_state->frame_height_in_pixel;
5504     va_status = gen9_avc_init_check_surfaces(ctx,
5505                                              obj_surface,
5506                                              encoder_context,
5507                                              &surface_param);
5508     if (va_status != VA_STATUS_SUCCESS)
5509         return va_status;
5510     {
5511         /* init the member of avc_priv_surface,frame_store_id,qp_value*/
5512         avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
5513         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = 0;
5514         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = 0;
5515         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2]);
5516         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1]);
5517         i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2], avc_priv_surface->dmv_top);
5518         i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1], avc_priv_surface->dmv_bottom);
5519         avc_priv_surface->qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
5520         avc_priv_surface->frame_store_id = 0;
5521         avc_priv_surface->frame_idx = pic_param->CurrPic.frame_idx;
5522         avc_priv_surface->top_field_order_cnt = pic_param->CurrPic.TopFieldOrderCnt;
5523         avc_priv_surface->is_as_ref = pic_param->pic_fields.bits.reference_pic_flag;
5524         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = avc_priv_surface->top_field_order_cnt;
5525         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = avc_priv_surface->top_field_order_cnt + 1;
5526     }
5527     i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
5528     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_reconstructed_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
5529
5530     /* input YUV surface*/
5531     obj_surface = encode_state->input_yuv_object;
5532     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5533
5534     if (va_status != VA_STATUS_SUCCESS)
5535         return va_status;
5536     i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
5537     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_uncompressed_input_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
5538
5539     /* Reference surfaces */
5540     for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
5541         i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
5542         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2]);
5543         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1]);
5544         obj_surface = encode_state->reference_objects[i];
5545         avc_state->top_field_poc[2 * i] = 0;
5546         avc_state->top_field_poc[2 * i + 1] = 0;
5547
5548         if (obj_surface && obj_surface->bo) {
5549             i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->list_reference_res[i], obj_surface, GPE_RESOURCE_ALIGNMENT);
5550
5551             /* actually it should be handled when it is reconstructed surface*/
5552             va_status = gen9_avc_init_check_surfaces(ctx,
5553                                                      obj_surface, encoder_context,
5554                                                      &surface_param);
5555             if (va_status != VA_STATUS_SUCCESS)
5556                 return va_status;
5557             avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
5558             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2], avc_priv_surface->dmv_top);
5559             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1], avc_priv_surface->dmv_bottom);
5560             avc_state->top_field_poc[2 * i] = avc_priv_surface->top_field_order_cnt;
5561             avc_state->top_field_poc[2 * i + 1] = avc_priv_surface->top_field_order_cnt + 1;
5562             avc_priv_surface->frame_store_id = i;
5563         } else {
5564             break;
5565         }
5566     }
5567
5568     /* Encoded bitstream ?*/
5569     obj_buffer = encode_state->coded_buf_object;
5570     bo = obj_buffer->buffer_store->bo;
5571     i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
5572     i965_dri_object_to_buffer_gpe_resource(&generic_ctx->compressed_bitstream.res, bo);
5573     generic_ctx->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
5574     generic_ctx->compressed_bitstream.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
5575
5576     /*status buffer */
5577     avc_ctx->status_buffer.bo = bo;
5578
5579     /* set the internal flag to 0 to indicate the coded size is unknown */
5580     dri_bo_map(bo, 1);
5581     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
5582     coded_buffer_segment->mapped = 0;
5583     coded_buffer_segment->codec = encoder_context->codec;
5584     coded_buffer_segment->status_support = 1;
5585
5586     pdata = bo->virtual + avc_ctx->status_buffer.base_offset;
5587     memset(pdata, 0, avc_ctx->status_buffer.status_buffer_size);
5588     dri_bo_unmap(bo);
5589
5590     //frame id, it is the ref pic id in the reference_objects list.
5591     avc_state->num_refs[0] = 0;
5592     avc_state->num_refs[1] = 0;
5593     if (generic_state->frame_type == SLICE_TYPE_P) {
5594         avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
5595
5596         if (slice_param->num_ref_idx_active_override_flag)
5597             avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
5598     } else if (generic_state->frame_type == SLICE_TYPE_B) {
5599         avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
5600         avc_state->num_refs[1] = pic_param->num_ref_idx_l1_active_minus1 + 1;
5601
5602         if (slice_param->num_ref_idx_active_override_flag) {
5603             avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
5604             avc_state->num_refs[1] = slice_param->num_ref_idx_l1_active_minus1 + 1;
5605         }
5606     }
5607
5608     if (avc_state->num_refs[0] > ARRAY_ELEMS(avc_state->list_ref_idx[0]))
5609         return VA_STATUS_ERROR_INVALID_VALUE;
5610     if (avc_state->num_refs[1] > ARRAY_ELEMS(avc_state->list_ref_idx[1]))
5611         return VA_STATUS_ERROR_INVALID_VALUE;
5612
5613     for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[0]); i++) {
5614         VAPictureH264 *va_pic;
5615
5616         assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(avc_state->list_ref_idx[0]));
5617         avc_state->list_ref_idx[0][i] = 0;
5618
5619         if (i >= avc_state->num_refs[0])
5620             continue;
5621
5622         va_pic = &slice_param->RefPicList0[i];
5623
5624         for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
5625             obj_surface = encode_state->reference_objects[j];
5626
5627             if (obj_surface &&
5628                 obj_surface->bo &&
5629                 obj_surface->base.id == va_pic->picture_id) {
5630
5631                 assert(obj_surface->base.id != VA_INVALID_SURFACE);
5632                 avc_state->list_ref_idx[0][i] = j;
5633
5634                 break;
5635             }
5636         }
5637     }
5638     for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[1]); i++) {
5639         VAPictureH264 *va_pic;
5640
5641         assert(ARRAY_ELEMS(slice_param->RefPicList1) == ARRAY_ELEMS(avc_state->list_ref_idx[1]));
5642         avc_state->list_ref_idx[1][i] = 0;
5643
5644         if (i >= avc_state->num_refs[1])
5645             continue;
5646
5647         va_pic = &slice_param->RefPicList1[i];
5648
5649         for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
5650             obj_surface = encode_state->reference_objects[j];
5651
5652             if (obj_surface &&
5653                 obj_surface->bo &&
5654                 obj_surface->base.id == va_pic->picture_id) {
5655
5656                 assert(obj_surface->base.id != VA_INVALID_SURFACE);
5657                 avc_state->list_ref_idx[1][i] = j;
5658
5659                 break;
5660             }
5661         }
5662     }
5663
5664     return VA_STATUS_SUCCESS;
5665 }
5666
5667 static VAStatus
5668 gen9_avc_vme_gpe_kernel_init(VADriverContextP ctx,
5669                              struct encode_state *encode_state,
5670                              struct intel_encoder_context *encoder_context)
5671 {
5672     return VA_STATUS_SUCCESS;
5673 }
5674
5675 static VAStatus
5676 gen9_avc_vme_gpe_kernel_final(VADriverContextP ctx,
5677                               struct encode_state *encode_state,
5678                               struct intel_encoder_context *encoder_context)
5679 {
5680
5681     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5682     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5683     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5684
5685     /*set this flag when all kernel is finished*/
5686     if (generic_state->brc_enabled) {
5687         generic_state->brc_inited = 1;
5688         generic_state->brc_need_reset = 0;
5689         avc_state->mbenc_curbe_set_in_brc_update = 0;
5690     }
5691     return VA_STATUS_SUCCESS;
5692 }
5693
5694 static VAStatus
5695 gen9_avc_vme_gpe_kernel_run(VADriverContextP ctx,
5696                             struct encode_state *encode_state,
5697                             struct intel_encoder_context *encoder_context)
5698 {
5699     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5700     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5701     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5702
5703     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
5704     VAEncSliceParameterBufferH264 *slice_param = avc_state->slice_param[0];
5705     int sfd_in_use = 0;
5706
5707     /* BRC init/reset needs to be called before HME since it will reset the Brc Distortion surface*/
5708     if (generic_state->brc_enabled && (!generic_state->brc_inited || generic_state->brc_need_reset)) {
5709         gen9_avc_kernel_brc_init_reset(ctx, encode_state, encoder_context);
5710     }
5711
5712     /*down scaling*/
5713     if (generic_state->hme_supported) {
5714         gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
5715         if (generic_state->b16xme_supported) {
5716             gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_16x);
5717             if (generic_state->b32xme_supported) {
5718                 gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_32x);
5719             }
5720         }
5721     }
5722
5723     /*me kernel*/
5724     if (generic_state->hme_enabled) {
5725         if (generic_state->b16xme_enabled) {
5726             if (generic_state->b32xme_enabled) {
5727                 gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_32x);
5728             }
5729             gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_16x);
5730         }
5731         gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
5732     }
5733
5734     /*call SFD kernel after HME in same command buffer*/
5735     sfd_in_use = avc_state->sfd_enable && generic_state->hme_enabled;
5736     sfd_in_use = sfd_in_use && !avc_state->sfd_mb_enable;
5737     if (sfd_in_use) {
5738         gen9_avc_kernel_sfd(ctx, encode_state, encoder_context);
5739     }
5740
5741     /* BRC and MbEnc are included in the same task phase*/
5742     if (generic_state->brc_enabled) {
5743         if (avc_state->mbenc_i_frame_dist_in_use) {
5744             gen9_avc_kernel_mbenc(ctx, encode_state, encoder_context, true);
5745         }
5746         gen9_avc_kernel_brc_frame_update(ctx, encode_state, encoder_context);
5747
5748         if (avc_state->brc_split_enable && generic_state->mb_brc_enabled) {
5749             gen9_avc_kernel_brc_mb_update(ctx, encode_state, encoder_context);
5750         }
5751     }
5752
5753     /*weight prediction,disable by now */
5754     avc_state->weighted_ref_l0_enable = 0;
5755     avc_state->weighted_ref_l1_enable = 0;
5756     if (avc_state->weighted_prediction_supported &&
5757         ((generic_state->frame_type == SLICE_TYPE_P && pic_param->pic_fields.bits.weighted_pred_flag) ||
5758          (generic_state->frame_type == SLICE_TYPE_B && pic_param->pic_fields.bits.weighted_bipred_idc == INTEL_AVC_WP_MODE_EXPLICIT))) {
5759         if (slice_param->luma_weight_l0_flag & 1) {
5760             gen9_avc_kernel_wp(ctx, encode_state, encoder_context, 0);
5761
5762         } else if (!(slice_param->chroma_weight_l0_flag & 1)) {
5763             pic_param->pic_fields.bits.weighted_pred_flag = 0;// it should be handled in app
5764         }
5765
5766         if (generic_state->frame_type == SLICE_TYPE_B && pic_param->pic_fields.bits.weighted_bipred_idc == INTEL_AVC_WP_MODE_EXPLICIT) {
5767             if (slice_param->luma_weight_l1_flag & 1) {
5768                 gen9_avc_kernel_wp(ctx, encode_state, encoder_context, 1);
5769             } else if (!((slice_param->luma_weight_l0_flag & 1) ||
5770                          (slice_param->chroma_weight_l0_flag & 1) ||
5771                          (slice_param->chroma_weight_l1_flag & 1))) {
5772                 pic_param->pic_fields.bits.weighted_bipred_idc = INTEL_AVC_WP_MODE_DEFAULT;// it should be handled in app
5773             }
5774         }
5775     }
5776
5777     /*mbenc kernel*/
5778     gen9_avc_kernel_mbenc(ctx, encode_state, encoder_context, false);
5779
5780     /*ignore the reset vertical line kernel*/
5781
5782     return VA_STATUS_SUCCESS;
5783 }
5784
5785 static VAStatus
5786 gen9_avc_vme_pipeline(VADriverContextP ctx,
5787                       VAProfile profile,
5788                       struct encode_state *encode_state,
5789                       struct intel_encoder_context *encoder_context)
5790 {
5791     VAStatus va_status;
5792
5793     gen9_avc_update_parameters(ctx, profile, encode_state, encoder_context);
5794
5795     va_status = gen9_avc_encode_check_parameter(ctx, encode_state, encoder_context);
5796     if (va_status != VA_STATUS_SUCCESS)
5797         return va_status;
5798
5799     va_status = gen9_avc_allocate_resources(ctx, encode_state, encoder_context);
5800     if (va_status != VA_STATUS_SUCCESS)
5801         return va_status;
5802
5803     va_status = gen9_avc_vme_gpe_kernel_prepare(ctx, encode_state, encoder_context);
5804     if (va_status != VA_STATUS_SUCCESS)
5805         return va_status;
5806
5807     va_status = gen9_avc_vme_gpe_kernel_init(ctx, encode_state, encoder_context);
5808     if (va_status != VA_STATUS_SUCCESS)
5809         return va_status;
5810
5811     va_status = gen9_avc_vme_gpe_kernel_run(ctx, encode_state, encoder_context);
5812     if (va_status != VA_STATUS_SUCCESS)
5813         return va_status;
5814
5815     gen9_avc_vme_gpe_kernel_final(ctx, encode_state, encoder_context);
5816
5817     return VA_STATUS_SUCCESS;
5818 }
5819
5820 static void
5821 gen9_avc_vme_context_destroy(void * context)
5822 {
5823     struct encoder_vme_mfc_context *vme_context = (struct encoder_vme_mfc_context *)context;
5824     struct generic_encoder_context *generic_ctx;
5825     struct i965_avc_encoder_context *avc_ctx;
5826     struct generic_enc_codec_state *generic_state;
5827     struct avc_enc_state *avc_state;
5828
5829     if (!vme_context)
5830         return;
5831
5832     generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5833     avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5834     generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5835     avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5836
5837     gen9_avc_kernel_destroy(vme_context);
5838
5839     free(generic_ctx);
5840     free(avc_ctx);
5841     free(generic_state);
5842     free(avc_state);
5843     free(vme_context);
5844     return;
5845
5846 }
5847
5848 static void
5849 gen9_avc_kernel_init(VADriverContextP ctx,
5850                      struct intel_encoder_context *encoder_context)
5851 {
5852     struct i965_driver_data *i965 = i965_driver_data(ctx);
5853     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5854     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5855     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5856
5857     gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling);
5858     gen9_avc_kernel_init_brc(ctx, generic_ctx, &avc_ctx->context_brc);
5859     gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me);
5860     gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc);
5861     gen9_avc_kernel_init_wp(ctx, generic_ctx, &avc_ctx->context_wp);
5862     gen9_avc_kernel_init_sfd(ctx, generic_ctx, &avc_ctx->context_sfd);
5863
5864     //function pointer
5865     generic_ctx->pfn_set_curbe_scaling2x = gen9_avc_set_curbe_scaling2x;
5866     generic_ctx->pfn_set_curbe_scaling4x = gen9_avc_set_curbe_scaling4x;
5867     generic_ctx->pfn_set_curbe_me = gen9_avc_set_curbe_me;
5868     generic_ctx->pfn_set_curbe_mbenc = gen9_avc_set_curbe_mbenc;
5869     generic_ctx->pfn_set_curbe_brc_init_reset = gen9_avc_set_curbe_brc_init_reset;
5870     generic_ctx->pfn_set_curbe_brc_frame_update = gen9_avc_set_curbe_brc_frame_update;
5871     generic_ctx->pfn_set_curbe_brc_mb_update = gen9_avc_set_curbe_brc_mb_update;
5872     generic_ctx->pfn_set_curbe_sfd = gen9_avc_set_curbe_sfd;
5873     generic_ctx->pfn_set_curbe_wp = gen9_avc_set_curbe_wp;
5874
5875     generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
5876     generic_ctx->pfn_send_me_surface = gen9_avc_send_surface_me;
5877     generic_ctx->pfn_send_mbenc_surface = gen9_avc_send_surface_mbenc;
5878     generic_ctx->pfn_send_brc_init_reset_surface = gen9_avc_send_surface_brc_init_reset;
5879     generic_ctx->pfn_send_brc_frame_update_surface = gen9_avc_send_surface_brc_frame_update;
5880     generic_ctx->pfn_send_brc_mb_update_surface = gen9_avc_send_surface_brc_mb_update;
5881     generic_ctx->pfn_send_sfd_surface = gen9_avc_send_surface_sfd;
5882     generic_ctx->pfn_send_wp_surface = gen9_avc_send_surface_wp;
5883
5884     if (IS_SKL(i965->intel.device_info) ||
5885         IS_BXT(i965->intel.device_info))
5886         generic_ctx->pfn_set_curbe_scaling4x = gen9_avc_set_curbe_scaling4x;
5887     else if (IS_KBL(i965->intel.device_info) ||
5888              IS_GLK(i965->intel.device_info))
5889         generic_ctx->pfn_set_curbe_scaling4x = gen95_avc_set_curbe_scaling4x;
5890
5891 }
5892
5893 /*
5894 PAK pipeline related function
5895 */
5896 extern int
5897 intel_avc_enc_slice_type_fixup(int slice_type);
5898
5899 static void
5900 gen9_mfc_avc_pipe_mode_select(VADriverContextP ctx,
5901                               struct encode_state *encode_state,
5902                               struct intel_encoder_context *encoder_context)
5903 {
5904     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5905     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
5906     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
5907     struct intel_batchbuffer *batch = encoder_context->base.batch;
5908
5909     BEGIN_BCS_BATCH(batch, 5);
5910
5911     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
5912     OUT_BCS_BATCH(batch,
5913                   (0 << 29) |
5914                   (MFX_LONG_MODE << 17) |       /* Must be long format for encoder */
5915                   (MFD_MODE_VLD << 15) |
5916                   (0 << 13) |                   /* Non-VDEnc mode  is 0*/
5917                   ((generic_state->curr_pak_pass != (generic_state->num_pak_passes - 1)) << 10) |                  /* Stream-Out Enable */
5918                   ((!!avc_ctx->res_post_deblocking_output.bo) << 9)  |    /* Post Deblocking Output */
5919                   ((!!avc_ctx->res_pre_deblocking_output.bo) << 8)  |     /* Pre Deblocking Output */
5920                   (0 << 7)  |                   /* Scaled surface enable */
5921                   (0 << 6)  |                   /* Frame statistics stream out enable */
5922                   (0 << 5)  |                   /* not in stitch mode */
5923                   (1 << 4)  |                   /* encoding mode */
5924                   (MFX_FORMAT_AVC << 0));
5925     OUT_BCS_BATCH(batch,
5926                   (0 << 7)  | /* expand NOA bus flag */
5927                   (0 << 6)  | /* disable slice-level clock gating */
5928                   (0 << 5)  | /* disable clock gating for NOA */
5929                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
5930                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
5931                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
5932                   (0 << 1)  |
5933                   (0 << 0));
5934     OUT_BCS_BATCH(batch, 0);
5935     OUT_BCS_BATCH(batch, 0);
5936
5937     ADVANCE_BCS_BATCH(batch);
5938 }
5939
5940 static void
5941 gen9_mfc_avc_surface_state(VADriverContextP ctx,
5942                            struct intel_encoder_context *encoder_context,
5943                            struct i965_gpe_resource *gpe_resource,
5944                            int id)
5945 {
5946     struct intel_batchbuffer *batch = encoder_context->base.batch;
5947
5948     BEGIN_BCS_BATCH(batch, 6);
5949
5950     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
5951     OUT_BCS_BATCH(batch, id);
5952     OUT_BCS_BATCH(batch,
5953                   ((gpe_resource->height - 1) << 18) |
5954                   ((gpe_resource->width - 1) << 4));
5955     OUT_BCS_BATCH(batch,
5956                   (MFX_SURFACE_PLANAR_420_8 << 28) |    /* 420 planar YUV surface */
5957                   (1 << 27) |                           /* must be 1 for interleave U/V, hardware requirement */
5958                   ((gpe_resource->pitch - 1) << 3) |    /* pitch */
5959                   (0 << 2)  |                           /* must be 0 for interleave U/V */
5960                   (1 << 1)  |                           /* must be tiled */
5961                   (I965_TILEWALK_YMAJOR << 0));         /* tile walk, TILEWALK_YMAJOR */
5962     OUT_BCS_BATCH(batch,
5963                   (0 << 16) |                   /* must be 0 for interleave U/V */
5964                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
5965     OUT_BCS_BATCH(batch,
5966                   (0 << 16) |                   /* must be 0 for interleave U/V */
5967                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
5968
5969     ADVANCE_BCS_BATCH(batch);
5970 }
5971
5972 static void
5973 gen9_mfc_avc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5974 {
5975     struct i965_driver_data *i965 = i965_driver_data(ctx);
5976     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5977     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
5978     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
5979     struct intel_batchbuffer *batch = encoder_context->base.batch;
5980     int i;
5981
5982     BEGIN_BCS_BATCH(batch, 65);
5983
5984     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (65 - 2));
5985
5986     /* the DW1-3 is for pre_deblocking */
5987     OUT_BUFFER_3DW(batch, avc_ctx->res_pre_deblocking_output.bo, 1, 0, i965->intel.mocs_state);
5988
5989     /* the DW4-6 is for the post_deblocking */
5990     OUT_BUFFER_3DW(batch, avc_ctx->res_post_deblocking_output.bo, 1, 0, i965->intel.mocs_state);
5991
5992     /* the DW7-9 is for the uncompressed_picture */
5993     OUT_BUFFER_3DW(batch, generic_ctx->res_uncompressed_input_surface.bo, 0, 0, i965->intel.mocs_state);
5994
5995     /* the DW10-12 is for PAK information (write) */
5996     OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, i965->intel.mocs_state);//?
5997
5998     /* the DW13-15 is for the intra_row_store_scratch */
5999     OUT_BUFFER_3DW(batch, avc_ctx->res_intra_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
6000
6001     /* the DW16-18 is for the deblocking filter */
6002     OUT_BUFFER_3DW(batch, avc_ctx->res_deblocking_filter_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
6003
6004     /* the DW 19-50 is for Reference pictures*/
6005     for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
6006         OUT_BUFFER_2DW(batch, avc_ctx->list_reference_res[i].bo, 0, 0);
6007     }
6008
6009     /* DW 51, reference picture attributes */
6010     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
6011
6012     /* The DW 52-54 is for PAK information (read) */
6013     OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, i965->intel.mocs_state);
6014
6015     /* the DW 55-57 is the ILDB buffer */
6016     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6017
6018     /* the DW 58-60 is the second ILDB buffer */
6019     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6020
6021     /* DW 61, memory compress enable & mode */
6022     OUT_BCS_BATCH(batch, 0);
6023
6024     /* the DW 62-64 is the buffer */
6025     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6026
6027     ADVANCE_BCS_BATCH(batch);
6028 }
6029
6030 static void
6031 gen9_mfc_avc_ind_obj_base_addr_state(VADriverContextP ctx,
6032                                      struct encode_state *encode_state,
6033                                      struct intel_encoder_context *encoder_context)
6034 {
6035     struct i965_driver_data *i965 = i965_driver_data(ctx);
6036     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6037     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
6038     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
6039     struct intel_batchbuffer *batch = encoder_context->base.batch;
6040     struct object_surface *obj_surface;
6041     struct gen9_surface_avc *avc_priv_surface;
6042     unsigned int size = 0;
6043     unsigned int w_mb = generic_state->frame_width_in_mbs;
6044     unsigned int h_mb = generic_state->frame_height_in_mbs;
6045
6046     obj_surface = encode_state->reconstructed_object;
6047
6048     if (!obj_surface || !obj_surface->private_data)
6049         return;
6050     avc_priv_surface = obj_surface->private_data;
6051
6052     BEGIN_BCS_BATCH(batch, 26);
6053
6054     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
6055     /* The DW1-5 is for the MFX indirect bistream offset */
6056     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6057     OUT_BUFFER_2DW(batch, NULL, 0, 0);
6058
6059     /* the DW6-10 is for MFX Indirect MV Object Base Address */
6060     size = w_mb * h_mb * 32 * 4;
6061     OUT_BUFFER_3DW(batch,
6062                    avc_priv_surface->res_mv_data_surface.bo,
6063                    1,
6064                    0,
6065                    i965->intel.mocs_state);
6066     OUT_BUFFER_2DW(batch,
6067                    avc_priv_surface->res_mv_data_surface.bo,
6068                    1,
6069                    ALIGN(size, 0x1000));
6070
6071     /* The DW11-15 is for MFX IT-COFF. Not used on encoder */
6072     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6073     OUT_BUFFER_2DW(batch, NULL, 0, 0);
6074
6075     /* The DW16-20 is for MFX indirect DBLK. Not used on encoder */
6076     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6077     OUT_BUFFER_2DW(batch, NULL, 0, 0);
6078
6079     /* The DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder
6080      * Note: an offset is specified in MFX_AVC_SLICE_STATE
6081      */
6082     OUT_BUFFER_3DW(batch,
6083                    generic_ctx->compressed_bitstream.res.bo,
6084                    1,
6085                    0,
6086                    i965->intel.mocs_state);
6087     OUT_BUFFER_2DW(batch,
6088                    generic_ctx->compressed_bitstream.res.bo,
6089                    1,
6090                    generic_ctx->compressed_bitstream.end_offset);
6091
6092     ADVANCE_BCS_BATCH(batch);
6093 }
6094
6095 static void
6096 gen9_mfc_avc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
6097 {
6098     struct i965_driver_data *i965 = i965_driver_data(ctx);
6099     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6100     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
6101     struct intel_batchbuffer *batch = encoder_context->base.batch;
6102
6103     BEGIN_BCS_BATCH(batch, 10);
6104
6105     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
6106
6107     /* The DW1-3 is for bsd/mpc row store scratch buffer */
6108     OUT_BUFFER_3DW(batch, avc_ctx->res_bsd_mpc_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
6109
6110     /* The DW4-6 is for MPR Row Store Scratch Buffer Base Address, ignore for encoder */
6111     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6112
6113     /* The DW7-9 is for Bitplane Read Buffer Base Address, ignore for encoder */
6114     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6115
6116     ADVANCE_BCS_BATCH(batch);
6117 }
6118
6119 static void
6120 gen9_mfc_avc_directmode_state(VADriverContextP ctx,
6121                               struct intel_encoder_context *encoder_context)
6122 {
6123     struct i965_driver_data *i965 = i965_driver_data(ctx);
6124     struct intel_batchbuffer *batch = encoder_context->base.batch;
6125     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6126     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
6127     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
6128
6129     int i;
6130
6131     BEGIN_BCS_BATCH(batch, 71);
6132
6133     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
6134
6135     /* Reference frames and Current frames */
6136     /* the DW1-32 is for the direct MV for reference */
6137     for (i = 0; i < NUM_MFC_AVC_DMV_BUFFERS - 2; i += 2) {
6138         if (avc_ctx->res_direct_mv_buffersr[i].bo != NULL) {
6139             OUT_BCS_RELOC64(batch, avc_ctx->res_direct_mv_buffersr[i].bo,
6140                             I915_GEM_DOMAIN_INSTRUCTION, 0,
6141                             0);
6142         } else {
6143             OUT_BCS_BATCH(batch, 0);
6144             OUT_BCS_BATCH(batch, 0);
6145         }
6146     }
6147
6148     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
6149
6150     /* the DW34-36 is the MV for the current frame */
6151     OUT_BCS_RELOC64(batch, avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2].bo,
6152                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
6153                     0);
6154
6155     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
6156
6157     /* POL list */
6158     for (i = 0; i < 32; i++) {
6159         OUT_BCS_BATCH(batch, avc_state->top_field_poc[i]);
6160     }
6161     OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2]);
6162     OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1]);
6163
6164     ADVANCE_BCS_BATCH(batch);
6165 }
6166
6167 static void
6168 gen9_mfc_qm_state(VADriverContextP ctx,
6169                   int qm_type,
6170                   const unsigned int *qm,
6171                   int qm_length,
6172                   struct intel_encoder_context *encoder_context)
6173 {
6174     struct intel_batchbuffer *batch = encoder_context->base.batch;
6175     unsigned int qm_buffer[16];
6176
6177     assert(qm_length <= 16);
6178     assert(sizeof(*qm) == 4);
6179     memset(qm_buffer, 0, 16 * 4);
6180     memcpy(qm_buffer, qm, qm_length * 4);
6181
6182     BEGIN_BCS_BATCH(batch, 18);
6183     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
6184     OUT_BCS_BATCH(batch, qm_type << 0);
6185     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
6186     ADVANCE_BCS_BATCH(batch);
6187 }
6188
6189 static void
6190 gen9_mfc_avc_qm_state(VADriverContextP ctx,
6191                       struct encode_state *encode_state,
6192                       struct intel_encoder_context *encoder_context)
6193 {
6194     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6195     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
6196     VAEncSequenceParameterBufferH264  *seq_param = avc_state->seq_param;
6197     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
6198
6199
6200     const unsigned int *qm_4x4_intra;
6201     const unsigned int *qm_4x4_inter;
6202     const unsigned int *qm_8x8_intra;
6203     const unsigned int *qm_8x8_inter;
6204
6205     if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
6206         && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
6207         qm_4x4_intra = qm_4x4_inter = qm_8x8_intra = qm_8x8_inter = qm_flat;
6208     } else {
6209         VAIQMatrixBufferH264 *qm;
6210         assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
6211         qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
6212         qm_4x4_intra = (unsigned int *)qm->ScalingList4x4[0];
6213         qm_4x4_inter = (unsigned int *)qm->ScalingList4x4[3];
6214         qm_8x8_intra = (unsigned int *)qm->ScalingList8x8[0];
6215         qm_8x8_inter = (unsigned int *)qm->ScalingList8x8[1];
6216     }
6217
6218     gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm_4x4_intra, 12, encoder_context);
6219     gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm_4x4_inter, 12, encoder_context);
6220     gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm_8x8_intra, 16, encoder_context);
6221     gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm_8x8_inter, 16, encoder_context);
6222 }
6223
6224 static void
6225 gen9_mfc_fqm_state(VADriverContextP ctx,
6226                    int fqm_type,
6227                    const unsigned int *fqm,
6228                    int fqm_length,
6229                    struct intel_encoder_context *encoder_context)
6230 {
6231     struct intel_batchbuffer *batch = encoder_context->base.batch;
6232     unsigned int fqm_buffer[32];
6233
6234     assert(fqm_length <= 32);
6235     assert(sizeof(*fqm) == 4);
6236     memset(fqm_buffer, 0, 32 * 4);
6237     memcpy(fqm_buffer, fqm, fqm_length * 4);
6238
6239     BEGIN_BCS_BATCH(batch, 34);
6240     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
6241     OUT_BCS_BATCH(batch, fqm_type << 0);
6242     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
6243     ADVANCE_BCS_BATCH(batch);
6244 }
6245
6246 static void
6247 gen9_mfc_fill_fqm(uint8_t *qm, uint16_t *fqm, int len)
6248 {
6249     int i, j;
6250     for (i = 0; i < len; i++)
6251         for (j = 0; j < len; j++) {
6252             assert(qm[j * len + i]);
6253             fqm[i * len + j] = (1 << 16) / qm[j * len + i];
6254         }
6255 }
6256
6257 static void
6258 gen9_mfc_avc_fqm_state(VADriverContextP ctx,
6259                        struct encode_state *encode_state,
6260                        struct intel_encoder_context *encoder_context)
6261 {
6262     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6263     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
6264     VAEncSequenceParameterBufferH264  *seq_param = avc_state->seq_param;
6265     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
6266
6267     if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
6268         && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
6269         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm_flat, 24, encoder_context);
6270         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm_flat, 24, encoder_context);
6271         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm_flat, 32, encoder_context);
6272         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm_flat, 32, encoder_context);
6273     } else {
6274         int i;
6275         uint32_t fqm[32];
6276         VAIQMatrixBufferH264 *qm;
6277         assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
6278         qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
6279
6280         for (i = 0; i < 3; i++)
6281             gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * i, 4);
6282         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm, 24, encoder_context);
6283
6284         for (i = 3; i < 6; i++)
6285             gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * (i - 3), 4);
6286         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm, 24, encoder_context);
6287
6288         gen9_mfc_fill_fqm(qm->ScalingList8x8[0], (uint16_t *)fqm, 8);
6289         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm, 32, encoder_context);
6290
6291         gen9_mfc_fill_fqm(qm->ScalingList8x8[1], (uint16_t *)fqm, 8);
6292         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm, 32, encoder_context);
6293     }
6294 }
6295
6296 static void
6297 gen9_mfc_avc_insert_object(VADriverContextP ctx,
6298                            struct intel_encoder_context *encoder_context,
6299                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
6300                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
6301                            int slice_header_indicator,
6302                            struct intel_batchbuffer *batch)
6303 {
6304     if (data_bits_in_last_dw == 0)
6305         data_bits_in_last_dw = 32;
6306
6307     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
6308
6309     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws));
6310     OUT_BCS_BATCH(batch,
6311                   (0 << 16) |   /* always start at offset 0 */
6312                   (slice_header_indicator << 14) |
6313                   (data_bits_in_last_dw << 8) |
6314                   (skip_emul_byte_count << 4) |
6315                   (!!emulation_flag << 3) |
6316                   ((!!is_last_header) << 2) |
6317                   ((!!is_end_of_slice) << 1) |
6318                   (0 << 0));    /* check this flag */
6319     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
6320
6321     ADVANCE_BCS_BATCH(batch);
6322 }
6323
6324 static void
6325 gen9_mfc_avc_insert_aud_packed_data(VADriverContextP ctx,
6326                                     struct encode_state *encode_state,
6327                                     struct intel_encoder_context *encoder_context,
6328                                     struct intel_batchbuffer *batch)
6329 {
6330     VAEncPackedHeaderParameterBuffer *param = NULL;
6331     unsigned int length_in_bits;
6332     unsigned int *header_data = NULL;
6333     unsigned char *nal_type = NULL;
6334     int count, i, start_index;
6335
6336     count = encode_state->slice_rawdata_count[0];
6337     start_index = (encode_state->slice_rawdata_index[0] & SLICE_PACKED_DATA_INDEX_MASK);
6338
6339     for (i = 0; i < count; i++) {
6340         unsigned int skip_emul_byte_cnt;
6341
6342         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
6343         nal_type = (unsigned char *)header_data;
6344
6345         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
6346
6347         length_in_bits = param->bit_length;
6348
6349         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6350
6351         if ((*(nal_type+skip_emul_byte_cnt-1)&0x1f) == AVC_NAL_DELIMITER ) {
6352             gen9_mfc_avc_insert_object(ctx,
6353                                        encoder_context,
6354                                        header_data,
6355                                        ALIGN(length_in_bits, 32) >> 5,
6356                                        length_in_bits & 0x1f,
6357                                        skip_emul_byte_cnt,
6358                                        0,
6359                                        0,
6360                                        !param->has_emulation_bytes,
6361                                        0,
6362                                        batch);
6363             break;
6364         }
6365     }
6366 }
6367
6368 static void
6369 gen9_mfc_avc_insert_slice_packed_data(VADriverContextP ctx,
6370                                       struct encode_state *encode_state,
6371                                       struct intel_encoder_context *encoder_context,
6372                                       int slice_index,
6373                                       struct intel_batchbuffer *batch)
6374 {
6375     VAEncPackedHeaderParameterBuffer *param = NULL;
6376     unsigned int length_in_bits;
6377     unsigned int *header_data = NULL;
6378     int count, i, start_index;
6379     int slice_header_index;
6380     unsigned char *nal_type = NULL;
6381
6382     if (encode_state->slice_header_index[slice_index] == 0)
6383         slice_header_index = -1;
6384     else
6385         slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
6386
6387     count = encode_state->slice_rawdata_count[slice_index];
6388     start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
6389
6390     for (i = 0; i < count; i++) {
6391         unsigned int skip_emul_byte_cnt;
6392
6393         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
6394         nal_type = (unsigned char *)header_data;
6395
6396         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
6397
6398         length_in_bits = param->bit_length;
6399
6400         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6401
6402         /* skip the slice header packed data type as it is lastly inserted */
6403         if (param->type == VAEncPackedHeaderSlice || (*(nal_type+skip_emul_byte_cnt-1)&0x1f) == AVC_NAL_DELIMITER)
6404             continue;
6405
6406         /* as the slice header is still required, the last header flag is set to
6407          * zero.
6408          */
6409         gen9_mfc_avc_insert_object(ctx,
6410                                    encoder_context,
6411                                    header_data,
6412                                    ALIGN(length_in_bits, 32) >> 5,
6413                                    length_in_bits & 0x1f,
6414                                    skip_emul_byte_cnt,
6415                                    0,
6416                                    0,
6417                                    !param->has_emulation_bytes,
6418                                    0,
6419                                    batch);
6420     }
6421
6422     if (slice_header_index == -1) {
6423         VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
6424         VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
6425         VAEncSliceParameterBufferH264 *slice_params = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
6426         unsigned char *slice_header = NULL;
6427         int slice_header_length_in_bits = 0;
6428
6429         /* No slice header data is passed. And the driver needs to generate it */
6430         /* For the Normal H264 */
6431         slice_header_length_in_bits = build_avc_slice_header(seq_param,
6432                                                              pic_param,
6433                                                              slice_params,
6434                                                              &slice_header);
6435         gen9_mfc_avc_insert_object(ctx,
6436                                    encoder_context,
6437                                    (unsigned int *)slice_header,
6438                                    ALIGN(slice_header_length_in_bits, 32) >> 5,
6439                                    slice_header_length_in_bits & 0x1f,
6440                                    5,  /* first 5 bytes are start code + nal unit type */
6441                                    1, 0, 1,
6442                                    1,
6443                                    batch);
6444
6445         free(slice_header);
6446     } else {
6447         unsigned int skip_emul_byte_cnt;
6448
6449         header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
6450
6451         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[slice_header_index]->buffer);
6452         length_in_bits = param->bit_length;
6453
6454         /* as the slice header is the last header data for one slice,
6455          * the last header flag is set to one.
6456          */
6457         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6458
6459         gen9_mfc_avc_insert_object(ctx,
6460                                    encoder_context,
6461                                    header_data,
6462                                    ALIGN(length_in_bits, 32) >> 5,
6463                                    length_in_bits & 0x1f,
6464                                    skip_emul_byte_cnt,
6465                                    1,
6466                                    0,
6467                                    !param->has_emulation_bytes,
6468                                    1,
6469                                    batch);
6470     }
6471
6472     return;
6473 }
6474
6475 static void
6476 gen9_mfc_avc_inset_headers(VADriverContextP ctx,
6477                            struct encode_state *encode_state,
6478                            struct intel_encoder_context *encoder_context,
6479                            VAEncSliceParameterBufferH264 *slice_param,
6480                            int slice_index,
6481                            struct intel_batchbuffer *batch)
6482 {
6483     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6484     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
6485     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
6486     unsigned int internal_rate_mode = generic_state->internal_rate_mode;
6487     unsigned int skip_emul_byte_cnt;
6488
6489     if (slice_index == 0) {
6490
6491         /* if AUD exist and insert it firstly */
6492         gen9_mfc_avc_insert_aud_packed_data(ctx,encode_state,encoder_context,batch);
6493
6494         if (encode_state->packed_header_data[idx]) {
6495             VAEncPackedHeaderParameterBuffer *param = NULL;
6496             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
6497             unsigned int length_in_bits;
6498
6499             assert(encode_state->packed_header_param[idx]);
6500             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
6501             length_in_bits = param->bit_length;
6502
6503             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6504             gen9_mfc_avc_insert_object(ctx,
6505                                        encoder_context,
6506                                        header_data,
6507                                        ALIGN(length_in_bits, 32) >> 5,
6508                                        length_in_bits & 0x1f,
6509                                        skip_emul_byte_cnt,
6510                                        0,
6511                                        0,
6512                                        !param->has_emulation_bytes,
6513                                        0,
6514                                        batch);
6515         }
6516
6517         idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
6518
6519         if (encode_state->packed_header_data[idx]) {
6520             VAEncPackedHeaderParameterBuffer *param = NULL;
6521             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
6522             unsigned int length_in_bits;
6523
6524             assert(encode_state->packed_header_param[idx]);
6525             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
6526             length_in_bits = param->bit_length;
6527
6528             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6529
6530             gen9_mfc_avc_insert_object(ctx,
6531                                        encoder_context,
6532                                        header_data,
6533                                        ALIGN(length_in_bits, 32) >> 5,
6534                                        length_in_bits & 0x1f,
6535                                        skip_emul_byte_cnt,
6536                                        0,
6537                                        0,
6538                                        !param->has_emulation_bytes,
6539                                        0,
6540                                        batch);
6541         }
6542
6543         idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
6544
6545         if (encode_state->packed_header_data[idx]) {
6546             VAEncPackedHeaderParameterBuffer *param = NULL;
6547             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
6548             unsigned int length_in_bits;
6549
6550             assert(encode_state->packed_header_param[idx]);
6551             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
6552             length_in_bits = param->bit_length;
6553
6554             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6555             gen9_mfc_avc_insert_object(ctx,
6556                                        encoder_context,
6557                                        header_data,
6558                                        ALIGN(length_in_bits, 32) >> 5,
6559                                        length_in_bits & 0x1f,
6560                                        skip_emul_byte_cnt,
6561                                        0,
6562                                        0,
6563                                        !param->has_emulation_bytes,
6564                                        0,
6565                                        batch);
6566         } else if (internal_rate_mode == VA_RC_CBR) {
6567             /* insert others */
6568         }
6569     }
6570
6571     gen9_mfc_avc_insert_slice_packed_data(ctx,
6572                                           encode_state,
6573                                           encoder_context,
6574                                           slice_index,
6575                                           batch);
6576 }
6577
6578 static void
6579 gen9_mfc_avc_slice_state(VADriverContextP ctx,
6580                          struct encode_state *encode_state,
6581                          struct intel_encoder_context *encoder_context,
6582                          VAEncPictureParameterBufferH264 *pic_param,
6583                          VAEncSliceParameterBufferH264 *slice_param,
6584                          VAEncSliceParameterBufferH264 *next_slice_param,
6585                          struct intel_batchbuffer *batch)
6586 {
6587     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6588     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
6589     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
6590     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
6591     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
6592     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
6593     unsigned char correct[6], grow, shrink;
6594     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
6595     int max_qp_n, max_qp_p;
6596     int i;
6597     int weighted_pred_idc = 0;
6598     int num_ref_l0 = 0, num_ref_l1 = 0;
6599     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
6600     int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
6601     unsigned int rc_panic_enable = 0;
6602     unsigned int rate_control_counter_enable = 0;
6603     unsigned int rounding_value = 0;
6604     unsigned int rounding_inter_enable = 0;
6605
6606     slice_hor_pos = slice_param->macroblock_address % generic_state->frame_width_in_mbs;
6607     slice_ver_pos = slice_param->macroblock_address / generic_state->frame_width_in_mbs;
6608
6609     if (next_slice_param) {
6610         next_slice_hor_pos = next_slice_param->macroblock_address % generic_state->frame_width_in_mbs;
6611         next_slice_ver_pos = next_slice_param->macroblock_address / generic_state->frame_width_in_mbs;
6612     } else {
6613         next_slice_hor_pos = 0;
6614         next_slice_ver_pos = generic_state->frame_height_in_mbs;
6615     }
6616
6617     if (slice_type == SLICE_TYPE_I) {
6618         luma_log2_weight_denom = 0;
6619         chroma_log2_weight_denom = 0;
6620     } else if (slice_type == SLICE_TYPE_P) {
6621         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
6622         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
6623         rounding_inter_enable = avc_state->rounding_inter_enable;
6624         rounding_value = avc_state->rounding_value;
6625
6626         if (slice_param->num_ref_idx_active_override_flag)
6627             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
6628     } else if (slice_type == SLICE_TYPE_B) {
6629         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
6630         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
6631         num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
6632         rounding_inter_enable = avc_state->rounding_inter_enable;
6633         rounding_value = avc_state->rounding_value;
6634
6635         if (slice_param->num_ref_idx_active_override_flag) {
6636             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
6637             num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
6638         }
6639
6640         if (weighted_pred_idc == 2) {
6641             /* 8.4.3 - Derivation process for prediction weights (8-279) */
6642             luma_log2_weight_denom = 5;
6643             chroma_log2_weight_denom = 5;
6644         }
6645     }
6646
6647     max_qp_n = 0;
6648     max_qp_p = 0;
6649     grow = 0;
6650     shrink = 0;
6651
6652     rate_control_counter_enable = (generic_state->brc_enabled && (generic_state->curr_pak_pass != 0));
6653     rc_panic_enable = (avc_state->rc_panic_enable &&
6654                        (!avc_state->min_max_qp_enable) &&
6655                        (encoder_context->rate_control_mode != VA_RC_CQP) &&
6656                        (generic_state->curr_pak_pass == (generic_state->num_pak_passes - 1)));
6657
6658     for (i = 0; i < 6; i++)
6659         correct[i] = 0;
6660
6661     BEGIN_BCS_BATCH(batch, 11);
6662
6663     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
6664     OUT_BCS_BATCH(batch, slice_type);
6665     OUT_BCS_BATCH(batch,
6666                   (num_ref_l1 << 24) |
6667                   (num_ref_l0 << 16) |
6668                   (chroma_log2_weight_denom << 8) |
6669                   (luma_log2_weight_denom << 0));
6670     OUT_BCS_BATCH(batch,
6671                   (weighted_pred_idc << 30) |
6672                   (((slice_type == SLICE_TYPE_B) ? slice_param->direct_spatial_mv_pred_flag : 0) << 29) |
6673                   (slice_param->disable_deblocking_filter_idc << 27) |
6674                   (slice_param->cabac_init_idc << 24) |
6675                   (slice_qp << 16) |
6676                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
6677                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
6678
6679     OUT_BCS_BATCH(batch,
6680                   slice_ver_pos << 24 |
6681                   slice_hor_pos << 16 |
6682                   slice_param->macroblock_address);
6683     OUT_BCS_BATCH(batch,
6684                   next_slice_ver_pos << 16 |
6685                   next_slice_hor_pos);
6686
6687     OUT_BCS_BATCH(batch,
6688                   (rate_control_counter_enable << 31) |
6689                   (1 << 30) |           /* ResetRateControlCounter */
6690                   (2 << 28) |           /* Loose Rate Control */
6691                   (0 << 24) |           /* RC Stable Tolerance */
6692                   (rc_panic_enable << 23) |           /* RC Panic Enable */
6693                   (1 << 22) |           /* CBP mode */
6694                   (0 << 21) |           /* MB Type Direct Conversion, 0: Enable, 1: Disable */
6695                   (0 << 20) |           /* MB Type Skip Conversion, 0: Enable, 1: Disable */
6696                   (!next_slice_param << 19) |                   /* Is Last Slice */
6697                   (0 << 18) |           /* BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable */
6698                   (1 << 17) |           /* HeaderPresentFlag */
6699                   (1 << 16) |           /* SliceData PresentFlag */
6700                   (0 << 15) |           /* TailPresentFlag  */
6701                   (1 << 13) |           /* RBSP NAL TYPE */
6702                   (1 << 12));           /* CabacZeroWordInsertionEnable */
6703
6704     OUT_BCS_BATCH(batch, generic_ctx->compressed_bitstream.start_offset);
6705
6706     OUT_BCS_BATCH(batch,
6707                   (max_qp_n << 24) |     /*Target QP - 24 is lowest QP*/
6708                   (max_qp_p << 16) |     /*Target QP + 20 is highest QP*/
6709                   (shrink << 8) |
6710                   (grow << 0));
6711     OUT_BCS_BATCH(batch,
6712                   (rounding_inter_enable << 31) |
6713                   (rounding_value << 28) |
6714                   (1 << 27) |
6715                   (5 << 24) |
6716                   (correct[5] << 20) |
6717                   (correct[4] << 16) |
6718                   (correct[3] << 12) |
6719                   (correct[2] << 8) |
6720                   (correct[1] << 4) |
6721                   (correct[0] << 0));
6722     OUT_BCS_BATCH(batch, 0);
6723
6724     ADVANCE_BCS_BATCH(batch);
6725 }
6726
6727 static uint8_t
6728 gen9_mfc_avc_get_ref_idx_state(VAPictureH264 *va_pic, unsigned int frame_store_id)
6729 {
6730     unsigned int is_long_term =
6731         !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
6732     unsigned int is_top_field =
6733         !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
6734     unsigned int is_bottom_field =
6735         !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
6736
6737     return ((is_long_term                         << 6) |
6738             (0 << 5) |
6739             (frame_store_id                       << 1) |
6740             ((is_top_field ^ 1) & is_bottom_field));
6741 }
6742
6743 static void
6744 gen9_mfc_avc_ref_idx_state(VADriverContextP ctx,
6745                            struct encode_state *encode_state,
6746                            struct intel_encoder_context *encoder_context,
6747                            VAEncSliceParameterBufferH264 *slice_param,
6748                            struct intel_batchbuffer *batch)
6749 {
6750     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6751     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
6752     VAPictureH264 *ref_pic;
6753     int i, slice_type, ref_idx_shift;
6754     unsigned int fwd_ref_entry;
6755     unsigned int bwd_ref_entry;
6756
6757     /* max 4 ref frames are allowed for l0 and l1 */
6758     fwd_ref_entry = 0x80808080;
6759     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
6760
6761     if ((slice_type == SLICE_TYPE_P) ||
6762         (slice_type == SLICE_TYPE_B)) {
6763         for (i = 0; i < MIN(avc_state->num_refs[0], 4); i++) {
6764             ref_pic = &slice_param->RefPicList0[i];
6765             ref_idx_shift = i * 8;
6766
6767             fwd_ref_entry &= ~(0xFF << ref_idx_shift);
6768             fwd_ref_entry += (gen9_mfc_avc_get_ref_idx_state(ref_pic, avc_state->list_ref_idx[0][i]) << ref_idx_shift);
6769         }
6770     }
6771
6772     bwd_ref_entry = 0x80808080;
6773     if (slice_type == SLICE_TYPE_B) {
6774         for (i = 0; i < MIN(avc_state->num_refs[1], 4); i++) {
6775             ref_pic = &slice_param->RefPicList1[i];
6776             ref_idx_shift = i * 8;
6777
6778             bwd_ref_entry &= ~(0xFF << ref_idx_shift);
6779             bwd_ref_entry += (gen9_mfc_avc_get_ref_idx_state(ref_pic, avc_state->list_ref_idx[1][i]) << ref_idx_shift);
6780         }
6781     }
6782
6783     if ((slice_type == SLICE_TYPE_P) ||
6784         (slice_type == SLICE_TYPE_B)) {
6785         BEGIN_BCS_BATCH(batch, 10);
6786         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
6787         OUT_BCS_BATCH(batch, 0);                        // L0
6788         OUT_BCS_BATCH(batch, fwd_ref_entry);
6789
6790         for (i = 0; i < 7; i++) {
6791             OUT_BCS_BATCH(batch, 0x80808080);
6792         }
6793
6794         ADVANCE_BCS_BATCH(batch);
6795     }
6796
6797     if (slice_type == SLICE_TYPE_B) {
6798         BEGIN_BCS_BATCH(batch, 10);
6799         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
6800         OUT_BCS_BATCH(batch, 1);                  //Select L1
6801         OUT_BCS_BATCH(batch, bwd_ref_entry);      //max 4 reference allowed
6802         for (i = 0; i < 7; i++) {
6803             OUT_BCS_BATCH(batch, 0x80808080);
6804         }
6805         ADVANCE_BCS_BATCH(batch);
6806     }
6807 }
6808
6809 static void
6810 gen9_mfc_avc_weightoffset_state(VADriverContextP ctx,
6811                                 struct encode_state *encode_state,
6812                                 struct intel_encoder_context *encoder_context,
6813                                 VAEncPictureParameterBufferH264 *pic_param,
6814                                 VAEncSliceParameterBufferH264 *slice_param,
6815                                 struct intel_batchbuffer *batch)
6816 {
6817     int i, slice_type;
6818     short weightoffsets[32 * 6];
6819
6820     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
6821
6822     if (slice_type == SLICE_TYPE_P &&
6823         pic_param->pic_fields.bits.weighted_pred_flag == 1) {
6824         memset(weightoffsets, 0, 32 * 6 * sizeof(short));
6825         for (i = 0; i < 32; i++) {
6826             weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
6827             weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
6828             weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
6829             weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
6830             weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
6831             weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
6832         }
6833
6834         BEGIN_BCS_BATCH(batch, 98);
6835         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
6836         OUT_BCS_BATCH(batch, 0);
6837         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
6838
6839         ADVANCE_BCS_BATCH(batch);
6840     }
6841
6842     if (slice_type == SLICE_TYPE_B &&
6843         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
6844         memset(weightoffsets, 0, 32 * 6 * sizeof(short));
6845         for (i = 0; i < 32; i++) {
6846             weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
6847             weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
6848             weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
6849             weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
6850             weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
6851             weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
6852         }
6853
6854         BEGIN_BCS_BATCH(batch, 98);
6855         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
6856         OUT_BCS_BATCH(batch, 0);
6857         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
6858         ADVANCE_BCS_BATCH(batch);
6859
6860         memset(weightoffsets, 0, 32 * 6 * sizeof(short));
6861         for (i = 0; i < 32; i++) {
6862             weightoffsets[i * 6 + 0] = slice_param->luma_weight_l1[i];
6863             weightoffsets[i * 6 + 1] = slice_param->luma_offset_l1[i];
6864             weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l1[i][0];
6865             weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l1[i][0];
6866             weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l1[i][1];
6867             weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l1[i][1];
6868         }
6869
6870         BEGIN_BCS_BATCH(batch, 98);
6871         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
6872         OUT_BCS_BATCH(batch, 1);
6873         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
6874         ADVANCE_BCS_BATCH(batch);
6875     }
6876 }
6877
6878 static void
6879 gen9_mfc_avc_single_slice(VADriverContextP ctx,
6880                           struct encode_state *encode_state,
6881                           struct intel_encoder_context *encoder_context,
6882                           VAEncSliceParameterBufferH264 *slice_param,
6883                           VAEncSliceParameterBufferH264 *next_slice_param,
6884                           int slice_index)
6885 {
6886     struct i965_driver_data *i965 = i965_driver_data(ctx);
6887     struct i965_gpe_table *gpe = &i965->gpe_table;
6888     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6889     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
6890     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
6891     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
6892     struct intel_batchbuffer *batch = encoder_context->base.batch;
6893     struct intel_batchbuffer *slice_batch = avc_ctx->pres_slice_batch_buffer_2nd_level;
6894     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
6895     struct gpe_mi_batch_buffer_start_parameter second_level_batch;
6896     struct object_surface *obj_surface;
6897     struct gen9_surface_avc *avc_priv_surface;
6898
6899     unsigned int slice_offset = 0;
6900
6901     if (generic_state->curr_pak_pass == 0) {
6902         slice_offset = intel_batchbuffer_used_size(slice_batch);
6903         avc_state->slice_batch_offset[slice_index] = slice_offset;
6904         gen9_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context, slice_param, slice_batch);
6905         gen9_mfc_avc_weightoffset_state(ctx,
6906                                         encode_state,
6907                                         encoder_context,
6908                                         pic_param,
6909                                         slice_param,
6910                                         slice_batch);
6911         gen9_mfc_avc_slice_state(ctx,
6912                                  encode_state,
6913                                  encoder_context,
6914                                  pic_param,
6915                                  slice_param,
6916                                  next_slice_param,
6917                                  slice_batch);
6918         gen9_mfc_avc_inset_headers(ctx,
6919                                    encode_state,
6920                                    encoder_context,
6921                                    slice_param,
6922                                    slice_index,
6923                                    slice_batch);
6924
6925         BEGIN_BCS_BATCH(slice_batch, 2);
6926         OUT_BCS_BATCH(slice_batch, 0);
6927         OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END);
6928         ADVANCE_BCS_BATCH(slice_batch);
6929
6930     } else {
6931         slice_offset = avc_state->slice_batch_offset[slice_index];
6932     }
6933     /* insert slice as second levle.*/
6934     memset(&second_level_batch, 0, sizeof(second_level_batch));
6935     second_level_batch.is_second_level = 1; /* Must be the second level batch buffer */
6936     second_level_batch.offset = slice_offset;
6937     second_level_batch.bo = slice_batch->buffer;
6938     gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
6939
6940     /* insert mb code as second levle.*/
6941     obj_surface = encode_state->reconstructed_object;
6942     assert(obj_surface->private_data);
6943     avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
6944
6945     memset(&second_level_batch, 0, sizeof(second_level_batch));
6946     second_level_batch.is_second_level = 1; /* Must be the second level batch buffer */
6947     second_level_batch.offset = slice_param->macroblock_address * 16 * 4;
6948     second_level_batch.bo = avc_priv_surface->res_mb_code_surface.bo;
6949     gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
6950
6951 }
6952
6953 static void
6954 gen9_avc_pak_slice_level(VADriverContextP ctx,
6955                          struct encode_state *encode_state,
6956                          struct intel_encoder_context *encoder_context)
6957 {
6958     struct i965_driver_data *i965 = i965_driver_data(ctx);
6959     struct i965_gpe_table *gpe = &i965->gpe_table;
6960     struct intel_batchbuffer *batch = encoder_context->base.batch;
6961     struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
6962     VAEncSliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
6963     int i, j;
6964     int slice_index = 0;
6965     int is_frame_level = 1;       /* check it for SKL,now single slice per frame */
6966     int has_tail = 0;             /* check it later */
6967
6968     for (j = 0; j < encode_state->num_slice_params_ext; j++) {
6969         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
6970
6971         if (j == encode_state->num_slice_params_ext - 1)
6972             next_slice_group_param = NULL;
6973         else
6974             next_slice_group_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j + 1]->buffer;
6975
6976         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
6977             if (i < encode_state->slice_params_ext[j]->num_elements - 1)
6978                 next_slice_param = slice_param + 1;
6979             else
6980                 next_slice_param = next_slice_group_param;
6981
6982             gen9_mfc_avc_single_slice(ctx,
6983                                       encode_state,
6984                                       encoder_context,
6985                                       slice_param,
6986                                       next_slice_param,
6987                                       slice_index);
6988             slice_param++;
6989             slice_index++;
6990
6991             if (is_frame_level)
6992                 break;
6993             else {
6994                 /* remove assert(0) and add other commands here */
6995                 assert(0);
6996             }
6997         }
6998
6999         if (is_frame_level)
7000             break;
7001     }
7002
7003     if (has_tail) {
7004         /* insert a tail if required */
7005     }
7006
7007     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
7008     mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
7009     gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_params);
7010 }
7011 static void
7012 gen9_avc_pak_picture_level(VADriverContextP ctx,
7013                            struct encode_state *encode_state,
7014                            struct intel_encoder_context *encoder_context)
7015 {
7016     struct i965_driver_data *i965 = i965_driver_data(ctx);
7017     struct i965_gpe_table *gpe = &i965->gpe_table;
7018     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7019     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
7020     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
7021     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
7022     struct gpe_mi_batch_buffer_start_parameter second_level_batch;
7023     struct intel_batchbuffer *batch = encoder_context->base.batch;
7024
7025     if (generic_state->brc_enabled &&
7026         generic_state->curr_pak_pass) {
7027         struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
7028         struct encoder_status_buffer_internal *status_buffer;
7029         status_buffer = &(avc_ctx->status_buffer);
7030
7031         memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
7032         mi_conditional_batch_buffer_end_params.offset = status_buffer->image_status_mask_offset;
7033         mi_conditional_batch_buffer_end_params.bo = status_buffer->bo;
7034         mi_conditional_batch_buffer_end_params.compare_data = 0;
7035         mi_conditional_batch_buffer_end_params.compare_mask_mode_disabled = 0;
7036         gpe->mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
7037     }
7038
7039     gen9_mfc_avc_pipe_mode_select(ctx, encode_state, encoder_context);
7040     gen9_mfc_avc_surface_state(ctx, encoder_context, &(generic_ctx->res_reconstructed_surface), 0);
7041     gen9_mfc_avc_surface_state(ctx, encoder_context, &(generic_ctx->res_uncompressed_input_surface), 4);
7042     gen9_mfc_avc_pipe_buf_addr_state(ctx, encoder_context);
7043     gen9_mfc_avc_ind_obj_base_addr_state(ctx, encode_state, encoder_context);
7044     gen9_mfc_avc_bsp_buf_base_addr_state(ctx, encoder_context);
7045
7046     if (generic_state->brc_enabled) {
7047         memset(&second_level_batch, 0, sizeof(second_level_batch));
7048         if (generic_state->curr_pak_pass == 0) {
7049             second_level_batch.offset = 0;
7050         } else {
7051             second_level_batch.offset = generic_state->curr_pak_pass * INTEL_AVC_IMAGE_STATE_CMD_SIZE;
7052         }
7053         second_level_batch.is_second_level = 1;
7054         second_level_batch.bo = avc_ctx->res_brc_image_state_read_buffer.bo;
7055         gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
7056     } else {
7057         /*generate a new image state */
7058         gen9_avc_set_image_state_non_brc(ctx, encode_state, encoder_context, &(avc_ctx->res_image_state_batch_buffer_2nd_level));
7059         memset(&second_level_batch, 0, sizeof(second_level_batch));
7060         second_level_batch.offset = 0;
7061         second_level_batch.is_second_level = 1;
7062         second_level_batch.bo = avc_ctx->res_image_state_batch_buffer_2nd_level.bo;
7063         gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
7064     }
7065
7066     gen9_mfc_avc_qm_state(ctx, encode_state, encoder_context);
7067     gen9_mfc_avc_fqm_state(ctx, encode_state, encoder_context);
7068     gen9_mfc_avc_directmode_state(ctx, encoder_context);
7069
7070 }
7071
7072 static void
7073 gen9_avc_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
7074 {
7075     struct i965_driver_data *i965 = i965_driver_data(ctx);
7076     struct i965_gpe_table *gpe = &i965->gpe_table;
7077     struct intel_batchbuffer *batch = encoder_context->base.batch;
7078     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7079     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
7080     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
7081
7082     struct gpe_mi_store_register_mem_parameter mi_store_reg_mem_param;
7083     struct gpe_mi_store_data_imm_parameter mi_store_data_imm_param;
7084     struct gpe_mi_flush_dw_parameter mi_flush_dw_param;
7085     struct encoder_status_buffer_internal *status_buffer;
7086
7087     status_buffer = &(avc_ctx->status_buffer);
7088
7089     memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
7090     gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
7091
7092     /* read register and store into status_buffer and pak_statitistic info */
7093     memset(&mi_store_reg_mem_param, 0, sizeof(mi_store_reg_mem_param));
7094     mi_store_reg_mem_param.bo = status_buffer->bo;
7095     mi_store_reg_mem_param.offset = status_buffer->bs_byte_count_frame_offset;
7096     mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_reg_offset;
7097     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7098
7099     mi_store_reg_mem_param.bo = status_buffer->bo;
7100     mi_store_reg_mem_param.offset = status_buffer->image_status_mask_offset;
7101     mi_store_reg_mem_param.mmio_offset = status_buffer->image_status_mask_reg_offset;
7102     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7103
7104     /*update the status in the pak_statistic_surface */
7105     mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
7106     mi_store_reg_mem_param.offset = 0;
7107     mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_reg_offset;
7108     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7109
7110     mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
7111     mi_store_reg_mem_param.offset = 4;
7112     mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_nh_reg_offset;
7113     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7114
7115     memset(&mi_store_data_imm_param, 0, sizeof(mi_store_data_imm_param));
7116     mi_store_data_imm_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
7117     mi_store_data_imm_param.offset = sizeof(unsigned int) * 2;
7118     mi_store_data_imm_param.dw0 = (generic_state->curr_pak_pass + 1);
7119     gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm_param);
7120
7121     mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
7122     mi_store_reg_mem_param.offset = sizeof(unsigned int) * (4 + generic_state->curr_pak_pass) ;
7123     mi_store_reg_mem_param.mmio_offset = status_buffer->image_status_ctrl_reg_offset;
7124     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7125
7126     memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
7127     gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
7128
7129     return;
7130 }
7131
7132 static void
7133 gen9_avc_pak_brc_prepare(struct encode_state *encode_state,
7134                          struct intel_encoder_context *encoder_context)
7135 {
7136     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7137     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
7138     unsigned int rate_control_mode = encoder_context->rate_control_mode;
7139
7140     switch (rate_control_mode & 0x7f) {
7141     case VA_RC_CBR:
7142         generic_state->internal_rate_mode = VA_RC_CBR;
7143         break;
7144
7145     case VA_RC_VBR:
7146         generic_state->internal_rate_mode = VA_RC_VBR;//AVBR
7147         break;
7148
7149     case VA_RC_CQP:
7150     default:
7151         generic_state->internal_rate_mode = VA_RC_CQP;
7152         break;
7153     }
7154
7155     if (encoder_context->quality_level == 0)
7156         encoder_context->quality_level = ENCODER_DEFAULT_QUALITY_AVC;
7157 }
7158
7159 static VAStatus
7160 gen9_avc_pak_pipeline_prepare(VADriverContextP ctx,
7161                               struct encode_state *encode_state,
7162                               struct intel_encoder_context *encoder_context)
7163 {
7164     VAStatus va_status;
7165     struct i965_driver_data *i965 = i965_driver_data(ctx);
7166     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7167     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
7168     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
7169     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
7170     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
7171
7172     struct object_surface *obj_surface;
7173     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
7174     VAEncSliceParameterBufferH264 *slice_param = avc_state->slice_param[0];
7175
7176     struct gen9_surface_avc *avc_priv_surface;
7177     int i, j, enable_avc_ildb = 0;
7178     unsigned int allocate_flag = 1;
7179     unsigned int size;
7180     unsigned int w_mb = generic_state->frame_width_in_mbs;
7181     unsigned int h_mb = generic_state->frame_height_in_mbs;
7182     struct avc_surface_param surface_param;
7183
7184     /* update the parameter and check slice parameter */
7185     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
7186         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
7187         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
7188
7189         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
7190             assert((slice_param->slice_type == SLICE_TYPE_I) ||
7191                    (slice_param->slice_type == SLICE_TYPE_SI) ||
7192                    (slice_param->slice_type == SLICE_TYPE_P) ||
7193                    (slice_param->slice_type == SLICE_TYPE_SP) ||
7194                    (slice_param->slice_type == SLICE_TYPE_B));
7195
7196             if (slice_param->disable_deblocking_filter_idc != 1) {
7197                 enable_avc_ildb = 1;
7198                 break;
7199             }
7200
7201             slice_param++;
7202         }
7203     }
7204     avc_state->enable_avc_ildb = enable_avc_ildb;
7205
7206     /* setup the all surface and buffer for PAK */
7207     /* Setup current reconstruct frame */
7208     obj_surface = encode_state->reconstructed_object;
7209     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
7210
7211     if (va_status != VA_STATUS_SUCCESS)
7212         return va_status;
7213
7214     memset(&surface_param, 0, sizeof(surface_param));
7215     surface_param.frame_width = generic_state->frame_width_in_pixel;
7216     surface_param.frame_height = generic_state->frame_height_in_pixel;
7217     va_status = gen9_avc_init_check_surfaces(ctx,
7218                                              obj_surface, encoder_context,
7219                                              &surface_param);
7220     if (va_status != VA_STATUS_SUCCESS)
7221         return va_status;
7222     /* init the member of avc_priv_surface,frame_store_id,qp_value */
7223     {
7224         avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
7225         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = 0;
7226         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = 0;
7227         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2]);
7228         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1]);
7229         i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2], avc_priv_surface->dmv_top);
7230         i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1], avc_priv_surface->dmv_bottom);
7231         avc_priv_surface->qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
7232         avc_priv_surface->frame_store_id = 0;
7233         avc_priv_surface->frame_idx = pic_param->CurrPic.frame_idx;
7234         avc_priv_surface->top_field_order_cnt = pic_param->CurrPic.TopFieldOrderCnt;
7235         avc_priv_surface->is_as_ref = pic_param->pic_fields.bits.reference_pic_flag;
7236         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = avc_priv_surface->top_field_order_cnt;
7237         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = avc_priv_surface->top_field_order_cnt + 1;
7238     }
7239     i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
7240     i965_free_gpe_resource(&avc_ctx->res_post_deblocking_output);
7241     i965_free_gpe_resource(&avc_ctx->res_pre_deblocking_output);
7242     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_reconstructed_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
7243
7244
7245     if (avc_state->enable_avc_ildb) {
7246         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_post_deblocking_output, obj_surface, GPE_RESOURCE_ALIGNMENT);
7247     } else {
7248         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_pre_deblocking_output, obj_surface, GPE_RESOURCE_ALIGNMENT);
7249     }
7250     /* input YUV surface */
7251     obj_surface = encode_state->input_yuv_object;
7252     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
7253
7254     if (va_status != VA_STATUS_SUCCESS)
7255         return va_status;
7256     i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
7257     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_uncompressed_input_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
7258
7259     /* Reference surfaces */
7260     for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
7261         i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
7262         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2]);
7263         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1]);
7264         obj_surface = encode_state->reference_objects[i];
7265         avc_state->top_field_poc[2 * i] = 0;
7266         avc_state->top_field_poc[2 * i + 1] = 0;
7267
7268         if (obj_surface && obj_surface->bo) {
7269             i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->list_reference_res[i], obj_surface, GPE_RESOURCE_ALIGNMENT);
7270
7271             /* actually it should be handled when it is reconstructed surface */
7272             va_status = gen9_avc_init_check_surfaces(ctx,
7273                                                      obj_surface, encoder_context,
7274                                                      &surface_param);
7275             if (va_status != VA_STATUS_SUCCESS)
7276                 return va_status;
7277             avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
7278             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2], avc_priv_surface->dmv_top);
7279             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1], avc_priv_surface->dmv_bottom);
7280             avc_priv_surface->frame_store_id = i;
7281             avc_state->top_field_poc[2 * i] = avc_priv_surface->top_field_order_cnt;
7282             avc_state->top_field_poc[2 * i + 1] = avc_priv_surface->top_field_order_cnt + 1;
7283         } else {
7284             break;
7285         }
7286     }
7287
7288     if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
7289         intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
7290         avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
7291     }
7292
7293     avc_ctx->pres_slice_batch_buffer_2nd_level =
7294         intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD,
7295                               4096 *
7296                               encode_state->num_slice_params_ext);
7297     if (!avc_ctx->pres_slice_batch_buffer_2nd_level)
7298         return VA_STATUS_ERROR_ALLOCATION_FAILED;
7299
7300     for (i = 0; i < MAX_AVC_SLICE_NUM; i++) {
7301         avc_state->slice_batch_offset[i] = 0;
7302     }
7303
7304
7305     size = w_mb * 64;
7306     i965_free_gpe_resource(&avc_ctx->res_intra_row_store_scratch_buffer);
7307     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
7308                                                &avc_ctx->res_intra_row_store_scratch_buffer,
7309                                                size,
7310                                                "PAK Intra row store scratch buffer");
7311     if (!allocate_flag)
7312         goto failed_allocation;
7313
7314     size = w_mb * 4 * 64;
7315     i965_free_gpe_resource(&avc_ctx->res_deblocking_filter_row_store_scratch_buffer);
7316     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
7317                                                &avc_ctx->res_deblocking_filter_row_store_scratch_buffer,
7318                                                size,
7319                                                "PAK Deblocking filter row store scratch buffer");
7320     if (!allocate_flag)
7321         goto failed_allocation;
7322
7323     size = w_mb * 2 * 64;
7324     i965_free_gpe_resource(&avc_ctx->res_bsd_mpc_row_store_scratch_buffer);
7325     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
7326                                                &avc_ctx->res_bsd_mpc_row_store_scratch_buffer,
7327                                                size,
7328                                                "PAK BSD/MPC row store scratch buffer");
7329     if (!allocate_flag)
7330         goto failed_allocation;
7331
7332     size = w_mb * h_mb * 16;
7333     i965_free_gpe_resource(&avc_ctx->res_pak_mb_status_buffer);
7334     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
7335                                                &avc_ctx->res_pak_mb_status_buffer,
7336                                                size,
7337                                                "PAK MB status buffer");
7338     if (!allocate_flag)
7339         goto failed_allocation;
7340
7341     return VA_STATUS_SUCCESS;
7342
7343 failed_allocation:
7344     return VA_STATUS_ERROR_ALLOCATION_FAILED;
7345 }
7346
7347 static VAStatus
7348 gen9_avc_encode_picture(VADriverContextP ctx,
7349                         VAProfile profile,
7350                         struct encode_state *encode_state,
7351                         struct intel_encoder_context *encoder_context)
7352 {
7353     VAStatus va_status;
7354     struct i965_driver_data *i965 = i965_driver_data(ctx);
7355     struct i965_gpe_table *gpe = &i965->gpe_table;
7356     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7357     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
7358     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
7359     struct intel_batchbuffer *batch = encoder_context->base.batch;
7360
7361     va_status = gen9_avc_pak_pipeline_prepare(ctx, encode_state, encoder_context);
7362
7363     if (va_status != VA_STATUS_SUCCESS)
7364         return va_status;
7365
7366     if (i965->intel.has_bsd2)
7367         intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
7368     else
7369         intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
7370     intel_batchbuffer_emit_mi_flush(batch);
7371
7372     for (generic_state->curr_pak_pass = 0;
7373          generic_state->curr_pak_pass < generic_state->num_pak_passes;
7374          generic_state->curr_pak_pass++) {
7375
7376         if (generic_state->curr_pak_pass == 0) {
7377             /* Initialize the avc Image Ctrl reg for the first pass,write 0 to staturs/control register, is it needed in AVC? */
7378             struct gpe_mi_load_register_imm_parameter mi_load_reg_imm;
7379             struct encoder_status_buffer_internal *status_buffer;
7380
7381             status_buffer = &(avc_ctx->status_buffer);
7382             memset(&mi_load_reg_imm, 0, sizeof(mi_load_reg_imm));
7383             mi_load_reg_imm.mmio_offset = status_buffer->image_status_ctrl_reg_offset;
7384             mi_load_reg_imm.data = 0;
7385             gpe->mi_load_register_imm(ctx, batch, &mi_load_reg_imm);
7386         }
7387         gen9_avc_pak_picture_level(ctx, encode_state, encoder_context);
7388         gen9_avc_pak_slice_level(ctx, encode_state, encoder_context);
7389         gen9_avc_read_mfc_status(ctx, encoder_context);
7390
7391     }
7392
7393     if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
7394         intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
7395         avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
7396     }
7397
7398     intel_batchbuffer_end_atomic(batch);
7399     intel_batchbuffer_flush(batch);
7400
7401     generic_state->seq_frame_number++;
7402     generic_state->total_frame_number++;
7403     generic_state->first_frame = 0;
7404     return VA_STATUS_SUCCESS;
7405 }
7406
7407 static VAStatus
7408 gen9_avc_pak_pipeline(VADriverContextP ctx,
7409                       VAProfile profile,
7410                       struct encode_state *encode_state,
7411                       struct intel_encoder_context *encoder_context)
7412 {
7413     VAStatus vaStatus;
7414
7415     switch (profile) {
7416     case VAProfileH264ConstrainedBaseline:
7417     case VAProfileH264Main:
7418     case VAProfileH264High:
7419     case VAProfileH264MultiviewHigh:
7420     case VAProfileH264StereoHigh:
7421         vaStatus = gen9_avc_encode_picture(ctx, profile, encode_state, encoder_context);
7422         break;
7423
7424     default:
7425         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
7426         break;
7427     }
7428
7429     return vaStatus;
7430 }
7431
7432 static void
7433 gen9_avc_pak_context_destroy(void * context)
7434 {
7435     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)context;
7436     struct generic_encoder_context * generic_ctx;
7437     struct i965_avc_encoder_context * avc_ctx;
7438     int i = 0;
7439
7440     if (!pak_context)
7441         return;
7442
7443     generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
7444     avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
7445
7446     // other things
7447     i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
7448     i965_free_gpe_resource(&avc_ctx->res_post_deblocking_output);
7449     i965_free_gpe_resource(&avc_ctx->res_pre_deblocking_output);
7450     i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
7451
7452     i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
7453     i965_free_gpe_resource(&avc_ctx->res_intra_row_store_scratch_buffer);
7454     i965_free_gpe_resource(&avc_ctx->res_deblocking_filter_row_store_scratch_buffer);
7455     i965_free_gpe_resource(&avc_ctx->res_bsd_mpc_row_store_scratch_buffer);
7456     i965_free_gpe_resource(&avc_ctx->res_pak_mb_status_buffer);
7457
7458     for (i = 0 ; i < MAX_MFC_AVC_REFERENCE_SURFACES; i++) {
7459         i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
7460     }
7461
7462     for (i = 0 ; i < NUM_MFC_AVC_DMV_BUFFERS; i++) {
7463         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i]);
7464     }
7465
7466     if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
7467         intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
7468         avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
7469     }
7470
7471 }
7472
7473 static VAStatus
7474 gen9_avc_get_coded_status(VADriverContextP ctx,
7475                           struct intel_encoder_context *encoder_context,
7476                           struct i965_coded_buffer_segment *coded_buf_seg)
7477 {
7478     struct encoder_status *avc_encode_status;
7479
7480     if (!encoder_context || !coded_buf_seg)
7481         return VA_STATUS_ERROR_INVALID_BUFFER;
7482
7483     avc_encode_status = (struct encoder_status *)coded_buf_seg->codec_private_data;
7484     coded_buf_seg->base.size = avc_encode_status->bs_byte_count_frame;
7485
7486     return VA_STATUS_SUCCESS;
7487 }
7488
7489 Bool
7490 gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
7491 {
7492     /* VME & PAK share the same context */
7493     struct i965_driver_data *i965 = i965_driver_data(ctx);
7494     struct encoder_vme_mfc_context * vme_context = NULL;
7495     struct generic_encoder_context * generic_ctx = NULL;
7496     struct i965_avc_encoder_context * avc_ctx = NULL;
7497     struct generic_enc_codec_state * generic_state = NULL;
7498     struct avc_enc_state * avc_state = NULL;
7499     struct encoder_status_buffer_internal *status_buffer;
7500     uint32_t base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
7501
7502     vme_context = calloc(1, sizeof(struct encoder_vme_mfc_context));
7503     generic_ctx = calloc(1, sizeof(struct generic_encoder_context));
7504     avc_ctx = calloc(1, sizeof(struct i965_avc_encoder_context));
7505     generic_state = calloc(1, sizeof(struct generic_enc_codec_state));
7506     avc_state = calloc(1, sizeof(struct avc_enc_state));
7507
7508     if (!vme_context || !generic_ctx || !avc_ctx || !generic_state || !avc_state)
7509         goto allocate_structure_failed;
7510
7511     memset(vme_context, 0, sizeof(struct encoder_vme_mfc_context));
7512     memset(generic_ctx, 0, sizeof(struct generic_encoder_context));
7513     memset(avc_ctx, 0, sizeof(struct i965_avc_encoder_context));
7514     memset(generic_state, 0, sizeof(struct generic_enc_codec_state));
7515     memset(avc_state, 0, sizeof(struct avc_enc_state));
7516
7517     encoder_context->vme_context = vme_context;
7518     vme_context->generic_enc_ctx = generic_ctx;
7519     vme_context->private_enc_ctx = avc_ctx;
7520     vme_context->generic_enc_state = generic_state;
7521     vme_context->private_enc_state = avc_state;
7522
7523     if (IS_SKL(i965->intel.device_info) ||
7524         IS_BXT(i965->intel.device_info)) {
7525         generic_ctx->enc_kernel_ptr = (void *)skl_avc_encoder_kernels;
7526         generic_ctx->enc_kernel_size = sizeof(skl_avc_encoder_kernels);
7527     } else if (IS_KBL(i965->intel.device_info) ||
7528                IS_GLK(i965->intel.device_info)) {
7529         generic_ctx->enc_kernel_ptr = (void *)kbl_avc_encoder_kernels;
7530         generic_ctx->enc_kernel_size = sizeof(kbl_avc_encoder_kernels);
7531     } else
7532         goto allocate_structure_failed;
7533
7534     /* initialize misc ? */
7535     avc_ctx->ctx = ctx;
7536     generic_ctx->use_hw_scoreboard = 1;
7537     generic_ctx->use_hw_non_stalling_scoreboard = 1;
7538
7539     /* initialize generic state */
7540
7541     generic_state->kernel_mode = INTEL_ENC_KERNEL_NORMAL;
7542     generic_state->preset = INTEL_PRESET_RT_SPEED;
7543     generic_state->seq_frame_number = 0;
7544     generic_state->total_frame_number = 0;
7545     generic_state->frame_type = 0;
7546     generic_state->first_frame = 1;
7547
7548     generic_state->frame_width_in_pixel = 0;
7549     generic_state->frame_height_in_pixel = 0;
7550     generic_state->frame_width_in_mbs = 0;
7551     generic_state->frame_height_in_mbs = 0;
7552     generic_state->frame_width_4x = 0;
7553     generic_state->frame_height_4x = 0;
7554     generic_state->frame_width_16x = 0;
7555     generic_state->frame_height_16x = 0;
7556     generic_state->frame_width_32x = 0;
7557     generic_state->downscaled_width_4x_in_mb = 0;
7558     generic_state->downscaled_height_4x_in_mb = 0;
7559     generic_state->downscaled_width_16x_in_mb = 0;
7560     generic_state->downscaled_height_16x_in_mb = 0;
7561     generic_state->downscaled_width_32x_in_mb = 0;
7562     generic_state->downscaled_height_32x_in_mb = 0;
7563
7564     generic_state->hme_supported = 1;
7565     generic_state->b16xme_supported = 1;
7566     generic_state->b32xme_supported = 0;
7567     generic_state->hme_enabled = 0;
7568     generic_state->b16xme_enabled = 0;
7569     generic_state->b32xme_enabled = 0;
7570     generic_state->brc_distortion_buffer_supported = 1;
7571     generic_state->brc_constant_buffer_supported = 0;
7572
7573
7574     generic_state->frame_rate = 30;
7575     generic_state->brc_allocated = 0;
7576     generic_state->brc_inited = 0;
7577     generic_state->brc_need_reset = 0;
7578     generic_state->is_low_delay = 0;
7579     generic_state->brc_enabled = 0;//default
7580     generic_state->internal_rate_mode = 0;
7581     generic_state->curr_pak_pass = 0;
7582     generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
7583     generic_state->is_first_pass = 1;
7584     generic_state->is_last_pass = 0;
7585     generic_state->mb_brc_enabled = 0; // enable mb brc
7586     generic_state->brc_roi_enable = 0;
7587     generic_state->brc_dirty_roi_enable = 0;
7588     generic_state->skip_frame_enbale = 0;
7589
7590     generic_state->target_bit_rate = 0;
7591     generic_state->max_bit_rate = 0;
7592     generic_state->min_bit_rate = 0;
7593     generic_state->init_vbv_buffer_fullness_in_bit = 0;
7594     generic_state->vbv_buffer_size_in_bit = 0;
7595     generic_state->frames_per_100s = 0;
7596     generic_state->gop_size = 0;
7597     generic_state->gop_ref_distance = 0;
7598     generic_state->brc_target_size = 0;
7599     generic_state->brc_mode = 0;
7600     generic_state->brc_init_current_target_buf_full_in_bits = 0.0;
7601     generic_state->brc_init_reset_input_bits_per_frame = 0.0;
7602     generic_state->brc_init_reset_buf_size_in_bits = 0;
7603     generic_state->brc_init_previous_target_buf_full_in_bits = 0;
7604     generic_state->frames_per_window_size = 0;//default
7605     generic_state->target_percentage = 0;
7606
7607     generic_state->avbr_curracy = 0;
7608     generic_state->avbr_convergence = 0;
7609
7610     generic_state->num_skip_frames = 0;
7611     generic_state->size_skip_frames = 0;
7612
7613     generic_state->num_roi = 0;
7614     generic_state->max_delta_qp = 0;
7615     generic_state->min_delta_qp = 0;
7616
7617     if (encoder_context->rate_control_mode != VA_RC_NONE &&
7618         encoder_context->rate_control_mode != VA_RC_CQP) {
7619         generic_state->brc_enabled = 1;
7620         generic_state->brc_distortion_buffer_supported = 1;
7621         generic_state->brc_constant_buffer_supported = 1;
7622         generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
7623     }
7624     /*avc state initialization */
7625     avc_state->mad_enable = 0;
7626     avc_state->mb_disable_skip_map_enable = 0;
7627     avc_state->sfd_enable = 1;//default
7628     avc_state->sfd_mb_enable = 1;//set it true
7629     avc_state->adaptive_search_window_enable = 1;//default
7630     avc_state->mb_qp_data_enable = 0;
7631     avc_state->intra_refresh_i_enable = 0;
7632     avc_state->min_max_qp_enable = 0;
7633     avc_state->skip_bias_adjustment_enable = 0;//default,same as   skip_bias_adjustment_supporte? no
7634
7635     //external input
7636     avc_state->non_ftq_skip_threshold_lut_input_enable = 0;
7637     avc_state->ftq_skip_threshold_lut_input_enable = 0;
7638     avc_state->ftq_override = 0;
7639
7640     avc_state->direct_bias_adjustment_enable = 0;
7641     avc_state->global_motion_bias_adjustment_enable = 0;
7642     avc_state->disable_sub_mb_partion = 0;
7643     avc_state->arbitrary_num_mbs_in_slice = 0;
7644     avc_state->adaptive_transform_decision_enable = 0;//default
7645     avc_state->skip_check_disable = 0;
7646     avc_state->tq_enable = 0;
7647     avc_state->enable_avc_ildb = 0;
7648     avc_state->mbaff_flag = 0;
7649     avc_state->enable_force_skip = 1;//default
7650     avc_state->rc_panic_enable = 1;//default
7651     avc_state->suppress_recon_enable = 1;//default
7652
7653     avc_state->ref_pic_select_list_supported = 1;
7654     avc_state->mb_brc_supported = 1;//?,default
7655     avc_state->multi_pre_enable = 1;//default
7656     avc_state->ftq_enable = 1;//default
7657     avc_state->caf_supported = 1; //default
7658     avc_state->caf_enable = 0;
7659     avc_state->caf_disable_hd = 1;//default
7660     avc_state->skip_bias_adjustment_supported = 1;//default
7661
7662     avc_state->adaptive_intra_scaling_enable = 1;//default
7663     avc_state->old_mode_cost_enable = 0;//default
7664     avc_state->multi_ref_qp_enable = 1;//default
7665     avc_state->weighted_ref_l0_enable = 1;//default
7666     avc_state->weighted_ref_l1_enable = 1;//default
7667     avc_state->weighted_prediction_supported = 0;
7668     avc_state->brc_split_enable = 0;
7669     avc_state->slice_level_report_supported = 0;
7670
7671     avc_state->fbr_bypass_enable = 1;//default
7672     avc_state->field_scaling_output_interleaved = 0;
7673     avc_state->mb_variance_output_enable = 0;
7674     avc_state->mb_pixel_average_output_enable = 0;
7675     avc_state->rolling_intra_refresh_enable = 0;// same as intra_refresh_i_enable?
7676     avc_state->mbenc_curbe_set_in_brc_update = 0;
7677     avc_state->rounding_inter_enable = 1; //default
7678     avc_state->adaptive_rounding_inter_enable = 1;//default
7679
7680     avc_state->mbenc_i_frame_dist_in_use = 0;
7681     avc_state->mb_status_supported = 1; //set in intialization for gen9
7682     avc_state->mb_status_enable = 0;
7683     avc_state->mb_vproc_stats_enable = 0;
7684     avc_state->flatness_check_enable = 0;
7685     avc_state->flatness_check_supported = 1;//default
7686     avc_state->block_based_skip_enable = 0;
7687     avc_state->use_widi_mbenc_kernel = 0;
7688     avc_state->kernel_trellis_enable = 0;
7689     avc_state->generic_reserved = 0;
7690
7691     avc_state->rounding_value = 0;
7692     avc_state->rounding_inter_p = AVC_INVALID_ROUNDING_VALUE;//default
7693     avc_state->rounding_inter_b = AVC_INVALID_ROUNDING_VALUE; //default
7694     avc_state->rounding_inter_b_ref = AVC_INVALID_ROUNDING_VALUE; //default
7695     avc_state->min_qp_i = INTEL_AVC_MIN_QP;
7696     avc_state->min_qp_p = INTEL_AVC_MIN_QP;
7697     avc_state->min_qp_b = INTEL_AVC_MIN_QP;
7698     avc_state->max_qp_i = INTEL_AVC_MAX_QP;
7699     avc_state->max_qp_p = INTEL_AVC_MAX_QP;
7700     avc_state->max_qp_b = INTEL_AVC_MAX_QP;
7701
7702     memset(avc_state->non_ftq_skip_threshold_lut, 0, AVC_QP_MAX * sizeof(uint8_t));
7703     memset(avc_state->ftq_skip_threshold_lut, 0, AVC_QP_MAX * sizeof(uint8_t));
7704     memset(avc_state->lamda_value_lut, 0, AVC_QP_MAX * 2 * sizeof(uint32_t));
7705
7706     avc_state->intra_refresh_qp_threshold = 0;
7707     avc_state->trellis_flag = 0;
7708     avc_state->hme_mv_cost_scaling_factor = 0;
7709     avc_state->slice_height = 1;
7710     avc_state->slice_num = 1;
7711     memset(avc_state->dist_scale_factor_list0, 0, 32 * sizeof(uint32_t));
7712     avc_state->bi_weight = 0;
7713
7714     avc_state->lambda_table_enable = 0;
7715
7716
7717     if (IS_SKL(i965->intel.device_info) ||
7718         IS_BXT(i965->intel.device_info)) {
7719         avc_state->brc_const_data_surface_width = 64;
7720         avc_state->brc_const_data_surface_height = 44;
7721     } else if (IS_KBL(i965->intel.device_info) ||
7722                IS_GLK(i965->intel.device_info)) {
7723         avc_state->brc_const_data_surface_width = 64;
7724         avc_state->brc_const_data_surface_height = 53;
7725         //gen95
7726         avc_state->decouple_mbenc_curbe_from_brc_enable = 1;
7727         avc_state->extended_mv_cost_range_enable = 0;
7728         avc_state->reserved_g95 = 0;
7729         avc_state->mbenc_brc_buffer_size = 128;
7730         avc_state->kernel_trellis_enable = 1;
7731         avc_state->lambda_table_enable = 1;
7732         avc_state->brc_split_enable = 1;
7733     }
7734
7735     avc_state->num_refs[0] = 0;
7736     avc_state->num_refs[1] = 0;
7737     memset(avc_state->list_ref_idx, 0, 32 * 2 * sizeof(uint32_t));
7738     memset(avc_state->top_field_poc, 0, NUM_MFC_AVC_DMV_BUFFERS * sizeof(int32_t));
7739     avc_state->tq_rounding = 0;
7740     avc_state->zero_mv_threshold = 0;
7741     avc_state->slice_second_levle_batch_buffer_in_use = 0;
7742
7743     //1. seq/pic/slice
7744
7745     /* the definition of status buffer offset for Encoder */
7746
7747     status_buffer = &avc_ctx->status_buffer;
7748     memset(status_buffer, 0, sizeof(struct encoder_status_buffer_internal));
7749
7750     status_buffer->base_offset = base_offset;
7751     status_buffer->bs_byte_count_frame_offset = base_offset + offsetof(struct encoder_status, bs_byte_count_frame);
7752     status_buffer->bs_byte_count_frame_nh_offset = base_offset + offsetof(struct encoder_status, bs_byte_count_frame_nh);
7753     status_buffer->image_status_mask_offset = base_offset + offsetof(struct encoder_status, image_status_mask);
7754     status_buffer->image_status_ctrl_offset = base_offset + offsetof(struct encoder_status, image_status_ctrl);
7755     status_buffer->mfc_qp_status_count_offset = base_offset + offsetof(struct encoder_status, mfc_qp_status_count);
7756     status_buffer->media_index_offset       = base_offset + offsetof(struct encoder_status, media_index);
7757
7758     status_buffer->status_buffer_size = sizeof(struct encoder_status);
7759     status_buffer->bs_byte_count_frame_reg_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG;
7760     status_buffer->bs_byte_count_frame_nh_reg_offset = MFC_BITSTREAM_BYTECOUNT_SLICE_REG;
7761     status_buffer->image_status_mask_reg_offset = MFC_IMAGE_STATUS_MASK_REG;
7762     status_buffer->image_status_ctrl_reg_offset = MFC_IMAGE_STATUS_CTRL_REG;
7763     status_buffer->mfc_qp_status_count_reg_offset = MFC_QP_STATUS_COUNT_REG;
7764
7765     gen9_avc_kernel_init(ctx, encoder_context);
7766     encoder_context->vme_context = vme_context;
7767     encoder_context->vme_pipeline = gen9_avc_vme_pipeline;
7768     encoder_context->vme_context_destroy = gen9_avc_vme_context_destroy;
7769
7770     return true;
7771
7772 allocate_structure_failed:
7773
7774     free(vme_context);
7775     free(generic_ctx);
7776     free(avc_ctx);
7777     free(generic_state);
7778     free(avc_state);
7779     return false;
7780 }
7781
7782 Bool
7783 gen9_avc_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
7784 {
7785     /* VME & PAK share the same context */
7786     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7787
7788     if (!pak_context)
7789         return false;
7790
7791     encoder_context->mfc_context = pak_context;
7792     encoder_context->mfc_context_destroy = gen9_avc_pak_context_destroy;
7793     encoder_context->mfc_pipeline = gen9_avc_pak_pipeline;
7794     encoder_context->mfc_brc_prepare = gen9_avc_pak_brc_prepare;
7795     encoder_context->get_status = gen9_avc_get_coded_status;
7796     return true;
7797 }