OSDN Git Service

a6dba78c9ac5d783d1c35c10f093e0b5c358cc84
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_avc_encoder.c
1 /*
2  * Copyright @ 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWAR OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Pengfei Qu <Pengfei.qu@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <stdbool.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35 #include <va/va.h>
36
37 #include "intel_batchbuffer.h"
38 #include "intel_driver.h"
39
40 #include "i965_defines.h"
41 #include "i965_structs.h"
42 #include "i965_drv_video.h"
43 #include "i965_encoder.h"
44 #include "i965_encoder_utils.h"
45 #include "intel_media.h"
46
47 #include "i965_gpe_utils.h"
48 #include "i965_encoder_common.h"
49 #include "i965_avc_encoder_common.h"
50 #include "gen9_avc_encoder_kernels.h"
51 #include "gen9_avc_encoder.h"
52 #include "gen9_avc_const_def.h"
53
54 #define MAX_URB_SIZE                    4096 /* In register */
55 #define NUM_KERNELS_PER_GPE_CONTEXT     1
56 #define MBENC_KERNEL_BASE GEN9_AVC_KERNEL_MBENC_QUALITY_I
57 #define GPE_RESOURCE_ALIGNMENT 4  /* 4 means 16 = 1 << 4) */
58
59 #define OUT_BUFFER_2DW(batch, bo, is_target, delta)  do {               \
60         if (bo) {                                                       \
61             OUT_BCS_RELOC64(batch,                                        \
62                             bo,                                         \
63                             I915_GEM_DOMAIN_INSTRUCTION,                \
64                             is_target ? I915_GEM_DOMAIN_RENDER : 0,     \
65                             delta);                                     \
66         } else {                                                        \
67             OUT_BCS_BATCH(batch, 0);                                    \
68             OUT_BCS_BATCH(batch, 0);                                    \
69         }                                                               \
70     } while (0)
71
72 #define OUT_BUFFER_3DW(batch, bo, is_target, delta, attr)  do { \
73         OUT_BUFFER_2DW(batch, bo, is_target, delta);            \
74         OUT_BCS_BATCH(batch, attr);                             \
75     } while (0)
76
77 static const uint32_t qm_flat[16] = {
78     0x10101010, 0x10101010, 0x10101010, 0x10101010,
79     0x10101010, 0x10101010, 0x10101010, 0x10101010,
80     0x10101010, 0x10101010, 0x10101010, 0x10101010,
81     0x10101010, 0x10101010, 0x10101010, 0x10101010
82 };
83
84 static const uint32_t fqm_flat[32] = {
85     0x10001000, 0x10001000, 0x10001000, 0x10001000,
86     0x10001000, 0x10001000, 0x10001000, 0x10001000,
87     0x10001000, 0x10001000, 0x10001000, 0x10001000,
88     0x10001000, 0x10001000, 0x10001000, 0x10001000,
89     0x10001000, 0x10001000, 0x10001000, 0x10001000,
90     0x10001000, 0x10001000, 0x10001000, 0x10001000,
91     0x10001000, 0x10001000, 0x10001000, 0x10001000,
92     0x10001000, 0x10001000, 0x10001000, 0x10001000
93 };
94
95 static const unsigned int slice_type_kernel[3] = {1,2,0};
96
97 static const gen9_avc_brc_init_reset_curbe_data gen9_avc_brc_init_reset_curbe_init_data =
98 {
99     // unsigned int 0
100     {
101             0
102     },
103
104     // unsigned int 1
105     {
106             0
107     },
108
109     // unsigned int 2
110     {
111             0
112     },
113
114     // unsigned int 3
115     {
116             0
117     },
118
119     // unsigned int 4
120     {
121             0
122     },
123
124     // unsigned int 5
125     {
126             0
127     },
128
129     // unsigned int 6
130     {
131             0
132     },
133
134     // unsigned int 7
135     {
136             0
137     },
138
139     // unsigned int 8
140     {
141             0,
142             0
143     },
144
145     // unsigned int 9
146     {
147             0,
148             0
149     },
150
151     // unsigned int 10
152     {
153             0,
154             0
155     },
156
157     // unsigned int 11
158     {
159             0,
160             1
161     },
162
163     // unsigned int 12
164     {
165             51,
166             0
167     },
168
169     // unsigned int 13
170     {
171             40,
172             60,
173             80,
174             120
175     },
176
177     // unsigned int 14
178     {
179             35,
180             60,
181             80,
182             120
183     },
184
185     // unsigned int 15
186     {
187             40,
188             60,
189             90,
190             115
191     },
192
193     // unsigned int 16
194     {
195             0,
196             0,
197             0,
198             0
199     },
200
201     // unsigned int 17
202     {
203             0,
204             0,
205             0,
206             0
207     },
208
209     // unsigned int 18
210     {
211             0,
212             0,
213             0,
214             0
215     },
216
217     // unsigned int 19
218     {
219             0,
220             0,
221             0,
222             0
223     },
224
225     // unsigned int 20
226     {
227             0,
228             0,
229             0,
230             0
231     },
232
233     // unsigned int 21
234     {
235             0,
236             0,
237             0,
238             0
239     },
240
241     // unsigned int 22
242     {
243             0,
244             0,
245             0,
246             0
247     },
248
249     // unsigned int 23
250     {
251             0
252     }
253 };
254
255 static const gen9_avc_frame_brc_update_curbe_data gen9_avc_frame_brc_update_curbe_init_data =
256 {
257     // unsigned int 0
258     {
259             0
260     },
261
262     // unsigned int 1
263     {
264             0
265     },
266
267     // unsigned int 2
268     {
269             0
270     },
271
272     // unsigned int 3
273     {
274             10,
275             50
276     },
277
278     // unsigned int 4
279     {
280             100,
281             150
282     },
283
284     // unsigned int 5
285     {
286             0,
287             0,
288             0,
289             0
290     },
291
292     // unsigned int 6
293     {
294             0,
295             0,
296             0,
297             0,
298             0,
299             0
300     },
301
302     // unsigned int 7
303     {
304             0
305     },
306
307     // unsigned int 8
308     {
309             1,
310             1,
311             3,
312             2
313     },
314
315     // unsigned int 9
316     {
317             1,
318             40,
319             5,
320             5
321     },
322
323     // unsigned int 10
324     {
325             3,
326             1,
327             7,
328             18
329     },
330
331     // unsigned int 11
332     {
333             25,
334             37,
335             40,
336             75
337     },
338
339     // unsigned int 12
340     {
341             97,
342             103,
343             125,
344             160
345     },
346
347     // unsigned int 13
348     {
349             -3,
350             -2,
351             -1,
352             0
353     },
354
355     // unsigned int 14
356     {
357             1,
358             2,
359             3,
360             0xff
361     },
362
363     // unsigned int 15
364     {
365             0,
366             0,
367             0,
368             0
369     },
370
371     // unsigned int 16
372     {
373             0
374     },
375
376     // unsigned int 17
377     {
378             0
379     },
380
381     // unsigned int 18
382     {
383             0
384     },
385
386     // unsigned int 19
387     {
388             0
389     },
390
391     // unsigned int 20
392     {
393             0
394     },
395
396     // unsigned int 21
397     {
398             0
399     },
400
401     // unsigned int 22
402     {
403             0
404     },
405
406     // unsigned int 23
407     {
408             0
409     },
410
411 };
412
413 static void
414 gen9_avc_update_misc_parameters(VADriverContextP ctx,
415                                 struct encode_state *encode_state,
416                                 struct intel_encoder_context *encoder_context)
417 {
418     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
419     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
420     int i;
421
422     /* brc */
423     generic_state->max_bit_rate = (encoder_context->brc.bits_per_second[0] + 1000 - 1) / 1000;
424
425     generic_state->brc_need_reset = encoder_context->brc.need_reset;
426
427     if (generic_state->internal_rate_mode == VA_RC_CBR) {
428         generic_state->min_bit_rate = generic_state->max_bit_rate;
429         generic_state->mb_brc_enabled = encoder_context->brc.mb_rate_control[0];
430
431         if (generic_state->target_bit_rate != generic_state->max_bit_rate) {
432             generic_state->target_bit_rate = generic_state->max_bit_rate;
433             generic_state->brc_need_reset = 1;
434         }
435     } else if (generic_state->internal_rate_mode == VA_RC_VBR) {
436         generic_state->min_bit_rate = generic_state->max_bit_rate * (2 * encoder_context->brc.target_percentage[0] - 100) / 100;
437         generic_state->mb_brc_enabled = encoder_context->brc.mb_rate_control[0];
438
439         if (generic_state->target_bit_rate != generic_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100) {
440             generic_state->target_bit_rate = generic_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100;
441             generic_state->brc_need_reset = 1;
442         }
443     }
444
445     /*  frame rate */
446     if (generic_state->internal_rate_mode != VA_RC_CQP)
447     {
448         generic_state->frames_per_100s = encoder_context->brc.framerate[0].num * 100/encoder_context->brc.framerate[0].den ;
449         generic_state->frame_rate = encoder_context->brc.framerate[0].num/encoder_context->brc.framerate[0].den ;
450         generic_state->frames_per_window_size = (int)(encoder_context->brc.window_size * generic_state->frame_rate /1000);// brc.windows size in ms as the unit
451     }else
452     {
453         generic_state->frames_per_100s = 30 * 100;
454         generic_state->frame_rate = 30 ;
455         generic_state->frames_per_window_size = 30;
456     }
457
458     /*  HRD */
459     if (generic_state->internal_rate_mode != VA_RC_CQP)
460     {
461         generic_state->vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;//misc->buffer_size;
462         generic_state->init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;//misc->initial_buffer_fullness;
463     }
464
465     /* ROI */
466     generic_state->num_roi = MIN(encoder_context->brc.num_roi, 3);
467     if (generic_state->num_roi > 0) {
468         generic_state->max_delta_qp = encoder_context->brc.roi_max_delta_qp;
469         generic_state->min_delta_qp = encoder_context->brc.roi_min_delta_qp;
470
471         for (i = 0; i < generic_state->num_roi; i++) {
472             generic_state->roi[i].left   = encoder_context->brc.roi[i].left;
473             generic_state->roi[i].right  = encoder_context->brc.roi[i].right;
474             generic_state->roi[i].top    = encoder_context->brc.roi[i].top;
475             generic_state->roi[i].bottom = encoder_context->brc.roi[i].bottom;
476             generic_state->roi[i].value  = encoder_context->brc.roi[i].value;
477
478             generic_state->roi[i].left /= 16;
479             generic_state->roi[i].right /= 16;
480             generic_state->roi[i].top /= 16;
481             generic_state->roi[i].bottom /= 16;
482         }
483     }
484
485 }
486
487 static bool
488 intel_avc_get_kernel_header_and_size(void *pvbinary,
489                                      int binary_size,
490                                      INTEL_GENERIC_ENC_OPERATION operation,
491                                      int krnstate_idx,
492                                      struct i965_kernel *ret_kernel)
493 {
494     typedef uint32_t BIN_PTR[4];
495
496     char *bin_start;
497     gen9_avc_encoder_kernel_header      *pkh_table;
498     kernel_header          *pcurr_header, *pinvalid_entry, *pnext_header;
499     int next_krnoffset;
500
501     if (!pvbinary || !ret_kernel)
502         return false;
503
504     bin_start = (char *)pvbinary;
505     pkh_table = (gen9_avc_encoder_kernel_header *)pvbinary;
506     pinvalid_entry = &(pkh_table->static_detection) + 1;
507     next_krnoffset = binary_size;
508
509     if (operation == INTEL_GENERIC_ENC_SCALING4X)
510     {
511         pcurr_header = &pkh_table->ply_dscale_ply;
512     }
513     else if (operation == INTEL_GENERIC_ENC_SCALING2X)
514     {
515         pcurr_header = &pkh_table->ply_2xdscale_ply;
516     }
517     else if (operation == INTEL_GENERIC_ENC_ME)
518     {
519         pcurr_header = &pkh_table->me_p;
520     }
521     else if (operation == INTEL_GENERIC_ENC_BRC)
522     {
523         pcurr_header = &pkh_table->frame_brc_init;
524     }
525     else if (operation == INTEL_GENERIC_ENC_MBENC)
526     {
527         pcurr_header = &pkh_table->mbenc_quality_I;
528     }
529     else if (operation == INTEL_GENERIC_ENC_WP)
530     {
531         pcurr_header = &pkh_table->wp;
532     }
533     else if (operation == INTEL_GENERIC_ENC_SFD)
534     {
535         pcurr_header = &pkh_table->static_detection;
536     }
537     else
538     {
539         return false;
540     }
541
542     pcurr_header += krnstate_idx;
543     ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
544
545     pnext_header = (pcurr_header + 1);
546     if (pnext_header < pinvalid_entry)
547     {
548         next_krnoffset = pnext_header->kernel_start_pointer << 6;
549     }
550     ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
551
552     return true;
553 }
554 static void
555 gen9_free_surfaces_avc(void **data)
556 {
557     struct gen9_surface_avc *avc_surface;
558
559     if (!data || !*data)
560         return;
561
562     avc_surface = *data;
563
564     if (avc_surface->scaled_4x_surface_obj) {
565         i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_4x_surface_id, 1);
566         avc_surface->scaled_4x_surface_id = VA_INVALID_SURFACE;
567         avc_surface->scaled_4x_surface_obj = NULL;
568     }
569
570     if (avc_surface->scaled_16x_surface_obj) {
571         i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_16x_surface_id, 1);
572         avc_surface->scaled_16x_surface_id = VA_INVALID_SURFACE;
573         avc_surface->scaled_16x_surface_obj = NULL;
574     }
575
576     if (avc_surface->scaled_32x_surface_obj) {
577         i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_32x_surface_id, 1);
578         avc_surface->scaled_32x_surface_id = VA_INVALID_SURFACE;
579         avc_surface->scaled_32x_surface_obj = NULL;
580     }
581
582     i965_free_gpe_resource(&avc_surface->res_mb_code_surface);
583     i965_free_gpe_resource(&avc_surface->res_mv_data_surface);
584     i965_free_gpe_resource(&avc_surface->res_ref_pic_select_surface);
585
586     dri_bo_unreference(avc_surface->dmv_top);
587     avc_surface->dmv_top = NULL;
588     dri_bo_unreference(avc_surface->dmv_bottom);
589     avc_surface->dmv_bottom = NULL;
590
591     free(avc_surface);
592
593     *data = NULL;
594
595     return;
596 }
597
598 static VAStatus
599 gen9_avc_init_check_surfaces(VADriverContextP ctx,
600                              struct object_surface *obj_surface,
601                              struct intel_encoder_context *encoder_context,
602                              struct avc_surface_param *surface_param)
603 {
604     struct i965_driver_data *i965 = i965_driver_data(ctx);
605     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
606     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
607     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
608
609     struct gen9_surface_avc *avc_surface;
610     int downscaled_width_4x, downscaled_height_4x;
611     int downscaled_width_16x, downscaled_height_16x;
612     int downscaled_width_32x, downscaled_height_32x;
613     int size = 0;
614     unsigned int frame_width_in_mbs = ALIGN(surface_param->frame_width,16) / 16;
615     unsigned int frame_height_in_mbs = ALIGN(surface_param->frame_height,16) / 16;
616     unsigned int frame_mb_nums = frame_width_in_mbs * frame_height_in_mbs;
617     int allocate_flag = 1;
618     int width,height;
619
620     if (!obj_surface || !obj_surface->bo)
621         return VA_STATUS_ERROR_INVALID_SURFACE;
622
623     if (obj_surface->private_data) {
624         return VA_STATUS_SUCCESS;
625     }
626
627     avc_surface = calloc(1, sizeof(struct gen9_surface_avc));
628
629     if (!avc_surface)
630         return VA_STATUS_ERROR_ALLOCATION_FAILED;
631
632     avc_surface->ctx = ctx;
633     obj_surface->private_data = avc_surface;
634     obj_surface->free_private_data = gen9_free_surfaces_avc;
635
636     downscaled_width_4x = generic_state->frame_width_4x;
637     downscaled_height_4x = generic_state->frame_height_4x;
638
639     i965_CreateSurfaces(ctx,
640                         downscaled_width_4x,
641                         downscaled_height_4x,
642                         VA_RT_FORMAT_YUV420,
643                         1,
644                         &avc_surface->scaled_4x_surface_id);
645
646     avc_surface->scaled_4x_surface_obj = SURFACE(avc_surface->scaled_4x_surface_id);
647
648     if (!avc_surface->scaled_4x_surface_obj) {
649         return VA_STATUS_ERROR_ALLOCATION_FAILED;
650     }
651
652     i965_check_alloc_surface_bo(ctx, avc_surface->scaled_4x_surface_obj, 1,
653                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
654
655     downscaled_width_16x = generic_state->frame_width_16x;
656     downscaled_height_16x = generic_state->frame_height_16x;
657     i965_CreateSurfaces(ctx,
658                         downscaled_width_16x,
659                         downscaled_height_16x,
660                         VA_RT_FORMAT_YUV420,
661                         1,
662                         &avc_surface->scaled_16x_surface_id);
663     avc_surface->scaled_16x_surface_obj = SURFACE(avc_surface->scaled_16x_surface_id);
664
665     if (!avc_surface->scaled_16x_surface_obj) {
666         return VA_STATUS_ERROR_ALLOCATION_FAILED;
667     }
668
669     i965_check_alloc_surface_bo(ctx, avc_surface->scaled_16x_surface_obj, 1,
670                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
671
672     if(generic_state->b32xme_supported ||
673        generic_state->b32xme_enabled)
674     {
675         downscaled_width_32x = generic_state->frame_width_32x;
676         downscaled_height_32x = generic_state->frame_height_32x;
677         i965_CreateSurfaces(ctx,
678                             downscaled_width_32x,
679                             downscaled_height_32x,
680                             VA_RT_FORMAT_YUV420,
681                             1,
682                             &avc_surface->scaled_32x_surface_id);
683         avc_surface->scaled_32x_surface_obj = SURFACE(avc_surface->scaled_32x_surface_id);
684
685         if (!avc_surface->scaled_32x_surface_obj) {
686             return VA_STATUS_ERROR_ALLOCATION_FAILED;
687         }
688
689         i965_check_alloc_surface_bo(ctx, avc_surface->scaled_32x_surface_obj, 1,
690                                     VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
691     }
692
693     /*mb code and mv data for each frame*/
694     size = frame_mb_nums * 16 * 4;
695     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
696         &avc_surface->res_mb_code_surface,
697         ALIGN(size,0x1000),
698         "mb code buffer");
699     if (!allocate_flag)
700         goto failed_allocation;
701
702     size = frame_mb_nums * 32 * 4;
703     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
704         &avc_surface->res_mv_data_surface,
705         ALIGN(size,0x1000),
706         "mv data buffer");
707     if (!allocate_flag)
708         goto failed_allocation;
709
710     /* ref pic list*/
711     if(avc_state->ref_pic_select_list_supported)
712     {
713         width = ALIGN(frame_width_in_mbs * 8,64);
714         height= frame_height_in_mbs ;
715         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
716                                      &avc_surface->res_ref_pic_select_surface,
717                                      width, height,
718                                      width,
719                                      "Ref pic select list buffer");
720         if (!allocate_flag)
721             goto failed_allocation;
722     }
723
724     /*direct mv*/
725     avc_surface->dmv_top =
726         dri_bo_alloc(i965->intel.bufmgr,
727         "direct mv top Buffer",
728         68 * frame_mb_nums,
729         64);
730     avc_surface->dmv_bottom =
731         dri_bo_alloc(i965->intel.bufmgr,
732         "direct mv bottom Buffer",
733         68 * frame_mb_nums,
734         64);
735     assert(avc_surface->dmv_top);
736     assert(avc_surface->dmv_bottom);
737
738     return VA_STATUS_SUCCESS;
739
740 failed_allocation:
741     return VA_STATUS_ERROR_ALLOCATION_FAILED;
742 }
743
744 static VAStatus
745 gen9_avc_allocate_resources(VADriverContextP ctx,
746                             struct encode_state *encode_state,
747                             struct intel_encoder_context *encoder_context)
748 {
749     struct i965_driver_data *i965 = i965_driver_data(ctx);
750     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
751     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
752     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
753     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
754     unsigned int size  = 0;
755     unsigned int width  = 0;
756     unsigned int height  = 0;
757     unsigned char * data  = NULL;
758     int allocate_flag = 1;
759     int i = 0;
760
761     /*all the surface/buffer are allocated here*/
762
763     /*second level batch buffer for image state write when cqp etc*/
764     i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
765     size = INTEL_AVC_IMAGE_STATE_CMD_SIZE ;
766     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
767                              &avc_ctx->res_image_state_batch_buffer_2nd_level,
768                              ALIGN(size,0x1000),
769                              "second levle batch (image state write) buffer");
770     if (!allocate_flag)
771         goto failed_allocation;
772
773     /* scaling related surface   */
774     if(avc_state->mb_status_supported)
775     {
776         i965_free_gpe_resource(&avc_ctx->res_mb_status_buffer);
777         size = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * 16 * 4 + 1023)&~0x3ff;
778         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
779                                  &avc_ctx->res_mb_status_buffer,
780                                  ALIGN(size,0x1000),
781                                  "MB statistics output buffer");
782         if (!allocate_flag)
783             goto failed_allocation;
784         i965_zero_gpe_resource(&avc_ctx->res_mb_status_buffer);
785     }
786
787     if(avc_state->flatness_check_supported)
788     {
789         width = generic_state->frame_width_in_mbs * 4;
790         height= generic_state->frame_height_in_mbs * 4;
791         i965_free_gpe_resource(&avc_ctx->res_flatness_check_surface);
792         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
793                                      &avc_ctx->res_flatness_check_surface,
794                                      width, height,
795                                      ALIGN(width,64),
796                                      "Flatness check buffer");
797         if (!allocate_flag)
798             goto failed_allocation;
799     }
800     /* me related surface */
801     width = generic_state->downscaled_width_4x_in_mb * 8;
802     height= generic_state->downscaled_height_4x_in_mb * 4 * 10;
803     i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
804     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
805                                  &avc_ctx->s4x_memv_distortion_buffer,
806                                  width, height,
807                                  ALIGN(width,64),
808                                  "4x MEMV distortion buffer");
809     if (!allocate_flag)
810         goto failed_allocation;
811     i965_zero_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
812
813     width = (generic_state->downscaled_width_4x_in_mb + 7)/8 * 64;
814     height= (generic_state->downscaled_height_4x_in_mb + 1)/2 * 8;
815     i965_free_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
816     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
817                                  &avc_ctx->s4x_memv_min_distortion_brc_buffer,
818                                  width, height,
819                                  width,
820                                  "4x MEMV min distortion brc buffer");
821     if (!allocate_flag)
822         goto failed_allocation;
823     i965_zero_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
824
825
826     width = ALIGN(generic_state->downscaled_width_4x_in_mb * 32,64);
827     height= generic_state->downscaled_height_4x_in_mb * 4 * 2 * 10;
828     i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
829     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
830                                  &avc_ctx->s4x_memv_data_buffer,
831                                  width, height,
832                                  width,
833                                  "4x MEMV data buffer");
834     if (!allocate_flag)
835         goto failed_allocation;
836     i965_zero_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
837
838
839     width = ALIGN(generic_state->downscaled_width_16x_in_mb * 32,64);
840     height= generic_state->downscaled_height_16x_in_mb * 4 * 2 * 10 ;
841     i965_free_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
842     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
843                                  &avc_ctx->s16x_memv_data_buffer,
844                                  width, height,
845                                  width,
846                                  "16x MEMV data buffer");
847     if (!allocate_flag)
848         goto failed_allocation;
849     i965_zero_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
850
851
852     width = ALIGN(generic_state->downscaled_width_32x_in_mb * 32,64);
853     height= generic_state->downscaled_height_32x_in_mb * 4 * 2 * 10 ;
854     i965_free_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
855     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
856                                  &avc_ctx->s32x_memv_data_buffer,
857                                  width, height,
858                                  width,
859                                  "32x MEMV data buffer");
860     if (!allocate_flag)
861         goto failed_allocation;
862     i965_zero_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
863
864
865     if(!generic_state->brc_allocated)
866     {
867         /*brc related surface */
868         i965_free_gpe_resource(&avc_ctx->res_brc_history_buffer);
869         size = 864;
870         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
871                                  &avc_ctx->res_brc_history_buffer,
872                                  ALIGN(size,0x1000),
873                                  "brc history buffer");
874         if (!allocate_flag)
875             goto failed_allocation;
876
877         i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
878         size = 64;//44
879         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
880                                  &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
881                                  ALIGN(size,0x1000),
882                                  "brc pak statistic buffer");
883         if (!allocate_flag)
884             goto failed_allocation;
885
886         i965_free_gpe_resource(&avc_ctx->res_brc_image_state_read_buffer);
887         size = INTEL_AVC_IMAGE_STATE_CMD_SIZE * 7;
888         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
889                                  &avc_ctx->res_brc_image_state_read_buffer,
890                                  ALIGN(size,0x1000),
891                                  "brc image state read buffer");
892         if (!allocate_flag)
893             goto failed_allocation;
894
895         i965_free_gpe_resource(&avc_ctx->res_brc_image_state_write_buffer);
896         size = INTEL_AVC_IMAGE_STATE_CMD_SIZE * 7;
897         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
898                                  &avc_ctx->res_brc_image_state_write_buffer,
899                                  ALIGN(size,0x1000),
900                                  "brc image state write buffer");
901         if (!allocate_flag)
902             goto failed_allocation;
903
904         width = ALIGN(avc_state->brc_const_data_surface_width,64);
905         height= avc_state->brc_const_data_surface_height;
906         i965_free_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
907         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
908                                      &avc_ctx->res_brc_const_data_buffer,
909                                      width, height,
910                                      width,
911                                      "brc const data buffer");
912         if (!allocate_flag)
913             goto failed_allocation;
914
915         if(generic_state->brc_distortion_buffer_supported)
916         {
917             width = ALIGN(generic_state->downscaled_width_4x_in_mb * 8,64);
918             height= ALIGN(generic_state->downscaled_height_4x_in_mb * 4,8);
919             width = (generic_state->downscaled_width_4x_in_mb + 7)/8 * 64;
920             height= (generic_state->downscaled_height_4x_in_mb + 1)/2 * 8;
921             i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
922             allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
923                                          &avc_ctx->res_brc_dist_data_surface,
924                                          width, height,
925                                          width,
926                                          "brc dist data buffer");
927             if (!allocate_flag)
928                 goto failed_allocation;
929             i965_zero_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
930         }
931
932         if(generic_state->brc_roi_enable)
933         {
934             width = ALIGN(generic_state->downscaled_width_4x_in_mb * 16,64);
935             height= ALIGN(generic_state->downscaled_height_4x_in_mb * 4,8);
936             i965_free_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
937             allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
938                                          &avc_ctx->res_mbbrc_roi_surface,
939                                          width, height,
940                                          width,
941                                          "mbbrc roi buffer");
942             if (!allocate_flag)
943                 goto failed_allocation;
944             i965_zero_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
945         }
946
947         /*mb qp in mb brc*/
948         width = ALIGN(generic_state->downscaled_width_4x_in_mb * 4,64);
949         height= ALIGN(generic_state->downscaled_height_4x_in_mb * 4,8);
950         i965_free_gpe_resource(&avc_ctx->res_mbbrc_mb_qp_data_surface);
951         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
952                                      &avc_ctx->res_mbbrc_mb_qp_data_surface,
953                                      width, height,
954                                      width,
955                                      "mbbrc mb qp buffer");
956         if (!allocate_flag)
957             goto failed_allocation;
958
959         i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
960         size = 16 * AVC_QP_MAX * 4;
961         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
962                                  &avc_ctx->res_mbbrc_const_data_buffer,
963                                  ALIGN(size,0x1000),
964                                  "mbbrc const data buffer");
965         if (!allocate_flag)
966             goto failed_allocation;
967
968         if(avc_state->decouple_mbenc_curbe_from_brc_enable)
969         {
970             i965_free_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
971             size = avc_state->mbenc_brc_buffer_size;
972             allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
973                                      &avc_ctx->res_mbenc_brc_buffer,
974                                      ALIGN(size,0x1000),
975                                      "mbenc brc buffer");
976             if (!allocate_flag)
977                 goto failed_allocation;
978             i965_zero_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
979         }
980         generic_state->brc_allocated = 1;
981     }
982
983     /*mb qp external*/
984     if(avc_state->mb_qp_data_enable)
985     {
986         width = ALIGN(generic_state->downscaled_width_4x_in_mb * 4,64);
987         height= ALIGN(generic_state->downscaled_height_4x_in_mb * 4,8);
988         i965_free_gpe_resource(&avc_ctx->res_mb_qp_data_surface);
989         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
990                                      &avc_ctx->res_mb_qp_data_surface,
991                                      width, height,
992                                      width,
993                                      "external mb qp buffer");
994         if (!allocate_flag)
995             goto failed_allocation;
996     }
997
998     /*     mbenc related surface. it share most of surface with other kernels     */
999     if(avc_state->arbitrary_num_mbs_in_slice)
1000     {
1001         width = (generic_state->frame_width_in_mbs + 1) * 64;
1002         height= generic_state->frame_height_in_mbs ;
1003         i965_free_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1004         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1005                                      &avc_ctx->res_mbenc_slice_map_surface,
1006                                      width, height,
1007                                      width,
1008                                      "slice map buffer");
1009         if (!allocate_flag)
1010             goto failed_allocation;
1011
1012         /*generate slice map,default one slice per frame.*/
1013     }
1014
1015     /* sfd related surface  */
1016     if(avc_state->sfd_enable)
1017     {
1018         i965_free_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1019         size = 128;
1020         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1021                                  &avc_ctx->res_sfd_output_buffer,
1022                                  size,
1023                                  "sfd output buffer");
1024         if (!allocate_flag)
1025             goto failed_allocation;
1026
1027         i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer);
1028         size = ALIGN(52,64);
1029         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1030                                  &avc_ctx->res_sfd_cost_table_p_frame_buffer,
1031                                  size,
1032                                  "sfd P frame cost table buffer");
1033         if (!allocate_flag)
1034             goto failed_allocation;
1035         data = i965_map_gpe_resource(&(avc_ctx->res_sfd_cost_table_p_frame_buffer));
1036         assert(data);
1037         memcpy(data,gen9_avc_sfd_cost_table_p_frame,sizeof(unsigned char) *52);
1038         i965_unmap_gpe_resource(&(avc_ctx->res_sfd_cost_table_p_frame_buffer));
1039
1040         i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_b_frame_buffer);
1041         size = ALIGN(52,64);
1042         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1043                                  &avc_ctx->res_sfd_cost_table_b_frame_buffer,
1044                                  size,
1045                                  "sfd B frame cost table buffer");
1046         if (!allocate_flag)
1047             goto failed_allocation;
1048         data = i965_map_gpe_resource(&(avc_ctx->res_sfd_cost_table_b_frame_buffer));
1049         assert(data);
1050         memcpy(data,gen9_avc_sfd_cost_table_b_frame,sizeof(unsigned char) *52);
1051         i965_unmap_gpe_resource(&(avc_ctx->res_sfd_cost_table_b_frame_buffer));
1052     }
1053
1054     /* wp related surfaces */
1055     if(avc_state->weighted_prediction_supported)
1056     {
1057         for(i = 0; i < 2 ; i++)
1058         {
1059             if (avc_ctx->wp_output_pic_select_surface_obj[i]) {
1060                 continue;
1061             }
1062
1063             width = generic_state->frame_width_in_pixel;
1064             height= generic_state->frame_height_in_pixel ;
1065             i965_CreateSurfaces(ctx,
1066                                 width,
1067                                 height,
1068                                 VA_RT_FORMAT_YUV420,
1069                                 1,
1070                                 &avc_ctx->wp_output_pic_select_surface_id[i]);
1071             avc_ctx->wp_output_pic_select_surface_obj[i] = SURFACE(avc_ctx->wp_output_pic_select_surface_id[i]);
1072
1073             if (!avc_ctx->wp_output_pic_select_surface_obj[i]) {
1074                 goto failed_allocation;
1075             }
1076
1077             i965_check_alloc_surface_bo(ctx, avc_ctx->wp_output_pic_select_surface_obj[i], 1,
1078                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
1079         }
1080         i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[0]);
1081         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_wp_output_pic_select_surface_list[0], avc_ctx->wp_output_pic_select_surface_obj[0],GPE_RESOURCE_ALIGNMENT);
1082         i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[1]);
1083         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_wp_output_pic_select_surface_list[1], avc_ctx->wp_output_pic_select_surface_obj[1],GPE_RESOURCE_ALIGNMENT);
1084     }
1085
1086     /* other   */
1087
1088     i965_free_gpe_resource(&avc_ctx->res_mad_data_buffer);
1089     size = 4 * 1;
1090     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1091                                  &avc_ctx->res_mad_data_buffer,
1092                                  ALIGN(size,0x1000),
1093                                  "MAD data buffer");
1094     if (!allocate_flag)
1095         goto failed_allocation;
1096
1097     return VA_STATUS_SUCCESS;
1098
1099 failed_allocation:
1100     return VA_STATUS_ERROR_ALLOCATION_FAILED;
1101 }
1102
1103 static void
1104 gen9_avc_free_resources(struct encoder_vme_mfc_context * vme_context)
1105 {
1106     if(!vme_context)
1107         return;
1108
1109     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
1110     VADriverContextP ctx = avc_ctx->ctx;
1111     int i = 0;
1112
1113     /* free all the surface/buffer here*/
1114     i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
1115     i965_free_gpe_resource(&avc_ctx->res_mb_status_buffer);
1116     i965_free_gpe_resource(&avc_ctx->res_flatness_check_surface);
1117     i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
1118     i965_free_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
1119     i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
1120     i965_free_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
1121     i965_free_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
1122     i965_free_gpe_resource(&avc_ctx->res_brc_history_buffer);
1123     i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
1124     i965_free_gpe_resource(&avc_ctx->res_brc_image_state_read_buffer);
1125     i965_free_gpe_resource(&avc_ctx->res_brc_image_state_write_buffer);
1126     i965_free_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
1127     i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
1128     i965_free_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
1129     i965_free_gpe_resource(&avc_ctx->res_mbbrc_mb_qp_data_surface);
1130     i965_free_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
1131     i965_free_gpe_resource(&avc_ctx->res_mb_qp_data_surface);
1132     i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
1133     i965_free_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1134     i965_free_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1135     i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer);
1136     i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_b_frame_buffer);
1137     i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[0]);
1138     i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[1]);
1139     i965_free_gpe_resource(&avc_ctx->res_mad_data_buffer);
1140
1141     for(i = 0;i < 2 ; i++)
1142     {
1143         if (avc_ctx->wp_output_pic_select_surface_obj[i]) {
1144             i965_DestroySurfaces(ctx, &avc_ctx->wp_output_pic_select_surface_id[i], 1);
1145             avc_ctx->wp_output_pic_select_surface_id[i] = VA_INVALID_SURFACE;
1146             avc_ctx->wp_output_pic_select_surface_obj[i] = NULL;
1147         }
1148     }
1149
1150 }
1151
1152 static void
1153 gen9_avc_run_kernel_media_object(VADriverContextP ctx,
1154                              struct intel_encoder_context *encoder_context,
1155                              struct i965_gpe_context *gpe_context,
1156                              int media_function,
1157                              struct gpe_media_object_parameter *param)
1158 {
1159     struct i965_driver_data *i965 = i965_driver_data(ctx);
1160     struct i965_gpe_table *gpe = &i965->gpe_table;
1161     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1162     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
1163
1164     struct intel_batchbuffer *batch = encoder_context->base.batch;
1165     struct encoder_status_buffer_internal *status_buffer;
1166     struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1167
1168     if (!batch)
1169         return;
1170
1171     intel_batchbuffer_start_atomic(batch, 0x1000);
1172     intel_batchbuffer_emit_mi_flush(batch);
1173
1174     status_buffer = &(avc_ctx->status_buffer);
1175     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1176     mi_store_data_imm.bo = status_buffer->bo;
1177     mi_store_data_imm.offset = status_buffer->media_index_offset;
1178     mi_store_data_imm.dw0 = media_function;
1179     gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1180
1181     gpe->pipeline_setup(ctx, gpe_context, batch);
1182     gpe->media_object(ctx, gpe_context, batch, param);
1183     gpe->media_state_flush(ctx, gpe_context, batch);
1184
1185     gpe->pipeline_end(ctx, gpe_context, batch);
1186
1187     intel_batchbuffer_end_atomic(batch);
1188
1189     intel_batchbuffer_flush(batch);
1190 }
1191
1192 static void
1193 gen9_avc_run_kernel_media_object_walker(VADriverContextP ctx,
1194                                     struct intel_encoder_context *encoder_context,
1195                                     struct i965_gpe_context *gpe_context,
1196                                     int media_function,
1197                                     struct gpe_media_object_walker_parameter *param)
1198 {
1199     struct i965_driver_data *i965 = i965_driver_data(ctx);
1200     struct i965_gpe_table *gpe = &i965->gpe_table;
1201     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1202     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
1203
1204     struct intel_batchbuffer *batch = encoder_context->base.batch;
1205     struct encoder_status_buffer_internal *status_buffer;
1206     struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1207
1208     if (!batch)
1209         return;
1210
1211     intel_batchbuffer_start_atomic(batch, 0x1000);
1212
1213     intel_batchbuffer_emit_mi_flush(batch);
1214
1215     status_buffer = &(avc_ctx->status_buffer);
1216     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1217     mi_store_data_imm.bo = status_buffer->bo;
1218     mi_store_data_imm.offset = status_buffer->media_index_offset;
1219     mi_store_data_imm.dw0 = media_function;
1220     gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1221
1222     gpe->pipeline_setup(ctx, gpe_context, batch);
1223     gpe->media_object_walker(ctx, gpe_context, batch, param);
1224     gpe->media_state_flush(ctx, gpe_context, batch);
1225
1226     gpe->pipeline_end(ctx, gpe_context, batch);
1227
1228     intel_batchbuffer_end_atomic(batch);
1229
1230     intel_batchbuffer_flush(batch);
1231 }
1232
1233 static void
1234 gen9_init_gpe_context_avc(VADriverContextP ctx,
1235                           struct i965_gpe_context *gpe_context,
1236                           struct encoder_kernel_parameter *kernel_param)
1237 {
1238     struct i965_driver_data *i965 = i965_driver_data(ctx);
1239
1240     gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
1241
1242     gpe_context->sampler.entry_size = 0;
1243     gpe_context->sampler.max_entries = 0;
1244
1245     if (kernel_param->sampler_size) {
1246         gpe_context->sampler.entry_size = ALIGN(kernel_param->sampler_size, 64);
1247         gpe_context->sampler.max_entries = 1;
1248     }
1249
1250     gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
1251     gpe_context->idrt.max_entries = NUM_KERNELS_PER_GPE_CONTEXT;
1252
1253     gpe_context->surface_state_binding_table.max_entries = MAX_AVC_ENCODER_SURFACES;
1254     gpe_context->surface_state_binding_table.binding_table_offset = 0;
1255     gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_AVC_ENCODER_SURFACES * 4, 64);
1256     gpe_context->surface_state_binding_table.length = ALIGN(MAX_AVC_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_AVC_ENCODER_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
1257
1258     if (i965->intel.eu_total > 0)
1259         gpe_context->vfe_state.max_num_threads = 6 * i965->intel.eu_total;
1260     else
1261         gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads
1262
1263     gpe_context->vfe_state.curbe_allocation_size = MAX(1, ALIGN(gpe_context->curbe.length, 32) >> 5); // in registers
1264     gpe_context->vfe_state.urb_entry_size = MAX(1, ALIGN(kernel_param->inline_data_size, 32) >> 5); // in registers
1265     gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE -
1266                                               gpe_context->vfe_state.curbe_allocation_size -
1267                                               ((gpe_context->idrt.entry_size >> 5) *
1268                                                gpe_context->idrt.max_entries)) / gpe_context->vfe_state.urb_entry_size;
1269     gpe_context->vfe_state.num_urb_entries = CLAMP(1, 127, gpe_context->vfe_state.num_urb_entries);
1270     gpe_context->vfe_state.gpgpu_mode = 0;
1271 }
1272
1273 static void
1274 gen9_init_vfe_scoreboard_avc(struct i965_gpe_context *gpe_context,
1275                              struct encoder_scoreboard_parameter *scoreboard_param)
1276 {
1277     gpe_context->vfe_desc5.scoreboard0.mask = scoreboard_param->mask;
1278     gpe_context->vfe_desc5.scoreboard0.type = scoreboard_param->type;
1279     gpe_context->vfe_desc5.scoreboard0.enable = scoreboard_param->enable;
1280
1281     if (scoreboard_param->walkpat_flag) {
1282         gpe_context->vfe_desc5.scoreboard0.mask = 0x0F;
1283         gpe_context->vfe_desc5.scoreboard0.type = 1;
1284
1285         gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0x0;
1286         gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0xF;
1287
1288         gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
1289         gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xE;
1290
1291         gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0xF;
1292         gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0x3;
1293
1294         gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
1295         gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0x1;
1296     } else {
1297         // Scoreboard 0
1298         gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0xF;
1299         gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0x0;
1300
1301         // Scoreboard 1
1302         gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
1303         gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xF;
1304
1305         // Scoreboard 2
1306         gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0x1;
1307         gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0xF;
1308
1309         // Scoreboard 3
1310         gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
1311         gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0xF;
1312
1313         // Scoreboard 4
1314         gpe_context->vfe_desc7.scoreboard2.delta_x4 = 0xF;
1315         gpe_context->vfe_desc7.scoreboard2.delta_y4 = 0x1;
1316
1317         // Scoreboard 5
1318         gpe_context->vfe_desc7.scoreboard2.delta_x5 = 0x0;
1319         gpe_context->vfe_desc7.scoreboard2.delta_y5 = 0xE;
1320
1321         // Scoreboard 6
1322         gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0x1;
1323         gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
1324
1325         // Scoreboard 7
1326         gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0xF;
1327         gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
1328     }
1329 }
1330 /*
1331 VME pipeline related function
1332 */
1333
1334 /*
1335 scaling kernel related function
1336 */
1337 static void
1338 gen9_avc_set_curbe_scaling4x(VADriverContextP ctx,
1339                            struct encode_state *encode_state,
1340                            struct i965_gpe_context *gpe_context,
1341                            struct intel_encoder_context *encoder_context,
1342                            void *param)
1343 {
1344     gen9_avc_scaling4x_curbe_data *curbe_cmd;
1345     struct scaling_param *surface_param = (struct scaling_param *)param;
1346
1347     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1348
1349     if (!curbe_cmd)
1350         return;
1351
1352     memset(curbe_cmd, 0, sizeof(gen9_avc_scaling4x_curbe_data));
1353
1354     curbe_cmd->dw0.input_picture_width  = surface_param->input_frame_width;
1355     curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1356
1357     curbe_cmd->dw1.input_y_bti = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1358     curbe_cmd->dw2.output_y_bti = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1359
1360
1361     curbe_cmd->dw5.flatness_threshold = 128;
1362     curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
1363     curbe_cmd->dw7.enable_mb_variance_output = surface_param->enable_mb_variance_output;
1364     curbe_cmd->dw8.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
1365
1366     if (curbe_cmd->dw6.enable_mb_flatness_check ||
1367         curbe_cmd->dw7.enable_mb_variance_output ||
1368         curbe_cmd->dw8.enable_mb_pixel_average_output)
1369     {
1370         curbe_cmd->dw10.mbv_proc_stat_bti = GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX;
1371     }
1372
1373     i965_gpe_context_unmap_curbe(gpe_context);
1374     return;
1375 }
1376
1377 static void
1378 gen95_avc_set_curbe_scaling4x(VADriverContextP ctx,
1379                            struct encode_state *encode_state,
1380                            struct i965_gpe_context *gpe_context,
1381                            struct intel_encoder_context *encoder_context,
1382                            void *param)
1383 {
1384     gen95_avc_scaling4x_curbe_data *curbe_cmd;
1385     struct scaling_param *surface_param = (struct scaling_param *)param;
1386
1387     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1388
1389     if (!curbe_cmd)
1390         return;
1391
1392     memset(curbe_cmd, 0, sizeof(gen95_avc_scaling4x_curbe_data));
1393
1394     curbe_cmd->dw0.input_picture_width  = surface_param->input_frame_width;
1395     curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1396
1397     curbe_cmd->dw1.input_y_bti_frame = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1398     curbe_cmd->dw2.output_y_bti_frame = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1399
1400     if(surface_param->enable_mb_flatness_check)
1401         curbe_cmd->dw5.flatness_threshold = 128;
1402     curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
1403     curbe_cmd->dw6.enable_mb_variance_output = surface_param->enable_mb_variance_output;
1404     curbe_cmd->dw6.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
1405     curbe_cmd->dw6.enable_block8x8_statistics_output = surface_param->blk8x8_stat_enabled;
1406
1407     if (curbe_cmd->dw6.enable_mb_flatness_check ||
1408         curbe_cmd->dw6.enable_mb_variance_output ||
1409         curbe_cmd->dw6.enable_mb_pixel_average_output)
1410     {
1411         curbe_cmd->dw8.mbv_proc_stat_bti_frame = GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX;
1412     }
1413
1414     i965_gpe_context_unmap_curbe(gpe_context);
1415     return;
1416 }
1417
1418 static void
1419 gen9_avc_set_curbe_scaling2x(VADriverContextP ctx,
1420                            struct encode_state *encode_state,
1421                            struct i965_gpe_context *gpe_context,
1422                            struct intel_encoder_context *encoder_context,
1423                            void *param)
1424 {
1425     gen9_avc_scaling2x_curbe_data *curbe_cmd;
1426     struct scaling_param *surface_param = (struct scaling_param *)param;
1427
1428     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1429
1430     if (!curbe_cmd)
1431         return;
1432
1433     memset(curbe_cmd, 0, sizeof(gen9_avc_scaling2x_curbe_data));
1434
1435     curbe_cmd->dw0.input_picture_width  = surface_param->input_frame_width;
1436     curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1437
1438     curbe_cmd->dw8.input_y_bti = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1439     curbe_cmd->dw9.output_y_bti = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1440
1441     i965_gpe_context_unmap_curbe(gpe_context);
1442     return;
1443 }
1444
1445 static void
1446 gen9_avc_send_surface_scaling(VADriverContextP ctx,
1447                               struct encode_state *encode_state,
1448                               struct i965_gpe_context *gpe_context,
1449                               struct intel_encoder_context *encoder_context,
1450                               void *param)
1451 {
1452     struct scaling_param *surface_param = (struct scaling_param *)param;
1453     unsigned int surface_format;
1454     unsigned int res_size;
1455
1456     if (surface_param->scaling_out_use_32unorm_surf_fmt)
1457         surface_format = I965_SURFACEFORMAT_R32_UNORM;
1458     else if (surface_param->scaling_out_use_16unorm_surf_fmt)
1459         surface_format = I965_SURFACEFORMAT_R16_UNORM;
1460     else
1461         surface_format = I965_SURFACEFORMAT_R8_UNORM;
1462
1463     gen9_add_2d_gpe_surface(ctx, gpe_context,
1464                             surface_param->input_surface,
1465                             0, 1, surface_format,
1466                             GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX);
1467
1468     gen9_add_2d_gpe_surface(ctx, gpe_context,
1469                             surface_param->output_surface,
1470                             0, 1, surface_format,
1471                             GEN9_AVC_SCALING_FRAME_DST_Y_INDEX);
1472
1473     /*add buffer mv_proc_stat, here need change*/
1474     if (surface_param->mbv_proc_stat_enabled)
1475     {
1476         res_size = 16 * (surface_param->input_frame_width/16) * (surface_param->input_frame_height/16) * sizeof(unsigned int);
1477
1478         gen9_add_buffer_gpe_surface(ctx,
1479                                     gpe_context,
1480                                     surface_param->pres_mbv_proc_stat_buffer,
1481                                     0,
1482                                     res_size/4,
1483                                     0,
1484                                     GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
1485     }else if(surface_param->enable_mb_flatness_check)
1486     {
1487         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
1488                                        surface_param->pres_flatness_check_surface,
1489                                        1,
1490                                        I965_SURFACEFORMAT_R8_UNORM,
1491                                        GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
1492     }
1493
1494     return;
1495 }
1496
1497 static VAStatus
1498 gen9_avc_kernel_scaling(VADriverContextP ctx,
1499                         struct encode_state *encode_state,
1500                         struct intel_encoder_context *encoder_context,
1501                         int hme_type)
1502 {
1503     struct i965_driver_data *i965 = i965_driver_data(ctx);
1504     struct i965_gpe_table *gpe = &i965->gpe_table;
1505     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1506     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
1507     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
1508     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
1509     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )vme_context->generic_enc_ctx;
1510
1511     struct i965_gpe_context *gpe_context;
1512     struct scaling_param surface_param;
1513     struct object_surface *obj_surface;
1514     struct gen9_surface_avc *avc_priv_surface;
1515     struct gpe_media_object_walker_parameter media_object_walker_param;
1516     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
1517     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
1518     int media_function = 0;
1519     int kernel_idx = 0;
1520
1521     obj_surface = encode_state->reconstructed_object;
1522     avc_priv_surface = obj_surface->private_data;
1523
1524     memset(&surface_param,0,sizeof(struct scaling_param));
1525     switch(hme_type)
1526     {
1527     case INTEL_ENC_HME_4x :
1528         {
1529             media_function = INTEL_MEDIA_STATE_4X_SCALING;
1530             kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
1531             downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
1532             downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
1533
1534             surface_param.input_surface = encode_state->input_yuv_object ;
1535             surface_param.input_frame_width = generic_state->frame_width_in_pixel ;
1536             surface_param.input_frame_height = generic_state->frame_height_in_pixel ;
1537
1538             surface_param.output_surface = avc_priv_surface->scaled_4x_surface_obj ;
1539             surface_param.output_frame_width = generic_state->frame_width_4x ;
1540             surface_param.output_frame_height = generic_state->frame_height_4x ;
1541
1542             surface_param.enable_mb_flatness_check = avc_state->flatness_check_enable;
1543             surface_param.enable_mb_variance_output = avc_state->mb_status_enable;
1544             surface_param.enable_mb_pixel_average_output = avc_state->mb_status_enable;
1545
1546             surface_param.blk8x8_stat_enabled = 0 ;
1547             surface_param.use_4x_scaling  = 1 ;
1548             surface_param.use_16x_scaling = 0 ;
1549             surface_param.use_32x_scaling = 0 ;
1550             break;
1551         }
1552     case INTEL_ENC_HME_16x :
1553         {
1554             media_function = INTEL_MEDIA_STATE_16X_SCALING;
1555             kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
1556             downscaled_width_in_mb = generic_state->downscaled_width_16x_in_mb;
1557             downscaled_height_in_mb = generic_state->downscaled_height_16x_in_mb;
1558
1559             surface_param.input_surface = avc_priv_surface->scaled_4x_surface_obj ;
1560             surface_param.input_frame_width = generic_state->frame_width_4x ;
1561             surface_param.input_frame_height = generic_state->frame_height_4x ;
1562
1563             surface_param.output_surface = avc_priv_surface->scaled_16x_surface_obj ;
1564             surface_param.output_frame_width = generic_state->frame_width_16x ;
1565             surface_param.output_frame_height = generic_state->frame_height_16x ;
1566
1567             surface_param.enable_mb_flatness_check = 0 ;
1568             surface_param.enable_mb_variance_output = 0 ;
1569             surface_param.enable_mb_pixel_average_output = 0 ;
1570
1571             surface_param.blk8x8_stat_enabled = 0 ;
1572             surface_param.use_4x_scaling  = 0 ;
1573             surface_param.use_16x_scaling = 1 ;
1574             surface_param.use_32x_scaling = 0 ;
1575
1576             break;
1577         }
1578     case INTEL_ENC_HME_32x :
1579         {
1580             media_function = INTEL_MEDIA_STATE_32X_SCALING;
1581             kernel_idx = GEN9_AVC_KERNEL_SCALING_2X_IDX;
1582             downscaled_width_in_mb = generic_state->downscaled_width_32x_in_mb;
1583             downscaled_height_in_mb = generic_state->downscaled_height_32x_in_mb;
1584
1585             surface_param.input_surface = avc_priv_surface->scaled_16x_surface_obj ;
1586             surface_param.input_frame_width = generic_state->frame_width_16x ;
1587             surface_param.input_frame_height = generic_state->frame_height_16x ;
1588
1589             surface_param.output_surface = avc_priv_surface->scaled_32x_surface_obj ;
1590             surface_param.output_frame_width = generic_state->frame_width_32x ;
1591             surface_param.output_frame_height = generic_state->frame_height_32x ;
1592
1593             surface_param.enable_mb_flatness_check = 0 ;
1594             surface_param.enable_mb_variance_output = 0 ;
1595             surface_param.enable_mb_pixel_average_output = 0 ;
1596
1597             surface_param.blk8x8_stat_enabled = 0 ;
1598             surface_param.use_4x_scaling  = 0 ;
1599             surface_param.use_16x_scaling = 0 ;
1600             surface_param.use_32x_scaling = 1 ;
1601             break;
1602         }
1603     default :
1604         assert(0);
1605
1606     }
1607
1608     gpe_context = &(avc_ctx->context_scaling.gpe_contexts[kernel_idx]);
1609
1610     gpe->context_init(ctx, gpe_context);
1611     gpe->reset_binding_table(ctx, gpe_context);
1612
1613     if(surface_param.use_32x_scaling)
1614     {
1615         generic_ctx->pfn_set_curbe_scaling2x(ctx,encode_state,gpe_context,encoder_context,&surface_param);
1616     }else
1617     {
1618         generic_ctx->pfn_set_curbe_scaling4x(ctx,encode_state,gpe_context,encoder_context,&surface_param);
1619     }
1620
1621     if(surface_param.use_32x_scaling)
1622     {
1623         surface_param.scaling_out_use_16unorm_surf_fmt = 1 ;
1624         surface_param.scaling_out_use_32unorm_surf_fmt = 0 ;
1625     }else
1626     {
1627         surface_param.scaling_out_use_16unorm_surf_fmt = 0 ;
1628         surface_param.scaling_out_use_32unorm_surf_fmt = 1 ;
1629     }
1630
1631     if(surface_param.use_4x_scaling)
1632     {
1633         if(avc_state->mb_status_supported)
1634         {
1635             surface_param.enable_mb_flatness_check = 0;
1636             surface_param.mbv_proc_stat_enabled = (surface_param.use_4x_scaling)?(avc_state->mb_status_enable || avc_state->flatness_check_enable):0 ;
1637             surface_param.pres_mbv_proc_stat_buffer = &(avc_ctx->res_mb_status_buffer);
1638
1639         }else
1640         {
1641             surface_param.enable_mb_flatness_check = (surface_param.use_4x_scaling)?avc_state->flatness_check_enable:0;
1642             surface_param.mbv_proc_stat_enabled = 0 ;
1643             surface_param.pres_flatness_check_surface = &(avc_ctx->res_flatness_check_surface);
1644         }
1645     }
1646
1647     generic_ctx->pfn_send_scaling_surface(ctx,encode_state,gpe_context,encoder_context,&surface_param);
1648
1649     /* setup the interface data */
1650     gpe->setup_interface_data(ctx, gpe_context);
1651
1652     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
1653     if(surface_param.use_32x_scaling)
1654     {
1655         kernel_walker_param.resolution_x = downscaled_width_in_mb ;
1656         kernel_walker_param.resolution_y = downscaled_height_in_mb ;
1657     }else
1658     {
1659         /* the scaling is based on 8x8 blk level */
1660         kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
1661         kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
1662     }
1663     kernel_walker_param.no_dependency = 1;
1664
1665     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
1666
1667     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
1668                                         gpe_context,
1669                                         media_function,
1670                                         &media_object_walker_param);
1671
1672     return VA_STATUS_SUCCESS;
1673 }
1674
1675 /*
1676 frame/mb brc related function
1677 */
1678 static void
1679 gen9_avc_init_mfx_avc_img_state(VADriverContextP ctx,
1680                                 struct encode_state *encode_state,
1681                                 struct intel_encoder_context *encoder_context,
1682                                 struct gen9_mfx_avc_img_state *pstate)
1683 {
1684     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1685     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
1686     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
1687
1688     VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
1689     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
1690
1691     memset(pstate, 0, sizeof(*pstate));
1692
1693     pstate->dw0.dword_length = (sizeof(struct gen9_mfx_avc_img_state)) / 4 -2;
1694     pstate->dw0.sub_opcode_b = 0;
1695     pstate->dw0.sub_opcode_a = 0;
1696     pstate->dw0.command_opcode = 1;
1697     pstate->dw0.pipeline = 2;
1698     pstate->dw0.command_type = 3;
1699
1700     pstate->dw1.frame_size_in_mbs = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs ;
1701
1702     pstate->dw2.frame_width_in_mbs_minus1 = generic_state->frame_width_in_mbs - 1;
1703     pstate->dw2.frame_height_in_mbs_minus1 = generic_state->frame_height_in_mbs - 1;
1704
1705     pstate->dw3.image_structure = 0;//frame is zero
1706     pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
1707     pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
1708     pstate->dw3.brc_domain_rate_control_enable = 0;//0,set for non-vdenc mode;
1709     pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
1710     pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
1711
1712     pstate->dw4.field_picture_flag = 0;
1713     pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
1714     pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
1715     pstate->dw4.transform_8x8_idct_mode_flag = pic_param->pic_fields.bits.transform_8x8_mode_flag;
1716     pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
1717     pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
1718     pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
1719     pstate->dw4.mb_mv_format_flag = 1;
1720     pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
1721     pstate->dw4.mv_unpacked_flag = 1;
1722     pstate->dw4.insert_test_flag = 0;
1723     pstate->dw4.load_slice_pointer_flag = 0;
1724     pstate->dw4.macroblock_stat_enable = 0;        /* disable in the first pass */
1725     pstate->dw4.minimum_frame_size = 0;
1726     pstate->dw5.intra_mb_max_bit_flag = 1;
1727     pstate->dw5.inter_mb_max_bit_flag = 1;
1728     pstate->dw5.frame_size_over_flag = 1;
1729     pstate->dw5.frame_size_under_flag = 1;
1730     pstate->dw5.intra_mb_ipcm_flag = 1;
1731     pstate->dw5.mb_rate_ctrl_flag = 0;
1732     pstate->dw5.non_first_pass_flag = 0;
1733     pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
1734     pstate->dw5.aq_chroma_disable = 1;
1735     if(pstate->dw4.entropy_coding_flag && (avc_state->tq_enable))
1736     {
1737         pstate->dw5.aq_enable = avc_state->tq_enable;
1738         pstate->dw5.aq_rounding = avc_state->tq_rounding;
1739     }else
1740     {
1741         pstate->dw5.aq_rounding = 0;
1742     }
1743
1744     pstate->dw6.intra_mb_max_size = 2700;
1745     pstate->dw6.inter_mb_max_size = 4095;
1746
1747     pstate->dw8.slice_delta_qp_max0 = 0;
1748     pstate->dw8.slice_delta_qp_max1 = 0;
1749     pstate->dw8.slice_delta_qp_max2 = 0;
1750     pstate->dw8.slice_delta_qp_max3 = 0;
1751
1752     pstate->dw9.slice_delta_qp_min0 = 0;
1753     pstate->dw9.slice_delta_qp_min1 = 0;
1754     pstate->dw9.slice_delta_qp_min2 = 0;
1755     pstate->dw9.slice_delta_qp_min3 = 0;
1756
1757     pstate->dw10.frame_bitrate_min = 0;
1758     pstate->dw10.frame_bitrate_min_unit = 1;
1759     pstate->dw10.frame_bitrate_min_unit_mode = 1;
1760     pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
1761     pstate->dw10.frame_bitrate_max_unit = 1;
1762     pstate->dw10.frame_bitrate_max_unit_mode = 1;
1763
1764     pstate->dw11.frame_bitrate_min_delta = 0;
1765     pstate->dw11.frame_bitrate_max_delta = 0;
1766
1767     pstate->dw12.vad_error_logic = 1;
1768     /* set paramters DW19/DW20 for slices */
1769 }
1770
1771 void gen9_avc_set_image_state(VADriverContextP ctx,
1772                               struct encode_state *encode_state,
1773                               struct intel_encoder_context *encoder_context,
1774                               struct i965_gpe_resource *gpe_resource)
1775 {
1776     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1777     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state;
1778     char *pdata;
1779     int i;
1780     unsigned int * data;
1781     struct gen9_mfx_avc_img_state cmd;
1782
1783     pdata = i965_map_gpe_resource(gpe_resource);
1784
1785     if (!pdata)
1786         return;
1787
1788     gen9_avc_init_mfx_avc_img_state(ctx,encode_state,encoder_context,&cmd);
1789     for(i = 0; i < generic_state->num_pak_passes;i++)
1790     {
1791
1792         if(i == 0)
1793         {
1794             cmd.dw4.macroblock_stat_enable = 0;
1795             cmd.dw5.non_first_pass_flag = 0;
1796         }else
1797         {
1798             cmd.dw4.macroblock_stat_enable = 1;
1799             cmd.dw5.non_first_pass_flag = 1;
1800             cmd.dw5.intra_mb_ipcm_flag = 1;
1801
1802         }
1803          cmd.dw5.mb_rate_ctrl_flag = 0;
1804          memcpy(pdata,&cmd,sizeof(struct gen9_mfx_avc_img_state));
1805          data = (unsigned int *)(pdata + sizeof(struct gen9_mfx_avc_img_state));
1806         *data = MI_BATCH_BUFFER_END;
1807
1808          pdata += INTEL_AVC_IMAGE_STATE_CMD_SIZE;
1809     }
1810     i965_unmap_gpe_resource(gpe_resource);
1811     return;
1812 }
1813
1814 void gen9_avc_set_image_state_non_brc(VADriverContextP ctx,
1815                                       struct encode_state *encode_state,
1816                                       struct intel_encoder_context *encoder_context,
1817                                       struct i965_gpe_resource *gpe_resource)
1818 {
1819     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1820     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state;
1821     char *pdata;
1822
1823     unsigned int * data;
1824     struct gen9_mfx_avc_img_state cmd;
1825
1826     pdata = i965_map_gpe_resource(gpe_resource);
1827
1828     if (!pdata)
1829         return;
1830
1831     gen9_avc_init_mfx_avc_img_state(ctx,encode_state,encoder_context,&cmd);
1832
1833     if(generic_state->curr_pak_pass == 0)
1834     {
1835         cmd.dw4.macroblock_stat_enable = 0;
1836         cmd.dw5.non_first_pass_flag = 0;
1837
1838     }
1839     else
1840     {
1841         cmd.dw4.macroblock_stat_enable = 1;
1842         cmd.dw5.non_first_pass_flag = 0;
1843         cmd.dw5.intra_mb_ipcm_flag = 1;
1844     }
1845
1846     cmd.dw5.mb_rate_ctrl_flag = 0;
1847     memcpy(pdata,&cmd,sizeof(struct gen9_mfx_avc_img_state));
1848     data = (unsigned int *)(pdata + sizeof(struct gen9_mfx_avc_img_state));
1849     *data = MI_BATCH_BUFFER_END;
1850
1851     i965_unmap_gpe_resource(gpe_resource);
1852     return;
1853 }
1854
1855 static void
1856 gen95_avc_calc_lambda_table(VADriverContextP ctx,
1857                             struct encode_state *encode_state,
1858                             struct intel_encoder_context *encoder_context)
1859 {
1860     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1861     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
1862     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
1863     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
1864     unsigned int value,inter,intra;
1865     unsigned int rounding_value = 0;
1866     unsigned int size = 0;
1867     int i = 0;
1868     int col = 0;
1869     unsigned int * lambda_table = (unsigned int *)(avc_state->lamda_value_lut);
1870
1871     value = 0;
1872     inter = 0;
1873     intra = 0;
1874
1875     size = AVC_QP_MAX * 2 * sizeof(unsigned int);
1876     switch(generic_state->frame_type)
1877     {
1878     case SLICE_TYPE_I:
1879         memcpy((unsigned char *)lambda_table,(unsigned char *)&gen95_avc_tq_lambda_i_frame[0][0],size * sizeof(unsigned char));
1880         break;
1881     case SLICE_TYPE_P:
1882         memcpy((unsigned char *)lambda_table,(unsigned char *)&gen95_avc_tq_lambda_p_frame[0][0],size * sizeof(unsigned char));
1883         break;
1884     case SLICE_TYPE_B:
1885         memcpy((unsigned char *)lambda_table,(unsigned char *)&gen95_avc_tq_lambda_b_frame[0][0],size * sizeof(unsigned char));
1886         break;
1887     default:
1888         assert(0);
1889         break;
1890     }
1891
1892     for(i = 0; i < AVC_QP_MAX ; i++)
1893     {
1894         for(col = 0; col < 2; col++)
1895         {
1896             value = *(lambda_table + i * 2 + col);
1897             intra = value >> 16;
1898
1899             if(intra < GEN95_AVC_MAX_LAMBDA)
1900             {
1901                 if(intra == 0xfffa)
1902                 {
1903                     intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
1904                 }
1905             }
1906
1907             intra = intra << 16;
1908             inter = value & 0xffff;
1909
1910             if(inter < GEN95_AVC_MAX_LAMBDA)
1911             {
1912                 if(inter == 0xffef)
1913                 {
1914                     if(generic_state->frame_type == SLICE_TYPE_P)
1915                     {
1916                         if(avc_state->rounding_inter_p == AVC_INVALID_ROUNDING_VALUE)
1917                             rounding_value = gen9_avc_inter_rounding_p[generic_state->preset];
1918                         else
1919                             rounding_value = avc_state->rounding_inter_p;
1920                     }else if(generic_state->frame_type == SLICE_TYPE_B)
1921                     {
1922                         if(pic_param->pic_fields.bits.reference_pic_flag)
1923                         {
1924                             if(avc_state->rounding_inter_b_ref == AVC_INVALID_ROUNDING_VALUE)
1925                                 rounding_value = gen9_avc_inter_rounding_b_ref[generic_state->preset];
1926                             else
1927                                 rounding_value = avc_state->rounding_inter_b_ref;
1928                         }
1929                         else
1930                         {
1931                             if(avc_state->rounding_inter_b == AVC_INVALID_ROUNDING_VALUE)
1932                                 rounding_value = gen9_avc_inter_rounding_b[generic_state->preset];
1933                             else
1934                                 rounding_value = avc_state->rounding_inter_b;
1935                         }
1936                     }
1937                 }
1938                 inter = 0xf000 + rounding_value;
1939             }
1940             *(lambda_table + i *2 + col) = intra + inter;
1941         }
1942     }
1943 }
1944
1945 static void
1946 gen9_avc_init_brc_const_data(VADriverContextP ctx,
1947                              struct encode_state *encode_state,
1948                              struct intel_encoder_context *encoder_context)
1949 {
1950     struct i965_driver_data *i965 = i965_driver_data(ctx);
1951     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1952     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
1953     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
1954     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
1955
1956     struct i965_gpe_resource *gpe_resource = NULL;
1957     unsigned char * data =NULL;
1958     unsigned char * data_tmp = NULL;
1959     unsigned int size = 0;
1960     unsigned int table_idx = 0;
1961     unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
1962     int i = 0;
1963
1964     struct object_surface *obj_surface;
1965     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
1966     VASurfaceID surface_id;
1967     unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
1968
1969     gpe_resource = &(avc_ctx->res_brc_const_data_buffer);
1970     assert(gpe_resource);
1971
1972     i965_zero_gpe_resource(gpe_resource);
1973
1974     data = i965_map_gpe_resource(gpe_resource);
1975     assert(data);
1976
1977     table_idx = slice_type_kernel[generic_state->frame_type];
1978
1979     /* Fill surface with QP Adjustment table, Distortion threshold table, MaxFrame threshold table, Distortion QP Adjustment Table*/
1980     size = sizeof(gen9_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb);
1981     memcpy(data,gen9_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb,size*sizeof(unsigned char));
1982
1983     data += size;
1984
1985     /* skip threshold table*/
1986     size = 128;
1987     switch(generic_state->frame_type)
1988     {
1989     case SLICE_TYPE_P:
1990         memcpy(data,gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag],size * sizeof(unsigned char));
1991         break;
1992     case SLICE_TYPE_B:
1993         memcpy(data,gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag],size * sizeof(unsigned char));
1994         break;
1995     default:
1996         /*SLICE_TYPE_I,no change */
1997         break;
1998     }
1999
2000     if((generic_state->frame_type != SLICE_TYPE_I) && avc_state->non_ftq_skip_threshold_lut_input_enable)
2001     {
2002         for(i = 0; i < AVC_QP_MAX ; i++)
2003         {
2004             *(data + 1 + (i * 2)) = (unsigned char)i965_avc_calc_skip_value(block_based_skip_enable,transform_8x8_mode_flag,avc_state->non_ftq_skip_threshold_lut[i]);
2005         }
2006     }
2007     data += size;
2008
2009     /*fill the qp for ref list*/
2010     size = 32 + 32 +32 +160;
2011     memset(data,0xff,32);
2012     memset(data+32+32,0xff,32);
2013     switch(generic_state->frame_type)
2014     {
2015     case SLICE_TYPE_P:
2016         {
2017             for(i = 0 ; i <  slice_param->num_ref_idx_l0_active_minus1 + 1; i++)
2018             {
2019                surface_id = slice_param->RefPicList0[i].picture_id;
2020                obj_surface = SURFACE(surface_id);
2021                if (!obj_surface)
2022                    break;
2023                *(data + i) = avc_state->list_ref_idx[0][i];//?
2024             }
2025         }
2026         break;
2027     case SLICE_TYPE_B:
2028         {
2029             data = data + 32 + 32;
2030             for(i = 0 ; i <  slice_param->num_ref_idx_l1_active_minus1 + 1; i++)
2031             {
2032                surface_id = slice_param->RefPicList1[i].picture_id;
2033                obj_surface = SURFACE(surface_id);
2034                if (!obj_surface)
2035                    break;
2036                *(data + i) = avc_state->list_ref_idx[1][i];//?
2037             }
2038
2039             data = data - 32 - 32;
2040
2041             for(i = 0 ; i <  slice_param->num_ref_idx_l0_active_minus1 + 1; i++)
2042             {
2043                surface_id = slice_param->RefPicList0[i].picture_id;
2044                obj_surface = SURFACE(surface_id);
2045                if (!obj_surface)
2046                    break;
2047                *(data + i) = avc_state->list_ref_idx[0][i];//?
2048             }
2049         }
2050         break;
2051     default:
2052         /*SLICE_TYPE_I,no change */
2053         break;
2054     }
2055     data += size;
2056
2057     /*mv cost and mode cost*/
2058     size = 1664;
2059     memcpy(data,(unsigned char *)&gen9_avc_mode_mv_cost_table[table_idx][0][0],size * sizeof(unsigned char));
2060
2061     if(avc_state->old_mode_cost_enable)
2062     {   data_tmp = data;
2063         for(i = 0; i < AVC_QP_MAX ; i++)
2064         {
2065             *(data_tmp +3) = (unsigned int)gen9_avc_old_intra_mode_cost[i];
2066             data_tmp += 16;
2067         }
2068     }
2069
2070     if(avc_state->ftq_skip_threshold_lut_input_enable)
2071     {
2072         for(i = 0; i < AVC_QP_MAX ; i++)
2073         {
2074             *(data + (i * 32) + 24) =
2075             *(data + (i * 32) + 25) =
2076             *(data + (i * 32) + 27) =
2077             *(data + (i * 32) + 28) =
2078             *(data + (i * 32) + 29) =
2079             *(data + (i * 32) + 30) =
2080             *(data + (i * 32) + 31) = avc_state->ftq_skip_threshold_lut[i];
2081         }
2082
2083     }
2084     data += size;
2085
2086     /*ref cost*/
2087     size = 128;
2088     memcpy(data,(unsigned char *)&gen9_avc_ref_cost[table_idx][0],size * sizeof(unsigned char));
2089     data += size;
2090
2091     /*scaling factor*/
2092     size = 64;
2093     if(avc_state->adaptive_intra_scaling_enable)
2094     {
2095         memcpy(data,(unsigned char *)gen9_avc_adaptive_intra_scaling_factor,size * sizeof(unsigned char));
2096     }else
2097     {
2098         memcpy(data,(unsigned char *)gen9_avc_intra_scaling_factor,size * sizeof(unsigned char));
2099     }
2100
2101     if (IS_KBL(i965->intel.device_info)||
2102         IS_GLK(i965->intel.device_info))
2103     {
2104         data += size;
2105
2106         size = 512;
2107         memcpy(data,(unsigned char *)gen95_avc_lambda_data,size * sizeof(unsigned char));
2108         data += size;
2109
2110         size = 64;
2111         memcpy(data,(unsigned char *)gen95_avc_ftq25,size * sizeof(unsigned char));
2112     }
2113
2114     i965_unmap_gpe_resource(gpe_resource);
2115 }
2116
2117 static void
2118 gen9_avc_init_brc_const_data_old(VADriverContextP ctx,
2119                                  struct encode_state *encode_state,
2120                                  struct intel_encoder_context *encoder_context)
2121 {
2122     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2123     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
2124     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
2125     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
2126
2127     struct i965_gpe_resource *gpe_resource = NULL;
2128     unsigned int * data =NULL;
2129     unsigned int * data_tmp = NULL;
2130     unsigned int size = 0;
2131     unsigned int table_idx = 0;
2132     unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
2133     unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
2134     int i = 0;
2135
2136     gpe_resource = &(avc_ctx->res_brc_const_data_buffer);
2137     assert(gpe_resource);
2138
2139     i965_zero_gpe_resource(gpe_resource);
2140
2141     data = i965_map_gpe_resource(gpe_resource);
2142     assert(data);
2143
2144     table_idx = slice_type_kernel[generic_state->frame_type];
2145
2146     /* Fill surface with QP Adjustment table, Distortion threshold table, MaxFrame threshold table, Distortion QP Adjustment Table*/
2147     size = sizeof(gen75_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb);
2148     memcpy(data,gen75_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb,size*sizeof(unsigned char));
2149
2150     data += size;
2151
2152     /* skip threshold table*/
2153     size = 128;
2154     switch(generic_state->frame_type)
2155     {
2156     case SLICE_TYPE_P:
2157         memcpy(data,gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag],size * sizeof(unsigned char));
2158         break;
2159     case SLICE_TYPE_B:
2160         memcpy(data,gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag],size * sizeof(unsigned char));
2161         break;
2162     default:
2163         /*SLICE_TYPE_I,no change */
2164         break;
2165     }
2166
2167     if((generic_state->frame_type != SLICE_TYPE_I) && avc_state->non_ftq_skip_threshold_lut_input_enable)
2168     {
2169         for(i = 0; i< AVC_QP_MAX ; i++)
2170         {
2171             *(data + 1 + (i * 2)) = (unsigned char)i965_avc_calc_skip_value(block_based_skip_enable,transform_8x8_mode_flag,avc_state->non_ftq_skip_threshold_lut[i]);
2172         }
2173     }
2174     data += size;
2175
2176     /*fill the qp for ref list*/
2177     size = 128;
2178     data += size;
2179     size = 128;
2180     data += size;
2181
2182     /*mv cost and mode cost*/
2183     size = 1664;
2184     memcpy(data,(unsigned char *)&gen75_avc_mode_mv_cost_table[table_idx][0][0],size * sizeof(unsigned char));
2185
2186     if(avc_state->old_mode_cost_enable)
2187     {   data_tmp = data;
2188         for(i = 0; i < AVC_QP_MAX ; i++)
2189         {
2190             *(data_tmp +3) = (unsigned int)gen9_avc_old_intra_mode_cost[i];
2191             data_tmp += 16;
2192         }
2193     }
2194
2195     if(avc_state->ftq_skip_threshold_lut_input_enable)
2196     {
2197         for(i = 0; i < AVC_QP_MAX ; i++)
2198         {
2199             *(data + (i * 32) + 24) =
2200             *(data + (i * 32) + 25) =
2201             *(data + (i * 32) + 27) =
2202             *(data + (i * 32) + 28) =
2203             *(data + (i * 32) + 29) =
2204             *(data + (i * 32) + 30) =
2205             *(data + (i * 32) + 31) = avc_state->ftq_skip_threshold_lut[i];
2206         }
2207
2208     }
2209     data += size;
2210
2211     /*ref cost*/
2212     size = 128;
2213     memcpy(data,(unsigned char *)&gen9_avc_ref_cost[table_idx][0],size * sizeof(unsigned char));
2214
2215     i965_unmap_gpe_resource(gpe_resource);
2216 }
2217 static void
2218 gen9_avc_set_curbe_brc_init_reset(VADriverContextP ctx,
2219                                   struct encode_state *encode_state,
2220                                   struct i965_gpe_context *gpe_context,
2221                                   struct intel_encoder_context *encoder_context,
2222                                   void * param)
2223 {
2224     gen9_avc_brc_init_reset_curbe_data *cmd;
2225     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2226     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
2227     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
2228     double input_bits_per_frame = 0;
2229     double bps_ratio = 0;
2230     VAEncSequenceParameterBufferH264 * seq_param = avc_state->seq_param;
2231     struct avc_param common_param;
2232
2233     cmd = i965_gpe_context_map_curbe(gpe_context);
2234
2235     if (!cmd)
2236         return;
2237
2238     memcpy(cmd,&gen9_avc_brc_init_reset_curbe_init_data,sizeof(gen9_avc_brc_init_reset_curbe_data));
2239
2240     memset(&common_param,0,sizeof(common_param));
2241     common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
2242     common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
2243     common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
2244     common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
2245     common_param.frames_per_100s = generic_state->frames_per_100s;
2246     common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
2247     common_param.target_bit_rate = generic_state->target_bit_rate;
2248
2249     cmd->dw0.profile_level_max_frame = i965_avc_get_profile_level_max_frame(&common_param,seq_param->level_idc);
2250     cmd->dw1.init_buf_full_in_bits = generic_state->init_vbv_buffer_fullness_in_bit;
2251     cmd->dw2.buf_size_in_bits = generic_state->vbv_buffer_size_in_bit;
2252     cmd->dw3.average_bit_rate = generic_state->target_bit_rate * 1000;
2253     cmd->dw4.max_bit_rate = generic_state->max_bit_rate * 1000;
2254     cmd->dw8.gop_p = (generic_state->gop_ref_distance)?((generic_state->gop_size -1)/generic_state->gop_ref_distance):0;
2255     cmd->dw9.gop_b = (generic_state->gop_size - 1 - cmd->dw8.gop_p);
2256     cmd->dw9.frame_width_in_bytes = generic_state->frame_width_in_pixel;
2257     cmd->dw10.frame_height_in_bytes = generic_state->frame_height_in_pixel;
2258     cmd->dw12.no_slices = avc_state->slice_num;
2259
2260     //VUI
2261     if(seq_param->vui_parameters_present_flag && generic_state->internal_rate_mode != INTEL_BRC_AVBR )
2262     {
2263         cmd->dw4.max_bit_rate = cmd->dw4.max_bit_rate;
2264         if(generic_state->internal_rate_mode == VA_RC_CBR)
2265         {
2266             cmd->dw3.average_bit_rate = cmd->dw4.max_bit_rate;
2267
2268         }
2269
2270     }
2271     cmd->dw6.frame_rate_m = generic_state->frames_per_100s;
2272     cmd->dw7.frame_rate_d = 100;
2273     cmd->dw8.brc_flag = 0;
2274     cmd->dw8.brc_flag |= (generic_state->mb_brc_enabled)? 0 : 0x8000;
2275
2276
2277     if(generic_state->internal_rate_mode == VA_RC_CBR)
2278     { //CBR
2279         cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate;
2280         cmd->dw8.brc_flag = cmd->dw8.brc_flag |INTEL_ENCODE_BRCINIT_ISCBR;
2281
2282     }else if(generic_state->internal_rate_mode == VA_RC_VBR)
2283     {//VBR
2284         if(cmd->dw4.max_bit_rate < cmd->dw3.average_bit_rate)
2285         {
2286             cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate << 1;
2287         }
2288         cmd->dw8.brc_flag = cmd->dw8.brc_flag |INTEL_ENCODE_BRCINIT_ISVBR;
2289
2290     }else if(generic_state->internal_rate_mode == INTEL_BRC_AVBR)
2291     { //AVBR
2292         cmd->dw4.max_bit_rate =cmd->dw3.average_bit_rate;
2293         cmd->dw8.brc_flag = cmd->dw8.brc_flag |INTEL_ENCODE_BRCINIT_ISAVBR;
2294
2295     }
2296     //igonre icq/vcm/qvbr
2297
2298     cmd->dw10.avbr_accuracy = generic_state->avbr_curracy;
2299     cmd->dw11.avbr_convergence = generic_state->avbr_convergence;
2300
2301     //frame bits
2302     input_bits_per_frame = (double)(cmd->dw4.max_bit_rate) * (double)(cmd->dw7.frame_rate_d)/(double)(cmd->dw6.frame_rate_m);;
2303
2304     if(cmd->dw2.buf_size_in_bits == 0)
2305     {
2306        cmd->dw2.buf_size_in_bits = (unsigned int)(input_bits_per_frame * 4);
2307     }
2308
2309     if(cmd->dw1.init_buf_full_in_bits == 0)
2310     {
2311        cmd->dw1.init_buf_full_in_bits = cmd->dw2.buf_size_in_bits * 7/8;
2312     }
2313     if(cmd->dw1.init_buf_full_in_bits < (unsigned int)(input_bits_per_frame * 2))
2314     {
2315        cmd->dw1.init_buf_full_in_bits = (unsigned int)(input_bits_per_frame * 2);
2316     }
2317     if(cmd->dw1.init_buf_full_in_bits > cmd->dw2.buf_size_in_bits)
2318     {
2319        cmd->dw1.init_buf_full_in_bits = cmd->dw2.buf_size_in_bits;
2320     }
2321
2322     //AVBR
2323     if(generic_state->internal_rate_mode == INTEL_BRC_AVBR)
2324     {
2325        cmd->dw2.buf_size_in_bits = 2 * generic_state->target_bit_rate * 1000;
2326        cmd->dw1.init_buf_full_in_bits = (unsigned int)(3 * cmd->dw2.buf_size_in_bits/4);
2327
2328     }
2329
2330     bps_ratio = input_bits_per_frame / (cmd->dw2.buf_size_in_bits/30.0);
2331     bps_ratio = (bps_ratio < 0.1)? 0.1:(bps_ratio > 3.5)?3.5:bps_ratio;
2332
2333
2334     cmd->dw16.deviation_threshold_0_pand_b = (unsigned int)(-50 * pow(0.90,bps_ratio));
2335     cmd->dw16.deviation_threshold_1_pand_b = (unsigned int)(-50 * pow(0.66,bps_ratio));
2336     cmd->dw16.deviation_threshold_2_pand_b = (unsigned int)(-50 * pow(0.46,bps_ratio));
2337     cmd->dw16.deviation_threshold_3_pand_b = (unsigned int)(-50 * pow(0.3, bps_ratio));
2338     cmd->dw17.deviation_threshold_4_pand_b = (unsigned int)(50 *  pow(0.3, bps_ratio));
2339     cmd->dw17.deviation_threshold_5_pand_b = (unsigned int)(50 * pow(0.46, bps_ratio));
2340     cmd->dw17.deviation_threshold_6_pand_b = (unsigned int)(50 * pow(0.7,  bps_ratio));
2341     cmd->dw17.deviation_threshold_7_pand_b = (unsigned int)(50 * pow(0.9,  bps_ratio));
2342     cmd->dw18.deviation_threshold_0_vbr = (unsigned int)(-50 * pow(0.9, bps_ratio));
2343     cmd->dw18.deviation_threshold_1_vbr = (unsigned int)(-50 * pow(0.7, bps_ratio));
2344     cmd->dw18.deviation_threshold_2_vbr = (unsigned int)(-50 * pow(0.5, bps_ratio));
2345     cmd->dw18.deviation_threshold_3_vbr = (unsigned int)(-50 * pow(0.3, bps_ratio));
2346     cmd->dw19.deviation_threshold_4_vbr = (unsigned int)(100 * pow(0.4, bps_ratio));
2347     cmd->dw19.deviation_threshold_5_vbr = (unsigned int)(100 * pow(0.5, bps_ratio));
2348     cmd->dw19.deviation_threshold_6_vbr = (unsigned int)(100 * pow(0.75,bps_ratio));
2349     cmd->dw19.deviation_threshold_7_vbr = (unsigned int)(100 * pow(0.9, bps_ratio));
2350     cmd->dw20.deviation_threshold_0_i = (unsigned int)(-50 * pow(0.8, bps_ratio));
2351     cmd->dw20.deviation_threshold_1_i = (unsigned int)(-50 * pow(0.6, bps_ratio));
2352     cmd->dw20.deviation_threshold_2_i = (unsigned int)(-50 * pow(0.34,bps_ratio));
2353     cmd->dw20.deviation_threshold_3_i = (unsigned int)(-50 * pow(0.2, bps_ratio));
2354     cmd->dw21.deviation_threshold_4_i = (unsigned int)(50 * pow(0.2,  bps_ratio));
2355     cmd->dw21.deviation_threshold_5_i = (unsigned int)(50 * pow(0.4,  bps_ratio));
2356     cmd->dw21.deviation_threshold_6_i = (unsigned int)(50 * pow(0.66, bps_ratio));
2357     cmd->dw21.deviation_threshold_7_i = (unsigned int)(50 * pow(0.9,  bps_ratio));
2358
2359     cmd->dw22.sliding_window_size = generic_state->frames_per_window_size;
2360
2361     i965_gpe_context_unmap_curbe(gpe_context);
2362
2363     return;
2364 }
2365
2366 static void
2367 gen9_avc_send_surface_brc_init_reset(VADriverContextP ctx,
2368                                      struct encode_state *encode_state,
2369                                      struct i965_gpe_context *gpe_context,
2370                                      struct intel_encoder_context *encoder_context,
2371                                      void * param_mbenc)
2372 {
2373     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2374     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
2375
2376     gen9_add_buffer_gpe_surface(ctx,
2377                                 gpe_context,
2378                                 &avc_ctx->res_brc_history_buffer,
2379                                 0,
2380                                 avc_ctx->res_brc_history_buffer.size,
2381                                 0,
2382                                 GEN9_AVC_BRC_INIT_RESET_HISTORY_INDEX);
2383
2384     gen9_add_buffer_2d_gpe_surface(ctx,
2385                                    gpe_context,
2386                                    &avc_ctx->res_brc_dist_data_surface,
2387                                    1,
2388                                    I965_SURFACEFORMAT_R8_UNORM,
2389                                    GEN9_AVC_BRC_INIT_RESET_DISTORTION_INDEX);
2390
2391     return;
2392 }
2393
2394 static VAStatus
2395 gen9_avc_kernel_brc_init_reset(VADriverContextP ctx,
2396                                struct encode_state *encode_state,
2397                                struct intel_encoder_context *encoder_context)
2398 {
2399     struct i965_driver_data *i965 = i965_driver_data(ctx);
2400     struct i965_gpe_table *gpe = &i965->gpe_table;
2401     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2402     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
2403     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
2404     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )vme_context->generic_enc_ctx;
2405
2406     struct i965_gpe_context *gpe_context;
2407     struct gpe_media_object_parameter media_object_param;
2408     struct gpe_media_object_inline_data media_object_inline_data;
2409     int media_function = 0;
2410     int kernel_idx = GEN9_AVC_KERNEL_BRC_INIT;
2411
2412     media_function = INTEL_MEDIA_STATE_BRC_INIT_RESET;
2413
2414     if(generic_state->brc_inited)
2415         kernel_idx = GEN9_AVC_KERNEL_BRC_RESET;
2416
2417     gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
2418
2419     gpe->context_init(ctx, gpe_context);
2420     gpe->reset_binding_table(ctx, gpe_context);
2421
2422     generic_ctx->pfn_set_curbe_brc_init_reset(ctx,encode_state,gpe_context,encoder_context,NULL);
2423
2424     generic_ctx->pfn_send_brc_init_reset_surface(ctx,encode_state,gpe_context,encoder_context,NULL);
2425
2426     gpe->setup_interface_data(ctx, gpe_context);
2427
2428     memset(&media_object_param, 0, sizeof(media_object_param));
2429     memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
2430     media_object_param.pinline_data = &media_object_inline_data;
2431     media_object_param.inline_size = sizeof(media_object_inline_data);
2432
2433     gen9_avc_run_kernel_media_object(ctx, encoder_context,
2434                                         gpe_context,
2435                                         media_function,
2436                                         &media_object_param);
2437
2438     return VA_STATUS_SUCCESS;
2439 }
2440
2441 static void
2442 gen9_avc_set_curbe_brc_frame_update(VADriverContextP ctx,
2443                                     struct encode_state *encode_state,
2444                                     struct i965_gpe_context *gpe_context,
2445                                     struct intel_encoder_context *encoder_context,
2446                                     void * param)
2447 {
2448     gen9_avc_frame_brc_update_curbe_data *cmd;
2449     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2450     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
2451     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
2452     struct object_surface *obj_surface;
2453     struct gen9_surface_avc *avc_priv_surface;
2454     struct avc_param common_param;
2455     VAEncSequenceParameterBufferH264 * seq_param = avc_state->seq_param;
2456
2457     obj_surface = encode_state->reconstructed_object;
2458
2459     if (!obj_surface || !obj_surface->private_data)
2460         return;
2461     avc_priv_surface = obj_surface->private_data;
2462
2463     cmd = i965_gpe_context_map_curbe(gpe_context);
2464
2465     if (!cmd)
2466         return;
2467
2468     memcpy(cmd,&gen9_avc_frame_brc_update_curbe_init_data,sizeof(gen9_avc_frame_brc_update_curbe_data));
2469
2470     cmd->dw5.target_size_flag = 0 ;
2471     if(generic_state->brc_init_current_target_buf_full_in_bits > (double)generic_state->brc_init_reset_buf_size_in_bits)
2472     {
2473         /*overflow*/
2474         generic_state->brc_init_current_target_buf_full_in_bits -= (double)generic_state->brc_init_reset_buf_size_in_bits;
2475         cmd->dw5.target_size_flag = 1 ;
2476     }
2477
2478     if(generic_state->skip_frame_enbale)
2479     {
2480         cmd->dw6.num_skip_frames = generic_state->num_skip_frames ;
2481         cmd->dw7.size_skip_frames = generic_state->size_skip_frames;
2482
2483         generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame * generic_state->num_skip_frames;
2484
2485     }
2486     cmd->dw0.target_size = (unsigned int)generic_state->brc_init_current_target_buf_full_in_bits ;
2487     cmd->dw1.frame_number = generic_state->seq_frame_number ;
2488     cmd->dw2.size_of_pic_headers = generic_state->herder_bytes_inserted << 3 ;
2489     cmd->dw5.cur_frame_type = generic_state->frame_type ;
2490     cmd->dw5.brc_flag = 0 ;
2491     cmd->dw5.brc_flag |= (avc_priv_surface->is_as_ref)?INTEL_ENCODE_BRCUPDATE_IS_REFERENCE:0 ;
2492
2493     if(avc_state->multi_pre_enable)
2494     {
2495         cmd->dw5.brc_flag  |= INTEL_ENCODE_BRCUPDATE_IS_ACTUALQP ;
2496         cmd->dw14.qp_index_of_cur_pic = avc_priv_surface->frame_idx ; //do not know this. use -1
2497     }
2498
2499     cmd->dw5.max_num_paks = generic_state->num_pak_passes ;
2500     if(avc_state->min_max_qp_enable)
2501     {
2502         switch(generic_state->frame_type)
2503         {
2504         case SLICE_TYPE_I:
2505             cmd->dw6.minimum_qp = avc_state->min_qp_i ;
2506             cmd->dw6.maximum_qp = avc_state->max_qp_i ;
2507             break;
2508         case SLICE_TYPE_P:
2509             cmd->dw6.minimum_qp = avc_state->min_qp_p ;
2510             cmd->dw6.maximum_qp = avc_state->max_qp_p ;
2511             break;
2512         case SLICE_TYPE_B:
2513             cmd->dw6.minimum_qp = avc_state->min_qp_b ;
2514             cmd->dw6.maximum_qp = avc_state->max_qp_b ;
2515             break;
2516         }
2517     }else
2518     {
2519         cmd->dw6.minimum_qp = 0 ;
2520         cmd->dw6.maximum_qp = 0 ;
2521     }
2522     cmd->dw6.enable_force_skip = avc_state->enable_force_skip ;
2523     cmd->dw6.enable_sliding_window = 0 ;
2524
2525     generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame;
2526
2527     if(generic_state->internal_rate_mode == INTEL_BRC_AVBR)
2528     {
2529         cmd->dw3.start_gadj_frame0 = (unsigned int)((10 *   generic_state->avbr_convergence) / (double)150);
2530         cmd->dw3.start_gadj_frame1 = (unsigned int)((50 *   generic_state->avbr_convergence) / (double)150);
2531         cmd->dw4.start_gadj_frame2 = (unsigned int)((100 *  generic_state->avbr_convergence) / (double)150);
2532         cmd->dw4.start_gadj_frame3 = (unsigned int)((150 *  generic_state->avbr_convergence) / (double)150);
2533         cmd->dw11.g_rate_ratio_threshold_0 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30)*(100 - 40)));
2534         cmd->dw11.g_rate_ratio_threshold_1 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30)*(100 - 75)));
2535         cmd->dw12.g_rate_ratio_threshold_2 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30)*(100 - 97)));
2536         cmd->dw12.g_rate_ratio_threshold_3 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30)*(103 - 100)));
2537         cmd->dw12.g_rate_ratio_threshold_4 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30)*(125 - 100)));
2538         cmd->dw12.g_rate_ratio_threshold_5 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30)*(160 - 100)));
2539
2540     }
2541     cmd->dw15.enable_roi = generic_state->brc_roi_enable ;
2542
2543     memset(&common_param,0,sizeof(common_param));
2544     common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
2545     common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
2546     common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
2547     common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
2548     common_param.frames_per_100s = generic_state->frames_per_100s;
2549     common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
2550     common_param.target_bit_rate = generic_state->target_bit_rate;
2551
2552     cmd->dw19.user_max_frame = i965_avc_get_profile_level_max_frame(&common_param,seq_param->level_idc);
2553     i965_gpe_context_unmap_curbe(gpe_context);
2554
2555     return;
2556 }
2557
2558 static void
2559 gen9_avc_send_surface_brc_frame_update(VADriverContextP ctx,
2560                                        struct encode_state *encode_state,
2561                                        struct i965_gpe_context *gpe_context,
2562                                        struct intel_encoder_context *encoder_context,
2563                                        void * param_brc)
2564 {
2565     struct i965_driver_data *i965 = i965_driver_data(ctx);
2566     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2567     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
2568     struct brc_param * param = (struct brc_param *)param_brc ;
2569     struct i965_gpe_context * gpe_context_mbenc = param->gpe_context_mbenc;
2570     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
2571     unsigned char is_g95 = 0;
2572
2573     if (IS_SKL(i965->intel.device_info)||
2574         IS_BXT(i965->intel.device_info))
2575         is_g95 = 0;
2576     else if (IS_KBL(i965->intel.device_info)||
2577              IS_GLK(i965->intel.device_info))
2578              is_g95 = 1;
2579
2580     /* brc history buffer*/
2581     gen9_add_buffer_gpe_surface(ctx,
2582                                 gpe_context,
2583                                 &avc_ctx->res_brc_history_buffer,
2584                                 0,
2585                                 avc_ctx->res_brc_history_buffer.size,
2586                                 0,
2587                                 (is_g95?GEN95_AVC_FRAME_BRC_UPDATE_HISTORY_INDEX:GEN9_AVC_FRAME_BRC_UPDATE_HISTORY_INDEX));
2588
2589     /* previous pak buffer*/
2590     gen9_add_buffer_gpe_surface(ctx,
2591                                 gpe_context,
2592                                 &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
2593                                 0,
2594                                 avc_ctx->res_brc_pre_pak_statistics_output_buffer.size,
2595                                 0,
2596                                 (is_g95?GEN95_AVC_FRAME_BRC_UPDATE_PAK_STATISTICS_OUTPUT_INDEX:GEN9_AVC_FRAME_BRC_UPDATE_PAK_STATISTICS_OUTPUT_INDEX));
2597
2598     /* image state command buffer read only*/
2599     gen9_add_buffer_gpe_surface(ctx,
2600                                 gpe_context,
2601                                 &avc_ctx->res_brc_image_state_read_buffer,
2602                                 0,
2603                                 avc_ctx->res_brc_image_state_read_buffer.size,
2604                                 0,
2605                                 (is_g95?GEN95_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_READ_INDEX:GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_READ_INDEX));
2606
2607     /* image state command buffer write only*/
2608     gen9_add_buffer_gpe_surface(ctx,
2609                                 gpe_context,
2610                                 &avc_ctx->res_brc_image_state_write_buffer,
2611                                 0,
2612                                 avc_ctx->res_brc_image_state_write_buffer.size,
2613                                 0,
2614                                 (is_g95?GEN95_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_WRITE_INDEX:GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_WRITE_INDEX));
2615
2616     if(avc_state->mbenc_brc_buffer_size > 0)
2617     {
2618         gen9_add_buffer_gpe_surface(ctx,
2619                                     gpe_context,
2620                                     &(avc_ctx->res_mbenc_brc_buffer),
2621                                     0,
2622                                     avc_ctx->res_mbenc_brc_buffer.size,
2623                                     0,
2624                                     GEN95_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_WRITE_INDEX);
2625     }
2626     else
2627     {
2628         /*  Mbenc curbe input buffer */
2629         gen9_add_dri_buffer_gpe_surface(ctx,
2630                                         gpe_context,
2631                                         gpe_context_mbenc->dynamic_state.bo,
2632                                         0,
2633                                         ALIGN(gpe_context_mbenc->curbe.length, 64),
2634                                         gpe_context_mbenc->curbe.offset,
2635                                         GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_READ_INDEX);
2636         /* Mbenc curbe output buffer */
2637         gen9_add_dri_buffer_gpe_surface(ctx,
2638                                         gpe_context,
2639                                         gpe_context_mbenc->dynamic_state.bo,
2640                                         0,
2641                                         ALIGN(gpe_context_mbenc->curbe.length, 64),
2642                                         gpe_context_mbenc->curbe.offset,
2643                                         GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_WRITE_INDEX);
2644     }
2645
2646     /* AVC_ME Distortion 2D surface buffer,input/output. is it res_brc_dist_data_surface*/
2647     gen9_add_buffer_2d_gpe_surface(ctx,
2648                                    gpe_context,
2649                                    &avc_ctx->res_brc_dist_data_surface,
2650                                    1,
2651                                    I965_SURFACEFORMAT_R8_UNORM,
2652                                    (is_g95?GEN95_AVC_FRAME_BRC_UPDATE_DISTORTION_INDEX:GEN9_AVC_FRAME_BRC_UPDATE_DISTORTION_INDEX));
2653
2654     /* BRC const data 2D surface buffer */
2655     gen9_add_buffer_2d_gpe_surface(ctx,
2656                                    gpe_context,
2657                                    &avc_ctx->res_brc_const_data_buffer,
2658                                    1,
2659                                    I965_SURFACEFORMAT_R8_UNORM,
2660                                    (is_g95?GEN95_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX:GEN9_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX));
2661
2662     /* MB statistical data surface*/
2663     gen9_add_buffer_gpe_surface(ctx,
2664                                 gpe_context,
2665                                 &avc_ctx->res_mb_status_buffer,
2666                                 0,
2667                                 avc_ctx->res_mb_status_buffer.size,
2668                                 0,
2669                                 (is_g95?GEN95_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX:GEN9_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX));
2670
2671     return;
2672 }
2673
2674 static VAStatus
2675 gen9_avc_kernel_brc_frame_update(VADriverContextP ctx,
2676                                  struct encode_state *encode_state,
2677                                  struct intel_encoder_context *encoder_context)
2678
2679 {
2680     struct i965_driver_data *i965 = i965_driver_data(ctx);
2681     struct i965_gpe_table *gpe = &i965->gpe_table;
2682     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2683     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )vme_context->generic_enc_ctx;
2684     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
2685     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
2686     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
2687
2688     struct i965_gpe_context *gpe_context = NULL;
2689     struct gpe_media_object_parameter media_object_param;
2690     struct gpe_media_object_inline_data media_object_inline_data;
2691     int media_function = 0;
2692     int kernel_idx = 0;
2693     unsigned int mb_const_data_buffer_in_use,mb_qp_buffer_in_use;
2694     unsigned int brc_enabled = 0;
2695     unsigned int roi_enable = (generic_state->num_roi > 0)?1:0;
2696     unsigned int dirty_roi_enable = ((generic_state->dirty_num_roi > 0) && (generic_state->frame_type == SLICE_TYPE_P) && (0));
2697
2698     /* the following set the mbenc curbe*/
2699     struct mbenc_param curbe_mbenc_param ;
2700     struct brc_param curbe_brc_param ;
2701
2702     mb_const_data_buffer_in_use =
2703         generic_state->mb_brc_enabled ||
2704         roi_enable ||
2705         dirty_roi_enable ||
2706         avc_state->mb_qp_data_enable ||
2707         avc_state->rolling_intra_refresh_enable;
2708     mb_qp_buffer_in_use =
2709         generic_state->mb_brc_enabled ||
2710         generic_state->brc_roi_enable ||
2711         avc_state->mb_qp_data_enable;
2712
2713     switch(generic_state->kernel_mode)
2714     {
2715     case INTEL_ENC_KERNEL_NORMAL :
2716         {
2717             kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_NORMAL_I;
2718             break;
2719         }
2720     case INTEL_ENC_KERNEL_PERFORMANCE :
2721         {
2722             kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_PERFORMANCE_I;
2723             break;
2724         }
2725     case INTEL_ENC_KERNEL_QUALITY :
2726         {
2727             kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_QUALITY_I;
2728             break;
2729         }
2730     default:
2731         assert(0);
2732
2733     }
2734
2735     if(generic_state->frame_type == SLICE_TYPE_P)
2736     {
2737         kernel_idx += 1;
2738     }
2739     else if(generic_state->frame_type == SLICE_TYPE_B)
2740     {
2741         kernel_idx += 2;
2742     }
2743
2744     gpe_context = &(avc_ctx->context_mbenc.gpe_contexts[kernel_idx]);
2745     gpe->context_init(ctx, gpe_context);
2746
2747     memset(&curbe_mbenc_param,0,sizeof(struct mbenc_param));
2748
2749     curbe_mbenc_param.mb_const_data_buffer_in_use = mb_const_data_buffer_in_use;
2750     curbe_mbenc_param.mb_qp_buffer_in_use = mb_qp_buffer_in_use;
2751     curbe_mbenc_param.mbenc_i_frame_dist_in_use = 0;
2752     curbe_mbenc_param.brc_enabled = brc_enabled;
2753     curbe_mbenc_param.roi_enabled = roi_enable;
2754
2755     /* set curbe mbenc*/
2756     generic_ctx->pfn_set_curbe_mbenc(ctx,encode_state,gpe_context,encoder_context,&curbe_mbenc_param);
2757
2758     // gen95 set curbe out of the brc. gen9 do it here
2759     avc_state->mbenc_curbe_set_in_brc_update = !avc_state->decouple_mbenc_curbe_from_brc_enable;
2760     /*begin brc frame update*/
2761     memset(&curbe_brc_param,0,sizeof(struct brc_param));
2762     curbe_brc_param.gpe_context_mbenc = gpe_context;
2763     media_function = INTEL_MEDIA_STATE_BRC_UPDATE;
2764     kernel_idx = GEN9_AVC_KERNEL_BRC_FRAME_UPDATE;
2765     gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
2766     curbe_brc_param.gpe_context_brc_frame_update = gpe_context;
2767
2768     gpe->context_init(ctx, gpe_context);
2769     gpe->reset_binding_table(ctx, gpe_context);
2770     /*brc copy ignored*/
2771
2772     /* set curbe frame update*/
2773     generic_ctx->pfn_set_curbe_brc_frame_update(ctx,encode_state,gpe_context,encoder_context,&curbe_brc_param);
2774
2775     /* load brc constant data, is it same as mbenc mb brc constant data? no.*/
2776     if(avc_state->multi_pre_enable)
2777     {
2778         gen9_avc_init_brc_const_data(ctx,encode_state,encoder_context);
2779     }else
2780     {
2781         gen9_avc_init_brc_const_data_old(ctx,encode_state,encoder_context);
2782     }
2783     /* image state construct*/
2784     gen9_avc_set_image_state(ctx,encode_state,encoder_context,&(avc_ctx->res_brc_image_state_read_buffer));
2785     /* set surface frame mbenc*/
2786     generic_ctx->pfn_send_brc_frame_update_surface(ctx,encode_state,gpe_context,encoder_context,&curbe_brc_param);
2787
2788
2789     gpe->setup_interface_data(ctx, gpe_context);
2790
2791     memset(&media_object_param, 0, sizeof(media_object_param));
2792     memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
2793     media_object_param.pinline_data = &media_object_inline_data;
2794     media_object_param.inline_size = sizeof(media_object_inline_data);
2795
2796     gen9_avc_run_kernel_media_object(ctx, encoder_context,
2797                                         gpe_context,
2798                                         media_function,
2799                                         &media_object_param);
2800
2801     return VA_STATUS_SUCCESS;
2802 }
2803
2804 static void
2805 gen9_avc_set_curbe_brc_mb_update(VADriverContextP ctx,
2806                                  struct encode_state *encode_state,
2807                                  struct i965_gpe_context *gpe_context,
2808                                  struct intel_encoder_context *encoder_context,
2809                                  void * param)
2810 {
2811     gen9_avc_mb_brc_curbe_data *cmd;
2812     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2813     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
2814
2815     cmd = i965_gpe_context_map_curbe(gpe_context);
2816
2817     if (!cmd)
2818         return;
2819
2820     memset(cmd,0,sizeof(gen9_avc_mb_brc_curbe_data));
2821
2822     cmd->dw0.cur_frame_type = generic_state->frame_type;
2823     if(generic_state->brc_roi_enable)
2824     {
2825         cmd->dw0.enable_roi = 1;
2826     }else
2827     {
2828         cmd->dw0.enable_roi = 0;
2829     }
2830
2831     i965_gpe_context_unmap_curbe(gpe_context);
2832
2833     return;
2834 }
2835
2836 static void
2837 gen9_avc_send_surface_brc_mb_update(VADriverContextP ctx,
2838                                     struct encode_state *encode_state,
2839                                     struct i965_gpe_context *gpe_context,
2840                                     struct intel_encoder_context *encoder_context,
2841                                     void * param_mbenc)
2842 {
2843     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2844     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
2845     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
2846
2847     /* brc history buffer*/
2848     gen9_add_buffer_gpe_surface(ctx,
2849                                 gpe_context,
2850                                 &avc_ctx->res_brc_history_buffer,
2851                                 0,
2852                                 avc_ctx->res_brc_history_buffer.size,
2853                                 0,
2854                                 GEN9_AVC_MB_BRC_UPDATE_HISTORY_INDEX);
2855
2856     /* MB qp data buffer is it same as res_mbbrc_mb_qp_data_surface*/
2857     if(generic_state->mb_brc_enabled)
2858     {
2859         gen9_add_buffer_2d_gpe_surface(ctx,
2860                                        gpe_context,
2861                                        &avc_ctx->res_mbbrc_mb_qp_data_surface,
2862                                        1,
2863                                        I965_SURFACEFORMAT_R8_UNORM,
2864                                        GEN9_AVC_MB_BRC_UPDATE_MB_QP_INDEX);
2865
2866     }
2867
2868     /* BRC roi feature*/
2869     if(generic_state->brc_roi_enable)
2870     {
2871         gen9_add_buffer_gpe_surface(ctx,
2872                                     gpe_context,
2873                                     &avc_ctx->res_mbbrc_roi_surface,
2874                                     0,
2875                                     avc_ctx->res_mbbrc_roi_surface.size,
2876                                     0,
2877                                     GEN9_AVC_MB_BRC_UPDATE_ROI_INDEX);
2878
2879     }
2880
2881     /* MB statistical data surface*/
2882     gen9_add_buffer_gpe_surface(ctx,
2883                                 gpe_context,
2884                                 &avc_ctx->res_mb_status_buffer,
2885                                 0,
2886                                 avc_ctx->res_mb_status_buffer.size,
2887                                 0,
2888                                 GEN9_AVC_MB_BRC_UPDATE_MB_STATUS_INDEX);
2889
2890     return;
2891 }
2892
2893 static VAStatus
2894 gen9_avc_kernel_brc_mb_update(VADriverContextP ctx,
2895                               struct encode_state *encode_state,
2896                               struct intel_encoder_context *encoder_context)
2897
2898 {
2899     struct i965_driver_data *i965 = i965_driver_data(ctx);
2900     struct i965_gpe_table *gpe = &i965->gpe_table;
2901     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2902     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
2903     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
2904     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )vme_context->generic_enc_ctx;
2905
2906     struct i965_gpe_context *gpe_context;
2907     struct gpe_media_object_walker_parameter media_object_walker_param;
2908     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
2909     int media_function = 0;
2910     int kernel_idx = 0;
2911
2912     media_function = INTEL_MEDIA_STATE_MB_BRC_UPDATE;
2913     kernel_idx = GEN9_AVC_KERNEL_BRC_MB_UPDATE;
2914     gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
2915
2916     gpe->context_init(ctx, gpe_context);
2917     gpe->reset_binding_table(ctx, gpe_context);
2918
2919     /* set curbe brc mb update*/
2920     generic_ctx->pfn_set_curbe_brc_mb_update(ctx,encode_state,gpe_context,encoder_context,NULL);
2921
2922
2923     /* set surface brc mb update*/
2924     generic_ctx->pfn_send_brc_mb_update_surface(ctx,encode_state,gpe_context,encoder_context,NULL);
2925
2926
2927     gpe->setup_interface_data(ctx, gpe_context);
2928
2929     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2930     /* the scaling is based on 8x8 blk level */
2931     kernel_walker_param.resolution_x = (generic_state->frame_width_in_mbs + 1)/2;
2932     kernel_walker_param.resolution_y = (generic_state->frame_height_in_mbs + 1)/2 ;
2933     kernel_walker_param.no_dependency = 1;
2934
2935     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
2936
2937     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
2938                                         gpe_context,
2939                                         media_function,
2940                                         &media_object_walker_param);
2941
2942     return VA_STATUS_SUCCESS;
2943 }
2944
2945 /*
2946 mbenc kernel related function,it include intra dist kernel
2947 */
2948 static int
2949 gen9_avc_get_biweight(int dist_scale_factor_ref_id0_list0, unsigned short weighted_bipredidc)
2950 {
2951     int biweight = 32;      // default value
2952
2953     /* based on kernel HLD*/
2954     if (weighted_bipredidc != INTEL_AVC_WP_MODE_IMPLICIT)
2955     {
2956         biweight = 32;
2957     }
2958     else
2959     {
2960         biweight = (dist_scale_factor_ref_id0_list0 + 2) >> 2;
2961
2962         if (biweight != 16 && biweight != 21 &&
2963             biweight != 32 && biweight != 43 && biweight != 48)
2964         {
2965             biweight = 32;        // If # of B-pics between two refs is more than 3. VME does not support it.
2966         }
2967     }
2968
2969     return biweight;
2970 }
2971
2972 static void
2973 gen9_avc_get_dist_scale_factor(VADriverContextP ctx,
2974                                struct encode_state *encode_state,
2975                                struct intel_encoder_context *encoder_context)
2976 {
2977     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2978     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
2979     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
2980     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
2981
2982     int max_num_references;
2983     VAPictureH264 *curr_pic;
2984     VAPictureH264 *ref_pic_l0;
2985     VAPictureH264 *ref_pic_l1;
2986     int i = 0;
2987     int tb = 0;
2988     int td = 0;
2989     int tx = 0;
2990     int tmp = 0;
2991     int poc0 = 0;
2992     int poc1 = 0;
2993
2994     max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
2995
2996     memset(avc_state->dist_scale_factor_list0,0,32*sizeof(unsigned int));
2997     curr_pic = &pic_param->CurrPic;
2998     for(i = 0; i < max_num_references; i++)
2999     {
3000         ref_pic_l0 = &(slice_param->RefPicList0[i]);
3001
3002         if((ref_pic_l0->flags & VA_PICTURE_H264_INVALID) ||
3003            (ref_pic_l0->picture_id == VA_INVALID_SURFACE) )
3004             break;
3005         ref_pic_l1 = &(slice_param->RefPicList1[0]);
3006         if((ref_pic_l0->flags & VA_PICTURE_H264_INVALID) ||
3007            (ref_pic_l0->picture_id == VA_INVALID_SURFACE) )
3008             break;
3009
3010         poc0 = (curr_pic->TopFieldOrderCnt - ref_pic_l0->TopFieldOrderCnt);
3011         poc1 = (ref_pic_l1->TopFieldOrderCnt - ref_pic_l0->TopFieldOrderCnt);
3012         CLIP(poc0,-128,127);
3013         CLIP(poc1,-128,127);
3014         tb = poc0;
3015         td = poc1;
3016
3017         if(td == 0)
3018         {
3019             td = 1;
3020         }
3021         tmp = (td/2 > 0)?(td/2):(-(td/2));
3022         tx = (16384 + tmp)/td ;
3023         tmp = (tb*tx+32)>>6;
3024         CLIP(tmp,-1024,1023);
3025         avc_state->dist_scale_factor_list0[i] = tmp;
3026     }
3027     return;
3028 }
3029
3030 static unsigned int
3031 gen9_avc_get_qp_from_ref_list(VADriverContextP ctx,
3032                               VAEncSliceParameterBufferH264 *slice_param,
3033                               int list,
3034                               int ref_frame_idx)
3035 {
3036     struct i965_driver_data *i965 = i965_driver_data(ctx);
3037     struct object_surface *obj_surface;
3038     struct gen9_surface_avc *avc_priv_surface;
3039     VASurfaceID surface_id;
3040
3041     assert(slice_param);
3042     assert(list < 2);
3043
3044     if(list == 0)
3045     {
3046         if(ref_frame_idx < slice_param->num_ref_idx_l0_active_minus1 + 1)
3047             surface_id = slice_param->RefPicList0[ref_frame_idx].picture_id;
3048         else
3049             return 0;
3050     }else
3051     {
3052         if(ref_frame_idx < slice_param->num_ref_idx_l1_active_minus1 + 1)
3053             surface_id = slice_param->RefPicList1[ref_frame_idx].picture_id;
3054         else
3055             return 0;
3056     }
3057     obj_surface = SURFACE(surface_id);
3058     if(obj_surface && obj_surface->private_data)
3059     {
3060         avc_priv_surface = obj_surface->private_data;
3061         return avc_priv_surface->qp_value;
3062     }else
3063     {
3064         return 0;
3065     }
3066 }
3067
3068 static void
3069 gen9_avc_load_mb_brc_const_data(VADriverContextP ctx,
3070                         struct encode_state *encode_state,
3071                         struct intel_encoder_context *encoder_context)
3072 {
3073     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3074     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
3075     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
3076     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
3077
3078     struct i965_gpe_resource *gpe_resource = NULL;
3079     unsigned int * data =NULL;
3080     unsigned int * data_tmp = NULL;
3081     unsigned int size = 16 * 52;
3082     unsigned int table_idx = 0;
3083     unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
3084     unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
3085     int i = 0;
3086
3087     gpe_resource = &(avc_ctx->res_mbbrc_const_data_buffer);
3088     assert(gpe_resource);
3089     data = i965_map_gpe_resource(gpe_resource);
3090     assert(data);
3091
3092     table_idx = slice_type_kernel[generic_state->frame_type];
3093
3094     memcpy(data,gen9_avc_mb_brc_const_data[table_idx][0],size*sizeof(unsigned int));
3095
3096     data_tmp = data;
3097
3098     switch(generic_state->frame_type)
3099     {
3100     case SLICE_TYPE_I:
3101         for(i = 0; i < AVC_QP_MAX ; i++)
3102         {
3103             if(avc_state->old_mode_cost_enable)
3104                 *data = (unsigned int)gen9_avc_old_intra_mode_cost[i];
3105             data += 16;
3106         }
3107         break;
3108     case SLICE_TYPE_P:
3109     case SLICE_TYPE_B:
3110         for(i = 0; i < AVC_QP_MAX ; i++)
3111         {
3112             if(generic_state->frame_type == SLICE_TYPE_P)
3113             {
3114                 if(avc_state->skip_bias_adjustment_enable)
3115                     *(data + 3) = (unsigned int)gen9_avc_mv_cost_p_skip_adjustment[i];
3116             }
3117             if(avc_state->non_ftq_skip_threshold_lut_input_enable)
3118             {
3119                 *(data + 9) = (unsigned int)i965_avc_calc_skip_value(block_based_skip_enable,transform_8x8_mode_flag,avc_state->non_ftq_skip_threshold_lut[i]);
3120             }else if(generic_state->frame_type == SLICE_TYPE_P)
3121             {
3122                 *(data + 9) = (unsigned int)gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag][i];
3123             }else
3124             {
3125                 *(data + 9) = (unsigned int)gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag][i];
3126             }
3127
3128             if(avc_state->adaptive_intra_scaling_enable)
3129             {
3130                 *(data + 10) = (unsigned int)gen9_avc_adaptive_intra_scaling_factor[i];
3131             }else
3132             {
3133                 *(data + 10) = (unsigned int)gen9_avc_intra_scaling_factor[i];
3134
3135             }
3136             data += 16;
3137
3138         }
3139         break;
3140     default:
3141         assert(0);
3142     }
3143
3144     data = data_tmp;
3145     for(i = 0; i < AVC_QP_MAX ; i++)
3146     {
3147         if(avc_state->ftq_skip_threshold_lut_input_enable)
3148         {
3149             *(data + 6) =  (avc_state->ftq_skip_threshold_lut[i] |
3150                 (avc_state->ftq_skip_threshold_lut[i] <<16) |
3151                 (avc_state->ftq_skip_threshold_lut[i] <<24) );
3152             *(data + 7) =  (avc_state->ftq_skip_threshold_lut[i] |
3153                 (avc_state->ftq_skip_threshold_lut[i] <<8) |
3154                 (avc_state->ftq_skip_threshold_lut[i] <<16) |
3155                 (avc_state->ftq_skip_threshold_lut[i] <<24) );
3156         }
3157
3158         if(avc_state->kernel_trellis_enable)
3159         {
3160             *(data + 11) = (unsigned int)avc_state->lamda_value_lut[i][0];
3161             *(data + 12) = (unsigned int)avc_state->lamda_value_lut[i][1];
3162
3163         }
3164         data += 16;
3165
3166     }
3167     i965_unmap_gpe_resource(gpe_resource);
3168 }
3169
3170 static void
3171 gen9_avc_set_curbe_mbenc(VADriverContextP ctx,
3172                          struct encode_state *encode_state,
3173                          struct i965_gpe_context *gpe_context,
3174                          struct intel_encoder_context *encoder_context,
3175                          void * param)
3176 {
3177     struct i965_driver_data *i965 = i965_driver_data(ctx);
3178     union { gen9_avc_mbenc_curbe_data *g9; gen95_avc_mbenc_curbe_data *g95;} cmd;
3179     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3180     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
3181     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
3182
3183     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3184     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
3185     VASurfaceID surface_id;
3186     struct object_surface *obj_surface;
3187
3188     struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
3189     unsigned char qp = 0;
3190     unsigned char me_method = 0;
3191     unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
3192     unsigned int table_idx = 0;
3193     unsigned char is_g9 = 0;
3194     unsigned char is_g95 = 0;
3195     unsigned int curbe_size = 0;
3196
3197     unsigned int preset = generic_state->preset;
3198     if (IS_SKL(i965->intel.device_info)||
3199         IS_BXT(i965->intel.device_info))
3200     {
3201         cmd.g9 = (gen9_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
3202         if(!cmd.g9)
3203             return;
3204         is_g9 = 1;
3205         curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
3206         memset(cmd.g9,0,curbe_size);
3207
3208         if(mbenc_i_frame_dist_in_use)
3209         {
3210             memcpy(cmd.g9,gen9_avc_mbenc_curbe_i_frame_dist_init_data,curbe_size);
3211
3212         }else
3213         {
3214             switch(generic_state->frame_type)
3215             {
3216             case SLICE_TYPE_I:
3217                 memcpy(cmd.g9,gen9_avc_mbenc_curbe_normal_i_frame_init_data,curbe_size);
3218                 break;
3219             case SLICE_TYPE_P:
3220                 memcpy(cmd.g9,gen9_avc_mbenc_curbe_normal_p_frame_init_data,curbe_size);
3221                 break;
3222             case SLICE_TYPE_B:
3223                 memcpy(cmd.g9,gen9_avc_mbenc_curbe_normal_b_frame_init_data,curbe_size);
3224                 break;
3225             default:
3226                 assert(0);
3227             }
3228
3229         }
3230     }
3231     else if (IS_KBL(i965->intel.device_info)||
3232              IS_GLK(i965->intel.device_info))
3233     {
3234         cmd.g95 = (gen95_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
3235         if(!cmd.g95)
3236             return;
3237         is_g95 = 1;
3238         curbe_size = sizeof(gen95_avc_mbenc_curbe_data);
3239         memset(cmd.g9,0,curbe_size);
3240
3241         if(mbenc_i_frame_dist_in_use)
3242         {
3243             memcpy(cmd.g95,gen95_avc_mbenc_curbe_i_frame_dist_init_data,curbe_size);
3244
3245         }else
3246         {
3247             switch(generic_state->frame_type)
3248             {
3249             case SLICE_TYPE_I:
3250                 memcpy(cmd.g95,gen95_avc_mbenc_curbe_normal_i_frame_init_data,curbe_size);
3251                 break;
3252             case SLICE_TYPE_P:
3253                 memcpy(cmd.g95,gen95_avc_mbenc_curbe_normal_p_frame_init_data,curbe_size);
3254                 break;
3255             case SLICE_TYPE_B:
3256                 memcpy(cmd.g95,gen95_avc_mbenc_curbe_normal_b_frame_init_data,curbe_size);
3257                 break;
3258             default:
3259                 assert(0);
3260             }
3261
3262         }
3263     }
3264
3265     me_method = (generic_state->frame_type == SLICE_TYPE_B)? gen9_avc_b_me_method[preset]:gen9_avc_p_me_method[preset];
3266     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
3267
3268     cmd.g9->dw0.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
3269     cmd.g9->dw37.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
3270     cmd.g9->dw0.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
3271     cmd.g9->dw37.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
3272
3273     cmd.g9->dw2.max_len_sp = gen9_avc_max_len_sp[preset];
3274     cmd.g9->dw38.max_len_sp = 0;
3275
3276     if(is_g95)
3277         cmd.g95->dw1.extended_mv_cost_range = avc_state->extended_mv_cost_range_enable;
3278
3279     cmd.g9->dw3.src_access = 0;
3280     cmd.g9->dw3.ref_access = 0;
3281
3282     if(avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I))
3283     {
3284         //disable ftq_override by now.
3285         if(avc_state->ftq_override)
3286         {
3287             cmd.g9->dw3.ftq_enable = avc_state->ftq_enable;
3288
3289         }else
3290         {
3291             // both gen9 and gen95 come here by now
3292             if(generic_state->frame_type == SLICE_TYPE_P)
3293             {
3294                 cmd.g9->dw3.ftq_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
3295
3296             }else
3297             {
3298                 cmd.g9->dw3.ftq_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
3299             }
3300         }
3301     }else
3302     {
3303         cmd.g9->dw3.ftq_enable = 0;
3304     }
3305
3306     if(avc_state->disable_sub_mb_partion)
3307         cmd.g9->dw3.sub_mb_part_mask = 0x7;
3308
3309     if(mbenc_i_frame_dist_in_use)
3310     {
3311         cmd.g9->dw2.pitch_width = generic_state->downscaled_width_4x_in_mb;
3312         cmd.g9->dw4.picture_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
3313         cmd.g9->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1)/4;
3314         cmd.g9->dw6.batch_buffer_end = 0;
3315         cmd.g9->dw31.intra_compute_type = 1;
3316
3317     }else
3318     {
3319         cmd.g9->dw2.pitch_width = generic_state->frame_width_in_mbs;
3320         cmd.g9->dw4.picture_height_minus1 = generic_state->frame_height_in_mbs - 1;
3321         cmd.g9->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice)?generic_state->frame_height_in_mbs:avc_state->slice_height;
3322
3323         {
3324             memcpy(&(cmd.g9->dw8),gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp],8*sizeof(unsigned int));
3325             if((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable)
3326             {
3327                 //cmd.g9->dw8 = gen9_avc_old_intra_mode_cost[qp];
3328             }else if(avc_state->skip_bias_adjustment_enable)
3329             {
3330                 /* Load different MvCost for P picture when SkipBiasAdjustment is enabled
3331                 // No need to check for P picture as the flag is only enabled for P picture */
3332                 cmd.g9->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
3333
3334             }
3335         }
3336
3337         table_idx = (generic_state->frame_type == SLICE_TYPE_B)?1:0;
3338         memcpy(&(cmd.g9->dw16),table_enc_search_path[table_idx][me_method],16*sizeof(unsigned int));
3339     }
3340     cmd.g9->dw4.enable_fbr_bypass = avc_state->fbr_bypass_enable;
3341     cmd.g9->dw4.enable_intra_cost_scaling_for_static_frame = avc_state->sfd_enable && generic_state->hme_enabled;
3342     cmd.g9->dw4.field_parity_flag = 0;//bottom field
3343     cmd.g9->dw4.enable_cur_fld_idr = 0;//field realted
3344     cmd.g9->dw4.contrained_intra_pred_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
3345     cmd.g9->dw4.hme_enable = generic_state->hme_enabled;
3346     cmd.g9->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
3347     cmd.g9->dw4.use_actual_ref_qp_value = generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
3348
3349
3350     cmd.g9->dw7.intra_part_mask = avc_state->transform_8x8_mode_enable?0:0x02;
3351     cmd.g9->dw7.src_field_polarity = 0;//field related
3352
3353     /*ftq_skip_threshold_lut set,dw14 /15*/
3354
3355     /*r5 disable NonFTQSkipThresholdLUT*/
3356     if(generic_state->frame_type == SLICE_TYPE_P)
3357     {
3358         cmd.g9->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
3359
3360     }else if(generic_state->frame_type == SLICE_TYPE_B)
3361     {
3362         cmd.g9->dw32.skip_val = gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
3363
3364     }
3365
3366     cmd.g9->dw13.qp_prime_y = qp;
3367     cmd.g9->dw13.qp_prime_cb = qp;
3368     cmd.g9->dw13.qp_prime_cr = qp;
3369     cmd.g9->dw13.target_size_in_word = 0xff;//hardcode for brc disable
3370
3371     if((generic_state->frame_type != SLICE_TYPE_I)&& avc_state->multi_pre_enable)
3372     {
3373         switch(gen9_avc_multi_pred[preset])
3374         {
3375         case 0:
3376             cmd.g9->dw32.mult_pred_l0_disable = 128;
3377             cmd.g9->dw32.mult_pred_l1_disable = 128;
3378             break;
3379         case 1:
3380             cmd.g9->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_P)?1:128;
3381             cmd.g9->dw32.mult_pred_l1_disable = 128;
3382             break;
3383         case 2:
3384             cmd.g9->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_B)?1:128;
3385             cmd.g9->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B)?1:128;
3386             break;
3387         case 3:
3388             cmd.g9->dw32.mult_pred_l0_disable = 1;
3389             cmd.g9->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B)?1:128;
3390             break;
3391
3392         }
3393
3394     }else
3395     {
3396         cmd.g9->dw32.mult_pred_l0_disable = 128;
3397         cmd.g9->dw32.mult_pred_l1_disable = 128;
3398     }
3399
3400     /*field setting for dw33 34, ignored*/
3401
3402     if(avc_state->adaptive_transform_decision_enable)
3403     {
3404         if(generic_state->frame_type != SLICE_TYPE_I)
3405         {
3406             cmd.g9->dw34.enable_adaptive_tx_decision = 1;
3407             if(is_g95)
3408             {
3409                 cmd.g95->dw60.mb_texture_threshold = 1024;
3410                 cmd.g95->dw60.tx_decision_threshold = 128;
3411             }
3412
3413         }
3414
3415         if(is_g9)
3416         {
3417             cmd.g9->dw58.mb_texture_threshold = 1024;
3418             cmd.g9->dw58.tx_decision_threshold = 128;
3419         }
3420     }
3421
3422
3423     if(generic_state->frame_type == SLICE_TYPE_B)
3424     {
3425         cmd.g9->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
3426         cmd.g9->dw34.list1_ref_id0_frm_field_parity = 0;
3427         cmd.g9->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
3428     }
3429
3430     cmd.g9->dw34.b_original_bff = 0; //frame only
3431     cmd.g9->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
3432     cmd.g9->dw34.roi_enable_flag = curbe_param->roi_enabled;
3433     cmd.g9->dw34.mad_enable_falg = avc_state->mad_enable;
3434     cmd.g9->dw34.mb_brc_enable = avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled;
3435     cmd.g9->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
3436     if(is_g95)
3437     {
3438         cmd.g95->dw34.tq_enable = avc_state->tq_enable;
3439         cmd.g95->dw34.cqp_flag = !generic_state->brc_enabled;
3440     }
3441
3442     if(is_g9)
3443     {
3444         cmd.g9->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
3445
3446         if(cmd.g9->dw34.force_non_skip_check)
3447         {
3448             cmd.g9->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
3449         }
3450     }
3451
3452
3453     cmd.g9->dw36.check_all_fractional_enable = avc_state->caf_enable;
3454     cmd.g9->dw38.ref_threshold = 400;
3455     cmd.g9->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B)?gen9_avc_hme_b_combine_len[preset]:gen9_avc_hme_combine_len[preset];
3456
3457     /* Default:2 used for MBBRC (MB QP Surface width and height are 4x downscaled picture in MB unit * 4  bytes)
3458        0 used for MBQP data surface (MB QP Surface width and height are same as the input picture size in MB unit * 1bytes)
3459        starting GEN9, BRC use split kernel, MB QP surface is same size as input picture */
3460     cmd.g9->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled)?0:2;
3461
3462     if(mbenc_i_frame_dist_in_use)
3463     {
3464         cmd.g9->dw13.qp_prime_y = 0;
3465         cmd.g9->dw13.qp_prime_cb = 0;
3466         cmd.g9->dw13.qp_prime_cr = 0;
3467         cmd.g9->dw33.intra_16x16_nondc_penalty = 0;
3468         cmd.g9->dw33.intra_8x8_nondc_penalty = 0;
3469         cmd.g9->dw33.intra_4x4_nondc_penalty = 0;
3470
3471     }
3472     if(cmd.g9->dw4.use_actual_ref_qp_value)
3473     {
3474         cmd.g9->dw44.actual_qp_value_for_ref_id0_list0 =  gen9_avc_get_qp_from_ref_list(ctx,slice_param,0,0);
3475         cmd.g9->dw44.actual_qp_value_for_ref_id1_list0 =  gen9_avc_get_qp_from_ref_list(ctx,slice_param,0,1);
3476         cmd.g9->dw44.actual_qp_value_for_ref_id2_list0 =  gen9_avc_get_qp_from_ref_list(ctx,slice_param,0,2);
3477         cmd.g9->dw44.actual_qp_value_for_ref_id3_list0 =  gen9_avc_get_qp_from_ref_list(ctx,slice_param,0,3);
3478         cmd.g9->dw45.actual_qp_value_for_ref_id4_list0 =  gen9_avc_get_qp_from_ref_list(ctx,slice_param,0,4);
3479         cmd.g9->dw45.actual_qp_value_for_ref_id5_list0 =  gen9_avc_get_qp_from_ref_list(ctx,slice_param,0,5);
3480         cmd.g9->dw45.actual_qp_value_for_ref_id6_list0 =  gen9_avc_get_qp_from_ref_list(ctx,slice_param,0,6);
3481         cmd.g9->dw45.actual_qp_value_for_ref_id7_list0 =  gen9_avc_get_qp_from_ref_list(ctx,slice_param,0,7);
3482         cmd.g9->dw46.actual_qp_value_for_ref_id0_list1 =  gen9_avc_get_qp_from_ref_list(ctx,slice_param,1,0);
3483         cmd.g9->dw46.actual_qp_value_for_ref_id1_list1 =  gen9_avc_get_qp_from_ref_list(ctx,slice_param,1,1);
3484     }
3485
3486     table_idx = slice_type_kernel[generic_state->frame_type];
3487     cmd.g9->dw46.ref_cost = gen9_avc_ref_cost[table_idx][qp];
3488
3489     if(generic_state->frame_type == SLICE_TYPE_I)
3490     {
3491         cmd.g9->dw0.skip_mode_enable = 0;
3492         cmd.g9->dw37.skip_mode_enable = 0;
3493         cmd.g9->dw36.hme_combine_overlap = 0;
3494         cmd.g9->dw47.intra_cost_sf = 16;
3495         cmd.g9->dw34.enable_direct_bias_adjustment = 0;
3496         if(is_g9)
3497             cmd.g9->dw34.enable_global_motion_bias_adjustment = 0;
3498
3499     }else if(generic_state->frame_type == SLICE_TYPE_P)
3500     {
3501         cmd.g9->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc)/2;
3502         cmd.g9->dw3.bme_disable_fbr = 1;
3503         cmd.g9->dw5.ref_width = gen9_avc_search_x[preset];
3504         cmd.g9->dw5.ref_height = gen9_avc_search_y[preset];
3505         cmd.g9->dw7.non_skip_zmv_added = 1;
3506         cmd.g9->dw7.non_skip_mode_added = 1;
3507         cmd.g9->dw7.skip_center_mask = 1;
3508         cmd.g9->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable)?gen9_avc_adaptive_intra_scaling_factor[qp]:gen9_avc_intra_scaling_factor[qp];
3509         cmd.g9->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame onlys
3510         cmd.g9->dw36.hme_combine_overlap = 1;
3511         cmd.g9->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable)?slice_param->num_ref_idx_l0_active_minus1:0;
3512         cmd.g9->dw39.ref_width = gen9_avc_search_x[preset];
3513         cmd.g9->dw39.ref_height = gen9_avc_search_y[preset];
3514         cmd.g9->dw34.enable_direct_bias_adjustment = 0;
3515         cmd.g9->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
3516         if(is_g9 && avc_state->global_motion_bias_adjustment_enable)
3517             cmd.g9->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
3518
3519     }else
3520     {
3521         cmd.g9->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc)/2;
3522         cmd.g9->dw1.bi_weight = avc_state->bi_weight;
3523         cmd.g9->dw3.search_ctrl = 7;
3524         cmd.g9->dw3.skip_type = 1;
3525         cmd.g9->dw5.ref_width = gen9_avc_b_search_x[preset];
3526         cmd.g9->dw5.ref_height = gen9_avc_b_search_y[preset];
3527         cmd.g9->dw7.skip_center_mask = 0xff;
3528         cmd.g9->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable)?gen9_avc_adaptive_intra_scaling_factor[qp]:gen9_avc_intra_scaling_factor[qp];
3529         cmd.g9->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
3530         cmd.g9->dw36.hme_combine_overlap = 1;
3531         surface_id = slice_param->RefPicList1[0].picture_id;
3532         obj_surface = SURFACE(surface_id);
3533         if (!obj_surface)
3534         {
3535             WARN_ONCE("Invalid backward reference frame\n");
3536             return;
3537         }
3538         cmd.g9->dw36.is_fwd_frame_short_term_ref = !!( slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
3539
3540         cmd.g9->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable)?slice_param->num_ref_idx_l0_active_minus1:0;
3541         cmd.g9->dw36.num_ref_idx_l1_minus_one = (avc_state->multi_pre_enable)?slice_param->num_ref_idx_l1_active_minus1:0;
3542         cmd.g9->dw39.ref_width = gen9_avc_b_search_x[preset];
3543         cmd.g9->dw39.ref_height = gen9_avc_b_search_y[preset];
3544         cmd.g9->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
3545         cmd.g9->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
3546         cmd.g9->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
3547         cmd.g9->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
3548         cmd.g9->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
3549         cmd.g9->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
3550         cmd.g9->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
3551         cmd.g9->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
3552
3553         cmd.g9->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
3554         if(cmd.g9->dw34.enable_direct_bias_adjustment)
3555         {
3556             cmd.g9->dw7.non_skip_zmv_added = 1;
3557             cmd.g9->dw7.non_skip_mode_added = 1;
3558         }
3559
3560         cmd.g9->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
3561         if(is_g9 && avc_state->global_motion_bias_adjustment_enable)
3562             cmd.g9->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
3563
3564     }
3565
3566     avc_state->block_based_skip_enable = cmd.g9->dw3.block_based_skip_enable;
3567
3568     if(avc_state->rolling_intra_refresh_enable)
3569     {
3570         /*by now disable it*/
3571         cmd.g9->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
3572         cmd.g9->dw32.mult_pred_l0_disable = 128;
3573         /* Pass the same IntraRefreshUnit to the kernel w/o the adjustment by -1, so as to have an overlap of one MB row or column of Intra macroblocks
3574          across one P frame to another P frame, as needed by the RollingI algo */
3575         if(is_g9)
3576         {
3577             cmd.g9->dw48.widi_intra_refresh_mb_num = 0;
3578             cmd.g9->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
3579             cmd.g9->dw48.widi_intra_refresh_qp_delta = 0;
3580         }
3581
3582         if(is_g95)
3583         {
3584             if(avc_state->rolling_intra_refresh_enable == INTEL_ROLLING_I_SQUARE && generic_state->brc_enabled)
3585             {
3586                 cmd.g95->dw4.enable_intra_refresh = 0;
3587                 cmd.g95->dw34.widi_intra_refresh_en = INTEL_ROLLING_I_DISABLED;
3588                 cmd.g95->dw48.widi_intra_refresh_mb_x = 0;
3589                 cmd.g95->dw61.widi_intra_refresh_mb_y = 0;
3590             }else
3591             {
3592                 cmd.g95->dw4.enable_intra_refresh = 1;
3593                 cmd.g95->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
3594                 cmd.g95->dw48.widi_intra_refresh_mb_x = 0;
3595                 cmd.g95->dw61.widi_intra_refresh_mb_y = 0;
3596                 cmd.g9->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
3597                 cmd.g9->dw48.widi_intra_refresh_qp_delta = 0;
3598             }
3599         }
3600
3601     }else
3602     {
3603         cmd.g9->dw34.widi_intra_refresh_en = 0;
3604     }
3605
3606     cmd.g9->dw34.enable_per_mb_static_check = avc_state->sfd_enable && generic_state->hme_enabled;
3607     cmd.g9->dw34.enable_adaptive_search_window_size = avc_state->adaptive_search_window_enable;
3608
3609     /*roi set disable by now. 49-56*/
3610     if(curbe_param->roi_enabled)
3611     {
3612         cmd.g9->dw49.roi_1_x_left   = generic_state->roi[0].left;
3613         cmd.g9->dw49.roi_1_y_top    = generic_state->roi[0].top;
3614         cmd.g9->dw50.roi_1_x_right  = generic_state->roi[0].right;
3615         cmd.g9->dw50.roi_1_y_bottom = generic_state->roi[0].bottom;
3616
3617         cmd.g9->dw51.roi_2_x_left   = generic_state->roi[1].left;
3618         cmd.g9->dw51.roi_2_y_top    = generic_state->roi[1].top;
3619         cmd.g9->dw52.roi_2_x_right  = generic_state->roi[1].right;
3620         cmd.g9->dw52.roi_2_y_bottom = generic_state->roi[1].bottom;
3621
3622         cmd.g9->dw53.roi_3_x_left   = generic_state->roi[2].left;
3623         cmd.g9->dw53.roi_3_y_top    = generic_state->roi[2].top;
3624         cmd.g9->dw54.roi_3_x_right  = generic_state->roi[2].right;
3625         cmd.g9->dw54.roi_3_y_bottom = generic_state->roi[2].bottom;
3626
3627         cmd.g9->dw55.roi_4_x_left   = generic_state->roi[3].left;
3628         cmd.g9->dw55.roi_4_y_top    = generic_state->roi[3].top;
3629         cmd.g9->dw56.roi_4_x_right  = generic_state->roi[3].right;
3630         cmd.g9->dw56.roi_4_y_bottom = generic_state->roi[3].bottom;
3631
3632         if(!generic_state->brc_enabled)
3633         {
3634             char tmp = 0;
3635             tmp = generic_state->roi[0].value;
3636             CLIP(tmp,-qp,AVC_QP_MAX-qp);
3637             cmd.g9->dw57.roi_1_dqp_prime_y = tmp;
3638             tmp = generic_state->roi[1].value;
3639             CLIP(tmp,-qp,AVC_QP_MAX-qp);
3640             cmd.g9->dw57.roi_2_dqp_prime_y = tmp;
3641             tmp = generic_state->roi[2].value;
3642             CLIP(tmp,-qp,AVC_QP_MAX-qp);
3643             cmd.g9->dw57.roi_3_dqp_prime_y = tmp;
3644             tmp = generic_state->roi[3].value;
3645             CLIP(tmp,-qp,AVC_QP_MAX-qp);
3646             cmd.g9->dw57.roi_4_dqp_prime_y = tmp;
3647         }else
3648         {
3649             cmd.g9->dw34.roi_enable_flag = 0;
3650         }
3651     }
3652
3653     if(is_g95)
3654     {
3655         if(avc_state->tq_enable)
3656         {
3657             if(generic_state->frame_type == SLICE_TYPE_I)
3658             {
3659                 cmd.g95->dw58.value = gen95_avc_tq_lambda_i_frame[qp][0];
3660                 cmd.g95->dw59.value = gen95_avc_tq_lambda_i_frame[qp][1];
3661
3662             }else if(generic_state->frame_type == SLICE_TYPE_P)
3663             {
3664                 cmd.g95->dw58.value = gen95_avc_tq_lambda_p_frame[qp][0];
3665                 cmd.g95->dw59.value = gen95_avc_tq_lambda_p_frame[qp][1];
3666
3667             }else
3668             {
3669                 cmd.g95->dw58.value = gen95_avc_tq_lambda_b_frame[qp][0];
3670                 cmd.g95->dw59.value = gen95_avc_tq_lambda_b_frame[qp][1];
3671             }
3672
3673             if(cmd.g95->dw58.lambda_8x8_inter > GEN95_AVC_MAX_LAMBDA)
3674                 cmd.g95->dw58.lambda_8x8_inter = 0xf000 + avc_state->rounding_value;
3675
3676             if(cmd.g95->dw58.lambda_8x8_intra > GEN95_AVC_MAX_LAMBDA)
3677                 cmd.g95->dw58.lambda_8x8_intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
3678
3679             if(cmd.g95->dw59.lambda_inter > GEN95_AVC_MAX_LAMBDA)
3680                 cmd.g95->dw59.lambda_inter = 0xf000 + avc_state->rounding_value;
3681
3682             if(cmd.g95->dw59.lambda_intra > GEN95_AVC_MAX_LAMBDA)
3683                 cmd.g95->dw59.lambda_intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
3684         }
3685     }
3686
3687     if(is_g95)
3688     {
3689         cmd.g95->dw66.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
3690         cmd.g95->dw67.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
3691         cmd.g95->dw68.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
3692         cmd.g95->dw69.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
3693         cmd.g95->dw70.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
3694         cmd.g95->dw71.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
3695         cmd.g95->dw72.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
3696         cmd.g95->dw73.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
3697         cmd.g95->dw74.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
3698         cmd.g95->dw75.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
3699         cmd.g95->dw76.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
3700         cmd.g95->dw77.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
3701         cmd.g95->dw78.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
3702         cmd.g95->dw79.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
3703         cmd.g95->dw80.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
3704         cmd.g95->dw81.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
3705         cmd.g95->dw82.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
3706         cmd.g95->dw83.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
3707         cmd.g95->dw84.brc_curbe_surf_index = GEN95_AVC_MBENC_BRC_CURBE_DATA_INDEX;
3708         cmd.g95->dw85.force_non_skip_mb_map_surface = GEN95_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
3709         cmd.g95->dw86.widi_wa_surf_index = GEN95_AVC_MBENC_WIDI_WA_INDEX;
3710         cmd.g95->dw87.static_detection_cost_table_index = GEN95_AVC_MBENC_SFD_COST_TABLE_INDEX;
3711     }
3712
3713     if(is_g9)
3714     {
3715         cmd.g9->dw64.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
3716         cmd.g9->dw65.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
3717         cmd.g9->dw66.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
3718         cmd.g9->dw67.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
3719         cmd.g9->dw68.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
3720         cmd.g9->dw69.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
3721         cmd.g9->dw70.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
3722         cmd.g9->dw71.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
3723         cmd.g9->dw72.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
3724         cmd.g9->dw73.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
3725         cmd.g9->dw74.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
3726         cmd.g9->dw75.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
3727         cmd.g9->dw76.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
3728         cmd.g9->dw77.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
3729         cmd.g9->dw78.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
3730         cmd.g9->dw79.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
3731         cmd.g9->dw80.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
3732         cmd.g9->dw81.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
3733         cmd.g9->dw82.force_non_skip_mb_map_surface = GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
3734         cmd.g9->dw83.widi_wa_surf_index = GEN9_AVC_MBENC_WIDI_WA_INDEX;
3735         cmd.g9->dw84.brc_curbe_surf_index = GEN9_AVC_MBENC_BRC_CURBE_DATA_INDEX;
3736         cmd.g9->dw85.static_detection_cost_table_index = GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX;
3737     }
3738
3739     i965_gpe_context_unmap_curbe(gpe_context);
3740
3741     return;
3742 }
3743
3744 static void
3745 gen9_avc_send_surface_mbenc(VADriverContextP ctx,
3746                             struct encode_state *encode_state,
3747                             struct i965_gpe_context *gpe_context,
3748                             struct intel_encoder_context *encoder_context,
3749                             void * param_mbenc)
3750 {
3751     struct i965_driver_data *i965 = i965_driver_data(ctx);
3752     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3753     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
3754     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
3755     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
3756     struct object_surface *obj_surface;
3757     struct gen9_surface_avc *avc_priv_surface;
3758     struct i965_gpe_resource *gpe_resource;
3759     struct mbenc_param * param = (struct mbenc_param *)param_mbenc ;
3760      VASurfaceID surface_id;
3761     unsigned int mbenc_i_frame_dist_in_use = param->mbenc_i_frame_dist_in_use;
3762     unsigned int size = 0;
3763     unsigned int frame_mb_size = generic_state->frame_width_in_mbs *
3764         generic_state->frame_height_in_mbs;
3765     int i = 0;
3766     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3767     unsigned char is_g95 = 0;
3768
3769     if (IS_SKL(i965->intel.device_info)||
3770         IS_BXT(i965->intel.device_info))
3771         is_g95 = 0;
3772     else if (IS_KBL(i965->intel.device_info)||
3773              IS_GLK(i965->intel.device_info))
3774              is_g95 = 1;
3775
3776     obj_surface = encode_state->reconstructed_object;
3777
3778     if (!obj_surface || !obj_surface->private_data)
3779         return;
3780     avc_priv_surface = obj_surface->private_data;
3781
3782     /*pak obj command buffer output*/
3783     size = frame_mb_size * 16 * 4;
3784     gpe_resource = &avc_priv_surface->res_mb_code_surface;
3785     gen9_add_buffer_gpe_surface(ctx,
3786                                 gpe_context,
3787                                 gpe_resource,
3788                                 0,
3789                                 size / 4,
3790                                 0,
3791                                 GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX);
3792
3793     /*mv data buffer output*/
3794     size = frame_mb_size * 32 * 4;
3795     gpe_resource = &avc_priv_surface->res_mv_data_surface;
3796     gen9_add_buffer_gpe_surface(ctx,
3797                                 gpe_context,
3798                                 gpe_resource,
3799                                 0,
3800                                 size / 4,
3801                                 0,
3802                                 GEN9_AVC_MBENC_IND_MV_DATA_INDEX);
3803
3804     /*input current  YUV surface, current input Y/UV object*/
3805     if(mbenc_i_frame_dist_in_use)
3806     {
3807         obj_surface = encode_state->reconstructed_object;
3808         if (!obj_surface || !obj_surface->private_data)
3809             return;
3810         avc_priv_surface = obj_surface->private_data;
3811         obj_surface = avc_priv_surface->scaled_4x_surface_obj;
3812     }else
3813     {
3814         obj_surface = encode_state->input_yuv_object;
3815     }
3816     gen9_add_2d_gpe_surface(ctx,
3817                             gpe_context,
3818                             obj_surface,
3819                             0,
3820                             1,
3821                             I965_SURFACEFORMAT_R8_UNORM,
3822                             GEN9_AVC_MBENC_CURR_Y_INDEX);
3823
3824     gen9_add_2d_gpe_surface(ctx,
3825                             gpe_context,
3826                             obj_surface,
3827                             1,
3828                             1,
3829                             I965_SURFACEFORMAT_R16_UINT,
3830                             GEN9_AVC_MBENC_CURR_UV_INDEX);
3831
3832     if(generic_state->hme_enabled)
3833     {
3834         /*memv input 4x*/
3835         gpe_resource = &(avc_ctx->s4x_memv_data_buffer);
3836         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3837                                        gpe_resource,
3838                                        1,
3839                                        I965_SURFACEFORMAT_R8_UNORM,
3840                                        GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX);
3841         /* memv distortion input*/
3842         gpe_resource = &(avc_ctx->s4x_memv_distortion_buffer);
3843         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3844                                        gpe_resource,
3845                                        1,
3846                                        I965_SURFACEFORMAT_R8_UNORM,
3847                                        GEN9_AVC_MBENC_4XME_DISTORTION_INDEX);
3848     }
3849
3850     /*mbbrc const data_buffer*/
3851     if(param->mb_const_data_buffer_in_use)
3852     {
3853         size = 16 * AVC_QP_MAX * sizeof(unsigned int);
3854         gpe_resource = &avc_ctx->res_mbbrc_const_data_buffer;
3855         gen9_add_buffer_gpe_surface(ctx,
3856                                     gpe_context,
3857                                     gpe_resource,
3858                                     0,
3859                                     size / 4,
3860                                     0,
3861                                     GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX);
3862
3863     }
3864
3865     /*mb qp data_buffer*/
3866     if(param->mb_qp_buffer_in_use)
3867     {
3868         if(avc_state->mb_qp_data_enable)
3869             gpe_resource = &(avc_ctx->res_mb_qp_data_surface);
3870         else
3871             gpe_resource = &(avc_ctx->res_mbbrc_mb_qp_data_surface);
3872         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3873                                        gpe_resource,
3874                                        1,
3875                                        I965_SURFACEFORMAT_R8_UNORM,
3876                                        GEN9_AVC_MBENC_MBQP_INDEX);
3877     }
3878
3879     /*input current  YUV surface, current input Y/UV object*/
3880     if(mbenc_i_frame_dist_in_use)
3881     {
3882         obj_surface = encode_state->reconstructed_object;
3883         if (!obj_surface || !obj_surface->private_data)
3884             return;
3885         avc_priv_surface = obj_surface->private_data;
3886         obj_surface = avc_priv_surface->scaled_4x_surface_obj;
3887     }else
3888     {
3889         obj_surface = encode_state->input_yuv_object;
3890     }
3891     gen9_add_adv_gpe_surface(ctx, gpe_context,
3892                              obj_surface,
3893                              GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX);
3894     /*input ref YUV surface*/
3895     for(i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++)
3896     {
3897         surface_id = slice_param->RefPicList0[i].picture_id;
3898         obj_surface = SURFACE(surface_id);
3899         if (!obj_surface || !obj_surface->private_data)
3900             break;
3901
3902         gen9_add_adv_gpe_surface(ctx, gpe_context,
3903                                  obj_surface,
3904                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX+i*2 + 1);
3905     }
3906     /*input current  YUV surface, current input Y/UV object*/
3907     if(mbenc_i_frame_dist_in_use)
3908     {
3909         obj_surface = encode_state->reconstructed_object;
3910         if (!obj_surface || !obj_surface->private_data)
3911             return;
3912         avc_priv_surface = obj_surface->private_data;
3913         obj_surface = avc_priv_surface->scaled_4x_surface_obj;
3914     }else
3915     {
3916         obj_surface = encode_state->input_yuv_object;
3917     }
3918     gen9_add_adv_gpe_surface(ctx, gpe_context,
3919                              obj_surface,
3920                              GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX);
3921
3922     for(i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++)
3923     {
3924         if(i > 0) break;// only  one ref supported here for B frame
3925         surface_id = slice_param->RefPicList1[i].picture_id;
3926         obj_surface = SURFACE(surface_id);
3927         if (!obj_surface || !obj_surface->private_data)
3928             break;
3929
3930         gen9_add_adv_gpe_surface(ctx, gpe_context,
3931                                  obj_surface,
3932                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX+i*2 + 1);
3933         gen9_add_adv_gpe_surface(ctx, gpe_context,
3934                                  obj_surface,
3935                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX+i*2 + 2);
3936         if(i == 0)
3937         {
3938             avc_priv_surface = obj_surface->private_data;
3939             /*pak obj command buffer output(mb code)*/
3940             size = frame_mb_size * 16 * 4;
3941             gpe_resource = &avc_priv_surface->res_mb_code_surface;
3942             gen9_add_buffer_gpe_surface(ctx,
3943                                         gpe_context,
3944                                         gpe_resource,
3945                                         0,
3946                                         size / 4,
3947                                         0,
3948                                         GEN9_AVC_MBENC_FWD_MB_DATA_INDEX);
3949
3950             /*mv data buffer output*/
3951             size = frame_mb_size * 32 * 4;
3952             gpe_resource = &avc_priv_surface->res_mv_data_surface;
3953             gen9_add_buffer_gpe_surface(ctx,
3954                                         gpe_context,
3955                                         gpe_resource,
3956                                         0,
3957                                         size / 4,
3958                                         0,
3959                                         GEN9_AVC_MBENC_FWD_MV_DATA_INDEX);
3960
3961         }
3962
3963         if( i < INTEL_AVC_MAX_BWD_REF_NUM)
3964         {
3965             gen9_add_adv_gpe_surface(ctx, gpe_context,
3966                                      obj_surface,
3967                                      GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX+i*2 + 1 + INTEL_AVC_MAX_BWD_REF_NUM);
3968         }
3969
3970     }
3971
3972     /* BRC distortion data buffer for I frame*/
3973     if(mbenc_i_frame_dist_in_use)
3974     {
3975         gpe_resource = &(avc_ctx->res_brc_dist_data_surface);
3976         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3977                                        gpe_resource,
3978                                        1,
3979                                        I965_SURFACEFORMAT_R8_UNORM,
3980                                        GEN9_AVC_MBENC_BRC_DISTORTION_INDEX);
3981     }
3982
3983     /* as ref frame ,update later RefPicSelect of Current Picture*/
3984     obj_surface = encode_state->reconstructed_object;
3985     avc_priv_surface = obj_surface->private_data;
3986     if(avc_state->ref_pic_select_list_supported && avc_priv_surface->is_as_ref)
3987     {
3988         gpe_resource = &(avc_priv_surface->res_ref_pic_select_surface);
3989         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3990                                        gpe_resource,
3991                                        1,
3992                                        I965_SURFACEFORMAT_R8_UNORM,
3993                                        GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX);
3994
3995     }
3996
3997     if(param->mb_vproc_stats_enable)
3998     {
3999         /*mb status buffer input*/
4000         size = frame_mb_size * 16 * 4;
4001         gpe_resource = &(avc_ctx->res_mb_status_buffer);
4002         gen9_add_buffer_gpe_surface(ctx,
4003                                     gpe_context,
4004                                     gpe_resource,
4005                                     0,
4006                                     size / 4,
4007                                     0,
4008                                     GEN9_AVC_MBENC_MB_STATS_INDEX);
4009
4010     }else if(avc_state->flatness_check_enable)
4011     {
4012
4013         gpe_resource = &(avc_ctx->res_flatness_check_surface);
4014         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4015                                        gpe_resource,
4016                                        1,
4017                                        I965_SURFACEFORMAT_R8_UNORM,
4018                                        GEN9_AVC_MBENC_MB_STATS_INDEX);
4019     }
4020
4021     if(param->mad_enable)
4022     {
4023         /*mad buffer input*/
4024         size = 4;
4025         gpe_resource = &(avc_ctx->res_mad_data_buffer);
4026         gen9_add_buffer_gpe_surface(ctx,
4027                                     gpe_context,
4028                                     gpe_resource,
4029                                     0,
4030                                     size / 4,
4031                                     0,
4032                                     GEN9_AVC_MBENC_MAD_DATA_INDEX);
4033         i965_zero_gpe_resource(gpe_resource);
4034     }
4035
4036     /*brc updated mbenc curbe data buffer,it is ignored by gen9 and used in gen95*/
4037     if(avc_state->mbenc_brc_buffer_size > 0)
4038     {
4039         size = avc_state->mbenc_brc_buffer_size;
4040         gpe_resource = &(avc_ctx->res_mbenc_brc_buffer);
4041         gen9_add_buffer_gpe_surface(ctx,
4042                                     gpe_context,
4043                                     gpe_resource,
4044                                     0,
4045                                     size / 4,
4046                                     0,
4047                                     GEN95_AVC_MBENC_BRC_CURBE_DATA_INDEX);
4048     }
4049
4050     /*artitratry num mbs in slice*/
4051     if(avc_state->arbitrary_num_mbs_in_slice)
4052     {
4053         /*slice surface input*/
4054         gpe_resource = &(avc_ctx->res_mbenc_slice_map_surface);
4055         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4056                                        gpe_resource,
4057                                        1,
4058                                        I965_SURFACEFORMAT_R8_UNORM,
4059                                        GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX);
4060     }
4061
4062     /* BRC distortion data buffer for I frame */
4063     if(!mbenc_i_frame_dist_in_use)
4064     {
4065         if(avc_state->mb_disable_skip_map_enable)
4066         {
4067             gpe_resource = &(avc_ctx->res_mb_disable_skip_map_surface);
4068             gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4069                                            gpe_resource,
4070                                            1,
4071                                            I965_SURFACEFORMAT_R8_UNORM,
4072                                            (is_g95?GEN95_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX:GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX));
4073         }
4074
4075         if(avc_state->sfd_enable && generic_state->hme_enabled)
4076         {
4077             if(generic_state->frame_type == SLICE_TYPE_P)
4078             {
4079                 gpe_resource = &(avc_ctx->res_sfd_cost_table_p_frame_buffer);
4080
4081             }else if(generic_state->frame_type == SLICE_TYPE_B)
4082             {
4083                 gpe_resource = &(avc_ctx->res_sfd_cost_table_b_frame_buffer);
4084             }
4085
4086             if(generic_state->frame_type != SLICE_TYPE_I)
4087             {
4088                 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4089                                                gpe_resource,
4090                                                1,
4091                                                I965_SURFACEFORMAT_R8_UNORM,
4092                                                (is_g95?GEN95_AVC_MBENC_SFD_COST_TABLE_INDEX:GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX));
4093             }
4094         }
4095     }
4096
4097     return;
4098 }
4099
4100 static VAStatus
4101 gen9_avc_kernel_mbenc(VADriverContextP ctx,
4102                       struct encode_state *encode_state,
4103                       struct intel_encoder_context *encoder_context,
4104                       bool i_frame_dist_in_use)
4105 {
4106     struct i965_driver_data *i965 = i965_driver_data(ctx);
4107     struct i965_gpe_table *gpe = &i965->gpe_table;
4108     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4109     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )vme_context->generic_enc_ctx;
4110     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
4111     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
4112     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
4113
4114     struct i965_gpe_context *gpe_context;
4115     struct gpe_media_object_walker_parameter media_object_walker_param;
4116     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
4117     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
4118     int media_function = 0;
4119     int kernel_idx = 0;
4120     unsigned int mb_const_data_buffer_in_use = 0;
4121     unsigned int mb_qp_buffer_in_use = 0;
4122     unsigned int brc_enabled = 0;
4123     unsigned int roi_enable = (generic_state->num_roi > 0)?1:0;
4124     unsigned int dirty_roi_enable = ((generic_state->dirty_num_roi > 0) && (generic_state->frame_type == SLICE_TYPE_P) && (0));
4125     struct mbenc_param param ;
4126
4127     int mbenc_i_frame_dist_in_use = i_frame_dist_in_use;
4128     int mad_enable = 0;
4129     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4130
4131     mb_const_data_buffer_in_use =
4132         generic_state->mb_brc_enabled ||
4133         roi_enable ||
4134         dirty_roi_enable ||
4135         avc_state->mb_qp_data_enable ||
4136         avc_state->rolling_intra_refresh_enable;
4137     mb_qp_buffer_in_use =
4138         generic_state->mb_brc_enabled ||
4139         generic_state->brc_roi_enable ||
4140         avc_state->mb_qp_data_enable;
4141
4142     if(mbenc_i_frame_dist_in_use)
4143     {
4144         media_function = INTEL_MEDIA_STATE_ENC_I_FRAME_DIST;
4145         kernel_idx = GEN9_AVC_KERNEL_BRC_I_FRAME_DIST;
4146         downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
4147         downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
4148         mad_enable = 0;
4149         brc_enabled = 0;
4150
4151         gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
4152     }else
4153     {
4154         switch(generic_state->kernel_mode)
4155         {
4156         case INTEL_ENC_KERNEL_NORMAL :
4157             {
4158                 media_function = INTEL_MEDIA_STATE_ENC_NORMAL;
4159                 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_NORMAL_I;
4160                 break;
4161             }
4162         case INTEL_ENC_KERNEL_PERFORMANCE :
4163             {
4164                 media_function = INTEL_MEDIA_STATE_ENC_PERFORMANCE;
4165                 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_PERFORMANCE_I;
4166                 break;
4167             }
4168         case INTEL_ENC_KERNEL_QUALITY :
4169             {
4170                 media_function = INTEL_MEDIA_STATE_ENC_QUALITY;
4171                 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_QUALITY_I;
4172                 break;
4173             }
4174         default:
4175             assert(0);
4176
4177         }
4178
4179         if(generic_state->frame_type == SLICE_TYPE_P)
4180         {
4181            kernel_idx += 1;
4182         }
4183         else if(generic_state->frame_type == SLICE_TYPE_B)
4184         {
4185            kernel_idx += 2;
4186         }
4187
4188         downscaled_width_in_mb = generic_state->frame_width_in_mbs;
4189         downscaled_height_in_mb = generic_state->frame_height_in_mbs;
4190         mad_enable = avc_state->mad_enable;
4191         brc_enabled = generic_state->brc_enabled;
4192
4193         gpe_context = &(avc_ctx->context_mbenc.gpe_contexts[kernel_idx]);
4194     }
4195
4196     memset(&param,0,sizeof(struct mbenc_param));
4197
4198     param.mb_const_data_buffer_in_use = mb_const_data_buffer_in_use;
4199     param.mb_qp_buffer_in_use = mb_qp_buffer_in_use;
4200     param.mbenc_i_frame_dist_in_use = mbenc_i_frame_dist_in_use;
4201     param.mad_enable = mad_enable;
4202     param.brc_enabled = brc_enabled;
4203     param.roi_enabled = roi_enable;
4204
4205     if(avc_state->mb_status_supported)
4206     {
4207         param.mb_vproc_stats_enable =  avc_state->flatness_check_enable || avc_state->adaptive_transform_decision_enable;
4208     }
4209
4210     if(!avc_state->mbenc_curbe_set_in_brc_update)
4211     {
4212         gpe->context_init(ctx, gpe_context);
4213     }
4214
4215     gpe->reset_binding_table(ctx, gpe_context);
4216
4217     if(!avc_state->mbenc_curbe_set_in_brc_update)
4218     {
4219         /*set curbe here*/
4220         generic_ctx->pfn_set_curbe_mbenc(ctx,encode_state,gpe_context,encoder_context,&param);
4221     }
4222
4223     /* MB brc const data buffer set up*/
4224     if(mb_const_data_buffer_in_use)
4225     {
4226         // caculate the lambda table, it is kernel controlled trellis quantization,gen95+
4227         if(avc_state->lambda_table_enable)
4228             gen95_avc_calc_lambda_table(ctx,encode_state,encoder_context);
4229
4230         gen9_avc_load_mb_brc_const_data(ctx,encode_state,encoder_context);
4231     }
4232
4233     /*clear the mad buffer*/
4234     if(mad_enable)
4235     {
4236         i965_zero_gpe_resource(&(avc_ctx->res_mad_data_buffer));
4237     }
4238     /*send surface*/
4239     generic_ctx->pfn_send_mbenc_surface(ctx,encode_state,gpe_context,encoder_context,&param);
4240
4241     gpe->setup_interface_data(ctx, gpe_context);
4242
4243     /*walker setting*/
4244     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
4245
4246     kernel_walker_param.use_scoreboard = 1;
4247     kernel_walker_param.resolution_x = downscaled_width_in_mb ;
4248     kernel_walker_param.resolution_y = downscaled_height_in_mb ;
4249     if(mbenc_i_frame_dist_in_use)
4250     {
4251         kernel_walker_param.no_dependency = 1;
4252     }else
4253     {
4254         switch(generic_state->frame_type)
4255         {
4256         case SLICE_TYPE_I:
4257             kernel_walker_param.walker_degree = WALKER_45_DEGREE;
4258             break;
4259         case SLICE_TYPE_P:
4260             kernel_walker_param.walker_degree = WALKER_26_DEGREE;
4261             break;
4262         case SLICE_TYPE_B:
4263             kernel_walker_param.walker_degree = WALKER_26_DEGREE;
4264             if(!slice_param->direct_spatial_mv_pred_flag)
4265             {
4266                 kernel_walker_param.walker_degree = WALKER_45_DEGREE;
4267             }
4268             break;
4269         default:
4270             assert(0);
4271         }
4272         kernel_walker_param.no_dependency = 0;
4273     }
4274
4275     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
4276
4277     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
4278                                         gpe_context,
4279                                         media_function,
4280                                         &media_object_walker_param);
4281     return VA_STATUS_SUCCESS;
4282 }
4283
4284 /*
4285 me kernle related function
4286 */
4287 static void
4288 gen9_avc_set_curbe_me(VADriverContextP ctx,
4289                       struct encode_state *encode_state,
4290                       struct i965_gpe_context *gpe_context,
4291                       struct intel_encoder_context *encoder_context,
4292                       void * param)
4293 {
4294     gen9_avc_me_curbe_data *curbe_cmd;
4295     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4296     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
4297     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
4298
4299     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4300
4301     struct me_param * curbe_param = (struct me_param *)param ;
4302     unsigned char  use_mv_from_prev_step = 0;
4303     unsigned char write_distortions = 0;
4304     unsigned char qp_prime_y = 0;
4305     unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
4306     unsigned char seach_table_idx = 0;
4307     unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
4308     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
4309     unsigned int scale_factor = 0;
4310
4311     qp_prime_y = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta;
4312     switch(curbe_param->hme_type)
4313     {
4314     case INTEL_ENC_HME_4x :
4315         {
4316             use_mv_from_prev_step = (generic_state->b16xme_enabled)? 1:0;
4317             write_distortions = 1;
4318             mv_shift_factor = 2;
4319             scale_factor = 4;
4320             prev_mv_read_pos_factor = 0;
4321             break;
4322         }
4323     case INTEL_ENC_HME_16x :
4324         {
4325             use_mv_from_prev_step = (generic_state->b32xme_enabled)? 1:0;
4326             write_distortions = 0;
4327             mv_shift_factor = 2;
4328             scale_factor = 16;
4329             prev_mv_read_pos_factor = 1;
4330             break;
4331         }
4332     case INTEL_ENC_HME_32x :
4333         {
4334             use_mv_from_prev_step = 0;
4335             write_distortions = 0;
4336             mv_shift_factor = 1;
4337             scale_factor = 32;
4338             prev_mv_read_pos_factor = 0;
4339             break;
4340         }
4341     default:
4342         assert(0);
4343
4344     }
4345     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
4346
4347     if (!curbe_cmd)
4348         return;
4349
4350     downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel/scale_factor,16)/16;
4351     downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel/scale_factor,16)/16;
4352
4353     memcpy(curbe_cmd,gen9_avc_me_curbe_init_data,sizeof(gen9_avc_me_curbe_data));
4354
4355     curbe_cmd->dw3.sub_pel_mode = 3;
4356     if(avc_state->field_scaling_output_interleaved)
4357     {
4358         /*frame set to zero,field specified*/
4359         curbe_cmd->dw3.src_access = 0;
4360         curbe_cmd->dw3.ref_access = 0;
4361         curbe_cmd->dw7.src_field_polarity = 0;
4362     }
4363     curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
4364     curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
4365     curbe_cmd->dw5.qp_prime_y = qp_prime_y;
4366
4367     curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
4368     curbe_cmd->dw6.write_distortions = write_distortions;
4369     curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
4370     curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
4371
4372     if(generic_state->frame_type == SLICE_TYPE_B)
4373     {
4374         curbe_cmd->dw1.bi_weight = 32;
4375         curbe_cmd->dw13.num_ref_idx_l1_minus1 = slice_param->num_ref_idx_l1_active_minus1;
4376         me_method = gen9_avc_b_me_method[generic_state->preset];
4377         seach_table_idx = 1;
4378     }
4379
4380     if(generic_state->frame_type == SLICE_TYPE_P ||
4381        generic_state->frame_type == SLICE_TYPE_B )
4382        curbe_cmd->dw13.num_ref_idx_l0_minus1 = slice_param->num_ref_idx_l0_active_minus1;
4383
4384     curbe_cmd->dw13.ref_streamin_cost = 5;
4385     curbe_cmd->dw13.roi_enable = 0;
4386
4387     curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
4388     curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
4389
4390     memcpy(&curbe_cmd->dw16,table_enc_search_path[seach_table_idx][me_method],14*sizeof(int));
4391
4392     curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN9_AVC_ME_MV_DATA_SURFACE_INDEX;
4393     curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x)? GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX:GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX ;
4394     curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN9_AVC_ME_DISTORTION_SURFACE_INDEX;
4395     curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN9_AVC_ME_BRC_DISTORTION_INDEX;
4396     curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX;
4397     curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX;
4398     curbe_cmd->dw38.reserved = GEN9_AVC_ME_VDENC_STREAMIN_INDEX;
4399
4400     i965_gpe_context_unmap_curbe(gpe_context);
4401     return;
4402 }
4403
4404 static void
4405 gen9_avc_send_surface_me(VADriverContextP ctx,
4406                          struct encode_state *encode_state,
4407                          struct i965_gpe_context *gpe_context,
4408                          struct intel_encoder_context *encoder_context,
4409                          void * param)
4410 {
4411     struct i965_driver_data *i965 = i965_driver_data(ctx);
4412
4413     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4414     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
4415     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
4416     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
4417
4418     struct object_surface *obj_surface, *input_surface;
4419     struct gen9_surface_avc *avc_priv_surface;
4420     struct i965_gpe_resource *gpe_resource;
4421     struct me_param * curbe_param = (struct me_param *)param ;
4422
4423     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4424     VASurfaceID surface_id;
4425     int i = 0;
4426
4427     /* all scaled input surface stored in reconstructed_object*/
4428     obj_surface = encode_state->reconstructed_object;
4429     if (!obj_surface || !obj_surface->private_data)
4430         return;
4431     avc_priv_surface = obj_surface->private_data;
4432
4433
4434     switch(curbe_param->hme_type)
4435     {
4436     case INTEL_ENC_HME_4x :
4437         {
4438             /*memv output 4x*/
4439             gpe_resource = &avc_ctx->s4x_memv_data_buffer;
4440             gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4441                                            gpe_resource,
4442                                            1,
4443                                            I965_SURFACEFORMAT_R8_UNORM,
4444                                            GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
4445
4446             /*memv input 16x*/
4447             if(generic_state->b16xme_enabled)
4448             {
4449                 gpe_resource = &avc_ctx->s16x_memv_data_buffer;
4450                 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4451                                                gpe_resource,
4452                                                1,
4453                                                I965_SURFACEFORMAT_R8_UNORM,
4454                                                GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX);
4455             }
4456             /* brc distortion  output*/
4457             gpe_resource = &avc_ctx->res_brc_dist_data_surface;
4458             gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4459                                            gpe_resource,
4460                                            1,
4461                                            I965_SURFACEFORMAT_R8_UNORM,
4462                                            GEN9_AVC_ME_BRC_DISTORTION_INDEX);
4463            /* memv distortion output*/
4464             gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
4465             gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4466                                            gpe_resource,
4467                                            1,
4468                                            I965_SURFACEFORMAT_R8_UNORM,
4469                                            GEN9_AVC_ME_DISTORTION_SURFACE_INDEX);
4470             /*input current down scaled YUV surface*/
4471             obj_surface = encode_state->reconstructed_object;
4472             avc_priv_surface = obj_surface->private_data;
4473             input_surface = avc_priv_surface->scaled_4x_surface_obj;
4474             gen9_add_adv_gpe_surface(ctx, gpe_context,
4475                                      input_surface,
4476                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
4477             /*input ref scaled YUV surface*/
4478             for(i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++)
4479             {
4480                 surface_id = slice_param->RefPicList0[i].picture_id;
4481                 obj_surface = SURFACE(surface_id);
4482                 if (!obj_surface || !obj_surface->private_data)
4483                     break;
4484                 avc_priv_surface = obj_surface->private_data;
4485
4486                 input_surface = avc_priv_surface->scaled_4x_surface_obj;
4487
4488                 gen9_add_adv_gpe_surface(ctx, gpe_context,
4489                                          input_surface,
4490                                          GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX+i*2 + 1);
4491             }
4492
4493             obj_surface = encode_state->reconstructed_object;
4494             avc_priv_surface = obj_surface->private_data;
4495             input_surface = avc_priv_surface->scaled_4x_surface_obj;
4496
4497             gen9_add_adv_gpe_surface(ctx, gpe_context,
4498                                      input_surface,
4499                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
4500
4501             for(i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++)
4502             {
4503                 surface_id = slice_param->RefPicList1[i].picture_id;
4504                 obj_surface = SURFACE(surface_id);
4505                 if (!obj_surface || !obj_surface->private_data)
4506                     break;
4507                 avc_priv_surface = obj_surface->private_data;
4508
4509                 input_surface = avc_priv_surface->scaled_4x_surface_obj;
4510
4511                 gen9_add_adv_gpe_surface(ctx, gpe_context,
4512                                          input_surface,
4513                                          GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX+i*2 + 1);
4514             }
4515             break;
4516
4517         }
4518     case INTEL_ENC_HME_16x :
4519         {
4520             gpe_resource = &avc_ctx->s16x_memv_data_buffer;
4521             gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4522                                            gpe_resource,
4523                                            1,
4524                                            I965_SURFACEFORMAT_R8_UNORM,
4525                                            GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
4526
4527             if(generic_state->b32xme_enabled)
4528             {
4529                 gpe_resource = &avc_ctx->s32x_memv_data_buffer;
4530                 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4531                                                gpe_resource,
4532                                                1,
4533                                                I965_SURFACEFORMAT_R8_UNORM,
4534                                                GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX);
4535             }
4536
4537             obj_surface = encode_state->reconstructed_object;
4538             avc_priv_surface = obj_surface->private_data;
4539             input_surface = avc_priv_surface->scaled_16x_surface_obj;
4540             gen9_add_adv_gpe_surface(ctx, gpe_context,
4541                                      input_surface,
4542                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
4543
4544             for(i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++)
4545             {
4546                 surface_id = slice_param->RefPicList0[i].picture_id;
4547                 obj_surface = SURFACE(surface_id);
4548                 if (!obj_surface || !obj_surface->private_data)
4549                     break;
4550                 avc_priv_surface = obj_surface->private_data;
4551
4552                 input_surface = avc_priv_surface->scaled_16x_surface_obj;
4553
4554                 gen9_add_adv_gpe_surface(ctx, gpe_context,
4555                                          input_surface,
4556                                          GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX+i*2 + 1);
4557             }
4558
4559             obj_surface = encode_state->reconstructed_object;
4560             avc_priv_surface = obj_surface->private_data;
4561             input_surface = avc_priv_surface->scaled_16x_surface_obj;
4562
4563             gen9_add_adv_gpe_surface(ctx, gpe_context,
4564                                      input_surface,
4565                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
4566
4567             for(i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++)
4568             {
4569                 surface_id = slice_param->RefPicList1[i].picture_id;
4570                 obj_surface = SURFACE(surface_id);
4571                 if (!obj_surface || !obj_surface->private_data)
4572                     break;
4573                 avc_priv_surface = obj_surface->private_data;
4574
4575                 input_surface = avc_priv_surface->scaled_16x_surface_obj;
4576
4577                 gen9_add_adv_gpe_surface(ctx, gpe_context,
4578                                          input_surface,
4579                                          GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX+i*2 + 1);
4580             }
4581             break;
4582         }
4583     case INTEL_ENC_HME_32x :
4584         {
4585             gpe_resource = &avc_ctx->s32x_memv_data_buffer;
4586             gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4587                                            gpe_resource,
4588                                            1,
4589                                            I965_SURFACEFORMAT_R8_UNORM,
4590                                            GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
4591
4592             obj_surface = encode_state->reconstructed_object;
4593             avc_priv_surface = obj_surface->private_data;
4594             input_surface = avc_priv_surface->scaled_32x_surface_obj;
4595             gen9_add_adv_gpe_surface(ctx, gpe_context,
4596                                      input_surface,
4597                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
4598
4599             for(i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++)
4600             {
4601                 surface_id = slice_param->RefPicList0[i].picture_id;
4602                 obj_surface = SURFACE(surface_id);
4603                 if (!obj_surface || !obj_surface->private_data)
4604                     break;
4605                 avc_priv_surface = obj_surface->private_data;
4606
4607                 input_surface = avc_priv_surface->scaled_32x_surface_obj;
4608
4609                 gen9_add_adv_gpe_surface(ctx, gpe_context,
4610                                          input_surface,
4611                                          GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX+i*2 + 1);
4612             }
4613
4614             obj_surface = encode_state->reconstructed_object;
4615             avc_priv_surface = obj_surface->private_data;
4616             input_surface = avc_priv_surface->scaled_32x_surface_obj;
4617
4618             gen9_add_adv_gpe_surface(ctx, gpe_context,
4619                                      input_surface,
4620                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
4621
4622             for(i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++)
4623             {
4624                 surface_id = slice_param->RefPicList1[i].picture_id;
4625                 obj_surface = SURFACE(surface_id);
4626                 if (!obj_surface || !obj_surface->private_data)
4627                     break;
4628                 avc_priv_surface = obj_surface->private_data;
4629
4630                 input_surface = avc_priv_surface->scaled_32x_surface_obj;
4631
4632                 gen9_add_adv_gpe_surface(ctx, gpe_context,
4633                                          input_surface,
4634                                          GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX+i*2 + 1);
4635             }
4636             break;
4637         }
4638     default:
4639         assert(0);
4640
4641     }
4642 }
4643
4644 static VAStatus
4645 gen9_avc_kernel_me(VADriverContextP ctx,
4646                    struct encode_state *encode_state,
4647                    struct intel_encoder_context *encoder_context,
4648                    int hme_type)
4649 {
4650     struct i965_driver_data *i965 = i965_driver_data(ctx);
4651     struct i965_gpe_table *gpe = &i965->gpe_table;
4652     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4653     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )vme_context->generic_enc_ctx;
4654     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
4655     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
4656
4657     struct i965_gpe_context *gpe_context;
4658     struct gpe_media_object_walker_parameter media_object_walker_param;
4659     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
4660     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
4661     int media_function = 0;
4662     int kernel_idx = 0;
4663     struct me_param param ;
4664     unsigned int scale_factor = 0;
4665
4666     switch(hme_type)
4667     {
4668     case INTEL_ENC_HME_4x :
4669         {
4670             media_function = INTEL_MEDIA_STATE_4X_ME;
4671             scale_factor = 4;
4672             break;
4673         }
4674     case INTEL_ENC_HME_16x :
4675         {
4676             media_function = INTEL_MEDIA_STATE_16X_ME;
4677             scale_factor = 16;
4678             break;
4679         }
4680     case INTEL_ENC_HME_32x :
4681         {
4682             media_function = INTEL_MEDIA_STATE_32X_ME;
4683             scale_factor = 32;
4684             break;
4685         }
4686     default:
4687         assert(0);
4688
4689     }
4690
4691     downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel/scale_factor,16)/16;
4692     downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel/scale_factor,16)/16;
4693
4694     /* I frame should not come here.*/
4695     kernel_idx = (generic_state->frame_type == SLICE_TYPE_P)? GEN9_AVC_KERNEL_ME_P_IDX : GEN9_AVC_KERNEL_ME_B_IDX;
4696     gpe_context = &(avc_ctx->context_me.gpe_contexts[kernel_idx]);
4697
4698     gpe->context_init(ctx, gpe_context);
4699     gpe->reset_binding_table(ctx, gpe_context);
4700
4701     /*set curbe*/
4702     memset(&param,0,sizeof(param));
4703     param.hme_type = hme_type;
4704     generic_ctx->pfn_set_curbe_me(ctx,encode_state,gpe_context,encoder_context,&param);
4705
4706     /*send surface*/
4707     generic_ctx->pfn_send_me_surface(ctx,encode_state,gpe_context,encoder_context,&param);
4708
4709     gpe->setup_interface_data(ctx, gpe_context);
4710
4711     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
4712     /* the scaling is based on 8x8 blk level */
4713     kernel_walker_param.resolution_x = downscaled_width_in_mb ;
4714     kernel_walker_param.resolution_y = downscaled_height_in_mb ;
4715     kernel_walker_param.no_dependency = 1;
4716
4717     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
4718
4719     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
4720                                         gpe_context,
4721                                         media_function,
4722                                         &media_object_walker_param);
4723
4724     return VA_STATUS_SUCCESS;
4725 }
4726
4727 /*
4728 wp related function
4729 */
4730 static void
4731 gen9_avc_set_curbe_wp(VADriverContextP ctx,
4732                      struct encode_state *encode_state,
4733                      struct i965_gpe_context *gpe_context,
4734                      struct intel_encoder_context *encoder_context,
4735                      void * param)
4736 {
4737     gen9_avc_wp_curbe_data *cmd;
4738     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4739     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
4740     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4741     struct wp_param * curbe_param = (struct wp_param *)param;
4742
4743     cmd = i965_gpe_context_map_curbe(gpe_context);
4744
4745     if (!cmd)
4746         return;
4747     memset(cmd,0,sizeof(gen9_avc_wp_curbe_data));
4748     if(curbe_param->ref_list_idx)
4749     {
4750         cmd->dw0.default_weight = slice_param->luma_weight_l1[0];
4751         cmd->dw0.default_offset = slice_param->luma_offset_l1[0];
4752     }else
4753     {
4754         cmd->dw0.default_weight = slice_param->luma_weight_l0[0];
4755         cmd->dw0.default_offset = slice_param->luma_offset_l0[0];
4756     }
4757
4758     cmd->dw49.input_surface = GEN9_AVC_WP_INPUT_REF_SURFACE_INDEX;
4759     cmd->dw50.output_surface = GEN9_AVC_WP_OUTPUT_SCALED_SURFACE_INDEX;
4760
4761     i965_gpe_context_unmap_curbe(gpe_context);
4762
4763 }
4764
4765 static void
4766 gen9_avc_send_surface_wp(VADriverContextP ctx,
4767                          struct encode_state *encode_state,
4768                          struct i965_gpe_context *gpe_context,
4769                          struct intel_encoder_context *encoder_context,
4770                          void * param)
4771 {
4772     struct i965_driver_data *i965 = i965_driver_data(ctx);
4773     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4774     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
4775     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
4776     struct wp_param * curbe_param = (struct wp_param *)param;
4777     struct object_surface *obj_surface;
4778     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4779     VASurfaceID surface_id;
4780
4781     if(curbe_param->ref_list_idx)
4782     {
4783         surface_id = slice_param->RefPicList1[0].picture_id;
4784         obj_surface = SURFACE(surface_id);
4785         if (!obj_surface || !obj_surface->private_data)
4786             avc_state->weighted_ref_l1_enable = 0;
4787         else
4788             avc_state->weighted_ref_l1_enable = 1;
4789     }else
4790     {
4791         surface_id = slice_param->RefPicList0[0].picture_id;
4792         obj_surface = SURFACE(surface_id);
4793         if (!obj_surface || !obj_surface->private_data)
4794             avc_state->weighted_ref_l0_enable = 0;
4795         else
4796             avc_state->weighted_ref_l0_enable = 1;
4797     }
4798     if(!obj_surface)
4799         obj_surface = encode_state->reference_objects[0];
4800
4801
4802     gen9_add_adv_gpe_surface(ctx, gpe_context,
4803                              obj_surface,
4804                              GEN9_AVC_WP_INPUT_REF_SURFACE_INDEX);
4805
4806     obj_surface = avc_ctx->wp_output_pic_select_surface_obj[curbe_param->ref_list_idx];
4807     gen9_add_adv_gpe_surface(ctx, gpe_context,
4808                              obj_surface,
4809                              GEN9_AVC_WP_OUTPUT_SCALED_SURFACE_INDEX);
4810 }
4811
4812
4813 static VAStatus
4814 gen9_avc_kernel_wp(VADriverContextP ctx,
4815                    struct encode_state *encode_state,
4816                    struct intel_encoder_context *encoder_context,
4817                    unsigned int list1_in_use)
4818 {
4819     struct i965_driver_data *i965 = i965_driver_data(ctx);
4820     struct i965_gpe_table *gpe = &i965->gpe_table;
4821     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4822     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
4823     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
4824     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )vme_context->generic_enc_ctx;
4825
4826     struct i965_gpe_context *gpe_context;
4827     struct gpe_media_object_walker_parameter media_object_walker_param;
4828     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
4829     int media_function = INTEL_MEDIA_STATE_ENC_WP;
4830     struct wp_param param;
4831
4832     gpe_context = &(avc_ctx->context_wp.gpe_contexts);
4833
4834     gpe->context_init(ctx, gpe_context);
4835     gpe->reset_binding_table(ctx, gpe_context);
4836
4837     memset(&param,0,sizeof(param));
4838     param.ref_list_idx = (list1_in_use == 1)? 1: 0;
4839     /*set curbe*/
4840     generic_ctx->pfn_set_curbe_wp(ctx,encode_state,gpe_context,encoder_context,&param);
4841
4842     /*send surface*/
4843     generic_ctx->pfn_send_wp_surface(ctx,encode_state,gpe_context,encoder_context,&param);
4844
4845     gpe->setup_interface_data(ctx, gpe_context);
4846
4847     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
4848     /* the scaling is based on 8x8 blk level */
4849     kernel_walker_param.resolution_x = generic_state->frame_width_in_mbs;
4850     kernel_walker_param.resolution_y = generic_state->frame_height_in_mbs;
4851     kernel_walker_param.no_dependency = 1;
4852
4853     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
4854
4855     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
4856                                         gpe_context,
4857                                         media_function,
4858                                         &media_object_walker_param);
4859
4860     return VA_STATUS_SUCCESS;
4861 }
4862
4863
4864 /*
4865 sfd related function
4866 */
4867 static void
4868 gen9_avc_set_curbe_sfd(VADriverContextP ctx,
4869                      struct encode_state *encode_state,
4870                      struct i965_gpe_context *gpe_context,
4871                      struct intel_encoder_context *encoder_context,
4872                      void * param)
4873 {
4874     gen9_avc_sfd_curbe_data *cmd;
4875     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4876     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
4877     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
4878     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4879
4880     cmd = i965_gpe_context_map_curbe(gpe_context);
4881
4882     if (!cmd)
4883         return;
4884     memset(cmd,0,sizeof(gen9_avc_sfd_curbe_data));
4885
4886     cmd->dw0.enable_intra_cost_scaling_for_static_frame = 1 ;
4887     cmd->dw0.enable_adaptive_mv_stream_in = 0 ;
4888     cmd->dw0.stream_in_type = 7 ;
4889     cmd->dw0.slice_type = slice_type_kernel[generic_state->frame_type]  ;
4890     cmd->dw0.brc_mode_enable = generic_state->brc_enabled ;
4891     cmd->dw0.vdenc_mode_disable = 1 ;
4892
4893     cmd->dw1.hme_stream_in_ref_cost = 5 ;
4894     cmd->dw1.num_of_refs = slice_param->num_ref_idx_l0_active_minus1 ;
4895     cmd->dw1.qp_value = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta ;
4896
4897     cmd->dw2.frame_width_in_mbs = generic_state->frame_width_in_mbs ;
4898     cmd->dw2.frame_height_in_mbs = generic_state->frame_height_in_mbs ;
4899
4900     cmd->dw3.large_mv_threshold = 128 ;
4901     cmd->dw4.total_large_mv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs)/100 ;
4902     cmd->dw5.zmv_threshold = 4 ;
4903     cmd->dw6.total_zmv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * avc_state->zero_mv_threshold)/100 ; // zero_mv_threshold = 60;
4904     cmd->dw7.min_dist_threshold = 10 ;
4905
4906     if(generic_state->frame_type == SLICE_TYPE_P)
4907     {
4908         memcpy(cmd->cost_table,gen9_avc_sfd_cost_table_p_frame,AVC_QP_MAX* sizeof(unsigned char));
4909
4910     }else if(generic_state->frame_type == SLICE_TYPE_B)
4911     {
4912         memcpy(cmd->cost_table,gen9_avc_sfd_cost_table_b_frame,AVC_QP_MAX* sizeof(unsigned char));
4913     }
4914
4915     cmd->dw21.actual_width_in_mb = cmd->dw2.frame_width_in_mbs ;
4916     cmd->dw21.actual_height_in_mb = cmd->dw2.frame_height_in_mbs ;
4917     cmd->dw24.vdenc_input_image_state_index = GEN9_AVC_SFD_VDENC_INPUT_IMAGE_STATE_INDEX ;
4918     cmd->dw26.mv_data_surface_index = GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX ;
4919     cmd->dw27.inter_distortion_surface_index = GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX ;
4920     cmd->dw28.output_data_surface_index = GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX ;
4921     cmd->dw29.vdenc_output_image_state_index = GEN9_AVC_SFD_VDENC_OUTPUT_IMAGE_STATE_INDEX ;
4922
4923     i965_gpe_context_unmap_curbe(gpe_context);
4924
4925 }
4926
4927 static void
4928 gen9_avc_send_surface_sfd(VADriverContextP ctx,
4929                           struct encode_state *encode_state,
4930                           struct i965_gpe_context *gpe_context,
4931                           struct intel_encoder_context *encoder_context,
4932                           void * param)
4933 {
4934     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4935     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
4936     struct i965_gpe_resource *gpe_resource;
4937     int size = 0;
4938
4939     /*HME mv data surface memv output 4x*/
4940     gpe_resource = &avc_ctx->s4x_memv_data_buffer;
4941     gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4942                                    gpe_resource,
4943                                    1,
4944                                    I965_SURFACEFORMAT_R8_UNORM,
4945                                    GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX);
4946
4947     /* memv distortion */
4948     gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
4949     gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4950                                    gpe_resource,
4951                                    1,
4952                                    I965_SURFACEFORMAT_R8_UNORM,
4953                                    GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX);
4954     /*buffer output*/
4955     size = 32 * 4 *4;
4956     gpe_resource = &avc_ctx->res_sfd_output_buffer;
4957     gen9_add_buffer_gpe_surface(ctx,
4958                                 gpe_context,
4959                                 gpe_resource,
4960                                 0,
4961                                 size / 4,
4962                                 0,
4963                                 GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX);
4964
4965 }
4966
4967 static VAStatus
4968 gen9_avc_kernel_sfd(VADriverContextP ctx,
4969                     struct encode_state *encode_state,
4970                     struct intel_encoder_context *encoder_context)
4971 {
4972     struct i965_driver_data *i965 = i965_driver_data(ctx);
4973     struct i965_gpe_table *gpe = &i965->gpe_table;
4974     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4975     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
4976     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )vme_context->generic_enc_ctx;
4977
4978     struct i965_gpe_context *gpe_context;
4979     struct gpe_media_object_parameter media_object_param;
4980     struct gpe_media_object_inline_data media_object_inline_data;
4981     int media_function = INTEL_MEDIA_STATE_STATIC_FRAME_DETECTION;
4982     gpe_context = &(avc_ctx->context_sfd.gpe_contexts);
4983
4984     gpe->context_init(ctx, gpe_context);
4985     gpe->reset_binding_table(ctx, gpe_context);
4986
4987     /*set curbe*/
4988     generic_ctx->pfn_set_curbe_sfd(ctx,encode_state,gpe_context,encoder_context,NULL);
4989
4990     /*send surface*/
4991     generic_ctx->pfn_send_sfd_surface(ctx,encode_state,gpe_context,encoder_context,NULL);
4992
4993     gpe->setup_interface_data(ctx, gpe_context);
4994
4995     memset(&media_object_param, 0, sizeof(media_object_param));
4996     memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
4997     media_object_param.pinline_data = &media_object_inline_data;
4998     media_object_param.inline_size = sizeof(media_object_inline_data);
4999
5000     gen9_avc_run_kernel_media_object(ctx, encoder_context,
5001                                      gpe_context,
5002                                      media_function,
5003                                      &media_object_param);
5004
5005     return VA_STATUS_SUCCESS;
5006 }
5007
5008 /*
5009 kernel related function:init/destroy etc
5010 */
5011 static void
5012 gen9_avc_kernel_init_scaling(VADriverContextP ctx,
5013                              struct generic_encoder_context *generic_context,
5014                              struct gen_avc_scaling_context *kernel_context)
5015 {
5016     struct i965_driver_data *i965 = i965_driver_data(ctx);
5017     struct i965_gpe_table *gpe = &i965->gpe_table;
5018     struct i965_gpe_context *gpe_context = NULL;
5019     struct encoder_kernel_parameter kernel_param ;
5020     struct encoder_scoreboard_parameter scoreboard_param;
5021     struct i965_kernel common_kernel;
5022
5023     if (IS_SKL(i965->intel.device_info)||
5024         IS_BXT(i965->intel.device_info))
5025     {
5026         kernel_param.curbe_size = sizeof(gen9_avc_scaling4x_curbe_data);
5027         kernel_param.inline_data_size = sizeof(gen9_avc_scaling4x_curbe_data);
5028     }
5029     else if (IS_KBL(i965->intel.device_info)||
5030              IS_GLK(i965->intel.device_info))
5031     {
5032         kernel_param.curbe_size = sizeof(gen95_avc_scaling4x_curbe_data);
5033         kernel_param.inline_data_size = sizeof(gen95_avc_scaling4x_curbe_data);
5034     }
5035
5036     /* 4x scaling kernel*/
5037     kernel_param.sampler_size = 0;
5038
5039     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
5040     scoreboard_param.mask = 0xFF;
5041     scoreboard_param.enable = generic_context->use_hw_scoreboard;
5042     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
5043     scoreboard_param.walkpat_flag = 0;
5044
5045     gpe_context = &kernel_context->gpe_contexts[GEN9_AVC_KERNEL_SCALING_4X_IDX];
5046     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
5047     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
5048
5049     memset(&common_kernel, 0, sizeof(common_kernel));
5050
5051     intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
5052                                          generic_context->enc_kernel_size,
5053                                          INTEL_GENERIC_ENC_SCALING4X,
5054                                          0,
5055                                          &common_kernel);
5056
5057     gpe->load_kernels(ctx,
5058                       gpe_context,
5059                       &common_kernel,
5060                       1);
5061
5062     /*2x scaling kernel*/
5063     kernel_param.curbe_size = sizeof(gen9_avc_scaling2x_curbe_data);
5064     kernel_param.inline_data_size = 0;
5065     kernel_param.sampler_size = 0;
5066
5067     gpe_context = &kernel_context->gpe_contexts[GEN9_AVC_KERNEL_SCALING_2X_IDX];
5068     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
5069     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
5070
5071     memset(&common_kernel, 0, sizeof(common_kernel));
5072
5073     intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
5074                                          generic_context->enc_kernel_size,
5075                                          INTEL_GENERIC_ENC_SCALING2X,
5076                                          0,
5077                                          &common_kernel);
5078
5079     gpe->load_kernels(ctx,
5080                       gpe_context,
5081                       &common_kernel,
5082                       1);
5083
5084 }
5085
5086 static void
5087 gen9_avc_kernel_init_me(VADriverContextP ctx,
5088                         struct generic_encoder_context *generic_context,
5089                         struct gen_avc_me_context *kernel_context)
5090 {
5091     struct i965_driver_data *i965 = i965_driver_data(ctx);
5092     struct i965_gpe_table *gpe = &i965->gpe_table;
5093     struct i965_gpe_context *gpe_context = NULL;
5094     struct encoder_kernel_parameter kernel_param ;
5095     struct encoder_scoreboard_parameter scoreboard_param;
5096     struct i965_kernel common_kernel;
5097     int i = 0;
5098
5099     kernel_param.curbe_size = sizeof(gen9_avc_me_curbe_data);
5100     kernel_param.inline_data_size = 0;
5101     kernel_param.sampler_size = 0;
5102
5103     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
5104     scoreboard_param.mask = 0xFF;
5105     scoreboard_param.enable = generic_context->use_hw_scoreboard;
5106     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
5107     scoreboard_param.walkpat_flag = 0;
5108
5109     for (i = 0; i < 2; i++) {
5110         gpe_context = &kernel_context->gpe_contexts[i];
5111         gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
5112         gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
5113
5114         memset(&common_kernel, 0, sizeof(common_kernel));
5115
5116         intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
5117                                              generic_context->enc_kernel_size,
5118                                              INTEL_GENERIC_ENC_ME,
5119                                              i,
5120                                              &common_kernel);
5121
5122         gpe->load_kernels(ctx,
5123                               gpe_context,
5124                               &common_kernel,
5125                               1);
5126     }
5127
5128 }
5129
5130 static void
5131 gen9_avc_kernel_init_mbenc(VADriverContextP ctx,
5132                            struct generic_encoder_context *generic_context,
5133                            struct gen_avc_mbenc_context *kernel_context)
5134 {
5135     struct i965_driver_data *i965 = i965_driver_data(ctx);
5136     struct i965_gpe_table *gpe = &i965->gpe_table;
5137     struct i965_gpe_context *gpe_context = NULL;
5138     struct encoder_kernel_parameter kernel_param ;
5139     struct encoder_scoreboard_parameter scoreboard_param;
5140     struct i965_kernel common_kernel;
5141     int i = 0;
5142     unsigned int curbe_size = 0;
5143
5144     if (IS_SKL(i965->intel.device_info)||
5145         IS_BXT(i965->intel.device_info)) {
5146         curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
5147     }
5148     else if (IS_KBL(i965->intel.device_info) ||
5149              IS_GLK(i965->intel.device_info)) {
5150         curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
5151     }
5152
5153     assert(curbe_size > 0);
5154     kernel_param.curbe_size = curbe_size;
5155     kernel_param.inline_data_size = 0;
5156     kernel_param.sampler_size = 0;
5157
5158     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
5159     scoreboard_param.mask = 0xFF;
5160     scoreboard_param.enable = generic_context->use_hw_scoreboard;
5161     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
5162     scoreboard_param.walkpat_flag = 0;
5163
5164     for (i = 0; i < NUM_GEN9_AVC_KERNEL_MBENC ; i++) {
5165         gpe_context = &kernel_context->gpe_contexts[i];
5166         gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
5167         gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
5168
5169         memset(&common_kernel, 0, sizeof(common_kernel));
5170
5171         intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
5172                                              generic_context->enc_kernel_size,
5173                                              INTEL_GENERIC_ENC_MBENC,
5174                                              i,
5175                                              &common_kernel);
5176
5177         gpe->load_kernels(ctx,
5178                           gpe_context,
5179                           &common_kernel,
5180                           1);
5181     }
5182
5183 }
5184
5185 static void
5186 gen9_avc_kernel_init_brc(VADriverContextP ctx,
5187                          struct generic_encoder_context *generic_context,
5188                          struct gen_avc_brc_context *kernel_context)
5189 {
5190     struct i965_driver_data *i965 = i965_driver_data(ctx);
5191     struct i965_gpe_table *gpe = &i965->gpe_table;
5192     struct i965_gpe_context *gpe_context = NULL;
5193     struct encoder_kernel_parameter kernel_param ;
5194     struct encoder_scoreboard_parameter scoreboard_param;
5195     struct i965_kernel common_kernel;
5196     int i = 0;
5197
5198     static const int brc_curbe_size[NUM_GEN9_AVC_KERNEL_BRC] = {
5199         (sizeof(gen9_avc_brc_init_reset_curbe_data)),
5200         (sizeof(gen9_avc_frame_brc_update_curbe_data)),
5201         (sizeof(gen9_avc_brc_init_reset_curbe_data)),
5202         (sizeof(gen9_avc_mbenc_curbe_data)),
5203         0,
5204         (sizeof(gen9_avc_mb_brc_curbe_data))
5205     };
5206
5207     kernel_param.inline_data_size = 0;
5208     kernel_param.sampler_size = 0;
5209
5210     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
5211     scoreboard_param.mask = 0xFF;
5212     scoreboard_param.enable = generic_context->use_hw_scoreboard;
5213     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
5214     scoreboard_param.walkpat_flag = 0;
5215
5216     for (i = 0; i < NUM_GEN9_AVC_KERNEL_BRC; i++) {
5217         kernel_param.curbe_size = brc_curbe_size[i];
5218         gpe_context = &kernel_context->gpe_contexts[i];
5219         gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
5220         gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
5221
5222         memset(&common_kernel, 0, sizeof(common_kernel));
5223
5224         intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
5225                                              generic_context->enc_kernel_size,
5226                                              INTEL_GENERIC_ENC_BRC,
5227                                              i,
5228                                              &common_kernel);
5229
5230         gpe->load_kernels(ctx,
5231                           gpe_context,
5232                           &common_kernel,
5233                           1);
5234     }
5235
5236 }
5237
5238 static void
5239 gen9_avc_kernel_init_wp(VADriverContextP ctx,
5240                         struct generic_encoder_context *generic_context,
5241                         struct gen_avc_wp_context *kernel_context)
5242 {
5243     struct i965_driver_data *i965 = i965_driver_data(ctx);
5244     struct i965_gpe_table *gpe = &i965->gpe_table;
5245     struct i965_gpe_context *gpe_context = NULL;
5246     struct encoder_kernel_parameter kernel_param ;
5247     struct encoder_scoreboard_parameter scoreboard_param;
5248     struct i965_kernel common_kernel;
5249
5250     kernel_param.curbe_size = sizeof(gen9_avc_wp_curbe_data);
5251     kernel_param.inline_data_size = 0;
5252     kernel_param.sampler_size = 0;
5253
5254     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
5255     scoreboard_param.mask = 0xFF;
5256     scoreboard_param.enable = generic_context->use_hw_scoreboard;
5257     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
5258     scoreboard_param.walkpat_flag = 0;
5259
5260     gpe_context = &kernel_context->gpe_contexts;
5261     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
5262     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
5263
5264     memset(&common_kernel, 0, sizeof(common_kernel));
5265
5266     intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
5267                                          generic_context->enc_kernel_size,
5268                                          INTEL_GENERIC_ENC_WP,
5269                                          0,
5270                                          &common_kernel);
5271
5272     gpe->load_kernels(ctx,
5273                           gpe_context,
5274                           &common_kernel,
5275                           1);
5276
5277 }
5278
5279 static void
5280 gen9_avc_kernel_init_sfd(VADriverContextP ctx,
5281                          struct generic_encoder_context *generic_context,
5282                          struct gen_avc_sfd_context *kernel_context)
5283 {
5284     struct i965_driver_data *i965 = i965_driver_data(ctx);
5285     struct i965_gpe_table *gpe = &i965->gpe_table;
5286     struct i965_gpe_context *gpe_context = NULL;
5287     struct encoder_kernel_parameter kernel_param ;
5288     struct encoder_scoreboard_parameter scoreboard_param;
5289     struct i965_kernel common_kernel;
5290
5291     kernel_param.curbe_size = sizeof(gen9_avc_sfd_curbe_data);
5292     kernel_param.inline_data_size = 0;
5293     kernel_param.sampler_size = 0;
5294
5295     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
5296     scoreboard_param.mask = 0xFF;
5297     scoreboard_param.enable = generic_context->use_hw_scoreboard;
5298     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
5299     scoreboard_param.walkpat_flag = 0;
5300
5301     gpe_context = &kernel_context->gpe_contexts;
5302     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
5303     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
5304
5305     memset(&common_kernel, 0, sizeof(common_kernel));
5306
5307     intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
5308                                          generic_context->enc_kernel_size,
5309                                          INTEL_GENERIC_ENC_SFD,
5310                                          0,
5311                                          &common_kernel);
5312
5313     gpe->load_kernels(ctx,
5314                           gpe_context,
5315                           &common_kernel,
5316                           1);
5317
5318 }
5319
5320 static void
5321 gen9_avc_kernel_destroy(struct encoder_vme_mfc_context * vme_context)
5322 {
5323
5324     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
5325     struct i965_driver_data *i965 = i965_driver_data(avc_ctx->ctx);
5326     struct i965_gpe_table *gpe = &i965->gpe_table;
5327
5328     int i = 0;
5329
5330     gen9_avc_free_resources(vme_context);
5331
5332     for(i = 0; i < NUM_GEN9_AVC_KERNEL_SCALING; i++)
5333         gpe->context_destroy(&avc_ctx->context_scaling.gpe_contexts[i]);
5334
5335     for(i = 0; i < NUM_GEN9_AVC_KERNEL_BRC; i++)
5336         gpe->context_destroy(&avc_ctx->context_brc.gpe_contexts[i]);
5337
5338     for(i = 0; i < NUM_GEN9_AVC_KERNEL_ME; i++)
5339         gpe->context_destroy(&avc_ctx->context_me.gpe_contexts[i]);
5340
5341     for(i = 0; i < NUM_GEN9_AVC_KERNEL_MBENC; i++)
5342         gpe->context_destroy(&avc_ctx->context_mbenc.gpe_contexts[i]);
5343
5344     gpe->context_destroy(&avc_ctx->context_wp.gpe_contexts);
5345
5346     gpe->context_destroy(&avc_ctx->context_sfd.gpe_contexts);
5347
5348 }
5349
5350 /*
5351 vme pipeline
5352 */
5353 static void
5354 gen9_avc_update_parameters(VADriverContextP ctx,
5355                              VAProfile profile,
5356                              struct encode_state *encode_state,
5357                              struct intel_encoder_context *encoder_context)
5358 {
5359     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5360     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
5361     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
5362     VAEncSequenceParameterBufferH264 *seq_param;
5363     VAEncSliceParameterBufferH264 * slice_param;
5364     int i,j;
5365     unsigned int preset = generic_state->preset;
5366
5367     /* seq/pic/slice parameter setting */
5368     generic_state->b16xme_supported = gen9_avc_super_hme[preset];
5369     generic_state->b32xme_supported = gen9_avc_ultra_hme[preset];
5370
5371     avc_state->seq_param =  (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
5372     avc_state->pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
5373
5374
5375     avc_state->enable_avc_ildb = 0;
5376     avc_state->slice_num = 0;
5377     for (j = 0; j < encode_state->num_slice_params_ext && avc_state->enable_avc_ildb == 0; j++) {
5378         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
5379         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
5380
5381         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
5382             assert((slice_param->slice_type == SLICE_TYPE_I) ||
5383                    (slice_param->slice_type == SLICE_TYPE_SI) ||
5384                    (slice_param->slice_type == SLICE_TYPE_P) ||
5385                    (slice_param->slice_type == SLICE_TYPE_SP) ||
5386                    (slice_param->slice_type == SLICE_TYPE_B));
5387
5388             if (slice_param->disable_deblocking_filter_idc != 1) {
5389                 avc_state->enable_avc_ildb = 1;
5390             }
5391
5392             avc_state->slice_param[i] = slice_param;
5393             slice_param++;
5394             avc_state->slice_num++;
5395         }
5396     }
5397
5398     /* how many slices support by now? 1 slice or multi slices, but row slice.not slice group. */
5399     seq_param = avc_state->seq_param;
5400     slice_param = avc_state->slice_param[0];
5401
5402     generic_state->frame_type = avc_state->slice_param[0]->slice_type;
5403
5404     if (slice_param->slice_type == SLICE_TYPE_I ||
5405         slice_param->slice_type == SLICE_TYPE_SI)
5406         generic_state->frame_type = SLICE_TYPE_I;
5407     else if(slice_param->slice_type == SLICE_TYPE_P)
5408         generic_state->frame_type = SLICE_TYPE_P;
5409     else if(slice_param->slice_type == SLICE_TYPE_B)
5410         generic_state->frame_type = SLICE_TYPE_B;
5411     if (profile == VAProfileH264High)
5412         avc_state->transform_8x8_mode_enable = 0;//work around for high profile to disabel pic_param->pic_fields.bits.transform_8x8_mode_flag
5413     else
5414         avc_state->transform_8x8_mode_enable = 0;
5415
5416     /* rc init*/
5417     if(generic_state->brc_enabled &&(!generic_state->brc_inited || generic_state->brc_need_reset ))
5418     {
5419         generic_state->target_bit_rate = ALIGN(seq_param->bits_per_second, 1000) / 1000;
5420         generic_state->init_vbv_buffer_fullness_in_bit = seq_param->bits_per_second;
5421         generic_state->vbv_buffer_size_in_bit = (uint64_t)seq_param->bits_per_second << 1;
5422         generic_state->frames_per_100s = 3000; /* 30fps */
5423     }
5424
5425     generic_state->gop_size = seq_param->intra_period;
5426     generic_state->gop_ref_distance = seq_param->ip_period;
5427
5428     if (generic_state->internal_rate_mode == VA_RC_CBR) {
5429         generic_state->max_bit_rate = generic_state->target_bit_rate;
5430         generic_state->min_bit_rate = generic_state->target_bit_rate;
5431     }
5432
5433     if(generic_state->frame_type == SLICE_TYPE_I || generic_state->first_frame)
5434     {
5435         gen9_avc_update_misc_parameters(ctx, encode_state, encoder_context);
5436     }
5437
5438     generic_state->preset = encoder_context->quality_level;
5439     if(encoder_context->quality_level == INTEL_PRESET_UNKNOWN)
5440     {
5441         generic_state->preset = INTEL_PRESET_RT_SPEED;
5442     }
5443     generic_state->kernel_mode = gen9_avc_kernel_mode[generic_state->preset];
5444
5445     if(!generic_state->brc_inited)
5446     {
5447         generic_state->brc_init_reset_input_bits_per_frame = ((double)(generic_state->max_bit_rate * 1000) * 100) / generic_state->frames_per_100s;;
5448         generic_state->brc_init_current_target_buf_full_in_bits = generic_state->init_vbv_buffer_fullness_in_bit;
5449         generic_state->brc_init_reset_buf_size_in_bits = generic_state->vbv_buffer_size_in_bit;
5450         generic_state->brc_target_size = generic_state->init_vbv_buffer_fullness_in_bit;
5451     }
5452
5453
5454     generic_state->curr_pak_pass = 0;
5455     generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
5456
5457     if (generic_state->internal_rate_mode == VA_RC_CBR ||
5458         generic_state->internal_rate_mode == VA_RC_VBR)
5459         generic_state->brc_enabled = 1;
5460     else
5461         generic_state->brc_enabled = 0;
5462
5463     if (generic_state->brc_enabled &&
5464         (!generic_state->init_vbv_buffer_fullness_in_bit ||
5465          !generic_state->vbv_buffer_size_in_bit ||
5466          !generic_state->max_bit_rate ||
5467          !generic_state->target_bit_rate ||
5468          !generic_state->frames_per_100s))
5469     {
5470         WARN_ONCE("Rate control parameter is required for BRC\n");
5471         generic_state->brc_enabled = 0;
5472     }
5473
5474     if (!generic_state->brc_enabled) {
5475         generic_state->target_bit_rate = 0;
5476         generic_state->max_bit_rate = 0;
5477         generic_state->min_bit_rate = 0;
5478         generic_state->init_vbv_buffer_fullness_in_bit = 0;
5479         generic_state->vbv_buffer_size_in_bit = 0;
5480         generic_state->num_pak_passes = 1;
5481     } else {
5482         generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
5483     }
5484
5485
5486     generic_state->frame_width_in_mbs = seq_param->picture_width_in_mbs;
5487     generic_state->frame_height_in_mbs = seq_param->picture_height_in_mbs;
5488     generic_state->frame_width_in_pixel = generic_state->frame_width_in_mbs * 16;
5489     generic_state->frame_height_in_pixel = generic_state->frame_height_in_mbs * 16;
5490
5491     generic_state->frame_width_4x  = ALIGN(generic_state->frame_width_in_pixel/4,16);
5492     generic_state->frame_height_4x = ALIGN(generic_state->frame_height_in_pixel/4,16);
5493     generic_state->downscaled_width_4x_in_mb  = generic_state->frame_width_4x/16 ;
5494     generic_state->downscaled_height_4x_in_mb = generic_state->frame_height_4x/16;
5495
5496     generic_state->frame_width_16x  =  ALIGN(generic_state->frame_width_in_pixel/16,16);
5497     generic_state->frame_height_16x =  ALIGN(generic_state->frame_height_in_pixel/16,16);
5498     generic_state->downscaled_width_16x_in_mb  = generic_state->frame_width_16x/16 ;
5499     generic_state->downscaled_height_16x_in_mb = generic_state->frame_height_16x/16;
5500
5501     generic_state->frame_width_32x  = ALIGN(generic_state->frame_width_in_pixel/32,16);
5502     generic_state->frame_height_32x = ALIGN(generic_state->frame_height_in_pixel/32,16);
5503     generic_state->downscaled_width_32x_in_mb  = generic_state->frame_width_32x/16 ;
5504     generic_state->downscaled_height_32x_in_mb = generic_state->frame_height_32x/16;
5505
5506     if (generic_state->hme_supported) {
5507         generic_state->hme_enabled = 1;
5508     } else {
5509         generic_state->hme_enabled = 0;
5510     }
5511
5512     if (generic_state->b16xme_supported) {
5513         generic_state->b16xme_enabled = 1;
5514     } else {
5515         generic_state->b16xme_enabled = 0;
5516     }
5517
5518     if (generic_state->b32xme_supported) {
5519         generic_state->b32xme_enabled = 1;
5520     } else {
5521         generic_state->b32xme_enabled = 0;
5522     }
5523     /* disable HME/16xME if the size is too small */
5524     if (generic_state->frame_width_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5525         generic_state->b32xme_supported = 0;
5526         generic_state->b32xme_enabled = 0;
5527         generic_state->b16xme_supported = 0;
5528         generic_state->b16xme_enabled = 0;
5529         generic_state->frame_width_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5530         generic_state->downscaled_width_4x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5531     }
5532     if (generic_state->frame_height_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5533         generic_state->b32xme_supported = 0;
5534         generic_state->b32xme_enabled = 0;
5535         generic_state->b16xme_supported = 0;
5536         generic_state->b16xme_enabled = 0;
5537         generic_state->frame_height_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5538         generic_state->downscaled_height_4x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5539     }
5540
5541     if (generic_state->frame_width_16x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT)
5542     {
5543         generic_state->b32xme_supported = 0;
5544         generic_state->b32xme_enabled = 0;
5545         generic_state->frame_width_16x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5546         generic_state->downscaled_width_16x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5547     }
5548     if (generic_state->frame_height_16x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5549         generic_state->b32xme_supported = 0;
5550         generic_state->b32xme_enabled = 0;
5551         generic_state->frame_height_16x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5552         generic_state->downscaled_height_16x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5553     }
5554
5555     if (generic_state->frame_width_32x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT)
5556     {
5557         generic_state->frame_width_32x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5558         generic_state->downscaled_width_32x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5559     }
5560     if (generic_state->frame_height_32x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5561         generic_state->frame_height_32x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5562         generic_state->downscaled_height_32x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5563     }
5564
5565 }
5566
5567 static VAStatus
5568 gen9_avc_encode_check_parameter(VADriverContextP ctx,
5569                                 struct encode_state *encode_state,
5570                                 struct intel_encoder_context *encoder_context)
5571 {
5572     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5573     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
5574     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
5575     unsigned int rate_control_mode = encoder_context->rate_control_mode;
5576     unsigned int preset = generic_state->preset;
5577     VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param ;
5578     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5579     int i = 0;
5580     int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
5581     /*avbr init*/
5582     generic_state->avbr_curracy = 30;
5583     generic_state->avbr_convergence = 150;
5584
5585     switch (rate_control_mode & 0x7f) {
5586     case VA_RC_CBR:
5587         generic_state->internal_rate_mode = VA_RC_CBR;
5588         break;
5589
5590     case VA_RC_VBR:
5591         generic_state->internal_rate_mode = VA_RC_VBR;
5592         break;
5593
5594     case VA_RC_CQP:
5595     default:
5596         generic_state->internal_rate_mode = VA_RC_CQP;
5597         break;
5598     }
5599
5600     if (rate_control_mode != VA_RC_NONE &&
5601         rate_control_mode != VA_RC_CQP) {
5602         generic_state->brc_enabled = 1;
5603         generic_state->brc_distortion_buffer_supported = 1;
5604         generic_state->brc_constant_buffer_supported = 1;
5605         generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
5606     }
5607
5608     /*check brc parameter*/
5609     if(generic_state->brc_enabled)
5610     {
5611        avc_state->mb_qp_data_enable = 0;
5612     }
5613
5614     /*set the brc init and reset accordingly*/
5615     if(generic_state->brc_need_reset &&
5616         (generic_state->brc_distortion_buffer_supported == 0 ||
5617         rate_control_mode == VA_RC_CQP))
5618     {
5619        generic_state->brc_need_reset = 0;// not support by CQP
5620     }
5621
5622     if(generic_state->brc_need_reset && !avc_state->sfd_mb_enable)
5623     {
5624         avc_state->sfd_enable = 0;
5625     }
5626
5627     if(generic_state->frames_per_window_size == 0)
5628     {
5629         generic_state->frames_per_window_size = (generic_state->frames_per_100s/100 < 60)?(generic_state->frames_per_100s/100):60;
5630     }else if(generic_state->frames_per_window_size > 2 * generic_state->frames_per_100s/100)
5631     {
5632         generic_state->frames_per_window_size = (generic_state->frames_per_100s/100 < 60)?(generic_state->frames_per_100s/100):60;
5633     }
5634
5635     if(generic_state->brc_enabled)
5636     {
5637         generic_state->hme_enabled = generic_state->frame_type != SLICE_TYPE_I;
5638         if(avc_state->min_max_qp_enable)
5639         {
5640             generic_state->num_pak_passes = 1;
5641         }
5642         generic_state->brc_roi_enable = (rate_control_mode != VA_RC_CQP) && (generic_state->num_roi > 0);// only !CQP
5643         generic_state->mb_brc_enabled = generic_state->mb_brc_enabled || generic_state->brc_roi_enable;
5644     }else
5645     {
5646         generic_state->num_pak_passes = 1;// CQP only one pass
5647     }
5648
5649     avc_state->mbenc_i_frame_dist_in_use = 0;
5650     avc_state->mbenc_i_frame_dist_in_use = (generic_state->brc_enabled) && (generic_state->brc_distortion_buffer_supported) && (generic_state->frame_type == SLICE_TYPE_I);
5651
5652     /*ROI must enable mbbrc.*/
5653
5654     /*CAD check*/
5655     if(avc_state->caf_supported)
5656     {
5657         switch(generic_state->frame_type)
5658         {
5659         case SLICE_TYPE_I:
5660             break;
5661         case SLICE_TYPE_P:
5662             avc_state->caf_enable = gen9_avc_all_fractional[preset] & 0x01;
5663             break;
5664         case SLICE_TYPE_B:
5665             avc_state->caf_enable = (gen9_avc_all_fractional[preset] >> 1) & 0x01;
5666             break;
5667         }
5668
5669         if(avc_state->caf_enable && avc_state->caf_disable_hd && gen9_avc_disable_all_fractional_check_for_high_res[preset])
5670         {
5671             if(generic_state->frame_width_in_pixel >= 1280 && generic_state->frame_height_in_pixel >= 720)
5672                  avc_state->caf_enable = 0;
5673         }
5674     }
5675
5676     avc_state->adaptive_transform_decision_enable &= gen9_avc_enable_adaptive_tx_decision[preset&0x7];
5677
5678     /* Flatness check is enabled only if scaling will be performed and CAF is enabled. here only frame */
5679     if(avc_state->flatness_check_supported )
5680     {
5681         avc_state->flatness_check_enable = ((avc_state->caf_enable) && (generic_state->brc_enabled || generic_state->hme_supported)) ;
5682     }else
5683     {
5684         avc_state->flatness_check_enable = 0;
5685     }
5686
5687     /* check mb_status_supported/enbale*/
5688     if(avc_state->adaptive_transform_decision_enable)
5689     {
5690        avc_state->mb_status_enable = 1;
5691     }else
5692     {
5693        avc_state->mb_status_enable = 0;
5694     }
5695     /*slice check,all the slices use the same slice height except the last slice*/
5696     avc_state->arbitrary_num_mbs_in_slice = 0;
5697     for(i = 0; i < avc_state->slice_num;i++)
5698     {
5699         assert(avc_state->slice_param[i]->num_macroblocks % generic_state->frame_width_in_mbs == 0);
5700         avc_state->slice_height = avc_state->slice_param[i]->num_macroblocks / generic_state->frame_width_in_mbs;
5701         /*add it later for muli slices map*/
5702     }
5703
5704     if(generic_state->frame_type == SLICE_TYPE_I)
5705     {
5706        generic_state->hme_enabled = 0;
5707        generic_state->b16xme_enabled = 0;
5708        generic_state->b32xme_enabled = 0;
5709     }
5710
5711     if(generic_state->frame_type == SLICE_TYPE_B)
5712     {
5713         gen9_avc_get_dist_scale_factor(ctx,encode_state,encoder_context);
5714         avc_state->bi_weight = gen9_avc_get_biweight(avc_state->dist_scale_factor_list0[0],pic_param->pic_fields.bits.weighted_bipred_idc);
5715     }
5716
5717     /* Determine if SkipBiasAdjustment should be enabled for P picture 1. No B frame 2. Qp >= 22 3. CQP mode */
5718     avc_state->skip_bias_adjustment_enable = avc_state->skip_bias_adjustment_supported && (generic_state->frame_type == SLICE_TYPE_P)
5719         && (generic_state->gop_ref_distance == 1) && (avc_state->pic_param->pic_init_qp + avc_state->slice_param[0]->slice_qp_delta >= 22) && !generic_state->brc_enabled;
5720
5721     if(generic_state->kernel_mode == INTEL_ENC_KERNEL_QUALITY)
5722     {
5723         avc_state->tq_enable = 1;
5724         avc_state->tq_rounding = 6;
5725         if(generic_state->brc_enabled)
5726         {
5727             generic_state->mb_brc_enabled = 1;
5728         }
5729     }
5730
5731     //check the inter rounding
5732     avc_state->rounding_value = 0;
5733     avc_state->rounding_inter_p = 255;//default
5734     avc_state->rounding_inter_b = 255; //default
5735     avc_state->rounding_inter_b_ref = 255; //default
5736
5737     if(generic_state->frame_type == SLICE_TYPE_P)
5738     {
5739         if(avc_state->rounding_inter_p == AVC_INVALID_ROUNDING_VALUE)
5740         {
5741             if(avc_state->adaptive_rounding_inter_enable && !(generic_state->brc_enabled))
5742             {
5743                 if(generic_state->gop_ref_distance == 1)
5744                     avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_p_without_b[slice_qp];
5745                 else
5746                     avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_p[slice_qp];
5747             }
5748             else
5749             {
5750                 avc_state->rounding_value = gen9_avc_inter_rounding_p[generic_state->preset];
5751             }
5752
5753         }else
5754         {
5755             avc_state->rounding_value = avc_state->rounding_inter_p;
5756         }
5757     }else if(generic_state->frame_type == SLICE_TYPE_B)
5758     {
5759         if(pic_param->pic_fields.bits.reference_pic_flag)
5760         {
5761             if(avc_state->rounding_inter_b_ref == AVC_INVALID_ROUNDING_VALUE)
5762                 avc_state->rounding_value = gen9_avc_inter_rounding_b_ref[generic_state->preset];
5763             else
5764                 avc_state->rounding_value = avc_state->rounding_inter_b_ref;
5765         }
5766         else
5767         {
5768             if(avc_state->rounding_inter_b == AVC_INVALID_ROUNDING_VALUE)
5769             {
5770                 if(avc_state->adaptive_rounding_inter_enable && !(generic_state->brc_enabled))
5771                     avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_b[slice_qp];
5772                 else
5773                     avc_state->rounding_value = gen9_avc_inter_rounding_b[generic_state->preset];
5774             }else
5775             {
5776                 avc_state->rounding_value = avc_state->rounding_inter_b;
5777             }
5778         }
5779     }
5780     return VA_STATUS_SUCCESS;
5781 }
5782
5783 static VAStatus
5784 gen9_avc_vme_gpe_kernel_prepare(VADriverContextP ctx,
5785                                 struct encode_state *encode_state,
5786                                 struct intel_encoder_context *encoder_context)
5787 {
5788     VAStatus va_status;
5789     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5790     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )vme_context->generic_enc_ctx;
5791     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
5792     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
5793     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
5794
5795     struct object_surface *obj_surface;
5796     struct object_buffer *obj_buffer;
5797     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5798     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
5799     struct i965_coded_buffer_segment *coded_buffer_segment;
5800
5801     struct gen9_surface_avc *avc_priv_surface;
5802     dri_bo *bo;
5803     struct avc_surface_param surface_param;
5804     int i,j = 0;
5805     unsigned char * pdata;
5806
5807     /* Setup current reconstruct frame */
5808     obj_surface = encode_state->reconstructed_object;
5809     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5810
5811     if (va_status != VA_STATUS_SUCCESS)
5812         return va_status;
5813
5814     memset(&surface_param,0,sizeof(surface_param));
5815     surface_param.frame_width = generic_state->frame_width_in_pixel;
5816     surface_param.frame_height = generic_state->frame_height_in_pixel;
5817     va_status = gen9_avc_init_check_surfaces(ctx,
5818                                              obj_surface,
5819                                              encoder_context,
5820                                              &surface_param);
5821     if (va_status != VA_STATUS_SUCCESS)
5822         return va_status;
5823     {
5824     /* init the member of avc_priv_surface,frame_store_id,qp_value*/
5825        avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
5826        avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS-2] = 0;
5827        avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS-1] = 0;
5828        i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS-2]);
5829        i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS-1]);
5830        i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS-2],avc_priv_surface->dmv_top);
5831        i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS-1],avc_priv_surface->dmv_bottom);
5832        avc_priv_surface->qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
5833        avc_priv_surface->frame_store_id = 0;
5834        avc_priv_surface->frame_idx = pic_param->CurrPic.frame_idx;
5835        avc_priv_surface->top_field_order_cnt = pic_param->CurrPic.TopFieldOrderCnt;
5836        avc_priv_surface->is_as_ref = pic_param->pic_fields.bits.reference_pic_flag;
5837        avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS-2] = avc_priv_surface->top_field_order_cnt;
5838        avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS-1] = avc_priv_surface->top_field_order_cnt + 1;
5839     }
5840     i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
5841     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_reconstructed_surface, obj_surface,GPE_RESOURCE_ALIGNMENT);
5842
5843     /* input YUV surface*/
5844     obj_surface = encode_state->input_yuv_object;
5845     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5846
5847     if (va_status != VA_STATUS_SUCCESS)
5848         return va_status;
5849     i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
5850     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_uncompressed_input_surface, obj_surface,GPE_RESOURCE_ALIGNMENT);
5851
5852     /* Reference surfaces */
5853     for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
5854         i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
5855         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i*2]);
5856         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i*2 + 1]);
5857         obj_surface = encode_state->reference_objects[i];
5858         avc_state->top_field_poc[2*i] = 0;
5859         avc_state->top_field_poc[2*i+1] = 0;
5860
5861         if (obj_surface && obj_surface->bo) {
5862             i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->list_reference_res[i], obj_surface,GPE_RESOURCE_ALIGNMENT);
5863
5864             /* actually it should be handled when it is reconstructed surface*/
5865             va_status = gen9_avc_init_check_surfaces(ctx,
5866                 obj_surface,encoder_context,
5867                 &surface_param);
5868             if (va_status != VA_STATUS_SUCCESS)
5869                 return va_status;
5870             avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
5871             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i*2],avc_priv_surface->dmv_top);
5872             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i*2 + 1],avc_priv_surface->dmv_bottom);
5873             avc_state->top_field_poc[2*i] = avc_priv_surface->top_field_order_cnt;
5874             avc_state->top_field_poc[2*i+1] = avc_priv_surface->top_field_order_cnt + 1;
5875             avc_priv_surface->frame_store_id = i;
5876         }else
5877         {
5878             break;
5879         }
5880     }
5881
5882     /* Encoded bitstream ?*/
5883     obj_buffer = encode_state->coded_buf_object;
5884     bo = obj_buffer->buffer_store->bo;
5885     i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
5886     i965_dri_object_to_buffer_gpe_resource(&generic_ctx->compressed_bitstream.res, bo);
5887     generic_ctx->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
5888     generic_ctx->compressed_bitstream.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
5889
5890     /*status buffer */
5891     avc_ctx->status_buffer.bo = bo;
5892
5893     /* set the internal flag to 0 to indicate the coded size is unknown */
5894     dri_bo_map(bo, 1);
5895     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
5896     coded_buffer_segment->mapped = 0;
5897     coded_buffer_segment->codec = encoder_context->codec;
5898     coded_buffer_segment->status_support = 1;
5899
5900     pdata = bo->virtual + avc_ctx->status_buffer.base_offset;
5901     memset(pdata,0,avc_ctx->status_buffer.status_buffer_size);
5902     dri_bo_unmap(bo);
5903
5904     //frame id, it is the ref pic id in the reference_objects list.
5905     avc_state->num_refs[0] = 0;
5906     avc_state->num_refs[1] = 0;
5907     if (generic_state->frame_type == SLICE_TYPE_P) {
5908         avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
5909
5910         if (slice_param->num_ref_idx_active_override_flag)
5911             avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
5912     } else if (generic_state->frame_type == SLICE_TYPE_B) {
5913         avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
5914         avc_state->num_refs[1] = pic_param->num_ref_idx_l1_active_minus1 + 1;
5915
5916         if (slice_param->num_ref_idx_active_override_flag) {
5917             avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
5918             avc_state->num_refs[1] = slice_param->num_ref_idx_l1_active_minus1 + 1;
5919         }
5920     }
5921
5922     if (avc_state->num_refs[0] > ARRAY_ELEMS(avc_state->list_ref_idx[0]))
5923         return VA_STATUS_ERROR_INVALID_VALUE;
5924     if (avc_state->num_refs[1] > ARRAY_ELEMS(avc_state->list_ref_idx[1]))
5925         return VA_STATUS_ERROR_INVALID_VALUE;
5926
5927     for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[0]); i++) {
5928         VAPictureH264 *va_pic;
5929
5930         assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(avc_state->list_ref_idx[0]));
5931         avc_state->list_ref_idx[0][i] = 0;
5932
5933         if (i >= avc_state->num_refs[0])
5934             continue;
5935
5936         va_pic = &slice_param->RefPicList0[i];
5937
5938         for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
5939             obj_surface = encode_state->reference_objects[j];
5940
5941             if (obj_surface &&
5942                 obj_surface->bo &&
5943                 obj_surface->base.id == va_pic->picture_id) {
5944
5945                 assert(obj_surface->base.id != VA_INVALID_SURFACE);
5946                 avc_state->list_ref_idx[0][i] = j;
5947
5948                 break;
5949             }
5950         }
5951     }
5952     for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[1]); i++) {
5953         VAPictureH264 *va_pic;
5954
5955         assert(ARRAY_ELEMS(slice_param->RefPicList1) == ARRAY_ELEMS(avc_state->list_ref_idx[1]));
5956         avc_state->list_ref_idx[1][i] = 0;
5957
5958         if (i >= avc_state->num_refs[1])
5959             continue;
5960
5961         va_pic = &slice_param->RefPicList1[i];
5962
5963         for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
5964             obj_surface = encode_state->reference_objects[j];
5965
5966             if (obj_surface &&
5967                 obj_surface->bo &&
5968                 obj_surface->base.id == va_pic->picture_id) {
5969
5970                 assert(obj_surface->base.id != VA_INVALID_SURFACE);
5971                 avc_state->list_ref_idx[1][i] = j;
5972
5973                 break;
5974             }
5975         }
5976     }
5977
5978     return VA_STATUS_SUCCESS;
5979 }
5980
5981 static VAStatus
5982 gen9_avc_vme_gpe_kernel_init(VADriverContextP ctx,
5983                              struct encode_state *encode_state,
5984                              struct intel_encoder_context *encoder_context)
5985 {
5986     return VA_STATUS_SUCCESS;
5987 }
5988
5989 static VAStatus
5990 gen9_avc_vme_gpe_kernel_final(VADriverContextP ctx,
5991                               struct encode_state *encode_state,
5992                               struct intel_encoder_context *encoder_context)
5993 {
5994
5995     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5996     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
5997     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
5998
5999     /*set this flag when all kernel is finished*/
6000     if(generic_state->brc_enabled)
6001     {
6002         generic_state->brc_inited = 1;
6003         generic_state->brc_need_reset = 0;
6004         avc_state->mbenc_curbe_set_in_brc_update = 0;
6005     }
6006     return VA_STATUS_SUCCESS;
6007 }
6008
6009 static VAStatus
6010 gen9_avc_vme_gpe_kernel_run(VADriverContextP ctx,
6011                             struct encode_state *encode_state,
6012                             struct intel_encoder_context *encoder_context)
6013 {
6014     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6015     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
6016     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
6017
6018     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
6019     VAEncSliceParameterBufferH264 *slice_param = avc_state->slice_param[0];
6020     int sfd_in_use = 0;
6021
6022     /* BRC init/reset needs to be called before HME since it will reset the Brc Distortion surface*/
6023     if(generic_state->brc_enabled &&(!generic_state->brc_inited || generic_state->brc_need_reset ))
6024     {
6025         gen9_avc_kernel_brc_init_reset(ctx,encode_state,encoder_context);
6026     }
6027
6028     /*down scaling*/
6029     if(generic_state->hme_supported)
6030     {
6031         gen9_avc_kernel_scaling(ctx,encode_state,encoder_context,INTEL_ENC_HME_4x);
6032         if(generic_state->b16xme_supported)
6033         {
6034             gen9_avc_kernel_scaling(ctx,encode_state,encoder_context,INTEL_ENC_HME_16x);
6035             if(generic_state->b32xme_supported)
6036             {
6037                 gen9_avc_kernel_scaling(ctx,encode_state,encoder_context,INTEL_ENC_HME_32x);
6038             }
6039         }
6040     }
6041
6042     /*me kernel*/
6043     if(generic_state->hme_enabled)
6044     {
6045         if(generic_state->b16xme_enabled)
6046         {
6047             if(generic_state->b32xme_enabled)
6048             {
6049                 gen9_avc_kernel_me(ctx,encode_state,encoder_context,INTEL_ENC_HME_32x);
6050             }
6051             gen9_avc_kernel_me(ctx,encode_state,encoder_context,INTEL_ENC_HME_16x);
6052         }
6053         gen9_avc_kernel_me(ctx,encode_state,encoder_context,INTEL_ENC_HME_4x);
6054     }
6055
6056     /*call SFD kernel after HME in same command buffer*/
6057     sfd_in_use = avc_state->sfd_enable && generic_state->hme_enabled;
6058     sfd_in_use = sfd_in_use && !avc_state->sfd_mb_enable;
6059     if(sfd_in_use)
6060     {
6061         gen9_avc_kernel_sfd(ctx,encode_state,encoder_context);
6062     }
6063
6064     /* BRC and MbEnc are included in the same task phase*/
6065     if(generic_state->brc_enabled)
6066     {
6067         if(avc_state->mbenc_i_frame_dist_in_use)
6068         {
6069             gen9_avc_kernel_mbenc(ctx,encode_state,encoder_context,true);
6070         }
6071         gen9_avc_kernel_brc_frame_update(ctx,encode_state,encoder_context);
6072
6073         if(avc_state->brc_split_enable && generic_state->mb_brc_enabled)
6074         {
6075             gen9_avc_kernel_brc_mb_update(ctx,encode_state,encoder_context);
6076         }
6077     }
6078
6079     /*weight prediction,disable by now */
6080     avc_state->weighted_ref_l0_enable = 0;
6081     avc_state->weighted_ref_l1_enable = 0;
6082     if(avc_state->weighted_prediction_supported &&
6083         ((generic_state->frame_type == SLICE_TYPE_P && pic_param->pic_fields.bits.weighted_pred_flag) ||
6084         (generic_state->frame_type == SLICE_TYPE_B && pic_param->pic_fields.bits.weighted_bipred_idc == INTEL_AVC_WP_MODE_EXPLICIT)))
6085     {
6086         if(slice_param->luma_weight_l0_flag & 1)
6087         {
6088             gen9_avc_kernel_wp(ctx,encode_state,encoder_context,0);
6089
6090         }else if(!(slice_param->chroma_weight_l0_flag & 1))
6091         {
6092             pic_param->pic_fields.bits.weighted_pred_flag = 0;// it should be handled in app
6093         }
6094
6095         if(generic_state->frame_type == SLICE_TYPE_B && pic_param->pic_fields.bits.weighted_bipred_idc == INTEL_AVC_WP_MODE_EXPLICIT)
6096         {
6097             if(slice_param->luma_weight_l1_flag & 1)
6098             {
6099                 gen9_avc_kernel_wp(ctx,encode_state,encoder_context,1);
6100             }else if(!((slice_param->luma_weight_l0_flag & 1)||
6101                        (slice_param->chroma_weight_l0_flag & 1)||
6102                        (slice_param->chroma_weight_l1_flag & 1)))
6103             {
6104                 pic_param->pic_fields.bits.weighted_bipred_idc = INTEL_AVC_WP_MODE_DEFAULT;// it should be handled in app
6105             }
6106         }
6107     }
6108
6109     /*mbenc kernel*/
6110     gen9_avc_kernel_mbenc(ctx,encode_state,encoder_context,false);
6111
6112     /*ignore the reset vertical line kernel*/
6113
6114     return VA_STATUS_SUCCESS;
6115 }
6116
6117 static VAStatus
6118 gen9_avc_vme_pipeline(VADriverContextP ctx,
6119                       VAProfile profile,
6120                       struct encode_state *encode_state,
6121                       struct intel_encoder_context *encoder_context)
6122 {
6123     VAStatus va_status;
6124
6125     gen9_avc_update_parameters(ctx, profile, encode_state, encoder_context);
6126
6127     va_status = gen9_avc_encode_check_parameter(ctx, encode_state, encoder_context);
6128     if (va_status != VA_STATUS_SUCCESS)
6129         return va_status;
6130
6131     va_status = gen9_avc_allocate_resources(ctx, encode_state, encoder_context);
6132     if (va_status != VA_STATUS_SUCCESS)
6133         return va_status;
6134
6135     va_status = gen9_avc_vme_gpe_kernel_prepare(ctx, encode_state, encoder_context);
6136     if (va_status != VA_STATUS_SUCCESS)
6137         return va_status;
6138
6139     va_status = gen9_avc_vme_gpe_kernel_init(ctx, encode_state, encoder_context);
6140     if (va_status != VA_STATUS_SUCCESS)
6141         return va_status;
6142
6143     va_status = gen9_avc_vme_gpe_kernel_run(ctx, encode_state, encoder_context);
6144     if (va_status != VA_STATUS_SUCCESS)
6145         return va_status;
6146
6147     gen9_avc_vme_gpe_kernel_final(ctx, encode_state, encoder_context);
6148
6149     return VA_STATUS_SUCCESS;
6150 }
6151
6152 static void
6153 gen9_avc_vme_context_destroy(void * context)
6154 {
6155     struct encoder_vme_mfc_context *vme_context = (struct encoder_vme_mfc_context *)context;
6156     struct generic_encoder_context *generic_ctx;
6157     struct i965_avc_encoder_context *avc_ctx;
6158     struct generic_enc_codec_state *generic_state;
6159     struct avc_enc_state *avc_state;
6160
6161     if (!vme_context)
6162         return;
6163
6164     generic_ctx = (struct generic_encoder_context * )vme_context->generic_enc_ctx;
6165     avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
6166     generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
6167     avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
6168
6169     gen9_avc_kernel_destroy(vme_context);
6170
6171     free(generic_ctx);
6172     free(avc_ctx);
6173     free(generic_state);
6174     free(avc_state);
6175     free(vme_context);
6176     return;
6177
6178 }
6179
6180 static void
6181 gen9_avc_kernel_init(VADriverContextP ctx,
6182                      struct intel_encoder_context *encoder_context)
6183 {
6184     struct i965_driver_data *i965 = i965_driver_data(ctx);
6185     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6186     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
6187     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )vme_context->generic_enc_ctx;
6188
6189     gen9_avc_kernel_init_scaling(ctx,generic_ctx,&avc_ctx->context_scaling);
6190     gen9_avc_kernel_init_brc(ctx,generic_ctx,&avc_ctx->context_brc);
6191     gen9_avc_kernel_init_me(ctx,generic_ctx,&avc_ctx->context_me);
6192     gen9_avc_kernel_init_mbenc(ctx,generic_ctx,&avc_ctx->context_mbenc);
6193     gen9_avc_kernel_init_wp(ctx,generic_ctx,&avc_ctx->context_wp);
6194     gen9_avc_kernel_init_sfd(ctx,generic_ctx,&avc_ctx->context_sfd);
6195
6196     //function pointer
6197     generic_ctx->pfn_set_curbe_scaling2x = gen9_avc_set_curbe_scaling2x;
6198     generic_ctx->pfn_set_curbe_scaling4x = gen9_avc_set_curbe_scaling4x;
6199     generic_ctx->pfn_set_curbe_me = gen9_avc_set_curbe_me;
6200     generic_ctx->pfn_set_curbe_mbenc = gen9_avc_set_curbe_mbenc;
6201     generic_ctx->pfn_set_curbe_brc_init_reset = gen9_avc_set_curbe_brc_init_reset;
6202     generic_ctx->pfn_set_curbe_brc_frame_update = gen9_avc_set_curbe_brc_frame_update;
6203     generic_ctx->pfn_set_curbe_brc_mb_update = gen9_avc_set_curbe_brc_mb_update;
6204     generic_ctx->pfn_set_curbe_sfd = gen9_avc_set_curbe_sfd;
6205     generic_ctx->pfn_set_curbe_wp = gen9_avc_set_curbe_wp;
6206
6207     generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
6208     generic_ctx->pfn_send_me_surface = gen9_avc_send_surface_me;
6209     generic_ctx->pfn_send_mbenc_surface = gen9_avc_send_surface_mbenc;
6210     generic_ctx->pfn_send_brc_init_reset_surface = gen9_avc_send_surface_brc_init_reset;
6211     generic_ctx->pfn_send_brc_frame_update_surface = gen9_avc_send_surface_brc_frame_update;
6212     generic_ctx->pfn_send_brc_mb_update_surface = gen9_avc_send_surface_brc_mb_update;
6213     generic_ctx->pfn_send_sfd_surface = gen9_avc_send_surface_sfd;
6214     generic_ctx->pfn_send_wp_surface = gen9_avc_send_surface_wp;
6215
6216     if (IS_SKL(i965->intel.device_info)||
6217         IS_BXT(i965->intel.device_info))
6218         generic_ctx->pfn_set_curbe_scaling4x = gen9_avc_set_curbe_scaling4x;
6219     else if (IS_KBL(i965->intel.device_info)||
6220              IS_GLK(i965->intel.device_info))
6221         generic_ctx->pfn_set_curbe_scaling4x = gen95_avc_set_curbe_scaling4x;
6222
6223 }
6224
6225 /*
6226 PAK pipeline related function
6227 */
6228 extern int
6229 intel_avc_enc_slice_type_fixup(int slice_type);
6230
6231 static void
6232 gen9_mfc_avc_pipe_mode_select(VADriverContextP ctx,
6233                               struct encode_state *encode_state,
6234                               struct intel_encoder_context *encoder_context)
6235 {
6236     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6237     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )pak_context->private_enc_ctx;
6238     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state;
6239     struct intel_batchbuffer *batch = encoder_context->base.batch;
6240
6241     BEGIN_BCS_BATCH(batch, 5);
6242
6243     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
6244     OUT_BCS_BATCH(batch,
6245                   (0 << 29) |
6246                   (MFX_LONG_MODE << 17) |       /* Must be long format for encoder */
6247                   (MFD_MODE_VLD << 15) |
6248                   (0 << 13) |                   /* Non-VDEnc mode  is 0*/
6249                   ((generic_state->curr_pak_pass != (generic_state->num_pak_passes -1)) << 10) |                   /* Stream-Out Enable */
6250                   ((!!avc_ctx->res_post_deblocking_output.bo) << 9)  |    /* Post Deblocking Output */
6251                   ((!!avc_ctx->res_pre_deblocking_output.bo) << 8)  |     /* Pre Deblocking Output */
6252                   (0 << 7)  |                   /* Scaled surface enable */
6253                   (0 << 6)  |                   /* Frame statistics stream out enable */
6254                   (0 << 5)  |                   /* not in stitch mode */
6255                   (1 << 4)  |                   /* encoding mode */
6256                   (MFX_FORMAT_AVC << 0));
6257     OUT_BCS_BATCH(batch,
6258                   (0 << 7)  | /* expand NOA bus flag */
6259                   (0 << 6)  | /* disable slice-level clock gating */
6260                   (0 << 5)  | /* disable clock gating for NOA */
6261                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
6262                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
6263                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
6264                   (0 << 1)  |
6265                   (0 << 0));
6266     OUT_BCS_BATCH(batch, 0);
6267     OUT_BCS_BATCH(batch, 0);
6268
6269     ADVANCE_BCS_BATCH(batch);
6270 }
6271
6272 static void
6273 gen9_mfc_avc_surface_state(VADriverContextP ctx,
6274                            struct intel_encoder_context *encoder_context,
6275                            struct i965_gpe_resource *gpe_resource,
6276                            int id)
6277 {
6278     struct intel_batchbuffer *batch = encoder_context->base.batch;
6279
6280     BEGIN_BCS_BATCH(batch, 6);
6281
6282     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
6283     OUT_BCS_BATCH(batch, id);
6284     OUT_BCS_BATCH(batch,
6285                   ((gpe_resource->height - 1) << 18) |
6286                   ((gpe_resource->width - 1) << 4));
6287     OUT_BCS_BATCH(batch,
6288                   (MFX_SURFACE_PLANAR_420_8 << 28) |    /* 420 planar YUV surface */
6289                   (1 << 27) |                           /* must be 1 for interleave U/V, hardware requirement */
6290                   ((gpe_resource->pitch - 1) << 3) |    /* pitch */
6291                   (0 << 2)  |                           /* must be 0 for interleave U/V */
6292                   (1 << 1)  |                           /* must be tiled */
6293                   (I965_TILEWALK_YMAJOR << 0));         /* tile walk, TILEWALK_YMAJOR */
6294     OUT_BCS_BATCH(batch,
6295                   (0 << 16) |                           /* must be 0 for interleave U/V */
6296                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
6297     OUT_BCS_BATCH(batch,
6298                   (0 << 16) |                           /* must be 0 for interleave U/V */
6299                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
6300
6301     ADVANCE_BCS_BATCH(batch);
6302 }
6303
6304 static void
6305 gen9_mfc_avc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
6306 {
6307     struct i965_driver_data *i965 = i965_driver_data(ctx);
6308     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6309     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )pak_context->generic_enc_ctx;
6310     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )pak_context->private_enc_ctx;
6311     struct intel_batchbuffer *batch = encoder_context->base.batch;
6312     int i;
6313
6314     BEGIN_BCS_BATCH(batch, 65);
6315
6316     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (65 - 2));
6317
6318     /* the DW1-3 is for pre_deblocking */
6319     OUT_BUFFER_3DW(batch, avc_ctx->res_pre_deblocking_output.bo, 1, 0, i965->intel.mocs_state);
6320
6321     /* the DW4-6 is for the post_deblocking */
6322     OUT_BUFFER_3DW(batch, avc_ctx->res_post_deblocking_output.bo, 1, 0, i965->intel.mocs_state);
6323
6324     /* the DW7-9 is for the uncompressed_picture */
6325     OUT_BUFFER_3DW(batch, generic_ctx->res_uncompressed_input_surface.bo, 0, 0, i965->intel.mocs_state);
6326
6327     /* the DW10-12 is for PAK information (write) */
6328     OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, i965->intel.mocs_state);//?
6329
6330     /* the DW13-15 is for the intra_row_store_scratch */
6331     OUT_BUFFER_3DW(batch, avc_ctx->res_intra_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
6332
6333     /* the DW16-18 is for the deblocking filter */
6334     OUT_BUFFER_3DW(batch, avc_ctx->res_deblocking_filter_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
6335
6336     /* the DW 19-50 is for Reference pictures*/
6337     for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
6338         OUT_BUFFER_2DW(batch, avc_ctx->list_reference_res[i].bo, 0, 0);
6339     }
6340
6341     /* DW 51, reference picture attributes */
6342     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
6343
6344     /* The DW 52-54 is for PAK information (read) */
6345     OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, i965->intel.mocs_state);
6346
6347     /* the DW 55-57 is the ILDB buffer */
6348     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6349
6350     /* the DW 58-60 is the second ILDB buffer */
6351     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6352
6353     /* DW 61, memory compress enable & mode */
6354     OUT_BCS_BATCH(batch, 0);
6355
6356     /* the DW 62-64 is the buffer */
6357     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6358
6359     ADVANCE_BCS_BATCH(batch);
6360 }
6361
6362 static void
6363 gen9_mfc_avc_ind_obj_base_addr_state(VADriverContextP ctx,
6364                                      struct encode_state *encode_state,
6365                                      struct intel_encoder_context *encoder_context)
6366 {
6367     struct i965_driver_data *i965 = i965_driver_data(ctx);
6368     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6369     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )pak_context->generic_enc_ctx;
6370     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state;
6371     struct intel_batchbuffer *batch = encoder_context->base.batch;
6372     struct object_surface *obj_surface;
6373     struct gen9_surface_avc *avc_priv_surface;
6374     unsigned int size = 0;
6375     unsigned int w_mb = generic_state->frame_width_in_mbs;
6376     unsigned int h_mb = generic_state->frame_height_in_mbs;
6377
6378     obj_surface = encode_state->reconstructed_object;
6379
6380     if (!obj_surface || !obj_surface->private_data)
6381         return;
6382     avc_priv_surface = obj_surface->private_data;
6383
6384     BEGIN_BCS_BATCH(batch, 26);
6385
6386     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
6387     /* The DW1-5 is for the MFX indirect bistream offset */
6388     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6389     OUT_BUFFER_2DW(batch, NULL, 0, 0);
6390
6391     /* the DW6-10 is for MFX Indirect MV Object Base Address */
6392     size = w_mb * h_mb * 32 * 4;
6393     OUT_BUFFER_3DW(batch,
6394                    avc_priv_surface->res_mv_data_surface.bo,
6395                    1,
6396                    0,
6397                    i965->intel.mocs_state);
6398     OUT_BUFFER_2DW(batch,
6399                    avc_priv_surface->res_mv_data_surface.bo,
6400                    1,
6401                    ALIGN(size,0x1000));
6402
6403     /* The DW11-15 is for MFX IT-COFF. Not used on encoder */
6404     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6405     OUT_BUFFER_2DW(batch, NULL, 0, 0);
6406
6407     /* The DW16-20 is for MFX indirect DBLK. Not used on encoder */
6408     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6409     OUT_BUFFER_2DW(batch, NULL, 0, 0);
6410
6411     /* The DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder
6412      * Note: an offset is specified in MFX_AVC_SLICE_STATE
6413      */
6414     OUT_BUFFER_3DW(batch,
6415                    generic_ctx->compressed_bitstream.res.bo,
6416                    1,
6417                    0,
6418                    i965->intel.mocs_state);
6419     OUT_BUFFER_2DW(batch,
6420                    generic_ctx->compressed_bitstream.res.bo,
6421                    1,
6422                    generic_ctx->compressed_bitstream.end_offset);
6423
6424     ADVANCE_BCS_BATCH(batch);
6425 }
6426
6427 static void
6428 gen9_mfc_avc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
6429 {
6430     struct i965_driver_data *i965 = i965_driver_data(ctx);
6431     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6432     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )pak_context->private_enc_ctx;
6433     struct intel_batchbuffer *batch = encoder_context->base.batch;
6434
6435     BEGIN_BCS_BATCH(batch, 10);
6436
6437     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
6438
6439     /* The DW1-3 is for bsd/mpc row store scratch buffer */
6440     OUT_BUFFER_3DW(batch, avc_ctx->res_bsd_mpc_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
6441
6442     /* The DW4-6 is for MPR Row Store Scratch Buffer Base Address, ignore for encoder */
6443     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6444
6445     /* The DW7-9 is for Bitplane Read Buffer Base Address, ignore for encoder */
6446     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6447
6448     ADVANCE_BCS_BATCH(batch);
6449 }
6450
6451 static void
6452 gen9_mfc_avc_directmode_state(VADriverContextP ctx,
6453                               struct intel_encoder_context *encoder_context)
6454 {
6455     struct i965_driver_data *i965 = i965_driver_data(ctx);
6456     struct intel_batchbuffer *batch = encoder_context->base.batch;
6457     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6458     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )pak_context->private_enc_ctx;
6459     struct avc_enc_state * avc_state = (struct avc_enc_state * )pak_context->private_enc_state;
6460
6461     int i;
6462
6463     BEGIN_BCS_BATCH(batch, 71);
6464
6465     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
6466
6467     /* Reference frames and Current frames */
6468     /* the DW1-32 is for the direct MV for reference */
6469     for(i = 0; i < NUM_MFC_AVC_DMV_BUFFERS - 2; i += 2) {
6470         if ( avc_ctx->res_direct_mv_buffersr[i].bo != NULL) {
6471             OUT_BCS_RELOC64(batch, avc_ctx->res_direct_mv_buffersr[i].bo,
6472                           I915_GEM_DOMAIN_INSTRUCTION, 0,
6473                           0);
6474         } else {
6475             OUT_BCS_BATCH(batch, 0);
6476             OUT_BCS_BATCH(batch, 0);
6477         }
6478     }
6479
6480     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
6481
6482     /* the DW34-36 is the MV for the current frame */
6483     OUT_BCS_RELOC64(batch, avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2].bo,
6484                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
6485                   0);
6486
6487     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
6488
6489     /* POL list */
6490     for(i = 0; i < 32; i++) {
6491         OUT_BCS_BATCH(batch, avc_state->top_field_poc[i]);
6492     }
6493     OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2]);
6494     OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1]);
6495
6496     ADVANCE_BCS_BATCH(batch);
6497 }
6498
6499 static void
6500 gen9_mfc_qm_state(VADriverContextP ctx,
6501                   int qm_type,
6502                   const unsigned int *qm,
6503                   int qm_length,
6504                   struct intel_encoder_context *encoder_context)
6505 {
6506     struct intel_batchbuffer *batch = encoder_context->base.batch;
6507     unsigned int qm_buffer[16];
6508
6509     assert(qm_length <= 16);
6510     assert(sizeof(*qm) == 4);
6511     memset(qm_buffer,0,16*4);
6512     memcpy(qm_buffer, qm, qm_length * 4);
6513
6514     BEGIN_BCS_BATCH(batch, 18);
6515     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
6516     OUT_BCS_BATCH(batch, qm_type << 0);
6517     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
6518     ADVANCE_BCS_BATCH(batch);
6519 }
6520
6521 static void
6522 gen9_mfc_avc_qm_state(VADriverContextP ctx,
6523                       struct encode_state *encode_state,
6524                       struct intel_encoder_context *encoder_context)
6525 {
6526     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6527     struct avc_enc_state * avc_state = (struct avc_enc_state * )pak_context->private_enc_state;
6528     VAEncSequenceParameterBufferH264  *seq_param = avc_state->seq_param;
6529     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
6530
6531
6532     const unsigned int *qm_4x4_intra;
6533     const unsigned int *qm_4x4_inter;
6534     const unsigned int *qm_8x8_intra;
6535     const unsigned int *qm_8x8_inter;
6536
6537     if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
6538         && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
6539         qm_4x4_intra = qm_4x4_inter = qm_8x8_intra = qm_8x8_inter = qm_flat;
6540     } else {
6541         VAIQMatrixBufferH264 *qm;
6542         assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
6543         qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
6544         qm_4x4_intra = (unsigned int *)qm->ScalingList4x4[0];
6545         qm_4x4_inter = (unsigned int *)qm->ScalingList4x4[3];
6546         qm_8x8_intra = (unsigned int *)qm->ScalingList8x8[0];
6547         qm_8x8_inter = (unsigned int *)qm->ScalingList8x8[1];
6548     }
6549
6550     gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm_4x4_intra, 12, encoder_context);
6551     gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm_4x4_inter, 12, encoder_context);
6552     gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm_8x8_intra, 16, encoder_context);
6553     gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm_8x8_inter, 16, encoder_context);
6554 }
6555
6556 static void
6557 gen9_mfc_fqm_state(VADriverContextP ctx,
6558                    int fqm_type,
6559                    const unsigned int *fqm,
6560                    int fqm_length,
6561                    struct intel_encoder_context *encoder_context)
6562 {
6563     struct intel_batchbuffer *batch = encoder_context->base.batch;
6564     unsigned int fqm_buffer[32];
6565
6566     assert(fqm_length <= 32);
6567     assert(sizeof(*fqm) == 4);
6568     memset(fqm_buffer,0,32*4);
6569     memcpy(fqm_buffer, fqm, fqm_length * 4);
6570
6571     BEGIN_BCS_BATCH(batch, 34);
6572     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
6573     OUT_BCS_BATCH(batch, fqm_type << 0);
6574     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
6575     ADVANCE_BCS_BATCH(batch);
6576 }
6577
6578 static void
6579 gen9_mfc_fill_fqm(uint8_t *qm, uint16_t *fqm, int len)
6580 {
6581     int i, j;
6582     for (i = 0; i < len; i++)
6583        for (j = 0; j < len; j++)
6584        {
6585            assert(qm[j * len + i]);
6586            fqm[i * len + j] = (1 << 16) / qm[j * len + i];
6587        }
6588 }
6589
6590 static void
6591 gen9_mfc_avc_fqm_state(VADriverContextP ctx,
6592                       struct encode_state *encode_state,
6593                       struct intel_encoder_context *encoder_context)
6594 {
6595     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6596     struct avc_enc_state * avc_state = (struct avc_enc_state * )pak_context->private_enc_state;
6597     VAEncSequenceParameterBufferH264  *seq_param = avc_state->seq_param;
6598     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
6599
6600     if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
6601         && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
6602         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm_flat, 24, encoder_context);
6603         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm_flat, 24, encoder_context);
6604         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm_flat, 32, encoder_context);
6605         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm_flat, 32, encoder_context);
6606     } else {
6607         int i;
6608         uint32_t fqm[32];
6609         VAIQMatrixBufferH264 *qm;
6610         assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
6611         qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
6612
6613         for (i = 0; i < 3; i++)
6614             gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * i, 4);
6615         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm, 24, encoder_context);
6616
6617         for (i = 3; i < 6; i++)
6618             gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * (i - 3), 4);
6619         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm, 24, encoder_context);
6620
6621         gen9_mfc_fill_fqm(qm->ScalingList8x8[0], (uint16_t *)fqm, 8);
6622         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm, 32, encoder_context);
6623
6624         gen9_mfc_fill_fqm(qm->ScalingList8x8[1], (uint16_t *)fqm, 8);
6625         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm, 32, encoder_context);
6626     }
6627 }
6628
6629 static void
6630 gen9_mfc_avc_insert_object(VADriverContextP ctx,
6631                            struct intel_encoder_context *encoder_context,
6632                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
6633                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
6634                            int slice_header_indicator,
6635                            struct intel_batchbuffer *batch)
6636 {
6637     if (data_bits_in_last_dw == 0)
6638         data_bits_in_last_dw = 32;
6639
6640     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
6641
6642     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws));
6643     OUT_BCS_BATCH(batch,
6644                   (0 << 16) |   /* always start at offset 0 */
6645                   (slice_header_indicator << 14) |
6646                   (data_bits_in_last_dw << 8) |
6647                   (skip_emul_byte_count << 4) |
6648                   (!!emulation_flag << 3) |
6649                   ((!!is_last_header) << 2) |
6650                   ((!!is_end_of_slice) << 1) |
6651                   (0 << 0));    /* check this flag */
6652     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
6653
6654     ADVANCE_BCS_BATCH(batch);
6655 }
6656
6657 static void
6658 gen9_mfc_avc_insert_slice_packed_data(VADriverContextP ctx,
6659                                       struct encode_state *encode_state,
6660                                       struct intel_encoder_context *encoder_context,
6661                                       int slice_index,
6662                                       struct intel_batchbuffer *batch)
6663 {
6664     VAEncPackedHeaderParameterBuffer *param = NULL;
6665     unsigned int length_in_bits;
6666     unsigned int *header_data = NULL;
6667     int count, i, start_index;
6668     int slice_header_index;
6669
6670     if (encode_state->slice_header_index[slice_index] == 0)
6671         slice_header_index = -1;
6672     else
6673         slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
6674
6675     count = encode_state->slice_rawdata_count[slice_index];
6676     start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
6677
6678     for (i = 0; i < count; i++) {
6679         unsigned int skip_emul_byte_cnt;
6680
6681         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
6682
6683         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
6684
6685         /* skip the slice header packed data type as it is lastly inserted */
6686         if (param->type == VAEncPackedHeaderSlice)
6687             continue;
6688
6689         length_in_bits = param->bit_length;
6690
6691         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6692
6693         /* as the slice header is still required, the last header flag is set to
6694          * zero.
6695          */
6696         gen9_mfc_avc_insert_object(ctx,
6697                                    encoder_context,
6698                                    header_data,
6699                                    ALIGN(length_in_bits, 32) >> 5,
6700                                    length_in_bits & 0x1f,
6701                                    skip_emul_byte_cnt,
6702                                    0,
6703                                    0,
6704                                    !param->has_emulation_bytes,
6705                                    0,
6706                                    batch);
6707     }
6708
6709     if (slice_header_index == -1) {
6710         VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
6711         VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
6712         VAEncSliceParameterBufferH264 *slice_params = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
6713         unsigned char *slice_header = NULL;
6714         int slice_header_length_in_bits = 0;
6715
6716         /* No slice header data is passed. And the driver needs to generate it */
6717         /* For the Normal H264 */
6718         slice_header_length_in_bits = build_avc_slice_header(seq_param,
6719                                                              pic_param,
6720                                                              slice_params,
6721                                                              &slice_header);
6722         gen9_mfc_avc_insert_object(ctx,
6723                                    encoder_context,
6724                                    (unsigned int *)slice_header,
6725                                    ALIGN(slice_header_length_in_bits, 32) >> 5,
6726                                    slice_header_length_in_bits & 0x1f,
6727                                    5,  /* first 5 bytes are start code + nal unit type */
6728                                    1, 0, 1,
6729                                    1,
6730                                    batch);
6731
6732         free(slice_header);
6733     } else {
6734         unsigned int skip_emul_byte_cnt;
6735
6736         header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
6737
6738         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[slice_header_index]->buffer);
6739         length_in_bits = param->bit_length;
6740
6741         /* as the slice header is the last header data for one slice,
6742          * the last header flag is set to one.
6743          */
6744         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6745
6746         gen9_mfc_avc_insert_object(ctx,
6747                                    encoder_context,
6748                                    header_data,
6749                                    ALIGN(length_in_bits, 32) >> 5,
6750                                    length_in_bits & 0x1f,
6751                                    skip_emul_byte_cnt,
6752                                    1,
6753                                    0,
6754                                    !param->has_emulation_bytes,
6755                                    1,
6756                                    batch);
6757     }
6758
6759     return;
6760 }
6761
6762 static void
6763 gen9_mfc_avc_inset_headers(VADriverContextP ctx,
6764                            struct encode_state *encode_state,
6765                            struct intel_encoder_context *encoder_context,
6766                            VAEncSliceParameterBufferH264 *slice_param,
6767                            int slice_index,
6768                            struct intel_batchbuffer *batch)
6769 {
6770     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6771     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state;
6772     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
6773     unsigned int internal_rate_mode = generic_state->internal_rate_mode;
6774     unsigned int skip_emul_byte_cnt;
6775
6776     if (slice_index == 0) {
6777         if (encode_state->packed_header_data[idx]) {
6778             VAEncPackedHeaderParameterBuffer *param = NULL;
6779             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
6780             unsigned int length_in_bits;
6781
6782             assert(encode_state->packed_header_param[idx]);
6783             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
6784             length_in_bits = param->bit_length;
6785
6786             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6787             gen9_mfc_avc_insert_object(ctx,
6788                                        encoder_context,
6789                                        header_data,
6790                                        ALIGN(length_in_bits, 32) >> 5,
6791                                        length_in_bits & 0x1f,
6792                                        skip_emul_byte_cnt,
6793                                        0,
6794                                        0,
6795                                        !param->has_emulation_bytes,
6796                                        0,
6797                                        batch);
6798         }
6799
6800         idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
6801
6802         if (encode_state->packed_header_data[idx]) {
6803             VAEncPackedHeaderParameterBuffer *param = NULL;
6804             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
6805             unsigned int length_in_bits;
6806
6807             assert(encode_state->packed_header_param[idx]);
6808             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
6809             length_in_bits = param->bit_length;
6810
6811             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6812
6813             gen9_mfc_avc_insert_object(ctx,
6814                                        encoder_context,
6815                                        header_data,
6816                                        ALIGN(length_in_bits, 32) >> 5,
6817                                        length_in_bits & 0x1f,
6818                                        skip_emul_byte_cnt,
6819                                        0,
6820                                        0,
6821                                        !param->has_emulation_bytes,
6822                                        0,
6823                                        batch);
6824         }
6825
6826         idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
6827
6828         if (encode_state->packed_header_data[idx]) {
6829             VAEncPackedHeaderParameterBuffer *param = NULL;
6830             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
6831             unsigned int length_in_bits;
6832
6833             assert(encode_state->packed_header_param[idx]);
6834             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
6835             length_in_bits = param->bit_length;
6836
6837             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6838             gen9_mfc_avc_insert_object(ctx,
6839                                        encoder_context,
6840                                        header_data,
6841                                        ALIGN(length_in_bits, 32) >> 5,
6842                                        length_in_bits & 0x1f,
6843                                        skip_emul_byte_cnt,
6844                                        0,
6845                                        0,
6846                                        !param->has_emulation_bytes,
6847                                        0,
6848                                        batch);
6849         } else if (internal_rate_mode == VA_RC_CBR) {
6850             /* insert others */
6851         }
6852     }
6853
6854     gen9_mfc_avc_insert_slice_packed_data(ctx,
6855                                           encode_state,
6856                                           encoder_context,
6857                                           slice_index,
6858                                           batch);
6859 }
6860
6861 static void
6862 gen9_mfc_avc_slice_state(VADriverContextP ctx,
6863                          struct encode_state *encode_state,
6864                          struct intel_encoder_context *encoder_context,
6865                          VAEncPictureParameterBufferH264 *pic_param,
6866                          VAEncSliceParameterBufferH264 *slice_param,
6867                          VAEncSliceParameterBufferH264 *next_slice_param,
6868                          struct intel_batchbuffer *batch)
6869 {
6870     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6871     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )pak_context->generic_enc_ctx;
6872     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state;
6873     struct avc_enc_state * avc_state = (struct avc_enc_state * )pak_context->private_enc_state;
6874     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
6875     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
6876     unsigned char correct[6], grow, shrink;
6877     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
6878     int max_qp_n, max_qp_p;
6879     int i;
6880     int weighted_pred_idc = 0;
6881     int num_ref_l0 = 0, num_ref_l1 = 0;
6882     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
6883     int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
6884     unsigned int rc_panic_enable = 0;
6885     unsigned int rate_control_counter_enable = 0;
6886     unsigned int rounding_value = 0;
6887     unsigned int rounding_inter_enable = 0;
6888
6889     slice_hor_pos = slice_param->macroblock_address % generic_state->frame_width_in_mbs;
6890     slice_ver_pos = slice_param->macroblock_address / generic_state->frame_width_in_mbs;
6891
6892     if (next_slice_param) {
6893         next_slice_hor_pos = next_slice_param->macroblock_address % generic_state->frame_width_in_mbs;
6894         next_slice_ver_pos = next_slice_param->macroblock_address / generic_state->frame_width_in_mbs;
6895     } else {
6896         next_slice_hor_pos = 0;
6897         next_slice_ver_pos = generic_state->frame_height_in_mbs;
6898     }
6899
6900     if (slice_type == SLICE_TYPE_I) {
6901         luma_log2_weight_denom = 0;
6902         chroma_log2_weight_denom = 0;
6903     } else if (slice_type == SLICE_TYPE_P) {
6904         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
6905         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
6906         rounding_inter_enable = avc_state->rounding_inter_enable;
6907         rounding_value = avc_state->rounding_value;
6908
6909         if (slice_param->num_ref_idx_active_override_flag)
6910             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
6911     } else if (slice_type == SLICE_TYPE_B) {
6912         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
6913         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
6914         num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
6915         rounding_inter_enable = avc_state->rounding_inter_enable;
6916         rounding_value = avc_state->rounding_value;
6917
6918         if (slice_param->num_ref_idx_active_override_flag) {
6919             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
6920             num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
6921         }
6922
6923         if (weighted_pred_idc == 2) {
6924             /* 8.4.3 - Derivation process for prediction weights (8-279) */
6925             luma_log2_weight_denom = 5;
6926             chroma_log2_weight_denom = 5;
6927         }
6928     }
6929
6930     max_qp_n = 0;
6931     max_qp_p = 0;
6932     grow = 0;
6933     shrink = 0;
6934
6935     rate_control_counter_enable = (generic_state->brc_enabled && (generic_state->curr_pak_pass != 0));
6936     rc_panic_enable = (avc_state->rc_panic_enable &&
6937                       (!avc_state->min_max_qp_enable) &&
6938                       (encoder_context->rate_control_mode != VA_RC_CQP) &&
6939                       (generic_state->curr_pak_pass == (generic_state->num_pak_passes - 1)));
6940
6941     for (i = 0; i < 6; i++)
6942         correct[i] = 0;
6943
6944     BEGIN_BCS_BATCH(batch, 11);
6945
6946     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
6947     OUT_BCS_BATCH(batch, slice_type);
6948     OUT_BCS_BATCH(batch,
6949                   (num_ref_l1 << 24) |
6950                   (num_ref_l0 << 16) |
6951                   (chroma_log2_weight_denom << 8) |
6952                   (luma_log2_weight_denom << 0));
6953     OUT_BCS_BATCH(batch,
6954                   (weighted_pred_idc << 30) |
6955                   (((slice_type == SLICE_TYPE_B)?slice_param->direct_spatial_mv_pred_flag:0) << 29) |
6956                   (slice_param->disable_deblocking_filter_idc << 27) |
6957                   (slice_param->cabac_init_idc << 24) |
6958                   (slice_qp << 16) |
6959                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
6960                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
6961
6962     OUT_BCS_BATCH(batch,
6963                   slice_ver_pos << 24 |
6964                   slice_hor_pos << 16 |
6965                   slice_param->macroblock_address);
6966     OUT_BCS_BATCH(batch,
6967                   next_slice_ver_pos << 16 |
6968                   next_slice_hor_pos);
6969
6970     OUT_BCS_BATCH(batch,
6971                   (rate_control_counter_enable << 31) |
6972                   (1 << 30) |           /* ResetRateControlCounter */
6973                   (2 << 28) |           /* Loose Rate Control */
6974                   (0 << 24) |           /* RC Stable Tolerance */
6975                   (rc_panic_enable << 23) |           /* RC Panic Enable */
6976                   (1 << 22) |           /* CBP mode */
6977                   (0 << 21) |           /* MB Type Direct Conversion, 0: Enable, 1: Disable */
6978                   (0 << 20) |           /* MB Type Skip Conversion, 0: Enable, 1: Disable */
6979                   (!next_slice_param << 19) |                   /* Is Last Slice */
6980                   (0 << 18) |           /* BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable */
6981                   (1 << 17) |           /* HeaderPresentFlag */
6982                   (1 << 16) |           /* SliceData PresentFlag */
6983                   (0 << 15) |           /* TailPresentFlag  */
6984                   (1 << 13) |           /* RBSP NAL TYPE */
6985                   (1 << 12));           /* CabacZeroWordInsertionEnable */
6986
6987     OUT_BCS_BATCH(batch, generic_ctx->compressed_bitstream.start_offset);
6988
6989     OUT_BCS_BATCH(batch,
6990                   (max_qp_n << 24) |     /*Target QP - 24 is lowest QP*/
6991                   (max_qp_p << 16) |     /*Target QP + 20 is highest QP*/
6992                   (shrink << 8) |
6993                   (grow << 0));
6994     OUT_BCS_BATCH(batch,
6995                   (rounding_inter_enable << 31) |
6996                   (rounding_value << 28) |
6997                   (1 << 27) |
6998                   (5 << 24) |
6999                   (correct[5] << 20) |
7000                   (correct[4] << 16) |
7001                   (correct[3] << 12) |
7002                   (correct[2] << 8) |
7003                   (correct[1] << 4) |
7004                   (correct[0] << 0));
7005     OUT_BCS_BATCH(batch, 0);
7006
7007     ADVANCE_BCS_BATCH(batch);
7008 }
7009
7010 static uint8_t
7011 gen9_mfc_avc_get_ref_idx_state(VAPictureH264 *va_pic, unsigned int frame_store_id)
7012 {
7013     unsigned int is_long_term =
7014         !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
7015     unsigned int is_top_field =
7016         !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
7017     unsigned int is_bottom_field =
7018         !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
7019
7020     return ((is_long_term                         << 6) |
7021             (0 << 5) |
7022             (frame_store_id                       << 1) |
7023             ((is_top_field ^ 1) & is_bottom_field));
7024 }
7025
7026 static void
7027 gen9_mfc_avc_ref_idx_state(VADriverContextP ctx,
7028                                  struct encode_state *encode_state,
7029                                  struct intel_encoder_context *encoder_context,
7030                                  VAEncSliceParameterBufferH264 *slice_param,
7031                                  struct intel_batchbuffer *batch)
7032 {
7033     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7034     struct avc_enc_state * avc_state = (struct avc_enc_state * )pak_context->private_enc_state;
7035     VAPictureH264 *ref_pic;
7036     int i, slice_type, ref_idx_shift;
7037     unsigned int fwd_ref_entry;
7038     unsigned int bwd_ref_entry;
7039
7040     /* max 4 ref frames are allowed for l0 and l1 */
7041     fwd_ref_entry = 0x80808080;
7042     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
7043
7044     if ((slice_type == SLICE_TYPE_P) ||
7045         (slice_type == SLICE_TYPE_B)) {
7046           for (i = 0; i < MIN(avc_state->num_refs[0],4); i++) {
7047               ref_pic = &slice_param->RefPicList0[i];
7048               ref_idx_shift = i * 8;
7049
7050               fwd_ref_entry &= ~(0xFF << ref_idx_shift);
7051               fwd_ref_entry += (gen9_mfc_avc_get_ref_idx_state(ref_pic, avc_state->list_ref_idx[0][i]) << ref_idx_shift);
7052           }
7053     }
7054
7055     bwd_ref_entry = 0x80808080;
7056     if (slice_type == SLICE_TYPE_B) {
7057         for (i = 0; i < MIN(avc_state->num_refs[1],4); i++) {
7058             ref_pic = &slice_param->RefPicList1[i];
7059             ref_idx_shift = i * 8;
7060
7061             bwd_ref_entry &= ~(0xFF << ref_idx_shift);
7062             bwd_ref_entry += (gen9_mfc_avc_get_ref_idx_state(ref_pic, avc_state->list_ref_idx[1][i]) << ref_idx_shift);
7063         }
7064     }
7065
7066     if ((slice_type == SLICE_TYPE_P) ||
7067         (slice_type == SLICE_TYPE_B)) {
7068         BEGIN_BCS_BATCH(batch, 10);
7069         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
7070         OUT_BCS_BATCH(batch, 0);                        // L0
7071         OUT_BCS_BATCH(batch, fwd_ref_entry);
7072
7073         for (i = 0; i < 7; i++) {
7074             OUT_BCS_BATCH(batch, 0x80808080);
7075         }
7076
7077         ADVANCE_BCS_BATCH(batch);
7078     }
7079
7080     if (slice_type == SLICE_TYPE_B) {
7081         BEGIN_BCS_BATCH(batch, 10);
7082         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
7083         OUT_BCS_BATCH(batch, 1);                  //Select L1
7084         OUT_BCS_BATCH(batch, bwd_ref_entry);      //max 4 reference allowed
7085         for(i = 0; i < 7; i++) {
7086             OUT_BCS_BATCH(batch, 0x80808080);
7087         }
7088         ADVANCE_BCS_BATCH(batch);
7089     }
7090 }
7091
7092 static void
7093 gen9_mfc_avc_weightoffset_state(VADriverContextP ctx,
7094                                 struct encode_state *encode_state,
7095                                 struct intel_encoder_context *encoder_context,
7096                                 VAEncPictureParameterBufferH264 *pic_param,
7097                                 VAEncSliceParameterBufferH264 *slice_param,
7098                                 struct intel_batchbuffer *batch)
7099 {
7100     int i, slice_type;
7101     short weightoffsets[32 * 6];
7102
7103     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
7104
7105     if (slice_type == SLICE_TYPE_P &&
7106         pic_param->pic_fields.bits.weighted_pred_flag == 1) {
7107         memset(weightoffsets,0,32*6 * sizeof(short));
7108         for (i = 0; i < 32; i++) {
7109             weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
7110             weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
7111             weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
7112             weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
7113             weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
7114             weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
7115         }
7116
7117         BEGIN_BCS_BATCH(batch, 98);
7118         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
7119         OUT_BCS_BATCH(batch, 0);
7120         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
7121
7122         ADVANCE_BCS_BATCH(batch);
7123     }
7124
7125     if (slice_type == SLICE_TYPE_B &&
7126         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
7127         memset(weightoffsets,0,32*6 * sizeof(short));
7128         for (i = 0; i < 32; i++) {
7129             weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
7130             weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
7131             weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
7132             weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
7133             weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
7134             weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
7135         }
7136
7137         BEGIN_BCS_BATCH(batch, 98);
7138         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
7139         OUT_BCS_BATCH(batch, 0);
7140         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
7141         ADVANCE_BCS_BATCH(batch);
7142
7143         memset(weightoffsets,0,32*6 * sizeof(short));
7144         for (i = 0; i < 32; i++) {
7145             weightoffsets[i * 6 + 0] = slice_param->luma_weight_l1[i];
7146             weightoffsets[i * 6 + 1] = slice_param->luma_offset_l1[i];
7147             weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l1[i][0];
7148             weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l1[i][0];
7149             weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l1[i][1];
7150             weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l1[i][1];
7151         }
7152
7153         BEGIN_BCS_BATCH(batch, 98);
7154         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
7155         OUT_BCS_BATCH(batch, 1);
7156         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
7157         ADVANCE_BCS_BATCH(batch);
7158     }
7159 }
7160
7161 static void
7162 gen9_mfc_avc_single_slice(VADriverContextP ctx,
7163                           struct encode_state *encode_state,
7164                           struct intel_encoder_context *encoder_context,
7165                           VAEncSliceParameterBufferH264 *slice_param,
7166                           VAEncSliceParameterBufferH264 *next_slice_param,
7167                           int slice_index)
7168 {
7169     struct i965_driver_data *i965 = i965_driver_data(ctx);
7170     struct i965_gpe_table *gpe = &i965->gpe_table;
7171     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7172     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )pak_context->private_enc_ctx;
7173     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state;
7174     struct avc_enc_state * avc_state = (struct avc_enc_state * )pak_context->private_enc_state;
7175     struct intel_batchbuffer *batch = encoder_context->base.batch;
7176     struct intel_batchbuffer *slice_batch = avc_ctx->pres_slice_batch_buffer_2nd_level;
7177     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
7178     struct gpe_mi_batch_buffer_start_parameter second_level_batch;
7179     struct object_surface *obj_surface;
7180     struct gen9_surface_avc *avc_priv_surface;
7181
7182     unsigned int slice_offset = 0;
7183
7184     if(generic_state->curr_pak_pass == 0)
7185     {
7186         slice_offset = intel_batchbuffer_used_size(slice_batch);
7187         avc_state->slice_batch_offset[slice_index] = slice_offset;
7188         gen9_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context, slice_param,slice_batch);
7189         gen9_mfc_avc_weightoffset_state(ctx,
7190                                         encode_state,
7191                                         encoder_context,
7192                                         pic_param,
7193                                         slice_param,
7194                                         slice_batch);
7195         gen9_mfc_avc_slice_state(ctx,
7196                                  encode_state,
7197                                  encoder_context,
7198                                  pic_param,
7199                                  slice_param,
7200                                  next_slice_param,
7201                                  slice_batch);
7202         gen9_mfc_avc_inset_headers(ctx,
7203                                    encode_state,
7204                                    encoder_context,
7205                                    slice_param,
7206                                    slice_index,
7207                                    slice_batch);
7208
7209         BEGIN_BCS_BATCH(slice_batch, 2);
7210         OUT_BCS_BATCH(slice_batch, 0);
7211         OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END);
7212         ADVANCE_BCS_BATCH(slice_batch);
7213
7214     }else
7215     {
7216         slice_offset = avc_state->slice_batch_offset[slice_index];
7217     }
7218     /* insert slice as second levle.*/
7219     memset(&second_level_batch, 0, sizeof(second_level_batch));
7220     second_level_batch.is_second_level = 1; /* Must be the second level batch buffer */
7221     second_level_batch.offset = slice_offset;
7222     second_level_batch.bo = slice_batch->buffer;
7223     gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
7224
7225     /* insert mb code as second levle.*/
7226     obj_surface = encode_state->reconstructed_object;
7227     assert(obj_surface->private_data);
7228     avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
7229
7230     memset(&second_level_batch, 0, sizeof(second_level_batch));
7231     second_level_batch.is_second_level = 1; /* Must be the second level batch buffer */
7232     second_level_batch.offset = slice_param->macroblock_address * 16 * 4;
7233     second_level_batch.bo = avc_priv_surface->res_mb_code_surface.bo;
7234     gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
7235
7236 }
7237
7238 static void
7239 gen9_avc_pak_slice_level(VADriverContextP ctx,
7240                          struct encode_state *encode_state,
7241                          struct intel_encoder_context *encoder_context)
7242 {
7243     struct i965_driver_data *i965 = i965_driver_data(ctx);
7244     struct i965_gpe_table *gpe = &i965->gpe_table;
7245     struct intel_batchbuffer *batch = encoder_context->base.batch;
7246     struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
7247     VAEncSliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
7248     int i, j;
7249     int slice_index = 0;
7250     int is_frame_level = 1;       /* check it for SKL,now single slice per frame */
7251     int has_tail = 0;             /* check it later */
7252
7253     for (j = 0; j < encode_state->num_slice_params_ext; j++) {
7254         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
7255
7256         if (j == encode_state->num_slice_params_ext - 1)
7257             next_slice_group_param = NULL;
7258         else
7259             next_slice_group_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j + 1]->buffer;
7260
7261         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
7262             if (i < encode_state->slice_params_ext[j]->num_elements - 1)
7263                 next_slice_param = slice_param + 1;
7264             else
7265                 next_slice_param = next_slice_group_param;
7266
7267             gen9_mfc_avc_single_slice(ctx,
7268                                       encode_state,
7269                                       encoder_context,
7270                                       slice_param,
7271                                       next_slice_param,
7272                                       slice_index);
7273             slice_param++;
7274             slice_index++;
7275
7276             if (is_frame_level)
7277                 break;
7278             else {
7279                 /* remove assert(0) and add other commands here */
7280                 assert(0);
7281             }
7282         }
7283
7284         if (is_frame_level)
7285             break;
7286     }
7287
7288     if (has_tail) {
7289         /* insert a tail if required */
7290     }
7291
7292     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
7293     mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
7294     gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_params);
7295 }
7296 static void
7297 gen9_avc_pak_picture_level(VADriverContextP ctx,
7298                            struct encode_state *encode_state,
7299                            struct intel_encoder_context *encoder_context)
7300 {
7301     struct i965_driver_data *i965 = i965_driver_data(ctx);
7302     struct i965_gpe_table *gpe = &i965->gpe_table;
7303     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7304     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )pak_context->generic_enc_ctx;
7305     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )pak_context->private_enc_ctx;
7306     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state;
7307     struct gpe_mi_batch_buffer_start_parameter second_level_batch;
7308     struct intel_batchbuffer *batch = encoder_context->base.batch;
7309
7310     if (generic_state->brc_enabled &&
7311         generic_state->curr_pak_pass) {
7312         struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
7313         struct encoder_status_buffer_internal *status_buffer;
7314         status_buffer = &(avc_ctx->status_buffer);
7315
7316         memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
7317         mi_conditional_batch_buffer_end_params.offset = status_buffer->image_status_mask_offset;
7318         mi_conditional_batch_buffer_end_params.bo = status_buffer->bo;
7319         mi_conditional_batch_buffer_end_params.compare_data = 0;
7320         mi_conditional_batch_buffer_end_params.compare_mask_mode_disabled = 0;
7321         gpe->mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
7322     }
7323
7324     gen9_mfc_avc_pipe_mode_select(ctx,encode_state,encoder_context);
7325     gen9_mfc_avc_surface_state(ctx,encoder_context,&(generic_ctx->res_reconstructed_surface),0);
7326     gen9_mfc_avc_surface_state(ctx,encoder_context,&(generic_ctx->res_uncompressed_input_surface),4);
7327     gen9_mfc_avc_pipe_buf_addr_state(ctx,encoder_context);
7328     gen9_mfc_avc_ind_obj_base_addr_state(ctx,encode_state,encoder_context);
7329     gen9_mfc_avc_bsp_buf_base_addr_state(ctx,encoder_context);
7330
7331     if(generic_state->brc_enabled)
7332     {
7333         memset(&second_level_batch, 0, sizeof(second_level_batch));
7334         if (generic_state->curr_pak_pass == 0) {
7335             second_level_batch.offset = 0;
7336         } else {
7337             second_level_batch.offset = generic_state->curr_pak_pass * INTEL_AVC_IMAGE_STATE_CMD_SIZE;
7338         }
7339         second_level_batch.is_second_level = 1;
7340         second_level_batch.bo = avc_ctx->res_brc_image_state_read_buffer.bo;
7341         gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
7342     }else
7343     {
7344         /*generate a new image state */
7345         gen9_avc_set_image_state_non_brc(ctx,encode_state,encoder_context,&(avc_ctx->res_image_state_batch_buffer_2nd_level));
7346         memset(&second_level_batch, 0, sizeof(second_level_batch));
7347         second_level_batch.offset = 0;
7348         second_level_batch.is_second_level = 1;
7349         second_level_batch.bo = avc_ctx->res_image_state_batch_buffer_2nd_level.bo;
7350         gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
7351     }
7352
7353     gen9_mfc_avc_qm_state(ctx,encode_state,encoder_context);
7354     gen9_mfc_avc_fqm_state(ctx,encode_state,encoder_context);
7355     gen9_mfc_avc_directmode_state(ctx,encoder_context);
7356
7357 }
7358
7359 static void
7360 gen9_avc_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
7361 {
7362     struct i965_driver_data *i965 = i965_driver_data(ctx);
7363     struct i965_gpe_table *gpe = &i965->gpe_table;
7364     struct intel_batchbuffer *batch = encoder_context->base.batch;
7365     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7366     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )pak_context->private_enc_ctx;
7367     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state;
7368
7369     struct gpe_mi_store_register_mem_parameter mi_store_reg_mem_param;
7370     struct gpe_mi_store_data_imm_parameter mi_store_data_imm_param;
7371     struct gpe_mi_flush_dw_parameter mi_flush_dw_param;
7372     struct encoder_status_buffer_internal *status_buffer;
7373
7374     status_buffer = &(avc_ctx->status_buffer);
7375
7376     memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
7377     gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
7378
7379     /* read register and store into status_buffer and pak_statitistic info */
7380     memset(&mi_store_reg_mem_param, 0, sizeof(mi_store_reg_mem_param));
7381     mi_store_reg_mem_param.bo = status_buffer->bo;
7382     mi_store_reg_mem_param.offset = status_buffer->bs_byte_count_frame_offset;
7383     mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_reg_offset;
7384     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7385
7386     mi_store_reg_mem_param.bo = status_buffer->bo;
7387     mi_store_reg_mem_param.offset = status_buffer->image_status_mask_offset;
7388     mi_store_reg_mem_param.mmio_offset = status_buffer->image_status_mask_reg_offset;
7389     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7390
7391     /*update the status in the pak_statistic_surface */
7392     mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
7393     mi_store_reg_mem_param.offset = 0;
7394     mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_reg_offset;
7395     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7396
7397     mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
7398     mi_store_reg_mem_param.offset = 4;
7399     mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_nh_reg_offset;
7400     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7401
7402     memset(&mi_store_data_imm_param, 0, sizeof(mi_store_data_imm_param));
7403     mi_store_data_imm_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
7404     mi_store_data_imm_param.offset = sizeof(unsigned int) * 2;
7405     mi_store_data_imm_param.dw0 = (generic_state->curr_pak_pass + 1);
7406     gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm_param);
7407
7408     mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
7409     mi_store_reg_mem_param.offset = sizeof(unsigned int) * (4 + generic_state->curr_pak_pass) ;
7410     mi_store_reg_mem_param.mmio_offset = status_buffer->image_status_ctrl_reg_offset;
7411     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7412
7413     memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
7414     gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
7415
7416     return;
7417 }
7418
7419 static void
7420 gen9_avc_pak_brc_prepare(struct encode_state *encode_state,
7421                           struct intel_encoder_context *encoder_context)
7422 {
7423     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7424     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state;
7425     unsigned int rate_control_mode = encoder_context->rate_control_mode;
7426
7427     switch (rate_control_mode & 0x7f) {
7428     case VA_RC_CBR:
7429         generic_state->internal_rate_mode = VA_RC_CBR;
7430         break;
7431
7432     case VA_RC_VBR:
7433         generic_state->internal_rate_mode = VA_RC_VBR;//AVBR
7434         break;
7435
7436     case VA_RC_CQP:
7437     default:
7438         generic_state->internal_rate_mode = VA_RC_CQP;
7439         break;
7440     }
7441
7442     if (encoder_context->quality_level == 0)\r
7443         encoder_context->quality_level = ENCODER_DEFAULT_QUALITY_AVC;\r
7444 }
7445
7446 static VAStatus
7447 gen9_avc_pak_pipeline_prepare(VADriverContextP ctx,
7448                      struct encode_state *encode_state,
7449                      struct intel_encoder_context *encoder_context)
7450 {
7451     VAStatus va_status;
7452     struct i965_driver_data *i965 = i965_driver_data(ctx);
7453     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7454     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )pak_context->generic_enc_ctx;
7455     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )pak_context->private_enc_ctx;
7456     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state;
7457     struct avc_enc_state * avc_state = (struct avc_enc_state * )pak_context->private_enc_state;
7458
7459     struct object_surface *obj_surface;
7460     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
7461     VAEncSliceParameterBufferH264 *slice_param = avc_state->slice_param[0];
7462
7463     struct gen9_surface_avc *avc_priv_surface;
7464     int i, j, enable_avc_ildb = 0;
7465     unsigned int allocate_flag = 1;
7466     unsigned int size;
7467     unsigned int w_mb = generic_state->frame_width_in_mbs;
7468     unsigned int h_mb = generic_state->frame_height_in_mbs;
7469     struct avc_surface_param surface_param;
7470
7471     /* update the parameter and check slice parameter */
7472     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
7473         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
7474         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
7475
7476         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
7477             assert((slice_param->slice_type == SLICE_TYPE_I) ||
7478                    (slice_param->slice_type == SLICE_TYPE_SI) ||
7479                    (slice_param->slice_type == SLICE_TYPE_P) ||
7480                    (slice_param->slice_type == SLICE_TYPE_SP) ||
7481                    (slice_param->slice_type == SLICE_TYPE_B));
7482
7483             if (slice_param->disable_deblocking_filter_idc != 1) {
7484                 enable_avc_ildb = 1;
7485                 break;
7486             }
7487
7488             slice_param++;
7489         }
7490     }
7491     avc_state->enable_avc_ildb = enable_avc_ildb;
7492
7493     /* setup the all surface and buffer for PAK */
7494     /* Setup current reconstruct frame */
7495     obj_surface = encode_state->reconstructed_object;
7496     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
7497
7498     if (va_status != VA_STATUS_SUCCESS)
7499         return va_status;
7500
7501     memset(&surface_param,0,sizeof(surface_param));
7502     surface_param.frame_width = generic_state->frame_width_in_pixel;
7503     surface_param.frame_height = generic_state->frame_height_in_pixel;
7504     va_status = gen9_avc_init_check_surfaces(ctx,
7505                                              obj_surface,encoder_context,
7506                                              &surface_param);
7507     if (va_status != VA_STATUS_SUCCESS)
7508         return va_status;
7509     /* init the member of avc_priv_surface,frame_store_id,qp_value */
7510     {
7511        avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
7512        avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS-2] = 0;
7513        avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS-1] = 0;
7514        i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS-2]);
7515        i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS-1]);
7516        i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS-2],avc_priv_surface->dmv_top);
7517        i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS-1],avc_priv_surface->dmv_bottom);
7518        avc_priv_surface->qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
7519        avc_priv_surface->frame_store_id = 0;
7520        avc_priv_surface->frame_idx = pic_param->CurrPic.frame_idx;
7521        avc_priv_surface->top_field_order_cnt = pic_param->CurrPic.TopFieldOrderCnt;
7522        avc_priv_surface->is_as_ref = pic_param->pic_fields.bits.reference_pic_flag;
7523        avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS-2] = avc_priv_surface->top_field_order_cnt;
7524        avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS-1] = avc_priv_surface->top_field_order_cnt + 1;
7525     }
7526     i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
7527     i965_free_gpe_resource(&avc_ctx->res_post_deblocking_output);
7528     i965_free_gpe_resource(&avc_ctx->res_pre_deblocking_output);
7529     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_reconstructed_surface, obj_surface,GPE_RESOURCE_ALIGNMENT);
7530
7531
7532     if (avc_state->enable_avc_ildb) {
7533         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_post_deblocking_output, obj_surface,GPE_RESOURCE_ALIGNMENT);
7534     } else {
7535         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_pre_deblocking_output, obj_surface,GPE_RESOURCE_ALIGNMENT);
7536     }
7537     /* input YUV surface */
7538     obj_surface = encode_state->input_yuv_object;
7539     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
7540
7541     if (va_status != VA_STATUS_SUCCESS)
7542         return va_status;
7543     i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
7544     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_uncompressed_input_surface, obj_surface,GPE_RESOURCE_ALIGNMENT);
7545
7546     /* Reference surfaces */
7547     for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
7548         i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
7549         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i*2]);
7550         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i*2 + 1]);
7551         obj_surface = encode_state->reference_objects[i];
7552         avc_state->top_field_poc[2*i] = 0;
7553         avc_state->top_field_poc[2*i+1] = 0;
7554
7555         if (obj_surface && obj_surface->bo) {
7556             i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->list_reference_res[i], obj_surface,GPE_RESOURCE_ALIGNMENT);
7557
7558             /* actually it should be handled when it is reconstructed surface */
7559             va_status = gen9_avc_init_check_surfaces(ctx,
7560                 obj_surface,encoder_context,
7561                 &surface_param);
7562             if (va_status != VA_STATUS_SUCCESS)
7563                 return va_status;
7564             avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
7565             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i*2],avc_priv_surface->dmv_top);
7566             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i*2 + 1],avc_priv_surface->dmv_bottom);
7567             avc_priv_surface->frame_store_id = i;
7568             avc_state->top_field_poc[2*i] = avc_priv_surface->top_field_order_cnt;
7569             avc_state->top_field_poc[2*i+1] = avc_priv_surface->top_field_order_cnt+1;
7570         }else
7571         {
7572             break;
7573         }
7574     }
7575
7576     if (avc_ctx->pres_slice_batch_buffer_2nd_level)
7577     {
7578         intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
7579         avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
7580     }
7581
7582     avc_ctx->pres_slice_batch_buffer_2nd_level =
7583         intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD,
7584                               4096 *
7585                               encode_state->num_slice_params_ext);
7586     if (!avc_ctx->pres_slice_batch_buffer_2nd_level)
7587         return VA_STATUS_ERROR_ALLOCATION_FAILED;
7588
7589     for (i = 0;i < MAX_AVC_SLICE_NUM;i++) {
7590         avc_state->slice_batch_offset[i] = 0;
7591     }
7592
7593
7594     size = w_mb * 64;
7595     i965_free_gpe_resource(&avc_ctx->res_intra_row_store_scratch_buffer);
7596     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
7597                                  &avc_ctx->res_intra_row_store_scratch_buffer,
7598                                  size,
7599                                 "PAK Intra row store scratch buffer");
7600     if (!allocate_flag)
7601         goto failed_allocation;
7602
7603     size = w_mb * 4 * 64;
7604     i965_free_gpe_resource(&avc_ctx->res_deblocking_filter_row_store_scratch_buffer);
7605     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
7606                                  &avc_ctx->res_deblocking_filter_row_store_scratch_buffer,
7607                                  size,
7608                                 "PAK Deblocking filter row store scratch buffer");
7609     if (!allocate_flag)
7610         goto failed_allocation;
7611
7612     size = w_mb * 2 * 64;
7613     i965_free_gpe_resource(&avc_ctx->res_bsd_mpc_row_store_scratch_buffer);
7614     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
7615                                  &avc_ctx->res_bsd_mpc_row_store_scratch_buffer,
7616                                  size,
7617                                 "PAK BSD/MPC row store scratch buffer");
7618     if (!allocate_flag)
7619         goto failed_allocation;
7620
7621     size = w_mb * h_mb * 16;
7622     i965_free_gpe_resource(&avc_ctx->res_pak_mb_status_buffer);
7623     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
7624                                  &avc_ctx->res_pak_mb_status_buffer,
7625                                  size,
7626                                 "PAK MB status buffer");
7627     if (!allocate_flag)
7628         goto failed_allocation;
7629
7630     return VA_STATUS_SUCCESS;
7631
7632 failed_allocation:
7633     return VA_STATUS_ERROR_ALLOCATION_FAILED;
7634 }
7635
7636 static VAStatus
7637 gen9_avc_encode_picture(VADriverContextP ctx,
7638                         VAProfile profile,
7639                         struct encode_state *encode_state,
7640                         struct intel_encoder_context *encoder_context)
7641 {
7642     VAStatus va_status;
7643     struct i965_driver_data *i965 = i965_driver_data(ctx);
7644     struct i965_gpe_table *gpe = &i965->gpe_table;
7645     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7646     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
7647     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
7648     struct intel_batchbuffer *batch = encoder_context->base.batch;
7649
7650     va_status = gen9_avc_pak_pipeline_prepare(ctx, encode_state, encoder_context);
7651
7652     if (va_status != VA_STATUS_SUCCESS)
7653         return va_status;
7654
7655     if (i965->intel.has_bsd2)
7656         intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
7657     else
7658         intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
7659     intel_batchbuffer_emit_mi_flush(batch);
7660
7661     for (generic_state->curr_pak_pass = 0;
7662          generic_state->curr_pak_pass < generic_state->num_pak_passes;
7663          generic_state->curr_pak_pass++) {
7664
7665          if (generic_state->curr_pak_pass == 0) {
7666              /* Initialize the avc Image Ctrl reg for the first pass,write 0 to staturs/control register, is it needed in AVC? */
7667              struct gpe_mi_load_register_imm_parameter mi_load_reg_imm;
7668              struct encoder_status_buffer_internal *status_buffer;
7669
7670              status_buffer = &(avc_ctx->status_buffer);
7671              memset(&mi_load_reg_imm, 0, sizeof(mi_load_reg_imm));
7672              mi_load_reg_imm.mmio_offset = status_buffer->image_status_ctrl_reg_offset;
7673              mi_load_reg_imm.data = 0;
7674              gpe->mi_load_register_imm(ctx, batch, &mi_load_reg_imm);
7675          }
7676          gen9_avc_pak_picture_level(ctx, encode_state, encoder_context);
7677          gen9_avc_pak_slice_level(ctx, encode_state, encoder_context);
7678          gen9_avc_read_mfc_status(ctx, encoder_context);
7679
7680     }
7681
7682     if (avc_ctx->pres_slice_batch_buffer_2nd_level)
7683     {
7684         intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
7685         avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
7686     }
7687
7688     intel_batchbuffer_end_atomic(batch);
7689     intel_batchbuffer_flush(batch);
7690
7691     generic_state->seq_frame_number++;
7692     generic_state->total_frame_number++;
7693     generic_state->first_frame = 0;
7694     return VA_STATUS_SUCCESS;
7695 }
7696
7697 static VAStatus
7698 gen9_avc_pak_pipeline(VADriverContextP ctx,
7699                       VAProfile profile,
7700                       struct encode_state *encode_state,
7701                       struct intel_encoder_context *encoder_context)
7702 {
7703     VAStatus vaStatus;
7704
7705     switch (profile) {
7706     case VAProfileH264ConstrainedBaseline:
7707     case VAProfileH264Main:
7708     case VAProfileH264High:
7709         vaStatus = gen9_avc_encode_picture(ctx, profile, encode_state, encoder_context);
7710         break;
7711
7712     default:
7713         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
7714         break;
7715     }
7716
7717     return vaStatus;
7718 }
7719
7720 static void
7721 gen9_avc_pak_context_destroy(void * context)
7722 {
7723     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)context;
7724     struct generic_encoder_context * generic_ctx;
7725     struct i965_avc_encoder_context * avc_ctx;
7726     int i = 0;
7727
7728     if (!pak_context)
7729         return;
7730
7731     generic_ctx = (struct generic_encoder_context * )pak_context->generic_enc_ctx;
7732     avc_ctx = (struct i965_avc_encoder_context * )pak_context->private_enc_ctx;
7733
7734     // other things
7735     i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
7736     i965_free_gpe_resource(&avc_ctx->res_post_deblocking_output);
7737     i965_free_gpe_resource(&avc_ctx->res_pre_deblocking_output);
7738     i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
7739
7740     i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
7741     i965_free_gpe_resource(&avc_ctx->res_intra_row_store_scratch_buffer);
7742     i965_free_gpe_resource(&avc_ctx->res_deblocking_filter_row_store_scratch_buffer);
7743     i965_free_gpe_resource(&avc_ctx->res_bsd_mpc_row_store_scratch_buffer);
7744     i965_free_gpe_resource(&avc_ctx->res_pak_mb_status_buffer);
7745
7746     for(i = 0 ; i < MAX_MFC_AVC_REFERENCE_SURFACES; i++)
7747     {
7748         i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
7749     }
7750
7751     for(i = 0 ; i < NUM_MFC_AVC_DMV_BUFFERS; i++)
7752     {
7753         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i]);
7754     }
7755
7756     if (avc_ctx->pres_slice_batch_buffer_2nd_level)
7757     {
7758         intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
7759         avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
7760     }
7761
7762 }
7763
7764 static VAStatus
7765 gen9_avc_get_coded_status(VADriverContextP ctx,
7766                           struct intel_encoder_context *encoder_context,
7767                           struct i965_coded_buffer_segment *coded_buf_seg)
7768 {
7769     struct encoder_status *avc_encode_status;
7770
7771     if (!encoder_context || !coded_buf_seg)
7772         return VA_STATUS_ERROR_INVALID_BUFFER;
7773
7774     avc_encode_status = (struct encoder_status *)coded_buf_seg->codec_private_data;
7775     coded_buf_seg->base.size = avc_encode_status->bs_byte_count_frame;
7776
7777     return VA_STATUS_SUCCESS;
7778 }
7779
7780 Bool
7781 gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
7782 {
7783     /* VME & PAK share the same context */
7784     struct i965_driver_data *i965 = i965_driver_data(ctx);
7785     struct encoder_vme_mfc_context * vme_context = NULL;
7786     struct generic_encoder_context * generic_ctx = NULL;
7787     struct i965_avc_encoder_context * avc_ctx = NULL;
7788     struct generic_enc_codec_state * generic_state = NULL;
7789     struct avc_enc_state * avc_state = NULL;
7790     struct encoder_status_buffer_internal *status_buffer;
7791     uint32_t base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
7792
7793     vme_context = calloc(1, sizeof(struct encoder_vme_mfc_context));
7794     generic_ctx = calloc(1, sizeof(struct generic_encoder_context));
7795     avc_ctx = calloc(1, sizeof(struct i965_avc_encoder_context));
7796     generic_state = calloc(1, sizeof(struct generic_enc_codec_state));
7797     avc_state = calloc(1, sizeof(struct avc_enc_state));
7798
7799     if(!vme_context || !generic_ctx || !avc_ctx || !generic_state || !avc_state)
7800         goto allocate_structure_failed;
7801
7802     memset(vme_context,0,sizeof(struct encoder_vme_mfc_context));
7803     memset(generic_ctx,0,sizeof(struct generic_encoder_context));
7804     memset(avc_ctx,0,sizeof(struct i965_avc_encoder_context));
7805     memset(generic_state,0,sizeof(struct generic_enc_codec_state));
7806     memset(avc_state,0,sizeof(struct avc_enc_state));
7807
7808     encoder_context->vme_context = vme_context;
7809     vme_context->generic_enc_ctx = generic_ctx;
7810     vme_context->private_enc_ctx = avc_ctx;
7811     vme_context->generic_enc_state = generic_state;
7812     vme_context->private_enc_state = avc_state;
7813
7814     if (IS_SKL(i965->intel.device_info)||
7815         IS_BXT(i965->intel.device_info)) {
7816         generic_ctx->enc_kernel_ptr = (void *)skl_avc_encoder_kernels;
7817         generic_ctx->enc_kernel_size = sizeof(skl_avc_encoder_kernels);
7818     }
7819     else if (IS_KBL(i965->intel.device_info) ||
7820              IS_GLK(i965->intel.device_info)) {
7821         generic_ctx->enc_kernel_ptr = (void *)kbl_avc_encoder_kernels;
7822         generic_ctx->enc_kernel_size = sizeof(kbl_avc_encoder_kernels);
7823     }
7824     else
7825         goto allocate_structure_failed;
7826
7827     /* initialize misc ? */
7828     avc_ctx->ctx = ctx;
7829     generic_ctx->use_hw_scoreboard = 1;
7830     generic_ctx->use_hw_non_stalling_scoreboard = 1;
7831
7832     /* initialize generic state */
7833
7834     generic_state->kernel_mode = INTEL_ENC_KERNEL_NORMAL;
7835     generic_state->preset = INTEL_PRESET_RT_SPEED;
7836     generic_state->seq_frame_number = 0;
7837     generic_state->total_frame_number = 0;
7838     generic_state->frame_type = 0;
7839     generic_state->first_frame = 1;
7840
7841     generic_state->frame_width_in_pixel = 0;
7842     generic_state->frame_height_in_pixel = 0;
7843     generic_state->frame_width_in_mbs = 0;
7844     generic_state->frame_height_in_mbs = 0;
7845     generic_state->frame_width_4x = 0;
7846     generic_state->frame_height_4x = 0;
7847     generic_state->frame_width_16x = 0;
7848     generic_state->frame_height_16x = 0;
7849     generic_state->frame_width_32x = 0;
7850     generic_state->downscaled_width_4x_in_mb = 0;
7851     generic_state->downscaled_height_4x_in_mb = 0;
7852     generic_state->downscaled_width_16x_in_mb = 0;
7853     generic_state->downscaled_height_16x_in_mb = 0;
7854     generic_state->downscaled_width_32x_in_mb = 0;
7855     generic_state->downscaled_height_32x_in_mb = 0;
7856
7857     generic_state->hme_supported = 1;
7858     generic_state->b16xme_supported = 1;
7859     generic_state->b32xme_supported = 0;
7860     generic_state->hme_enabled = 0;
7861     generic_state->b16xme_enabled = 0;
7862     generic_state->b32xme_enabled = 0;
7863     generic_state->brc_distortion_buffer_supported = 1;
7864     generic_state->brc_constant_buffer_supported = 0;
7865
7866
7867     generic_state->frame_rate = 30;
7868     generic_state->brc_allocated = 0;
7869     generic_state->brc_inited = 0;
7870     generic_state->brc_need_reset = 0;
7871     generic_state->is_low_delay = 0;
7872     generic_state->brc_enabled = 0;//default
7873     generic_state->internal_rate_mode = 0;
7874     generic_state->curr_pak_pass = 0;
7875     generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
7876     generic_state->is_first_pass = 1;
7877     generic_state->is_last_pass = 0;
7878     generic_state->mb_brc_enabled = 0; // enable mb brc
7879     generic_state->brc_roi_enable = 0;
7880     generic_state->brc_dirty_roi_enable = 0;
7881     generic_state->skip_frame_enbale = 0;
7882
7883     generic_state->target_bit_rate = 0;
7884     generic_state->max_bit_rate = 0;
7885     generic_state->min_bit_rate = 0;
7886     generic_state->init_vbv_buffer_fullness_in_bit = 0;
7887     generic_state->vbv_buffer_size_in_bit = 0;
7888     generic_state->frames_per_100s = 0;
7889     generic_state->gop_size = 0;
7890     generic_state->gop_ref_distance = 0;
7891     generic_state->brc_target_size = 0;
7892     generic_state->brc_mode = 0;
7893     generic_state->brc_init_current_target_buf_full_in_bits = 0.0;
7894     generic_state->brc_init_reset_input_bits_per_frame = 0.0;
7895     generic_state->brc_init_reset_buf_size_in_bits = 0;
7896     generic_state->brc_init_previous_target_buf_full_in_bits = 0;
7897     generic_state->frames_per_window_size = 0;//default
7898     generic_state->target_percentage = 0;
7899
7900     generic_state->avbr_curracy = 0;
7901     generic_state->avbr_convergence = 0;
7902
7903     generic_state->num_skip_frames = 0;
7904     generic_state->size_skip_frames = 0;
7905
7906     generic_state->num_roi = 0;
7907     generic_state->max_delta_qp = 0;
7908     generic_state->min_delta_qp = 0;
7909
7910     if (encoder_context->rate_control_mode != VA_RC_NONE &&
7911         encoder_context->rate_control_mode != VA_RC_CQP) {
7912         generic_state->brc_enabled = 1;
7913         generic_state->brc_distortion_buffer_supported = 1;
7914         generic_state->brc_constant_buffer_supported = 1;
7915         generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
7916     }
7917     /*avc state initialization */
7918     avc_state->mad_enable = 0;
7919     avc_state->mb_disable_skip_map_enable = 0;
7920     avc_state->sfd_enable = 1;//default
7921     avc_state->sfd_mb_enable = 1;//set it true
7922     avc_state->adaptive_search_window_enable = 1;//default
7923     avc_state->mb_qp_data_enable = 0;
7924     avc_state->intra_refresh_i_enable = 0;
7925     avc_state->min_max_qp_enable = 0;
7926     avc_state->skip_bias_adjustment_enable = 0;//default,same as   skip_bias_adjustment_supporte? no
7927
7928     //external input
7929     avc_state->non_ftq_skip_threshold_lut_input_enable = 0;
7930     avc_state->ftq_skip_threshold_lut_input_enable = 0;
7931     avc_state->ftq_override = 0;
7932
7933     avc_state->direct_bias_adjustment_enable = 0;
7934     avc_state->global_motion_bias_adjustment_enable = 0;
7935     avc_state->disable_sub_mb_partion = 0;
7936     avc_state->arbitrary_num_mbs_in_slice = 0;
7937     avc_state->adaptive_transform_decision_enable = 0;//default
7938     avc_state->skip_check_disable = 0;
7939     avc_state->tq_enable = 0;
7940     avc_state->enable_avc_ildb = 0;
7941     avc_state->mbaff_flag = 0;
7942     avc_state->enable_force_skip = 1;//default
7943     avc_state->rc_panic_enable = 1;//default
7944     avc_state->suppress_recon_enable = 1;//default
7945
7946     avc_state->ref_pic_select_list_supported = 1;
7947     avc_state->mb_brc_supported = 1;//?,default
7948     avc_state->multi_pre_enable = 1;//default
7949     avc_state->ftq_enable = 1;//default
7950     avc_state->caf_supported = 1; //default
7951     avc_state->caf_enable = 0;
7952     avc_state->caf_disable_hd = 1;//default
7953     avc_state->skip_bias_adjustment_supported = 1;//default
7954
7955     avc_state->adaptive_intra_scaling_enable = 1;//default
7956     avc_state->old_mode_cost_enable = 0;//default
7957     avc_state->multi_ref_qp_enable = 1;//default
7958     avc_state->weighted_ref_l0_enable = 1;//default
7959     avc_state->weighted_ref_l1_enable = 1;//default
7960     avc_state->weighted_prediction_supported = 0;
7961     avc_state->brc_split_enable = 0;
7962     avc_state->slice_level_report_supported = 0;
7963
7964     avc_state->fbr_bypass_enable = 1;//default
7965     avc_state->field_scaling_output_interleaved = 0;
7966     avc_state->mb_variance_output_enable = 0;
7967     avc_state->mb_pixel_average_output_enable = 0;
7968     avc_state->rolling_intra_refresh_enable = 0;// same as intra_refresh_i_enable?
7969     avc_state->mbenc_curbe_set_in_brc_update = 0;
7970     avc_state->rounding_inter_enable = 1; //default
7971     avc_state->adaptive_rounding_inter_enable = 1;//default
7972
7973     avc_state->mbenc_i_frame_dist_in_use = 0;
7974     avc_state->mb_status_supported = 1; //set in intialization for gen9
7975     avc_state->mb_status_enable = 0;
7976     avc_state->mb_vproc_stats_enable = 0;
7977     avc_state->flatness_check_enable = 0;
7978     avc_state->flatness_check_supported = 1;//default
7979     avc_state->block_based_skip_enable = 0;
7980     avc_state->use_widi_mbenc_kernel = 0;
7981     avc_state->kernel_trellis_enable = 0;
7982     avc_state->generic_reserved = 0;
7983
7984     avc_state->rounding_value = 0;
7985     avc_state->rounding_inter_p = AVC_INVALID_ROUNDING_VALUE;//default
7986     avc_state->rounding_inter_b = AVC_INVALID_ROUNDING_VALUE; //default
7987     avc_state->rounding_inter_b_ref = AVC_INVALID_ROUNDING_VALUE; //default
7988     avc_state->min_qp_i = INTEL_AVC_MIN_QP;
7989     avc_state->min_qp_p = INTEL_AVC_MIN_QP;
7990     avc_state->min_qp_b = INTEL_AVC_MIN_QP;
7991     avc_state->max_qp_i = INTEL_AVC_MAX_QP;
7992     avc_state->max_qp_p = INTEL_AVC_MAX_QP;
7993     avc_state->max_qp_b = INTEL_AVC_MAX_QP;
7994
7995     memset(avc_state->non_ftq_skip_threshold_lut,0,AVC_QP_MAX*sizeof(uint8_t));
7996     memset(avc_state->ftq_skip_threshold_lut,0,AVC_QP_MAX*sizeof(uint8_t));
7997     memset(avc_state->lamda_value_lut,0,AVC_QP_MAX*2*sizeof(uint32_t));
7998
7999     avc_state->intra_refresh_qp_threshold = 0;
8000     avc_state->trellis_flag = 0;
8001     avc_state->hme_mv_cost_scaling_factor = 0;
8002     avc_state->slice_height = 1;
8003     avc_state->slice_num = 1;
8004     memset(avc_state->dist_scale_factor_list0,0,32*sizeof(uint32_t));
8005     avc_state->bi_weight = 0;
8006
8007     avc_state->lambda_table_enable = 0;
8008
8009
8010     if (IS_SKL(i965->intel.device_info)||
8011         IS_BXT(i965->intel.device_info)) {
8012         avc_state->brc_const_data_surface_width = 64;
8013         avc_state->brc_const_data_surface_height = 44;
8014     }
8015     else if (IS_KBL(i965->intel.device_info)||
8016              IS_GLK(i965->intel.device_info)) {
8017         avc_state->brc_const_data_surface_width = 64;
8018         avc_state->brc_const_data_surface_height = 53;
8019         //gen95
8020         avc_state->decouple_mbenc_curbe_from_brc_enable = 1;
8021         avc_state->extended_mv_cost_range_enable = 0;
8022         avc_state->reserved_g95 = 0;
8023         avc_state->mbenc_brc_buffer_size = 128;
8024         avc_state->kernel_trellis_enable = 1;
8025         avc_state->lambda_table_enable = 1;
8026         avc_state->brc_split_enable = 1;
8027     }
8028
8029     avc_state->num_refs[0] = 0;
8030     avc_state->num_refs[1] = 0;
8031     memset(avc_state->list_ref_idx,0,32*2*sizeof(uint32_t));
8032     memset(avc_state->top_field_poc,0,NUM_MFC_AVC_DMV_BUFFERS*sizeof(int32_t));
8033     avc_state->tq_rounding = 0;
8034     avc_state->zero_mv_threshold = 0;
8035     avc_state->slice_second_levle_batch_buffer_in_use = 0;
8036
8037     //1. seq/pic/slice
8038
8039     /* the definition of status buffer offset for Encoder */
8040
8041     status_buffer = &avc_ctx->status_buffer;
8042     memset(status_buffer, 0,sizeof(struct encoder_status_buffer_internal));
8043
8044     status_buffer->base_offset = base_offset;
8045     status_buffer->bs_byte_count_frame_offset = base_offset + offsetof(struct encoder_status, bs_byte_count_frame);
8046     status_buffer->bs_byte_count_frame_nh_offset = base_offset + offsetof(struct encoder_status, bs_byte_count_frame_nh);
8047     status_buffer->image_status_mask_offset = base_offset + offsetof(struct encoder_status, image_status_mask);
8048     status_buffer->image_status_ctrl_offset = base_offset + offsetof(struct encoder_status, image_status_ctrl);
8049     status_buffer->mfc_qp_status_count_offset = base_offset + offsetof(struct encoder_status, mfc_qp_status_count);
8050     status_buffer->media_index_offset       = base_offset + offsetof(struct encoder_status, media_index);
8051
8052     status_buffer->status_buffer_size = sizeof(struct encoder_status);
8053     status_buffer->bs_byte_count_frame_reg_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG;
8054     status_buffer->bs_byte_count_frame_nh_reg_offset = MFC_BITSTREAM_BYTECOUNT_SLICE_REG;
8055     status_buffer->image_status_mask_reg_offset = MFC_IMAGE_STATUS_MASK_REG;
8056     status_buffer->image_status_ctrl_reg_offset = MFC_IMAGE_STATUS_CTRL_REG;
8057     status_buffer->mfc_qp_status_count_reg_offset = MFC_QP_STATUS_COUNT_REG;
8058
8059     gen9_avc_kernel_init(ctx,encoder_context);
8060     encoder_context->vme_context = vme_context;
8061     encoder_context->vme_pipeline = gen9_avc_vme_pipeline;
8062     encoder_context->vme_context_destroy = gen9_avc_vme_context_destroy;
8063
8064     return true;
8065
8066 allocate_structure_failed:
8067
8068     free(vme_context);
8069     free(generic_ctx);
8070     free(avc_ctx);
8071     free(generic_state);
8072     free(avc_state);
8073     return false;
8074 }
8075
8076 Bool
8077 gen9_avc_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
8078 {
8079     /* VME & PAK share the same context */
8080     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8081
8082     if (!pak_context)
8083         return false;
8084
8085     encoder_context->mfc_context = pak_context;
8086     encoder_context->mfc_context_destroy = gen9_avc_pak_context_destroy;
8087     encoder_context->mfc_pipeline = gen9_avc_pak_pipeline;
8088     encoder_context->mfc_brc_prepare = gen9_avc_pak_brc_prepare;
8089     encoder_context->get_status = gen9_avc_get_coded_status;
8090     return true;
8091 }