OSDN Git Service

1477cbfa72f17fff1849df14bcfbb2372d1cb760
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_avc_encoder.c
1 /*
2  * Copyright @ 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWAR OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Pengfei Qu <Pengfei.qu@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <stdbool.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35 #include <va/va.h>
36
37 #include "intel_batchbuffer.h"
38 #include "intel_driver.h"
39
40 #include "i965_defines.h"
41 #include "i965_structs.h"
42 #include "i965_drv_video.h"
43 #include "i965_encoder.h"
44 #include "i965_encoder_utils.h"
45 #include "intel_media.h"
46
47 #include "i965_gpe_utils.h"
48 #include "i965_encoder_common.h"
49 #include "i965_avc_encoder_common.h"
50 #include "gen9_avc_encoder_kernels.h"
51 #include "gen9_avc_encoder.h"
52 #include "gen9_avc_const_def.h"
53
54 #define MAX_URB_SIZE                    4096 /* In register */
55 #define NUM_KERNELS_PER_GPE_CONTEXT     1
56 #define MBENC_KERNEL_BASE GEN9_AVC_KERNEL_MBENC_QUALITY_I
57 #define GPE_RESOURCE_ALIGNMENT 4  /* 4 means 16 = 1 << 4) */
58
59 #define OUT_BUFFER_2DW(batch, bo, is_target, delta)  do {               \
60         if (bo) {                                                       \
61             OUT_BCS_RELOC64(batch,                                        \
62                             bo,                                         \
63                             I915_GEM_DOMAIN_INSTRUCTION,                \
64                             is_target ? I915_GEM_DOMAIN_RENDER : 0,     \
65                             delta);                                     \
66         } else {                                                        \
67             OUT_BCS_BATCH(batch, 0);                                    \
68             OUT_BCS_BATCH(batch, 0);                                    \
69         }                                                               \
70     } while (0)
71
72 #define OUT_BUFFER_3DW(batch, bo, is_target, delta, attr)  do { \
73         OUT_BUFFER_2DW(batch, bo, is_target, delta);            \
74         OUT_BCS_BATCH(batch, attr);                             \
75     } while (0)
76
77 static const uint32_t qm_flat[16] = {
78     0x10101010, 0x10101010, 0x10101010, 0x10101010,
79     0x10101010, 0x10101010, 0x10101010, 0x10101010,
80     0x10101010, 0x10101010, 0x10101010, 0x10101010,
81     0x10101010, 0x10101010, 0x10101010, 0x10101010
82 };
83
84 static const uint32_t fqm_flat[32] = {
85     0x10001000, 0x10001000, 0x10001000, 0x10001000,
86     0x10001000, 0x10001000, 0x10001000, 0x10001000,
87     0x10001000, 0x10001000, 0x10001000, 0x10001000,
88     0x10001000, 0x10001000, 0x10001000, 0x10001000,
89     0x10001000, 0x10001000, 0x10001000, 0x10001000,
90     0x10001000, 0x10001000, 0x10001000, 0x10001000,
91     0x10001000, 0x10001000, 0x10001000, 0x10001000,
92     0x10001000, 0x10001000, 0x10001000, 0x10001000
93 };
94
95 static const unsigned int slice_type_kernel[3] = {1,2,0};
96
97 static const gen9_avc_brc_init_reset_curbe_data gen9_avc_brc_init_reset_curbe_init_data =
98 {
99     // unsigned int 0
100     {
101             0
102     },
103
104     // unsigned int 1
105     {
106             0
107     },
108
109     // unsigned int 2
110     {
111             0
112     },
113
114     // unsigned int 3
115     {
116             0
117     },
118
119     // unsigned int 4
120     {
121             0
122     },
123
124     // unsigned int 5
125     {
126             0
127     },
128
129     // unsigned int 6
130     {
131             0
132     },
133
134     // unsigned int 7
135     {
136             0
137     },
138
139     // unsigned int 8
140     {
141             0,
142             0
143     },
144
145     // unsigned int 9
146     {
147             0,
148             0
149     },
150
151     // unsigned int 10
152     {
153             0,
154             0
155     },
156
157     // unsigned int 11
158     {
159             0,
160             1
161     },
162
163     // unsigned int 12
164     {
165             51,
166             0
167     },
168
169     // unsigned int 13
170     {
171             40,
172             60,
173             80,
174             120
175     },
176
177     // unsigned int 14
178     {
179             35,
180             60,
181             80,
182             120
183     },
184
185     // unsigned int 15
186     {
187             40,
188             60,
189             90,
190             115
191     },
192
193     // unsigned int 16
194     {
195             0,
196             0,
197             0,
198             0
199     },
200
201     // unsigned int 17
202     {
203             0,
204             0,
205             0,
206             0
207     },
208
209     // unsigned int 18
210     {
211             0,
212             0,
213             0,
214             0
215     },
216
217     // unsigned int 19
218     {
219             0,
220             0,
221             0,
222             0
223     },
224
225     // unsigned int 20
226     {
227             0,
228             0,
229             0,
230             0
231     },
232
233     // unsigned int 21
234     {
235             0,
236             0,
237             0,
238             0
239     },
240
241     // unsigned int 22
242     {
243             0,
244             0,
245             0,
246             0
247     },
248
249     // unsigned int 23
250     {
251             0
252     }
253 };
254
255 static const gen9_avc_frame_brc_update_curbe_data gen9_avc_frame_brc_update_curbe_init_data =
256 {
257     // unsigned int 0
258     {
259             0
260     },
261
262     // unsigned int 1
263     {
264             0
265     },
266
267     // unsigned int 2
268     {
269             0
270     },
271
272     // unsigned int 3
273     {
274             10,
275             50
276     },
277
278     // unsigned int 4
279     {
280             100,
281             150
282     },
283
284     // unsigned int 5
285     {
286             0,
287             0,
288             0,
289             0
290     },
291
292     // unsigned int 6
293     {
294             0,
295             0,
296             0,
297             0,
298             0,
299             0
300     },
301
302     // unsigned int 7
303     {
304             0
305     },
306
307     // unsigned int 8
308     {
309             1,
310             1,
311             3,
312             2
313     },
314
315     // unsigned int 9
316     {
317             1,
318             40,
319             5,
320             5
321     },
322
323     // unsigned int 10
324     {
325             3,
326             1,
327             7,
328             18
329     },
330
331     // unsigned int 11
332     {
333             25,
334             37,
335             40,
336             75
337     },
338
339     // unsigned int 12
340     {
341             97,
342             103,
343             125,
344             160
345     },
346
347     // unsigned int 13
348     {
349             -3,
350             -2,
351             -1,
352             0
353     },
354
355     // unsigned int 14
356     {
357             1,
358             2,
359             3,
360             0xff
361     },
362
363     // unsigned int 15
364     {
365             0,
366             0,
367             0,
368             0
369     },
370
371     // unsigned int 16
372     {
373             0
374     },
375
376     // unsigned int 17
377     {
378             0
379     },
380
381     // unsigned int 18
382     {
383             0
384     },
385
386     // unsigned int 19
387     {
388             0
389     },
390
391     // unsigned int 20
392     {
393             0
394     },
395
396     // unsigned int 21
397     {
398             0
399     },
400
401     // unsigned int 22
402     {
403             0
404     },
405
406     // unsigned int 23
407     {
408             0
409     },
410
411 };
412
413 static void
414 gen9_avc_update_misc_parameters(VADriverContextP ctx,
415                                 struct encode_state *encode_state,
416                                 struct intel_encoder_context *encoder_context)
417 {
418     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
419     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
420     int i;
421
422     /* brc */
423     generic_state->max_bit_rate = (encoder_context->brc.bits_per_second[0] + 1000 - 1) / 1000;
424
425     generic_state->brc_need_reset = encoder_context->brc.need_reset;
426
427     if (generic_state->internal_rate_mode == VA_RC_CBR) {
428         generic_state->min_bit_rate = generic_state->max_bit_rate;
429         generic_state->mb_brc_enabled = encoder_context->brc.mb_rate_control[0];
430
431         if (generic_state->target_bit_rate != generic_state->max_bit_rate) {
432             generic_state->target_bit_rate = generic_state->max_bit_rate;
433             generic_state->brc_need_reset = 1;
434         }
435     } else if (generic_state->internal_rate_mode == VA_RC_VBR) {
436         generic_state->min_bit_rate = generic_state->max_bit_rate * (2 * encoder_context->brc.target_percentage[0] - 100) / 100;
437         generic_state->mb_brc_enabled = encoder_context->brc.mb_rate_control[0];
438
439         if (generic_state->target_bit_rate != generic_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100) {
440             generic_state->target_bit_rate = generic_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100;
441             generic_state->brc_need_reset = 1;
442         }
443     }
444
445     /*  frame rate */
446     if (generic_state->internal_rate_mode != VA_RC_CQP)
447     {
448         generic_state->frames_per_100s = encoder_context->brc.framerate[0].num * 100/encoder_context->brc.framerate[0].den ;
449         generic_state->frame_rate = encoder_context->brc.framerate[0].num/encoder_context->brc.framerate[0].den ;
450         generic_state->frames_per_window_size = (int)(encoder_context->brc.window_size * generic_state->frame_rate /1000);// brc.windows size in ms as the unit
451     }else
452     {
453         generic_state->frames_per_100s = 30 * 100;
454         generic_state->frame_rate = 30 ;
455         generic_state->frames_per_window_size = 30;
456     }
457
458     /*  HRD */
459     if (generic_state->internal_rate_mode != VA_RC_CQP)
460     {
461         generic_state->vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;//misc->buffer_size;
462         generic_state->init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;//misc->initial_buffer_fullness;
463     }
464
465     /* ROI */
466     generic_state->num_roi = MIN(encoder_context->brc.num_roi, 3);
467     if (generic_state->num_roi > 0) {
468         generic_state->max_delta_qp = encoder_context->brc.roi_max_delta_qp;
469         generic_state->min_delta_qp = encoder_context->brc.roi_min_delta_qp;
470
471         for (i = 0; i < generic_state->num_roi; i++) {
472             generic_state->roi[i].left   = encoder_context->brc.roi[i].left;
473             generic_state->roi[i].right  = encoder_context->brc.roi[i].right;
474             generic_state->roi[i].top    = encoder_context->brc.roi[i].top;
475             generic_state->roi[i].bottom = encoder_context->brc.roi[i].bottom;
476             generic_state->roi[i].value  = encoder_context->brc.roi[i].value;
477
478             generic_state->roi[i].left /= 16;
479             generic_state->roi[i].right /= 16;
480             generic_state->roi[i].top /= 16;
481             generic_state->roi[i].bottom /= 16;
482         }
483     }
484
485 }
486
487 static bool
488 intel_avc_get_kernel_header_and_size(void *pvbinary,
489                                      int binary_size,
490                                      INTEL_GENERIC_ENC_OPERATION operation,
491                                      int krnstate_idx,
492                                      struct i965_kernel *ret_kernel)
493 {
494     typedef uint32_t BIN_PTR[4];
495
496     char *bin_start;
497     gen9_avc_encoder_kernel_header      *pkh_table;
498     kernel_header          *pcurr_header, *pinvalid_entry, *pnext_header;
499     int next_krnoffset;
500
501     if (!pvbinary || !ret_kernel)
502         return false;
503
504     bin_start = (char *)pvbinary;
505     pkh_table = (gen9_avc_encoder_kernel_header *)pvbinary;
506     pinvalid_entry = &(pkh_table->static_detection) + 1;
507     next_krnoffset = binary_size;
508
509     if (operation == INTEL_GENERIC_ENC_SCALING4X)
510     {
511         pcurr_header = &pkh_table->ply_dscale_ply;
512     }
513     else if (operation == INTEL_GENERIC_ENC_SCALING2X)
514     {
515         pcurr_header = &pkh_table->ply_2xdscale_ply;
516     }
517     else if (operation == INTEL_GENERIC_ENC_ME)
518     {
519         pcurr_header = &pkh_table->me_p;
520     }
521     else if (operation == INTEL_GENERIC_ENC_BRC)
522     {
523         pcurr_header = &pkh_table->frame_brc_init;
524     }
525     else if (operation == INTEL_GENERIC_ENC_MBENC)
526     {
527         pcurr_header = &pkh_table->mbenc_quality_I;
528     }
529     else if (operation == INTEL_GENERIC_ENC_WP)
530     {
531         pcurr_header = &pkh_table->wp;
532     }
533     else if (operation == INTEL_GENERIC_ENC_SFD)
534     {
535         pcurr_header = &pkh_table->static_detection;
536     }
537     else
538     {
539         return false;
540     }
541
542     pcurr_header += krnstate_idx;
543     ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
544
545     pnext_header = (pcurr_header + 1);
546     if (pnext_header < pinvalid_entry)
547     {
548         next_krnoffset = pnext_header->kernel_start_pointer << 6;
549     }
550     ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
551
552     return true;
553 }
554 static void
555 gen9_free_surfaces_avc(void **data)
556 {
557     struct gen9_surface_avc *avc_surface;
558
559     if (!data || !*data)
560         return;
561
562     avc_surface = *data;
563
564     if (avc_surface->scaled_4x_surface_obj) {
565         i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_4x_surface_id, 1);
566         avc_surface->scaled_4x_surface_id = VA_INVALID_SURFACE;
567         avc_surface->scaled_4x_surface_obj = NULL;
568     }
569
570     if (avc_surface->scaled_16x_surface_obj) {
571         i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_16x_surface_id, 1);
572         avc_surface->scaled_16x_surface_id = VA_INVALID_SURFACE;
573         avc_surface->scaled_16x_surface_obj = NULL;
574     }
575
576     if (avc_surface->scaled_32x_surface_obj) {
577         i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_32x_surface_id, 1);
578         avc_surface->scaled_32x_surface_id = VA_INVALID_SURFACE;
579         avc_surface->scaled_32x_surface_obj = NULL;
580     }
581
582     i965_free_gpe_resource(&avc_surface->res_mb_code_surface);
583     i965_free_gpe_resource(&avc_surface->res_mv_data_surface);
584     i965_free_gpe_resource(&avc_surface->res_ref_pic_select_surface);
585
586     dri_bo_unreference(avc_surface->dmv_top);
587     avc_surface->dmv_top = NULL;
588     dri_bo_unreference(avc_surface->dmv_bottom);
589     avc_surface->dmv_bottom = NULL;
590
591     free(avc_surface);
592
593     *data = NULL;
594
595     return;
596 }
597
598 static VAStatus
599 gen9_avc_init_check_surfaces(VADriverContextP ctx,
600                              struct object_surface *obj_surface,
601                              struct intel_encoder_context *encoder_context,
602                              struct avc_surface_param *surface_param)
603 {
604     struct i965_driver_data *i965 = i965_driver_data(ctx);
605     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
606     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
607     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
608
609     struct gen9_surface_avc *avc_surface;
610     int downscaled_width_4x, downscaled_height_4x;
611     int downscaled_width_16x, downscaled_height_16x;
612     int downscaled_width_32x, downscaled_height_32x;
613     int size = 0;
614     unsigned int frame_width_in_mbs = ALIGN(surface_param->frame_width,16) / 16;
615     unsigned int frame_height_in_mbs = ALIGN(surface_param->frame_height,16) / 16;
616     unsigned int frame_mb_nums = frame_width_in_mbs * frame_height_in_mbs;
617     int allocate_flag = 1;
618     int width,height;
619
620     if (!obj_surface || !obj_surface->bo)
621         return VA_STATUS_ERROR_INVALID_SURFACE;
622
623     if (obj_surface->private_data) {
624         return VA_STATUS_SUCCESS;
625     }
626
627     avc_surface = calloc(1, sizeof(struct gen9_surface_avc));
628
629     if (!avc_surface)
630         return VA_STATUS_ERROR_ALLOCATION_FAILED;
631
632     avc_surface->ctx = ctx;
633     obj_surface->private_data = avc_surface;
634     obj_surface->free_private_data = gen9_free_surfaces_avc;
635
636     downscaled_width_4x = generic_state->frame_width_4x;
637     downscaled_height_4x = generic_state->frame_height_4x;
638
639     i965_CreateSurfaces(ctx,
640                         downscaled_width_4x,
641                         downscaled_height_4x,
642                         VA_RT_FORMAT_YUV420,
643                         1,
644                         &avc_surface->scaled_4x_surface_id);
645
646     avc_surface->scaled_4x_surface_obj = SURFACE(avc_surface->scaled_4x_surface_id);
647
648     if (!avc_surface->scaled_4x_surface_obj) {
649         return VA_STATUS_ERROR_ALLOCATION_FAILED;
650     }
651
652     i965_check_alloc_surface_bo(ctx, avc_surface->scaled_4x_surface_obj, 1,
653                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
654
655     downscaled_width_16x = generic_state->frame_width_16x;
656     downscaled_height_16x = generic_state->frame_height_16x;
657     i965_CreateSurfaces(ctx,
658                         downscaled_width_16x,
659                         downscaled_height_16x,
660                         VA_RT_FORMAT_YUV420,
661                         1,
662                         &avc_surface->scaled_16x_surface_id);
663     avc_surface->scaled_16x_surface_obj = SURFACE(avc_surface->scaled_16x_surface_id);
664
665     if (!avc_surface->scaled_16x_surface_obj) {
666         return VA_STATUS_ERROR_ALLOCATION_FAILED;
667     }
668
669     i965_check_alloc_surface_bo(ctx, avc_surface->scaled_16x_surface_obj, 1,
670                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
671
672     if(generic_state->b32xme_supported ||
673        generic_state->b32xme_enabled)
674     {
675         downscaled_width_32x = generic_state->frame_width_32x;
676         downscaled_height_32x = generic_state->frame_height_32x;
677         i965_CreateSurfaces(ctx,
678                             downscaled_width_32x,
679                             downscaled_height_32x,
680                             VA_RT_FORMAT_YUV420,
681                             1,
682                             &avc_surface->scaled_32x_surface_id);
683         avc_surface->scaled_32x_surface_obj = SURFACE(avc_surface->scaled_32x_surface_id);
684
685         if (!avc_surface->scaled_32x_surface_obj) {
686             return VA_STATUS_ERROR_ALLOCATION_FAILED;
687         }
688
689         i965_check_alloc_surface_bo(ctx, avc_surface->scaled_32x_surface_obj, 1,
690                                     VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
691     }
692
693     /*mb code and mv data for each frame*/
694     size = frame_mb_nums * 16 * 4;
695     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
696         &avc_surface->res_mb_code_surface,
697         ALIGN(size,0x1000),
698         "mb code buffer");
699     if (!allocate_flag)
700         goto failed_allocation;
701
702     size = frame_mb_nums * 32 * 4;
703     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
704         &avc_surface->res_mv_data_surface,
705         ALIGN(size,0x1000),
706         "mv data buffer");
707     if (!allocate_flag)
708         goto failed_allocation;
709
710     /* ref pic list*/
711     if(avc_state->ref_pic_select_list_supported)
712     {
713         width = ALIGN(frame_width_in_mbs * 8,64);
714         height= frame_height_in_mbs ;
715         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
716                                      &avc_surface->res_ref_pic_select_surface,
717                                      width, height,
718                                      width,
719                                      "Ref pic select list buffer");
720         if (!allocate_flag)
721             goto failed_allocation;
722     }
723
724     /*direct mv*/
725     avc_surface->dmv_top =
726         dri_bo_alloc(i965->intel.bufmgr,
727         "direct mv top Buffer",
728         68 * frame_mb_nums,
729         64);
730     avc_surface->dmv_bottom =
731         dri_bo_alloc(i965->intel.bufmgr,
732         "direct mv bottom Buffer",
733         68 * frame_mb_nums,
734         64);
735     assert(avc_surface->dmv_top);
736     assert(avc_surface->dmv_bottom);
737
738     return VA_STATUS_SUCCESS;
739
740 failed_allocation:
741     return VA_STATUS_ERROR_ALLOCATION_FAILED;
742 }
743
744 static VAStatus
745 gen9_avc_allocate_resources(VADriverContextP ctx,
746                             struct encode_state *encode_state,
747                             struct intel_encoder_context *encoder_context)
748 {
749     struct i965_driver_data *i965 = i965_driver_data(ctx);
750     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
751     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
752     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
753     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
754     unsigned int size  = 0;
755     unsigned int width  = 0;
756     unsigned int height  = 0;
757     unsigned char * data  = NULL;
758     int allocate_flag = 1;
759     int i = 0;
760
761     /*all the surface/buffer are allocated here*/
762
763     /*second level batch buffer for image state write when cqp etc*/
764     i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
765     size = INTEL_AVC_IMAGE_STATE_CMD_SIZE ;
766     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
767                              &avc_ctx->res_image_state_batch_buffer_2nd_level,
768                              ALIGN(size,0x1000),
769                              "second levle batch (image state write) buffer");
770     if (!allocate_flag)
771         goto failed_allocation;
772
773     /* scaling related surface   */
774     if(avc_state->mb_status_supported)
775     {
776         i965_free_gpe_resource(&avc_ctx->res_mb_status_buffer);
777         size = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * 16 * 4 + 1023)&~0x3ff;
778         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
779                                  &avc_ctx->res_mb_status_buffer,
780                                  ALIGN(size,0x1000),
781                                  "MB statistics output buffer");
782         if (!allocate_flag)
783             goto failed_allocation;
784         i965_zero_gpe_resource(&avc_ctx->res_mb_status_buffer);
785     }
786
787     if(avc_state->flatness_check_supported)
788     {
789         width = generic_state->frame_width_in_mbs * 4;
790         height= generic_state->frame_height_in_mbs * 4;
791         i965_free_gpe_resource(&avc_ctx->res_flatness_check_surface);
792         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
793                                      &avc_ctx->res_flatness_check_surface,
794                                      width, height,
795                                      ALIGN(width,64),
796                                      "Flatness check buffer");
797         if (!allocate_flag)
798             goto failed_allocation;
799     }
800     /* me related surface */
801     width = generic_state->downscaled_width_4x_in_mb * 8;
802     height= generic_state->downscaled_height_4x_in_mb * 4 * 10;
803     i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
804     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
805                                  &avc_ctx->s4x_memv_distortion_buffer,
806                                  width, height,
807                                  ALIGN(width,64),
808                                  "4x MEMV distortion buffer");
809     if (!allocate_flag)
810         goto failed_allocation;
811     i965_zero_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
812
813     width = (generic_state->downscaled_width_4x_in_mb + 7)/8 * 64;
814     height= (generic_state->downscaled_height_4x_in_mb + 1)/2 * 8;
815     i965_free_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
816     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
817                                  &avc_ctx->s4x_memv_min_distortion_brc_buffer,
818                                  width, height,
819                                  width,
820                                  "4x MEMV min distortion brc buffer");
821     if (!allocate_flag)
822         goto failed_allocation;
823     i965_zero_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
824
825
826     width = ALIGN(generic_state->downscaled_width_4x_in_mb * 32,64);
827     height= generic_state->downscaled_height_4x_in_mb * 4 * 2 * 10;
828     i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
829     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
830                                  &avc_ctx->s4x_memv_data_buffer,
831                                  width, height,
832                                  width,
833                                  "4x MEMV data buffer");
834     if (!allocate_flag)
835         goto failed_allocation;
836     i965_zero_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
837
838
839     width = ALIGN(generic_state->downscaled_width_16x_in_mb * 32,64);
840     height= generic_state->downscaled_height_16x_in_mb * 4 * 2 * 10 ;
841     i965_free_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
842     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
843                                  &avc_ctx->s16x_memv_data_buffer,
844                                  width, height,
845                                  width,
846                                  "16x MEMV data buffer");
847     if (!allocate_flag)
848         goto failed_allocation;
849     i965_zero_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
850
851
852     width = ALIGN(generic_state->downscaled_width_32x_in_mb * 32,64);
853     height= generic_state->downscaled_height_32x_in_mb * 4 * 2 * 10 ;
854     i965_free_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
855     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
856                                  &avc_ctx->s32x_memv_data_buffer,
857                                  width, height,
858                                  width,
859                                  "32x MEMV data buffer");
860     if (!allocate_flag)
861         goto failed_allocation;
862     i965_zero_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
863
864
865     if(!generic_state->brc_allocated)
866     {
867         /*brc related surface */
868         i965_free_gpe_resource(&avc_ctx->res_brc_history_buffer);
869         size = 864;
870         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
871                                  &avc_ctx->res_brc_history_buffer,
872                                  ALIGN(size,0x1000),
873                                  "brc history buffer");
874         if (!allocate_flag)
875             goto failed_allocation;
876
877         i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
878         size = 64;//44
879         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
880                                  &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
881                                  ALIGN(size,0x1000),
882                                  "brc pak statistic buffer");
883         if (!allocate_flag)
884             goto failed_allocation;
885
886         i965_free_gpe_resource(&avc_ctx->res_brc_image_state_read_buffer);
887         size = INTEL_AVC_IMAGE_STATE_CMD_SIZE * 7;
888         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
889                                  &avc_ctx->res_brc_image_state_read_buffer,
890                                  ALIGN(size,0x1000),
891                                  "brc image state read buffer");
892         if (!allocate_flag)
893             goto failed_allocation;
894
895         i965_free_gpe_resource(&avc_ctx->res_brc_image_state_write_buffer);
896         size = INTEL_AVC_IMAGE_STATE_CMD_SIZE * 7;
897         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
898                                  &avc_ctx->res_brc_image_state_write_buffer,
899                                  ALIGN(size,0x1000),
900                                  "brc image state write buffer");
901         if (!allocate_flag)
902             goto failed_allocation;
903
904         width = ALIGN(64,64);
905         height= 44;
906         i965_free_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
907         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
908                                      &avc_ctx->res_brc_const_data_buffer,
909                                      width, height,
910                                      width,
911                                      "brc const data buffer");
912         if (!allocate_flag)
913             goto failed_allocation;
914
915         if(generic_state->brc_distortion_buffer_supported)
916         {
917             width = ALIGN(generic_state->downscaled_width_4x_in_mb * 8,64);
918             height= ALIGN(generic_state->downscaled_height_4x_in_mb * 4,8);
919             width = (generic_state->downscaled_width_4x_in_mb + 7)/8 * 64;
920             height= (generic_state->downscaled_height_4x_in_mb + 1)/2 * 8;
921             i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
922             allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
923                                          &avc_ctx->res_brc_dist_data_surface,
924                                          width, height,
925                                          width,
926                                          "brc dist data buffer");
927             if (!allocate_flag)
928                 goto failed_allocation;
929             i965_zero_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
930         }
931
932         if(generic_state->brc_roi_enable)
933         {
934             width = ALIGN(generic_state->downscaled_width_4x_in_mb * 16,64);
935             height= ALIGN(generic_state->downscaled_height_4x_in_mb * 4,8);
936             i965_free_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
937             allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
938                                          &avc_ctx->res_mbbrc_roi_surface,
939                                          width, height,
940                                          width,
941                                          "mbbrc roi buffer");
942             if (!allocate_flag)
943                 goto failed_allocation;
944             i965_zero_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
945         }
946
947         /*mb qp in mb brc*/
948         width = ALIGN(generic_state->downscaled_width_4x_in_mb * 4,64);
949         height= ALIGN(generic_state->downscaled_height_4x_in_mb * 4,8);
950         i965_free_gpe_resource(&avc_ctx->res_mbbrc_mb_qp_data_surface);
951         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
952                                      &avc_ctx->res_mbbrc_mb_qp_data_surface,
953                                      width, height,
954                                      width,
955                                      "mbbrc mb qp buffer");
956         if (!allocate_flag)
957             goto failed_allocation;
958
959         i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
960         size = 16 * 52 * 4;
961         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
962                                  &avc_ctx->res_mbbrc_const_data_buffer,
963                                  ALIGN(size,0x1000),
964                                  "mbbrc const data buffer");
965         if (!allocate_flag)
966             goto failed_allocation;
967
968         generic_state->brc_allocated = 1;
969     }
970
971     /*mb qp external*/
972     if(avc_state->mb_qp_data_enable)
973     {
974         width = ALIGN(generic_state->downscaled_width_4x_in_mb * 4,64);
975         height= ALIGN(generic_state->downscaled_height_4x_in_mb * 4,8);
976         i965_free_gpe_resource(&avc_ctx->res_mb_qp_data_surface);
977         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
978                                      &avc_ctx->res_mb_qp_data_surface,
979                                      width, height,
980                                      width,
981                                      "external mb qp buffer");
982         if (!allocate_flag)
983             goto failed_allocation;
984     }
985
986     /*     mbenc related surface. it share most of surface with other kernels     */
987     if(avc_state->arbitrary_num_mbs_in_slice)
988     {
989         width = (generic_state->frame_width_in_mbs + 1) * 64;
990         height= generic_state->frame_height_in_mbs ;
991         i965_free_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
992         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
993                                      &avc_ctx->res_mbenc_slice_map_surface,
994                                      width, height,
995                                      width,
996                                      "slice map buffer");
997         if (!allocate_flag)
998             goto failed_allocation;
999
1000         /*generate slice map,default one slice per frame.*/
1001     }
1002
1003     /* sfd related surface  */
1004     if(avc_state->sfd_enable)
1005     {
1006         i965_free_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1007         size = 128;
1008         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1009                                  &avc_ctx->res_sfd_output_buffer,
1010                                  size,
1011                                  "sfd output buffer");
1012         if (!allocate_flag)
1013             goto failed_allocation;
1014
1015         i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer);
1016         size = ALIGN(52,64);
1017         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1018                                  &avc_ctx->res_sfd_cost_table_p_frame_buffer,
1019                                  size,
1020                                  "sfd P frame cost table buffer");
1021         if (!allocate_flag)
1022             goto failed_allocation;
1023         data = i965_map_gpe_resource(&(avc_ctx->res_sfd_cost_table_p_frame_buffer));
1024         assert(data);
1025         memcpy(data,gen9_avc_sfd_cost_table_p_frame,sizeof(unsigned char) *52);
1026         i965_unmap_gpe_resource(&(avc_ctx->res_sfd_cost_table_p_frame_buffer));
1027
1028         i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_b_frame_buffer);
1029         size = ALIGN(52,64);
1030         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1031                                  &avc_ctx->res_sfd_cost_table_b_frame_buffer,
1032                                  size,
1033                                  "sfd B frame cost table buffer");
1034         if (!allocate_flag)
1035             goto failed_allocation;
1036         data = i965_map_gpe_resource(&(avc_ctx->res_sfd_cost_table_b_frame_buffer));
1037         assert(data);
1038         memcpy(data,gen9_avc_sfd_cost_table_b_frame,sizeof(unsigned char) *52);
1039         i965_unmap_gpe_resource(&(avc_ctx->res_sfd_cost_table_b_frame_buffer));
1040     }
1041
1042     /* wp related surfaces */
1043     if(avc_state->weighted_prediction_supported)
1044     {
1045         for(i = 0; i < 2 ; i++)
1046         {
1047             if (avc_ctx->wp_output_pic_select_surface_obj[i]) {
1048                 continue;
1049             }
1050
1051             width = generic_state->frame_width_in_pixel;
1052             height= generic_state->frame_height_in_pixel ;
1053             i965_CreateSurfaces(ctx,
1054                                 width,
1055                                 height,
1056                                 VA_RT_FORMAT_YUV420,
1057                                 1,
1058                                 &avc_ctx->wp_output_pic_select_surface_id[i]);
1059             avc_ctx->wp_output_pic_select_surface_obj[i] = SURFACE(avc_ctx->wp_output_pic_select_surface_id[i]);
1060
1061             if (!avc_ctx->wp_output_pic_select_surface_obj[i]) {
1062                 goto failed_allocation;
1063             }
1064
1065             i965_check_alloc_surface_bo(ctx, avc_ctx->wp_output_pic_select_surface_obj[i], 1,
1066                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
1067         }
1068         i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[0]);
1069         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_wp_output_pic_select_surface_list[0], avc_ctx->wp_output_pic_select_surface_obj[0],GPE_RESOURCE_ALIGNMENT);
1070         i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[1]);
1071         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_wp_output_pic_select_surface_list[1], avc_ctx->wp_output_pic_select_surface_obj[1],GPE_RESOURCE_ALIGNMENT);
1072     }
1073
1074     /* other   */
1075
1076     i965_free_gpe_resource(&avc_ctx->res_mad_data_buffer);
1077     size = 4 * 1;
1078     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1079                                  &avc_ctx->res_mad_data_buffer,
1080                                  ALIGN(size,0x1000),
1081                                  "MAD data buffer");
1082     if (!allocate_flag)
1083         goto failed_allocation;
1084
1085     return VA_STATUS_SUCCESS;
1086
1087 failed_allocation:
1088     return VA_STATUS_ERROR_ALLOCATION_FAILED;
1089 }
1090
1091 static void
1092 gen9_avc_free_resources(struct encoder_vme_mfc_context * vme_context)
1093 {
1094     if(!vme_context)
1095         return;
1096
1097     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
1098     VADriverContextP ctx = avc_ctx->ctx;
1099     int i = 0;
1100
1101     /* free all the surface/buffer here*/
1102     i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
1103     i965_free_gpe_resource(&avc_ctx->res_mb_status_buffer);
1104     i965_free_gpe_resource(&avc_ctx->res_flatness_check_surface);
1105     i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
1106     i965_free_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
1107     i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
1108     i965_free_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
1109     i965_free_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
1110     i965_free_gpe_resource(&avc_ctx->res_brc_history_buffer);
1111     i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
1112     i965_free_gpe_resource(&avc_ctx->res_brc_image_state_read_buffer);
1113     i965_free_gpe_resource(&avc_ctx->res_brc_image_state_write_buffer);
1114     i965_free_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
1115     i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
1116     i965_free_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
1117     i965_free_gpe_resource(&avc_ctx->res_mbbrc_mb_qp_data_surface);
1118     i965_free_gpe_resource(&avc_ctx->res_mb_qp_data_surface);
1119     i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
1120     i965_free_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1121     i965_free_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1122     i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer);
1123     i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_b_frame_buffer);
1124     i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[0]);
1125     i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[1]);
1126     i965_free_gpe_resource(&avc_ctx->res_mad_data_buffer);
1127
1128     for(i = 0;i < 2 ; i++)
1129     {
1130         if (avc_ctx->wp_output_pic_select_surface_obj[i]) {
1131             i965_DestroySurfaces(ctx, &avc_ctx->wp_output_pic_select_surface_id[i], 1);
1132             avc_ctx->wp_output_pic_select_surface_id[i] = VA_INVALID_SURFACE;
1133             avc_ctx->wp_output_pic_select_surface_obj[i] = NULL;
1134         }
1135     }
1136
1137 }
1138
1139 static void
1140 gen9_avc_run_kernel_media_object(VADriverContextP ctx,
1141                              struct intel_encoder_context *encoder_context,
1142                              struct i965_gpe_context *gpe_context,
1143                              int media_function,
1144                              struct gpe_media_object_parameter *param)
1145 {
1146     struct i965_driver_data *i965 = i965_driver_data(ctx);
1147     struct i965_gpe_table *gpe = &i965->gpe_table;
1148     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1149     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
1150
1151     struct intel_batchbuffer *batch = encoder_context->base.batch;
1152     struct encoder_status_buffer_internal *status_buffer;
1153     struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1154
1155     if (!batch)
1156         return;
1157
1158     intel_batchbuffer_start_atomic(batch, 0x1000);
1159     intel_batchbuffer_emit_mi_flush(batch);
1160
1161     status_buffer = &(avc_ctx->status_buffer);
1162     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1163     mi_store_data_imm.bo = status_buffer->bo;
1164     mi_store_data_imm.offset = status_buffer->media_index_offset;
1165     mi_store_data_imm.dw0 = media_function;
1166     gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1167
1168     gpe->pipeline_setup(ctx, gpe_context, batch);
1169     gpe->media_object(ctx, gpe_context, batch, param);
1170     gpe->media_state_flush(ctx, gpe_context, batch);
1171
1172     gpe->pipeline_end(ctx, gpe_context, batch);
1173
1174     intel_batchbuffer_end_atomic(batch);
1175
1176     intel_batchbuffer_flush(batch);
1177 }
1178
1179 static void
1180 gen9_avc_run_kernel_media_object_walker(VADriverContextP ctx,
1181                                     struct intel_encoder_context *encoder_context,
1182                                     struct i965_gpe_context *gpe_context,
1183                                     int media_function,
1184                                     struct gpe_media_object_walker_parameter *param)
1185 {
1186     struct i965_driver_data *i965 = i965_driver_data(ctx);
1187     struct i965_gpe_table *gpe = &i965->gpe_table;
1188     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1189     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
1190
1191     struct intel_batchbuffer *batch = encoder_context->base.batch;
1192     struct encoder_status_buffer_internal *status_buffer;
1193     struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1194
1195     if (!batch)
1196         return;
1197
1198     intel_batchbuffer_start_atomic(batch, 0x1000);
1199
1200     intel_batchbuffer_emit_mi_flush(batch);
1201
1202     status_buffer = &(avc_ctx->status_buffer);
1203     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1204     mi_store_data_imm.bo = status_buffer->bo;
1205     mi_store_data_imm.offset = status_buffer->media_index_offset;
1206     mi_store_data_imm.dw0 = media_function;
1207     gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1208
1209     gpe->pipeline_setup(ctx, gpe_context, batch);
1210     gpe->media_object_walker(ctx, gpe_context, batch, param);
1211     gpe->media_state_flush(ctx, gpe_context, batch);
1212
1213     gpe->pipeline_end(ctx, gpe_context, batch);
1214
1215     intel_batchbuffer_end_atomic(batch);
1216
1217     intel_batchbuffer_flush(batch);
1218 }
1219
1220 static void
1221 gen9_init_gpe_context_avc(VADriverContextP ctx,
1222                           struct i965_gpe_context *gpe_context,
1223                           struct encoder_kernel_parameter *kernel_param)
1224 {
1225     struct i965_driver_data *i965 = i965_driver_data(ctx);
1226
1227     gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
1228
1229     gpe_context->sampler.entry_size = 0;
1230     gpe_context->sampler.max_entries = 0;
1231
1232     if (kernel_param->sampler_size) {
1233         gpe_context->sampler.entry_size = ALIGN(kernel_param->sampler_size, 64);
1234         gpe_context->sampler.max_entries = 1;
1235     }
1236
1237     gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
1238     gpe_context->idrt.max_entries = NUM_KERNELS_PER_GPE_CONTEXT;
1239
1240     gpe_context->surface_state_binding_table.max_entries = MAX_AVC_ENCODER_SURFACES;
1241     gpe_context->surface_state_binding_table.binding_table_offset = 0;
1242     gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_AVC_ENCODER_SURFACES * 4, 64);
1243     gpe_context->surface_state_binding_table.length = ALIGN(MAX_AVC_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_AVC_ENCODER_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
1244
1245     if (i965->intel.eu_total > 0)
1246         gpe_context->vfe_state.max_num_threads = 6 * i965->intel.eu_total;
1247     else
1248         gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads
1249
1250     gpe_context->vfe_state.curbe_allocation_size = MAX(1, ALIGN(gpe_context->curbe.length, 32) >> 5); // in registers
1251     gpe_context->vfe_state.urb_entry_size = MAX(1, ALIGN(kernel_param->inline_data_size, 32) >> 5); // in registers
1252     gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE -
1253                                               gpe_context->vfe_state.curbe_allocation_size -
1254                                               ((gpe_context->idrt.entry_size >> 5) *
1255                                                gpe_context->idrt.max_entries)) / gpe_context->vfe_state.urb_entry_size;
1256     gpe_context->vfe_state.num_urb_entries = CLAMP(1, 127, gpe_context->vfe_state.num_urb_entries);
1257     gpe_context->vfe_state.gpgpu_mode = 0;
1258 }
1259
1260 static void
1261 gen9_init_vfe_scoreboard_avc(struct i965_gpe_context *gpe_context,
1262                              struct encoder_scoreboard_parameter *scoreboard_param)
1263 {
1264     gpe_context->vfe_desc5.scoreboard0.mask = scoreboard_param->mask;
1265     gpe_context->vfe_desc5.scoreboard0.type = scoreboard_param->type;
1266     gpe_context->vfe_desc5.scoreboard0.enable = scoreboard_param->enable;
1267
1268     if (scoreboard_param->walkpat_flag) {
1269         gpe_context->vfe_desc5.scoreboard0.mask = 0x0F;
1270         gpe_context->vfe_desc5.scoreboard0.type = 1;
1271
1272         gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0x0;
1273         gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0xF;
1274
1275         gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
1276         gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xE;
1277
1278         gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0xF;
1279         gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0x3;
1280
1281         gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
1282         gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0x1;
1283     } else {
1284         // Scoreboard 0
1285         gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0xF;
1286         gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0x0;
1287
1288         // Scoreboard 1
1289         gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
1290         gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xF;
1291
1292         // Scoreboard 2
1293         gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0x1;
1294         gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0xF;
1295
1296         // Scoreboard 3
1297         gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
1298         gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0xF;
1299
1300         // Scoreboard 4
1301         gpe_context->vfe_desc7.scoreboard2.delta_x4 = 0xF;
1302         gpe_context->vfe_desc7.scoreboard2.delta_y4 = 0x1;
1303
1304         // Scoreboard 5
1305         gpe_context->vfe_desc7.scoreboard2.delta_x5 = 0x0;
1306         gpe_context->vfe_desc7.scoreboard2.delta_y5 = 0xE;
1307
1308         // Scoreboard 6
1309         gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0x1;
1310         gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
1311
1312         // Scoreboard 7
1313         gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0xF;
1314         gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
1315     }
1316 }
1317 /*
1318 VME pipeline related function
1319 */
1320
1321 /*
1322 scaling kernel related function
1323 */
1324 static void
1325 gen9_avc_set_curbe_scaling4x(VADriverContextP ctx,
1326                            struct encode_state *encode_state,
1327                            struct i965_gpe_context *gpe_context,
1328                            struct intel_encoder_context *encoder_context,
1329                            void *param)
1330 {
1331     gen9_avc_scaling4x_curbe_data *curbe_cmd;
1332     struct scaling_param *surface_param = (struct scaling_param *)param;
1333
1334     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1335
1336     if (!curbe_cmd)
1337         return;
1338
1339     memset(curbe_cmd, 0, sizeof(gen9_avc_scaling4x_curbe_data));
1340
1341     curbe_cmd->dw0.input_picture_width  = surface_param->input_frame_width;
1342     curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1343
1344     curbe_cmd->dw1.input_y_bti = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1345     curbe_cmd->dw2.output_y_bti = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1346
1347
1348     curbe_cmd->dw5.flatness_threshold = 128;
1349     curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
1350     curbe_cmd->dw7.enable_mb_variance_output = surface_param->enable_mb_variance_output;
1351     curbe_cmd->dw8.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
1352
1353     if (curbe_cmd->dw6.enable_mb_flatness_check ||
1354         curbe_cmd->dw7.enable_mb_variance_output ||
1355         curbe_cmd->dw8.enable_mb_pixel_average_output)
1356     {
1357         curbe_cmd->dw10.mbv_proc_stat_bti = GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX;
1358     }
1359
1360     i965_gpe_context_unmap_curbe(gpe_context);
1361     return;
1362 }
1363
1364 static void
1365 gen9_avc_set_curbe_scaling2x(VADriverContextP ctx,
1366                            struct encode_state *encode_state,
1367                            struct i965_gpe_context *gpe_context,
1368                            struct intel_encoder_context *encoder_context,
1369                            void *param)
1370 {
1371     gen9_avc_scaling2x_curbe_data *curbe_cmd;
1372     struct scaling_param *surface_param = (struct scaling_param *)param;
1373
1374     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1375
1376     if (!curbe_cmd)
1377         return;
1378
1379     memset(curbe_cmd, 0, sizeof(gen9_avc_scaling2x_curbe_data));
1380
1381     curbe_cmd->dw0.input_picture_width  = surface_param->input_frame_width;
1382     curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1383
1384     curbe_cmd->dw8.input_y_bti = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1385     curbe_cmd->dw9.output_y_bti = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1386
1387     i965_gpe_context_unmap_curbe(gpe_context);
1388     return;
1389 }
1390
1391 static void
1392 gen9_avc_send_surface_scaling(VADriverContextP ctx,
1393                               struct encode_state *encode_state,
1394                               struct i965_gpe_context *gpe_context,
1395                               struct intel_encoder_context *encoder_context,
1396                               void *param)
1397 {
1398     struct scaling_param *surface_param = (struct scaling_param *)param;
1399     unsigned int surface_format;
1400     unsigned int res_size;
1401
1402     if (surface_param->scaling_out_use_32unorm_surf_fmt)
1403         surface_format = I965_SURFACEFORMAT_R32_UNORM;
1404     else if (surface_param->scaling_out_use_16unorm_surf_fmt)
1405         surface_format = I965_SURFACEFORMAT_R16_UNORM;
1406     else
1407         surface_format = I965_SURFACEFORMAT_R8_UNORM;
1408
1409     gen9_add_2d_gpe_surface(ctx, gpe_context,
1410                             surface_param->input_surface,
1411                             0, 1, surface_format,
1412                             GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX);
1413
1414     gen9_add_2d_gpe_surface(ctx, gpe_context,
1415                             surface_param->output_surface,
1416                             0, 1, surface_format,
1417                             GEN9_AVC_SCALING_FRAME_DST_Y_INDEX);
1418
1419     /*add buffer mv_proc_stat, here need change*/
1420     if (surface_param->mbv_proc_stat_enabled)
1421     {
1422         res_size = 16 * (surface_param->input_frame_width/16) * (surface_param->input_frame_height/16) * sizeof(unsigned int);
1423
1424         gen9_add_buffer_gpe_surface(ctx,
1425                                     gpe_context,
1426                                     surface_param->pres_mbv_proc_stat_buffer,
1427                                     0,
1428                                     res_size/4,
1429                                     0,
1430                                     GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
1431     }else if(surface_param->enable_mb_flatness_check)
1432     {
1433         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
1434                                        surface_param->pres_flatness_check_surface,
1435                                        1,
1436                                        I965_SURFACEFORMAT_R8_UNORM,
1437                                        GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
1438     }
1439
1440     return;
1441 }
1442
1443 static VAStatus
1444 gen9_avc_kernel_scaling(VADriverContextP ctx,
1445                         struct encode_state *encode_state,
1446                         struct intel_encoder_context *encoder_context,
1447                         int hme_type)
1448 {
1449     struct i965_driver_data *i965 = i965_driver_data(ctx);
1450     struct i965_gpe_table *gpe = &i965->gpe_table;
1451     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1452     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
1453     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
1454     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
1455     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )vme_context->generic_enc_ctx;
1456
1457     struct i965_gpe_context *gpe_context;
1458     struct scaling_param surface_param;
1459     struct object_surface *obj_surface;
1460     struct gen9_surface_avc *avc_priv_surface;
1461     struct gpe_media_object_walker_parameter media_object_walker_param;
1462     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
1463     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
1464     int media_function = 0;
1465     int kernel_idx = 0;
1466
1467     obj_surface = encode_state->reconstructed_object;
1468     avc_priv_surface = obj_surface->private_data;
1469
1470     memset(&surface_param,0,sizeof(struct scaling_param));
1471     switch(hme_type)
1472     {
1473     case INTEL_ENC_HME_4x :
1474         {
1475             media_function = INTEL_MEDIA_STATE_4X_SCALING;
1476             kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
1477             downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
1478             downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
1479
1480             surface_param.input_surface = encode_state->input_yuv_object ;
1481             surface_param.input_frame_width = generic_state->frame_width_in_pixel ;
1482             surface_param.input_frame_height = generic_state->frame_height_in_pixel ;
1483
1484             surface_param.output_surface = avc_priv_surface->scaled_4x_surface_obj ;
1485             surface_param.output_frame_width = generic_state->frame_width_4x ;
1486             surface_param.output_frame_height = generic_state->frame_height_4x ;
1487
1488             surface_param.enable_mb_flatness_check = avc_state->flatness_check_enable;
1489             surface_param.enable_mb_variance_output = avc_state->mb_status_enable;
1490             surface_param.enable_mb_pixel_average_output = avc_state->mb_status_enable;
1491
1492             surface_param.blk8x8_stat_enabled = 0 ;
1493             surface_param.use_4x_scaling  = 1 ;
1494             surface_param.use_16x_scaling = 0 ;
1495             surface_param.use_32x_scaling = 0 ;
1496             break;
1497         }
1498     case INTEL_ENC_HME_16x :
1499         {
1500             media_function = INTEL_MEDIA_STATE_16X_SCALING;
1501             kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
1502             downscaled_width_in_mb = generic_state->downscaled_width_16x_in_mb;
1503             downscaled_height_in_mb = generic_state->downscaled_height_16x_in_mb;
1504
1505             surface_param.input_surface = avc_priv_surface->scaled_4x_surface_obj ;
1506             surface_param.input_frame_width = generic_state->frame_width_4x ;
1507             surface_param.input_frame_height = generic_state->frame_height_4x ;
1508
1509             surface_param.output_surface = avc_priv_surface->scaled_16x_surface_obj ;
1510             surface_param.output_frame_width = generic_state->frame_width_16x ;
1511             surface_param.output_frame_height = generic_state->frame_height_16x ;
1512
1513             surface_param.enable_mb_flatness_check = 0 ;
1514             surface_param.enable_mb_variance_output = 0 ;
1515             surface_param.enable_mb_pixel_average_output = 0 ;
1516
1517             surface_param.blk8x8_stat_enabled = 0 ;
1518             surface_param.use_4x_scaling  = 0 ;
1519             surface_param.use_16x_scaling = 1 ;
1520             surface_param.use_32x_scaling = 0 ;
1521
1522             break;
1523         }
1524     case INTEL_ENC_HME_32x :
1525         {
1526             media_function = INTEL_MEDIA_STATE_32X_SCALING;
1527             kernel_idx = GEN9_AVC_KERNEL_SCALING_2X_IDX;
1528             downscaled_width_in_mb = generic_state->downscaled_width_32x_in_mb;
1529             downscaled_height_in_mb = generic_state->downscaled_height_32x_in_mb;
1530
1531             surface_param.input_surface = avc_priv_surface->scaled_16x_surface_obj ;
1532             surface_param.input_frame_width = generic_state->frame_width_16x ;
1533             surface_param.input_frame_height = generic_state->frame_height_16x ;
1534
1535             surface_param.output_surface = avc_priv_surface->scaled_32x_surface_obj ;
1536             surface_param.output_frame_width = generic_state->frame_width_32x ;
1537             surface_param.output_frame_height = generic_state->frame_height_32x ;
1538
1539             surface_param.enable_mb_flatness_check = 0 ;
1540             surface_param.enable_mb_variance_output = 0 ;
1541             surface_param.enable_mb_pixel_average_output = 0 ;
1542
1543             surface_param.blk8x8_stat_enabled = 0 ;
1544             surface_param.use_4x_scaling  = 0 ;
1545             surface_param.use_16x_scaling = 0 ;
1546             surface_param.use_32x_scaling = 1 ;
1547             break;
1548         }
1549     default :
1550         assert(0);
1551
1552     }
1553
1554     gpe_context = &(avc_ctx->context_scaling.gpe_contexts[kernel_idx]);
1555
1556     gpe->context_init(ctx, gpe_context);
1557     gpe->reset_binding_table(ctx, gpe_context);
1558
1559     if(surface_param.use_32x_scaling)
1560     {
1561         generic_ctx->pfn_set_curbe_scaling2x(ctx,encode_state,gpe_context,encoder_context,&surface_param);
1562     }else
1563     {
1564         generic_ctx->pfn_set_curbe_scaling4x(ctx,encode_state,gpe_context,encoder_context,&surface_param);
1565     }
1566
1567     if(surface_param.use_32x_scaling)
1568     {
1569         surface_param.scaling_out_use_16unorm_surf_fmt = 1 ;
1570         surface_param.scaling_out_use_32unorm_surf_fmt = 0 ;
1571     }else
1572     {
1573         surface_param.scaling_out_use_16unorm_surf_fmt = 0 ;
1574         surface_param.scaling_out_use_32unorm_surf_fmt = 1 ;
1575     }
1576
1577     if(surface_param.use_4x_scaling)
1578     {
1579         if(avc_state->mb_status_supported)
1580         {
1581             surface_param.enable_mb_flatness_check = 0;
1582             surface_param.mbv_proc_stat_enabled = (surface_param.use_4x_scaling)?(avc_state->mb_status_enable || avc_state->flatness_check_enable):0 ;
1583             surface_param.pres_mbv_proc_stat_buffer = &(avc_ctx->res_mb_status_buffer);
1584
1585         }else
1586         {
1587             surface_param.enable_mb_flatness_check = (surface_param.use_4x_scaling)?avc_state->flatness_check_enable:0;
1588             surface_param.mbv_proc_stat_enabled = 0 ;
1589             surface_param.pres_flatness_check_surface = &(avc_ctx->res_flatness_check_surface);
1590         }
1591     }
1592
1593     generic_ctx->pfn_send_scaling_surface(ctx,encode_state,gpe_context,encoder_context,&surface_param);
1594
1595     /* setup the interface data */
1596     gpe->setup_interface_data(ctx, gpe_context);
1597
1598     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
1599     if(surface_param.use_32x_scaling)
1600     {
1601         kernel_walker_param.resolution_x = downscaled_width_in_mb ;
1602         kernel_walker_param.resolution_y = downscaled_height_in_mb ;
1603     }else
1604     {
1605         /* the scaling is based on 8x8 blk level */
1606         kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
1607         kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
1608     }
1609     kernel_walker_param.no_dependency = 1;
1610
1611     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
1612
1613     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
1614                                         gpe_context,
1615                                         media_function,
1616                                         &media_object_walker_param);
1617
1618     return VA_STATUS_SUCCESS;
1619 }
1620
1621 /*
1622 frame/mb brc related function
1623 */
1624 static void
1625 gen9_avc_init_mfx_avc_img_state(VADriverContextP ctx,
1626                                 struct encode_state *encode_state,
1627                                 struct intel_encoder_context *encoder_context,
1628                                 struct gen9_mfx_avc_img_state *pstate)
1629 {
1630     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1631     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
1632     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
1633
1634     VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
1635     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
1636
1637     memset(pstate, 0, sizeof(*pstate));
1638
1639     pstate->dw0.dword_length = (sizeof(struct gen9_mfx_avc_img_state)) / 4 -2;
1640     pstate->dw0.sub_opcode_b = 0;
1641     pstate->dw0.sub_opcode_a = 0;
1642     pstate->dw0.command_opcode = 1;
1643     pstate->dw0.pipeline = 2;
1644     pstate->dw0.command_type = 3;
1645
1646     pstate->dw1.frame_size_in_mbs = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs ;
1647
1648     pstate->dw2.frame_width_in_mbs_minus1 = generic_state->frame_width_in_mbs - 1;
1649     pstate->dw2.frame_height_in_mbs_minus1 = generic_state->frame_height_in_mbs - 1;
1650
1651     pstate->dw3.image_structure = 0;//frame is zero
1652     pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
1653     pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
1654     pstate->dw3.brc_domain_rate_control_enable = 0;//0,set for non-vdenc mode;
1655     pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
1656     pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
1657
1658     pstate->dw4.field_picture_flag = 0;
1659     pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
1660     pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
1661     pstate->dw4.transform_8x8_idct_mode_flag = pic_param->pic_fields.bits.transform_8x8_mode_flag;
1662     pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
1663     pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
1664     pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
1665     pstate->dw4.mb_mv_format_flag = 1;
1666     pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
1667     pstate->dw4.mv_unpacked_flag = 1;
1668     pstate->dw4.insert_test_flag = 0;
1669     pstate->dw4.load_slice_pointer_flag = 0;
1670     pstate->dw4.macroblock_stat_enable = 0;        /* disable in the first pass */
1671     pstate->dw4.minimum_frame_size = 0;
1672     pstate->dw5.intra_mb_max_bit_flag = 1;
1673     pstate->dw5.inter_mb_max_bit_flag = 1;
1674     pstate->dw5.frame_size_over_flag = 1;
1675     pstate->dw5.frame_size_under_flag = 1;
1676     pstate->dw5.intra_mb_ipcm_flag = 1;
1677     pstate->dw5.mb_rate_ctrl_flag = 0;
1678     pstate->dw5.non_first_pass_flag = 0;
1679     pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
1680     pstate->dw5.aq_chroma_disable = 1;
1681     if(pstate->dw4.entropy_coding_flag && (avc_state->tq_enable))
1682     {
1683         pstate->dw5.aq_enable = avc_state->tq_enable;
1684         pstate->dw5.aq_rounding = avc_state->tq_rounding;
1685     }else
1686     {
1687         pstate->dw5.aq_rounding = 0;
1688     }
1689
1690     pstate->dw6.intra_mb_max_size = 2700;
1691     pstate->dw6.inter_mb_max_size = 4095;
1692
1693     pstate->dw8.slice_delta_qp_max0 = 0;
1694     pstate->dw8.slice_delta_qp_max1 = 0;
1695     pstate->dw8.slice_delta_qp_max2 = 0;
1696     pstate->dw8.slice_delta_qp_max3 = 0;
1697
1698     pstate->dw9.slice_delta_qp_min0 = 0;
1699     pstate->dw9.slice_delta_qp_min1 = 0;
1700     pstate->dw9.slice_delta_qp_min2 = 0;
1701     pstate->dw9.slice_delta_qp_min3 = 0;
1702
1703     pstate->dw10.frame_bitrate_min = 0;
1704     pstate->dw10.frame_bitrate_min_unit = 1;
1705     pstate->dw10.frame_bitrate_min_unit_mode = 1;
1706     pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
1707     pstate->dw10.frame_bitrate_max_unit = 1;
1708     pstate->dw10.frame_bitrate_max_unit_mode = 1;
1709
1710     pstate->dw11.frame_bitrate_min_delta = 0;
1711     pstate->dw11.frame_bitrate_max_delta = 0;
1712
1713     pstate->dw12.vad_error_logic = 1;
1714     /* set paramters DW19/DW20 for slices */
1715 }
1716
1717 void gen9_avc_set_image_state(VADriverContextP ctx,
1718                               struct encode_state *encode_state,
1719                               struct intel_encoder_context *encoder_context,
1720                               struct i965_gpe_resource *gpe_resource)
1721 {
1722     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1723     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state;
1724     char *pdata;
1725     int i;
1726     unsigned int * data;
1727     struct gen9_mfx_avc_img_state cmd;
1728
1729     pdata = i965_map_gpe_resource(gpe_resource);
1730
1731     if (!pdata)
1732         return;
1733
1734     gen9_avc_init_mfx_avc_img_state(ctx,encode_state,encoder_context,&cmd);
1735     for(i = 0; i < generic_state->num_pak_passes;i++)
1736     {
1737
1738         if(i == 0)
1739         {
1740             cmd.dw4.macroblock_stat_enable = 0;
1741             cmd.dw5.non_first_pass_flag = 0;
1742         }else
1743         {
1744             cmd.dw4.macroblock_stat_enable = 1;
1745             cmd.dw5.non_first_pass_flag = 1;
1746             cmd.dw5.intra_mb_ipcm_flag = 1;
1747
1748         }
1749          cmd.dw5.mb_rate_ctrl_flag = 0;
1750          memcpy(pdata,&cmd,sizeof(struct gen9_mfx_avc_img_state));
1751          data = (unsigned int *)(pdata + sizeof(struct gen9_mfx_avc_img_state));
1752         *data = MI_BATCH_BUFFER_END;
1753
1754          pdata += INTEL_AVC_IMAGE_STATE_CMD_SIZE;
1755     }
1756     i965_unmap_gpe_resource(gpe_resource);
1757     return;
1758 }
1759
1760 void gen9_avc_set_image_state_non_brc(VADriverContextP ctx,
1761                                       struct encode_state *encode_state,
1762                                       struct intel_encoder_context *encoder_context,
1763                                       struct i965_gpe_resource *gpe_resource)
1764 {
1765     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1766     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state;
1767     char *pdata;
1768
1769     unsigned int * data;
1770     struct gen9_mfx_avc_img_state cmd;
1771
1772     pdata = i965_map_gpe_resource(gpe_resource);
1773
1774     if (!pdata)
1775         return;
1776
1777     gen9_avc_init_mfx_avc_img_state(ctx,encode_state,encoder_context,&cmd);
1778
1779     if(generic_state->curr_pak_pass == 0)
1780     {
1781         cmd.dw4.macroblock_stat_enable = 0;
1782         cmd.dw5.non_first_pass_flag = 0;
1783
1784     }
1785     else
1786     {
1787         cmd.dw4.macroblock_stat_enable = 1;
1788         cmd.dw5.non_first_pass_flag = 0;
1789         cmd.dw5.intra_mb_ipcm_flag = 1;
1790     }
1791
1792     cmd.dw5.mb_rate_ctrl_flag = 0;
1793     memcpy(pdata,&cmd,sizeof(struct gen9_mfx_avc_img_state));
1794     data = (unsigned int *)(pdata + sizeof(struct gen9_mfx_avc_img_state));
1795     *data = MI_BATCH_BUFFER_END;
1796
1797     i965_unmap_gpe_resource(gpe_resource);
1798     return;
1799 }
1800
1801 static void
1802 gen9_avc_init_brc_const_data(VADriverContextP ctx,
1803                              struct encode_state *encode_state,
1804                              struct intel_encoder_context *encoder_context)
1805 {
1806     struct i965_driver_data *i965 = i965_driver_data(ctx);
1807     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1808     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
1809     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
1810     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
1811
1812     struct i965_gpe_resource *gpe_resource = NULL;
1813     unsigned char * data =NULL;
1814     unsigned char * data_tmp = NULL;
1815     unsigned int size = 0;
1816     unsigned int table_idx = 0;
1817     unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
1818     int i = 0;
1819
1820     struct object_surface *obj_surface;
1821     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
1822     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
1823     VASurfaceID surface_id;
1824     unsigned int transform_8x8_mode_flag = pic_param->pic_fields.bits.transform_8x8_mode_flag;
1825
1826     gpe_resource = &(avc_ctx->res_brc_const_data_buffer);
1827     assert(gpe_resource);
1828
1829     i965_zero_gpe_resource(gpe_resource);
1830
1831     data = i965_map_gpe_resource(gpe_resource);
1832     assert(data);
1833
1834     table_idx = slice_type_kernel[generic_state->frame_type];
1835
1836     /* Fill surface with QP Adjustment table, Distortion threshold table, MaxFrame threshold table, Distortion QP Adjustment Table*/
1837     size = sizeof(gen9_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb);
1838     memcpy(data,gen9_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb,size*sizeof(unsigned char));
1839
1840     data += size;
1841
1842     /* skip threshold table*/
1843     size = 128;
1844     switch(generic_state->frame_type)
1845     {
1846     case SLICE_TYPE_P:
1847         memcpy(data,gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag],size * sizeof(unsigned char));
1848         break;
1849     case SLICE_TYPE_B:
1850         memcpy(data,gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag],size * sizeof(unsigned char));
1851         break;
1852     default:
1853         /*SLICE_TYPE_I,no change */
1854         break;
1855     }
1856
1857     if((generic_state->frame_type != SLICE_TYPE_I) && avc_state->non_ftq_skip_threshold_lut_input_enable)
1858     {
1859         for(i = 0; i< 52 ; i++)
1860         {
1861             *(data + 1 + (i * 2)) = (unsigned char)i965_avc_calc_skip_value(block_based_skip_enable,transform_8x8_mode_flag,avc_state->non_ftq_skip_threshold_lut[i]);
1862         }
1863     }
1864     data += size;
1865
1866     /*fill the qp for ref list*/
1867     size = 32 + 32 +32 +160;
1868     memset(data,0xff,32);
1869     memset(data+32+32,0xff,32);
1870     switch(generic_state->frame_type)
1871     {
1872     case SLICE_TYPE_P:
1873         {
1874             for(i = 0 ; i <  slice_param->num_ref_idx_l0_active_minus1 + 1; i++)
1875             {
1876                surface_id = slice_param->RefPicList0[i].picture_id;
1877                obj_surface = SURFACE(surface_id);
1878                if (!obj_surface)
1879                    break;
1880                *(data + i) = avc_state->list_ref_idx[0][i];//?
1881             }
1882         }
1883         break;
1884     case SLICE_TYPE_B:
1885         {
1886             data = data + 32 + 32;
1887             for(i = 0 ; i <  slice_param->num_ref_idx_l1_active_minus1 + 1; i++)
1888             {
1889                surface_id = slice_param->RefPicList1[i].picture_id;
1890                obj_surface = SURFACE(surface_id);
1891                if (!obj_surface)
1892                    break;
1893                *(data + i) = avc_state->list_ref_idx[1][i];//?
1894             }
1895
1896             data = data - 32 - 32;
1897
1898             for(i = 0 ; i <  slice_param->num_ref_idx_l0_active_minus1 + 1; i++)
1899             {
1900                surface_id = slice_param->RefPicList0[i].picture_id;
1901                obj_surface = SURFACE(surface_id);
1902                if (!obj_surface)
1903                    break;
1904                *(data + i) = avc_state->list_ref_idx[0][i];//?
1905             }
1906         }
1907         break;
1908     default:
1909         /*SLICE_TYPE_I,no change */
1910         break;
1911     }
1912     data += size;
1913
1914     /*mv cost and mode cost*/
1915     size = 1664;
1916     memcpy(data,(unsigned char *)&gen9_avc_mode_mv_cost_table[table_idx][0][0],size * sizeof(unsigned char));
1917
1918     if(avc_state->old_mode_cost_enable)
1919     {   data_tmp = data;
1920         for(i = 0; i < 52 ; i++)
1921         {
1922             *(data_tmp +3) = (unsigned int)gen9_avc_old_intra_mode_cost[i];
1923             data_tmp += 16;
1924         }
1925     }
1926
1927     if(avc_state->ftq_skip_threshold_lut_input_enable)
1928     {
1929         for(i = 0; i < 52 ; i++)
1930         {
1931             *(data + (i * 32) + 24) =
1932             *(data + (i * 32) + 25) =
1933             *(data + (i * 32) + 27) =
1934             *(data + (i * 32) + 28) =
1935             *(data + (i * 32) + 29) =
1936             *(data + (i * 32) + 30) =
1937             *(data + (i * 32) + 31) = avc_state->ftq_skip_threshold_lut[i];
1938         }
1939
1940     }
1941     data += size;
1942
1943     /*ref cost*/
1944     size = 128;
1945     memcpy(data,(unsigned char *)&gen9_avc_ref_cost[table_idx][0],size * sizeof(unsigned char));
1946     data += size;
1947
1948     /*scaling factor*/
1949     size = 64;
1950     if(avc_state->adaptive_intra_scaling_enable)
1951     {
1952         memcpy(data,(unsigned char *)&gen9_avc_adaptive_intra_scaling_factor,size * sizeof(unsigned char));
1953     }else
1954     {
1955         memcpy(data,(unsigned char *)&gen9_avc_intra_scaling_factor,size * sizeof(unsigned char));
1956     }
1957     i965_unmap_gpe_resource(gpe_resource);
1958 }
1959
1960 static void
1961 gen9_avc_init_brc_const_data_old(VADriverContextP ctx,
1962                                  struct encode_state *encode_state,
1963                                  struct intel_encoder_context *encoder_context)
1964 {
1965     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1966     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
1967     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
1968     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
1969
1970     struct i965_gpe_resource *gpe_resource = NULL;
1971     unsigned int * data =NULL;
1972     unsigned int * data_tmp = NULL;
1973     unsigned int size = 0;
1974     unsigned int table_idx = 0;
1975     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
1976     unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
1977     unsigned int transform_8x8_mode_flag = pic_param->pic_fields.bits.transform_8x8_mode_flag;
1978     int i = 0;
1979
1980     gpe_resource = &(avc_ctx->res_brc_const_data_buffer);
1981     assert(gpe_resource);
1982
1983     i965_zero_gpe_resource(gpe_resource);
1984
1985     data = i965_map_gpe_resource(gpe_resource);
1986     assert(data);
1987
1988     table_idx = slice_type_kernel[generic_state->frame_type];
1989
1990     /* Fill surface with QP Adjustment table, Distortion threshold table, MaxFrame threshold table, Distortion QP Adjustment Table*/
1991     size = sizeof(gen75_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb);
1992     memcpy(data,gen75_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb,size*sizeof(unsigned char));
1993
1994     data += size;
1995
1996     /* skip threshold table*/
1997     size = 128;
1998     switch(generic_state->frame_type)
1999     {
2000     case SLICE_TYPE_P:
2001         memcpy(data,gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag],size * sizeof(unsigned char));
2002         break;
2003     case SLICE_TYPE_B:
2004         memcpy(data,gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag],size * sizeof(unsigned char));
2005         break;
2006     default:
2007         /*SLICE_TYPE_I,no change */
2008         break;
2009     }
2010
2011     if((generic_state->frame_type != SLICE_TYPE_I) && avc_state->non_ftq_skip_threshold_lut_input_enable)
2012     {
2013         for(i = 0; i< 52 ; i++)
2014         {
2015             *(data + 1 + (i * 2)) = (unsigned char)i965_avc_calc_skip_value(block_based_skip_enable,transform_8x8_mode_flag,avc_state->non_ftq_skip_threshold_lut[i]);
2016         }
2017     }
2018     data += size;
2019
2020     /*fill the qp for ref list*/
2021     size = 128;
2022     data += size;
2023     size = 128;
2024     data += size;
2025
2026     /*mv cost and mode cost*/
2027     size = 1664;
2028     memcpy(data,(unsigned char *)&gen75_avc_mode_mv_cost_table[table_idx][0][0],size * sizeof(unsigned char));
2029
2030     if(avc_state->old_mode_cost_enable)
2031     {   data_tmp = data;
2032         for(i = 0; i < 52 ; i++)
2033         {
2034             *(data_tmp +3) = (unsigned int)gen9_avc_old_intra_mode_cost[i];
2035             data_tmp += 16;
2036         }
2037     }
2038
2039     if(avc_state->ftq_skip_threshold_lut_input_enable)
2040     {
2041         for(i = 0; i < 52 ; i++)
2042         {
2043             *(data + (i * 32) + 24) =
2044             *(data + (i * 32) + 25) =
2045             *(data + (i * 32) + 27) =
2046             *(data + (i * 32) + 28) =
2047             *(data + (i * 32) + 29) =
2048             *(data + (i * 32) + 30) =
2049             *(data + (i * 32) + 31) = avc_state->ftq_skip_threshold_lut[i];
2050         }
2051
2052     }
2053     data += size;
2054
2055     /*ref cost*/
2056     size = 128;
2057     memcpy(data,(unsigned char *)&gen9_avc_ref_cost[table_idx][0],size * sizeof(unsigned char));
2058
2059     i965_unmap_gpe_resource(gpe_resource);
2060 }
2061 static void
2062 gen9_avc_set_curbe_brc_init_reset(VADriverContextP ctx,
2063                                   struct encode_state *encode_state,
2064                                   struct i965_gpe_context *gpe_context,
2065                                   struct intel_encoder_context *encoder_context,
2066                                   void * param)
2067 {
2068     gen9_avc_brc_init_reset_curbe_data *cmd;
2069     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2070     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
2071     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
2072     double input_bits_per_frame = 0;
2073     double bps_ratio = 0;
2074     VAEncSequenceParameterBufferH264 * seq_param = avc_state->seq_param;
2075     struct avc_param common_param;
2076
2077     cmd = i965_gpe_context_map_curbe(gpe_context);
2078
2079     if (!cmd)
2080         return;
2081
2082     memcpy(cmd,&gen9_avc_brc_init_reset_curbe_init_data,sizeof(gen9_avc_brc_init_reset_curbe_data));
2083
2084     memset(&common_param,0,sizeof(common_param));
2085     common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
2086     common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
2087     common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
2088     common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
2089     common_param.frames_per_100s = generic_state->frames_per_100s;
2090     common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
2091     common_param.target_bit_rate = generic_state->target_bit_rate;
2092
2093     cmd->dw0.profile_level_max_frame = i965_avc_get_profile_level_max_frame(&common_param,seq_param->level_idc);
2094     cmd->dw1.init_buf_full_in_bits = generic_state->init_vbv_buffer_fullness_in_bit;
2095     cmd->dw2.buf_size_in_bits = generic_state->vbv_buffer_size_in_bit;
2096     cmd->dw3.average_bit_rate = generic_state->target_bit_rate * 1000;
2097     cmd->dw4.max_bit_rate = generic_state->max_bit_rate * 1000;
2098     cmd->dw8.gop_p = (generic_state->gop_ref_distance)?((generic_state->gop_size -1)/generic_state->gop_ref_distance):0;
2099     cmd->dw9.gop_b = (generic_state->gop_size - 1 - cmd->dw8.gop_p);
2100     cmd->dw9.frame_width_in_bytes = generic_state->frame_width_in_pixel;
2101     cmd->dw10.frame_height_in_bytes = generic_state->frame_height_in_pixel;
2102     cmd->dw12.no_slices = avc_state->slice_num;
2103
2104     //VUI
2105     if(seq_param->vui_parameters_present_flag && generic_state->internal_rate_mode != INTEL_BRC_AVBR )
2106     {
2107         cmd->dw4.max_bit_rate = cmd->dw4.max_bit_rate;
2108         if(generic_state->internal_rate_mode == VA_RC_CBR)
2109         {
2110             cmd->dw3.average_bit_rate = cmd->dw4.max_bit_rate;
2111
2112         }
2113
2114     }
2115     cmd->dw6.frame_rate_m = generic_state->frames_per_100s;
2116     cmd->dw7.frame_rate_d = 100;
2117     cmd->dw8.brc_flag = 0;
2118     cmd->dw8.brc_flag |= (generic_state->mb_brc_enabled)? 0 : 0x8000;
2119
2120
2121     if(generic_state->internal_rate_mode == VA_RC_CBR)
2122     { //CBR
2123         cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate;
2124         cmd->dw8.brc_flag = cmd->dw8.brc_flag |INTEL_ENCODE_BRCINIT_ISCBR;
2125
2126     }else if(generic_state->internal_rate_mode == VA_RC_VBR)
2127     {//VBR
2128         if(cmd->dw4.max_bit_rate < cmd->dw3.average_bit_rate)
2129         {
2130             cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate << 1;
2131         }
2132         cmd->dw8.brc_flag = cmd->dw8.brc_flag |INTEL_ENCODE_BRCINIT_ISVBR;
2133
2134     }else if(generic_state->internal_rate_mode == INTEL_BRC_AVBR)
2135     { //AVBR
2136         cmd->dw4.max_bit_rate =cmd->dw3.average_bit_rate;
2137         cmd->dw8.brc_flag = cmd->dw8.brc_flag |INTEL_ENCODE_BRCINIT_ISAVBR;
2138
2139     }
2140     //igonre icq/vcm/qvbr
2141
2142     cmd->dw10.avbr_accuracy = generic_state->avbr_curracy;
2143     cmd->dw11.avbr_convergence = generic_state->avbr_convergence;
2144
2145     //frame bits
2146     input_bits_per_frame = (double)(cmd->dw4.max_bit_rate) * (double)(cmd->dw7.frame_rate_d)/(double)(cmd->dw6.frame_rate_m);;
2147
2148     if(cmd->dw2.buf_size_in_bits == 0)
2149     {
2150        cmd->dw2.buf_size_in_bits = (unsigned int)(input_bits_per_frame * 4);
2151     }
2152
2153     if(cmd->dw1.init_buf_full_in_bits == 0)
2154     {
2155        cmd->dw1.init_buf_full_in_bits = cmd->dw2.buf_size_in_bits * 7/8;
2156     }
2157     if(cmd->dw1.init_buf_full_in_bits < (unsigned int)(input_bits_per_frame * 2))
2158     {
2159        cmd->dw1.init_buf_full_in_bits = (unsigned int)(input_bits_per_frame * 2);
2160     }
2161     if(cmd->dw1.init_buf_full_in_bits > cmd->dw2.buf_size_in_bits)
2162     {
2163        cmd->dw1.init_buf_full_in_bits = cmd->dw2.buf_size_in_bits;
2164     }
2165
2166     //AVBR
2167     if(generic_state->internal_rate_mode == INTEL_BRC_AVBR)
2168     {
2169        cmd->dw2.buf_size_in_bits = 2 * generic_state->target_bit_rate * 1000;
2170        cmd->dw1.init_buf_full_in_bits = (unsigned int)(3 * cmd->dw2.buf_size_in_bits/4);
2171
2172     }
2173
2174     bps_ratio = input_bits_per_frame / (cmd->dw2.buf_size_in_bits/30.0);
2175     bps_ratio = (bps_ratio < 0.1)? 0.1:(bps_ratio > 3.5)?3.5:bps_ratio;
2176
2177
2178     cmd->dw16.deviation_threshold_0_pand_b = (unsigned int)(-50 * pow(0.90,bps_ratio));
2179     cmd->dw16.deviation_threshold_1_pand_b = (unsigned int)(-50 * pow(0.66,bps_ratio));
2180     cmd->dw16.deviation_threshold_2_pand_b = (unsigned int)(-50 * pow(0.46,bps_ratio));
2181     cmd->dw16.deviation_threshold_3_pand_b = (unsigned int)(-50 * pow(0.3, bps_ratio));
2182     cmd->dw17.deviation_threshold_4_pand_b = (unsigned int)(50 *  pow(0.3, bps_ratio));
2183     cmd->dw17.deviation_threshold_5_pand_b = (unsigned int)(50 * pow(0.46, bps_ratio));
2184     cmd->dw17.deviation_threshold_6_pand_b = (unsigned int)(50 * pow(0.7,  bps_ratio));
2185     cmd->dw17.deviation_threshold_7_pand_b = (unsigned int)(50 * pow(0.9,  bps_ratio));
2186     cmd->dw18.deviation_threshold_0_vbr = (unsigned int)(-50 * pow(0.9, bps_ratio));
2187     cmd->dw18.deviation_threshold_1_vbr = (unsigned int)(-50 * pow(0.7, bps_ratio));
2188     cmd->dw18.deviation_threshold_2_vbr = (unsigned int)(-50 * pow(0.5, bps_ratio));
2189     cmd->dw18.deviation_threshold_3_vbr = (unsigned int)(-50 * pow(0.3, bps_ratio));
2190     cmd->dw19.deviation_threshold_4_vbr = (unsigned int)(100 * pow(0.4, bps_ratio));
2191     cmd->dw19.deviation_threshold_5_vbr = (unsigned int)(100 * pow(0.5, bps_ratio));
2192     cmd->dw19.deviation_threshold_6_vbr = (unsigned int)(100 * pow(0.75,bps_ratio));
2193     cmd->dw19.deviation_threshold_7_vbr = (unsigned int)(100 * pow(0.9, bps_ratio));
2194     cmd->dw20.deviation_threshold_0_i = (unsigned int)(-50 * pow(0.8, bps_ratio));
2195     cmd->dw20.deviation_threshold_1_i = (unsigned int)(-50 * pow(0.6, bps_ratio));
2196     cmd->dw20.deviation_threshold_2_i = (unsigned int)(-50 * pow(0.34,bps_ratio));
2197     cmd->dw20.deviation_threshold_3_i = (unsigned int)(-50 * pow(0.2, bps_ratio));
2198     cmd->dw21.deviation_threshold_4_i = (unsigned int)(50 * pow(0.2,  bps_ratio));
2199     cmd->dw21.deviation_threshold_5_i = (unsigned int)(50 * pow(0.4,  bps_ratio));
2200     cmd->dw21.deviation_threshold_6_i = (unsigned int)(50 * pow(0.66, bps_ratio));
2201     cmd->dw21.deviation_threshold_7_i = (unsigned int)(50 * pow(0.9,  bps_ratio));
2202
2203     cmd->dw22.sliding_window_size = generic_state->frames_per_window_size;
2204
2205     i965_gpe_context_unmap_curbe(gpe_context);
2206
2207     return;
2208 }
2209
2210 static void
2211 gen9_avc_send_surface_brc_init_reset(VADriverContextP ctx,
2212                                      struct encode_state *encode_state,
2213                                      struct i965_gpe_context *gpe_context,
2214                                      struct intel_encoder_context *encoder_context,
2215                                      void * param_mbenc)
2216 {
2217     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2218     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
2219
2220     gen9_add_buffer_gpe_surface(ctx,
2221                                 gpe_context,
2222                                 &avc_ctx->res_brc_history_buffer,
2223                                 0,
2224                                 avc_ctx->res_brc_history_buffer.size,
2225                                 0,
2226                                 GEN9_AVC_BRC_INIT_RESET_HISTORY_INDEX);
2227
2228     gen9_add_buffer_2d_gpe_surface(ctx,
2229                                    gpe_context,
2230                                    &avc_ctx->res_brc_dist_data_surface,
2231                                    1,
2232                                    I965_SURFACEFORMAT_R8_UNORM,
2233                                    GEN9_AVC_BRC_INIT_RESET_DISTORTION_INDEX);
2234
2235     return;
2236 }
2237
2238 static VAStatus
2239 gen9_avc_kernel_brc_init_reset(VADriverContextP ctx,
2240                                struct encode_state *encode_state,
2241                                struct intel_encoder_context *encoder_context)
2242 {
2243     struct i965_driver_data *i965 = i965_driver_data(ctx);
2244     struct i965_gpe_table *gpe = &i965->gpe_table;
2245     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2246     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
2247     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
2248     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )vme_context->generic_enc_ctx;
2249
2250     struct i965_gpe_context *gpe_context;
2251     struct gpe_media_object_parameter media_object_param;
2252     struct gpe_media_object_inline_data media_object_inline_data;
2253     int media_function = 0;
2254     int kernel_idx = GEN9_AVC_KERNEL_BRC_INIT;
2255
2256     media_function = INTEL_MEDIA_STATE_BRC_INIT_RESET;
2257
2258     if(generic_state->brc_inited)
2259         kernel_idx = GEN9_AVC_KERNEL_BRC_RESET;
2260
2261     gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
2262
2263     gpe->context_init(ctx, gpe_context);
2264     gpe->reset_binding_table(ctx, gpe_context);
2265
2266     generic_ctx->pfn_set_curbe_brc_init_reset(ctx,encode_state,gpe_context,encoder_context,NULL);
2267
2268     generic_ctx->pfn_send_brc_init_reset_surface(ctx,encode_state,gpe_context,encoder_context,NULL);
2269
2270     gpe->setup_interface_data(ctx, gpe_context);
2271
2272     memset(&media_object_param, 0, sizeof(media_object_param));
2273     memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
2274     media_object_param.pinline_data = &media_object_inline_data;
2275     media_object_param.inline_size = sizeof(media_object_inline_data);
2276
2277     gen9_avc_run_kernel_media_object(ctx, encoder_context,
2278                                         gpe_context,
2279                                         media_function,
2280                                         &media_object_param);
2281
2282     return VA_STATUS_SUCCESS;
2283 }
2284
2285 static void
2286 gen9_avc_set_curbe_brc_frame_update(VADriverContextP ctx,
2287                                     struct encode_state *encode_state,
2288                                     struct i965_gpe_context *gpe_context,
2289                                     struct intel_encoder_context *encoder_context,
2290                                     void * param)
2291 {
2292     gen9_avc_frame_brc_update_curbe_data *cmd;
2293     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2294     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
2295     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
2296     struct object_surface *obj_surface;
2297     struct gen9_surface_avc *avc_priv_surface;
2298     struct avc_param common_param;
2299     VAEncSequenceParameterBufferH264 * seq_param = avc_state->seq_param;
2300
2301     obj_surface = encode_state->reconstructed_object;
2302
2303     if (!obj_surface || !obj_surface->private_data)
2304         return;
2305     avc_priv_surface = obj_surface->private_data;
2306
2307     cmd = i965_gpe_context_map_curbe(gpe_context);
2308
2309     if (!cmd)
2310         return;
2311
2312     memcpy(cmd,&gen9_avc_frame_brc_update_curbe_init_data,sizeof(gen9_avc_frame_brc_update_curbe_data));
2313
2314     cmd->dw5.target_size_flag = 0 ;
2315     if(generic_state->brc_init_current_target_buf_full_in_bits > (double)generic_state->brc_init_reset_buf_size_in_bits)
2316     {
2317         /*overflow*/
2318         generic_state->brc_init_current_target_buf_full_in_bits -= (double)generic_state->brc_init_reset_buf_size_in_bits;
2319         cmd->dw5.target_size_flag = 1 ;
2320     }
2321
2322     if(generic_state->skip_frame_enbale)
2323     {
2324         cmd->dw6.num_skip_frames = generic_state->num_skip_frames ;
2325         cmd->dw7.size_skip_frames = generic_state->size_skip_frames;
2326
2327         generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame * generic_state->num_skip_frames;
2328
2329     }
2330     cmd->dw0.target_size = (unsigned int)generic_state->brc_init_current_target_buf_full_in_bits ;
2331     cmd->dw1.frame_number = generic_state->seq_frame_number ;
2332     cmd->dw2.size_of_pic_headers = generic_state->herder_bytes_inserted << 3 ;
2333     cmd->dw5.cur_frame_type = generic_state->frame_type ;
2334     cmd->dw5.brc_flag = 0 ;
2335     cmd->dw5.brc_flag |= (avc_priv_surface->is_as_ref)?INTEL_ENCODE_BRCUPDATE_IS_REFERENCE:0 ;
2336
2337     if(avc_state->multi_pre_enable)
2338     {
2339         cmd->dw5.brc_flag  |= INTEL_ENCODE_BRCUPDATE_IS_ACTUALQP ;
2340         cmd->dw14.qp_index_of_cur_pic = avc_priv_surface->frame_idx ; //do not know this. use -1
2341     }
2342
2343     cmd->dw5.max_num_paks = generic_state->num_pak_passes ;
2344     if(avc_state->min_max_qp_enable)
2345     {
2346         switch(generic_state->frame_type)
2347         {
2348         case SLICE_TYPE_I:
2349             cmd->dw6.minimum_qp = avc_state->min_qp_i ;
2350             cmd->dw6.maximum_qp = avc_state->max_qp_i ;
2351             break;
2352         case SLICE_TYPE_P:
2353             cmd->dw6.minimum_qp = avc_state->min_qp_p ;
2354             cmd->dw6.maximum_qp = avc_state->max_qp_p ;
2355             break;
2356         case SLICE_TYPE_B:
2357             cmd->dw6.minimum_qp = avc_state->min_qp_b ;
2358             cmd->dw6.maximum_qp = avc_state->max_qp_b ;
2359             break;
2360         }
2361     }else
2362     {
2363         cmd->dw6.minimum_qp = 0 ;
2364         cmd->dw6.maximum_qp = 0 ;
2365     }
2366     cmd->dw6.enable_force_skip = avc_state->enable_force_skip ;
2367     cmd->dw6.enable_sliding_window = 0 ;
2368
2369     generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame;
2370
2371     if(generic_state->internal_rate_mode == INTEL_BRC_AVBR)
2372     {
2373         cmd->dw3.start_gadj_frame0 = (unsigned int)((10 *   generic_state->avbr_convergence) / (double)150);
2374         cmd->dw3.start_gadj_frame1 = (unsigned int)((50 *   generic_state->avbr_convergence) / (double)150);
2375         cmd->dw4.start_gadj_frame2 = (unsigned int)((100 *  generic_state->avbr_convergence) / (double)150);
2376         cmd->dw4.start_gadj_frame3 = (unsigned int)((150 *  generic_state->avbr_convergence) / (double)150);
2377         cmd->dw11.g_rate_ratio_threshold_0 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30)*(100 - 40)));
2378         cmd->dw11.g_rate_ratio_threshold_1 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30)*(100 - 75)));
2379         cmd->dw12.g_rate_ratio_threshold_2 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30)*(100 - 97)));
2380         cmd->dw12.g_rate_ratio_threshold_3 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30)*(103 - 100)));
2381         cmd->dw12.g_rate_ratio_threshold_4 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30)*(125 - 100)));
2382         cmd->dw12.g_rate_ratio_threshold_5 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30)*(160 - 100)));
2383
2384     }
2385     cmd->dw15.enable_roi = generic_state->brc_roi_enable ;
2386
2387     memset(&common_param,0,sizeof(common_param));
2388     common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
2389     common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
2390     common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
2391     common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
2392     common_param.frames_per_100s = generic_state->frames_per_100s;
2393     common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
2394     common_param.target_bit_rate = generic_state->target_bit_rate;
2395
2396     cmd->dw19.user_max_frame = i965_avc_get_profile_level_max_frame(&common_param,seq_param->level_idc);
2397     i965_gpe_context_unmap_curbe(gpe_context);
2398
2399     return;
2400 }
2401
2402 static void
2403 gen9_avc_send_surface_brc_frame_update(VADriverContextP ctx,
2404                                        struct encode_state *encode_state,
2405                                        struct i965_gpe_context *gpe_context,
2406                                        struct intel_encoder_context *encoder_context,
2407                                        void * param_brc)
2408 {
2409     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2410     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
2411     struct brc_param * param = (struct brc_param *)param_brc ;
2412     struct i965_gpe_context * gpe_context_mbenc = param->gpe_context_mbenc;
2413
2414
2415     /* brc history buffer*/
2416     gen9_add_buffer_gpe_surface(ctx,
2417                                 gpe_context,
2418                                 &avc_ctx->res_brc_history_buffer,
2419                                 0,
2420                                 avc_ctx->res_brc_history_buffer.size,
2421                                 0,
2422                                 GEN9_AVC_FRAME_BRC_UPDATE_HISTORY_INDEX);
2423
2424     /* previous pak buffer*/
2425     gen9_add_buffer_gpe_surface(ctx,
2426                                 gpe_context,
2427                                 &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
2428                                 0,
2429                                 avc_ctx->res_brc_pre_pak_statistics_output_buffer.size,
2430                                 0,
2431                                 GEN9_AVC_FRAME_BRC_UPDATE_PAK_STATISTICS_OUTPUT_INDEX);
2432
2433     /* image state command buffer read only*/
2434     gen9_add_buffer_gpe_surface(ctx,
2435                                 gpe_context,
2436                                 &avc_ctx->res_brc_image_state_read_buffer,
2437                                 0,
2438                                 avc_ctx->res_brc_image_state_read_buffer.size,
2439                                 0,
2440                                 GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_READ_INDEX);
2441
2442     /* image state command buffer write only*/
2443     gen9_add_buffer_gpe_surface(ctx,
2444                                 gpe_context,
2445                                 &avc_ctx->res_brc_image_state_write_buffer,
2446                                 0,
2447                                 avc_ctx->res_brc_image_state_write_buffer.size,
2448                                 0,
2449                                 GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_WRITE_INDEX);
2450
2451     /*  Mbenc curbe input buffer */
2452     gen9_add_dri_buffer_gpe_surface(ctx,
2453                                     gpe_context,
2454                                     gpe_context_mbenc->dynamic_state.bo,
2455                                     0,
2456                                     ALIGN(gpe_context_mbenc->curbe.length, 64),
2457                                     gpe_context_mbenc->curbe.offset,
2458                                     GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_READ_INDEX);
2459     /* Mbenc curbe output buffer */
2460     gen9_add_dri_buffer_gpe_surface(ctx,
2461                                     gpe_context,
2462                                     gpe_context_mbenc->dynamic_state.bo,
2463                                     0,
2464                                     ALIGN(gpe_context_mbenc->curbe.length, 64),
2465                                     gpe_context_mbenc->curbe.offset,
2466                                     GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_WRITE_INDEX);
2467
2468     /* AVC_ME Distortion 2D surface buffer,input/output. is it res_brc_dist_data_surface*/
2469     gen9_add_buffer_2d_gpe_surface(ctx,
2470                                    gpe_context,
2471                                    &avc_ctx->res_brc_dist_data_surface,
2472                                    1,
2473                                    I965_SURFACEFORMAT_R8_UNORM,
2474                                    GEN9_AVC_FRAME_BRC_UPDATE_DISTORTION_INDEX);
2475
2476     /* BRC const data 2D surface buffer */
2477     gen9_add_buffer_2d_gpe_surface(ctx,
2478                                    gpe_context,
2479                                    &avc_ctx->res_brc_const_data_buffer,
2480                                    1,
2481                                    I965_SURFACEFORMAT_R8_UNORM,
2482                                    GEN9_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX);
2483
2484     /* MB statistical data surface*/
2485     gen9_add_buffer_gpe_surface(ctx,
2486                                 gpe_context,
2487                                 &avc_ctx->res_mb_status_buffer,
2488                                 0,
2489                                 avc_ctx->res_mb_status_buffer.size,
2490                                 0,
2491                                 GEN9_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX);
2492
2493     return;
2494 }
2495
2496 static VAStatus
2497 gen9_avc_kernel_brc_frame_update(VADriverContextP ctx,
2498                                  struct encode_state *encode_state,
2499                                  struct intel_encoder_context *encoder_context)
2500
2501 {
2502     struct i965_driver_data *i965 = i965_driver_data(ctx);
2503     struct i965_gpe_table *gpe = &i965->gpe_table;
2504     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2505     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )vme_context->generic_enc_ctx;
2506     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
2507     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
2508     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
2509
2510     struct i965_gpe_context *gpe_context = NULL;
2511     struct gpe_media_object_parameter media_object_param;
2512     struct gpe_media_object_inline_data media_object_inline_data;
2513     int media_function = 0;
2514     int kernel_idx = 0;
2515     unsigned int mb_const_data_buffer_in_use,mb_qp_buffer_in_use;
2516     unsigned int brc_enabled = 0;
2517     unsigned int roi_enable = (generic_state->num_roi > 0)?1:0;
2518     unsigned int dirty_roi_enable = ((generic_state->dirty_num_roi > 0) && (generic_state->frame_type == SLICE_TYPE_P) && (0));
2519
2520     /* the following set the mbenc curbe*/
2521     struct mbenc_param curbe_mbenc_param ;
2522     struct brc_param curbe_brc_param ;
2523
2524     mb_const_data_buffer_in_use =
2525         generic_state->mb_brc_enabled ||
2526         roi_enable ||
2527         dirty_roi_enable ||
2528         avc_state->mb_qp_data_enable ||
2529         avc_state->rolling_intra_refresh_enable;
2530     mb_qp_buffer_in_use =
2531         generic_state->mb_brc_enabled ||
2532         generic_state->brc_roi_enable ||
2533         avc_state->mb_qp_data_enable;
2534
2535     switch(generic_state->kernel_mode)
2536     {
2537     case INTEL_ENC_KERNEL_NORMAL :
2538         {
2539             kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_NORMAL_I;
2540             break;
2541         }
2542     case INTEL_ENC_KERNEL_PERFORMANCE :
2543         {
2544             kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_PERFORMANCE_I;
2545             break;
2546         }
2547     case INTEL_ENC_KERNEL_QUALITY :
2548         {
2549             kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_QUALITY_I;
2550             break;
2551         }
2552     default:
2553         assert(0);
2554
2555     }
2556
2557     if(generic_state->frame_type == SLICE_TYPE_P)
2558     {
2559         kernel_idx += 1;
2560     }
2561     else if(generic_state->frame_type == SLICE_TYPE_B)
2562     {
2563         kernel_idx += 2;
2564     }
2565
2566     gpe_context = &(avc_ctx->context_mbenc.gpe_contexts[kernel_idx]);
2567     gpe->context_init(ctx, gpe_context);
2568
2569     memset(&curbe_mbenc_param,0,sizeof(struct mbenc_param));
2570
2571     curbe_mbenc_param.mb_const_data_buffer_in_use = mb_const_data_buffer_in_use;
2572     curbe_mbenc_param.mb_qp_buffer_in_use = mb_qp_buffer_in_use;
2573     curbe_mbenc_param.mbenc_i_frame_dist_in_use = 0;
2574     curbe_mbenc_param.brc_enabled = brc_enabled;
2575     curbe_mbenc_param.roi_enabled = roi_enable;
2576
2577     /* set curbe mbenc*/
2578     generic_ctx->pfn_set_curbe_mbenc(ctx,encode_state,gpe_context,encoder_context,&curbe_mbenc_param);
2579
2580     /*begin brc frame update*/
2581     memset(&curbe_brc_param,0,sizeof(struct brc_param));
2582     curbe_brc_param.gpe_context_mbenc = gpe_context;
2583     media_function = INTEL_MEDIA_STATE_BRC_UPDATE;
2584     kernel_idx = GEN9_AVC_KERNEL_BRC_FRAME_UPDATE;
2585     gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
2586     curbe_brc_param.gpe_context_brc_frame_update = gpe_context;
2587
2588     gpe->context_init(ctx, gpe_context);
2589     gpe->reset_binding_table(ctx, gpe_context);
2590     /*brc copy ignored*/
2591
2592     /* set curbe frame update*/
2593     generic_ctx->pfn_set_curbe_brc_frame_update(ctx,encode_state,gpe_context,encoder_context,&curbe_brc_param);
2594
2595     /* load brc constant data, is it same as mbenc mb brc constant data? no.*/
2596     if(avc_state->multi_pre_enable)
2597     {
2598         gen9_avc_init_brc_const_data(ctx,encode_state,encoder_context);
2599     }else
2600     {
2601         gen9_avc_init_brc_const_data_old(ctx,encode_state,encoder_context);
2602     }
2603     /* image state construct*/
2604     gen9_avc_set_image_state(ctx,encode_state,encoder_context,&(avc_ctx->res_brc_image_state_read_buffer));
2605     /* set surface frame mbenc*/
2606     generic_ctx->pfn_send_brc_frame_update_surface(ctx,encode_state,gpe_context,encoder_context,&curbe_brc_param);
2607
2608
2609     gpe->setup_interface_data(ctx, gpe_context);
2610
2611     memset(&media_object_param, 0, sizeof(media_object_param));
2612     memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
2613     media_object_param.pinline_data = &media_object_inline_data;
2614     media_object_param.inline_size = sizeof(media_object_inline_data);
2615
2616     gen9_avc_run_kernel_media_object(ctx, encoder_context,
2617                                         gpe_context,
2618                                         media_function,
2619                                         &media_object_param);
2620
2621     return VA_STATUS_SUCCESS;
2622 }
2623
2624 static void
2625 gen9_avc_set_curbe_brc_mb_update(VADriverContextP ctx,
2626                                  struct encode_state *encode_state,
2627                                  struct i965_gpe_context *gpe_context,
2628                                  struct intel_encoder_context *encoder_context,
2629                                  void * param)
2630 {
2631     gen9_avc_mb_brc_curbe_data *cmd;
2632     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2633     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
2634
2635     cmd = i965_gpe_context_map_curbe(gpe_context);
2636
2637     if (!cmd)
2638         return;
2639
2640     memset(cmd,0,sizeof(gen9_avc_mb_brc_curbe_data));
2641
2642     cmd->dw0.cur_frame_type = generic_state->frame_type;
2643     if(generic_state->brc_roi_enable)
2644     {
2645         cmd->dw0.enable_roi = 1;
2646     }else
2647     {
2648         cmd->dw0.enable_roi = 0;
2649     }
2650
2651     i965_gpe_context_unmap_curbe(gpe_context);
2652
2653     return;
2654 }
2655
2656 static void
2657 gen9_avc_send_surface_brc_mb_update(VADriverContextP ctx,
2658                                     struct encode_state *encode_state,
2659                                     struct i965_gpe_context *gpe_context,
2660                                     struct intel_encoder_context *encoder_context,
2661                                     void * param_mbenc)
2662 {
2663     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2664     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
2665     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
2666
2667     /* brc history buffer*/
2668     gen9_add_buffer_gpe_surface(ctx,
2669                                 gpe_context,
2670                                 &avc_ctx->res_brc_history_buffer,
2671                                 0,
2672                                 avc_ctx->res_brc_history_buffer.size,
2673                                 0,
2674                                 GEN9_AVC_MB_BRC_UPDATE_HISTORY_INDEX);
2675
2676     /* MB qp data buffer is it same as res_mbbrc_mb_qp_data_surface*/
2677     if(generic_state->mb_brc_enabled)
2678     {
2679         gen9_add_buffer_2d_gpe_surface(ctx,
2680                                        gpe_context,
2681                                        &avc_ctx->res_mbbrc_mb_qp_data_surface,
2682                                        1,
2683                                        I965_SURFACEFORMAT_R8_UNORM,
2684                                        GEN9_AVC_MB_BRC_UPDATE_MB_QP_INDEX);
2685
2686     }
2687
2688     /* BRC roi feature*/
2689     if(generic_state->brc_roi_enable)
2690     {
2691         gen9_add_buffer_gpe_surface(ctx,
2692                                     gpe_context,
2693                                     &avc_ctx->res_mbbrc_roi_surface,
2694                                     0,
2695                                     avc_ctx->res_mbbrc_roi_surface.size,
2696                                     0,
2697                                     GEN9_AVC_MB_BRC_UPDATE_ROI_INDEX);
2698
2699     }
2700
2701     /* MB statistical data surface*/
2702     gen9_add_buffer_gpe_surface(ctx,
2703                                 gpe_context,
2704                                 &avc_ctx->res_mb_status_buffer,
2705                                 0,
2706                                 avc_ctx->res_mb_status_buffer.size,
2707                                 0,
2708                                 GEN9_AVC_MB_BRC_UPDATE_MB_STATUS_INDEX);
2709
2710     return;
2711 }
2712
2713 static VAStatus
2714 gen9_avc_kernel_brc_mb_update(VADriverContextP ctx,
2715                               struct encode_state *encode_state,
2716                               struct intel_encoder_context *encoder_context)
2717
2718 {
2719     struct i965_driver_data *i965 = i965_driver_data(ctx);
2720     struct i965_gpe_table *gpe = &i965->gpe_table;
2721     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2722     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
2723     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
2724     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )vme_context->generic_enc_ctx;
2725
2726     struct i965_gpe_context *gpe_context;
2727     struct gpe_media_object_walker_parameter media_object_walker_param;
2728     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
2729     int media_function = 0;
2730     int kernel_idx = 0;
2731
2732     media_function = INTEL_MEDIA_STATE_MB_BRC_UPDATE;
2733     kernel_idx = GEN9_AVC_KERNEL_BRC_MB_UPDATE;
2734     gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
2735
2736     gpe->context_init(ctx, gpe_context);
2737     gpe->reset_binding_table(ctx, gpe_context);
2738
2739     /* set curbe brc mb update*/
2740     generic_ctx->pfn_set_curbe_brc_mb_update(ctx,encode_state,gpe_context,encoder_context,NULL);
2741
2742
2743     /* set surface brc mb update*/
2744     generic_ctx->pfn_send_brc_mb_update_surface(ctx,encode_state,gpe_context,encoder_context,NULL);
2745
2746
2747     gpe->setup_interface_data(ctx, gpe_context);
2748
2749     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2750     /* the scaling is based on 8x8 blk level */
2751     kernel_walker_param.resolution_x = (generic_state->frame_width_in_mbs + 1)/2;
2752     kernel_walker_param.resolution_y = (generic_state->frame_height_in_mbs + 1)/2 ;
2753     kernel_walker_param.no_dependency = 1;
2754
2755     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
2756
2757     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
2758                                         gpe_context,
2759                                         media_function,
2760                                         &media_object_walker_param);
2761
2762     return VA_STATUS_SUCCESS;
2763 }
2764
2765 /*
2766 mbenc kernel related function,it include intra dist kernel
2767 */
2768 static int
2769 gen9_avc_get_biweight(int dist_scale_factor_ref_id0_list0, unsigned short weighted_bipredidc)
2770 {
2771     int biweight = 32;      // default value
2772
2773     /* based on kernel HLD*/
2774     if (weighted_bipredidc != INTEL_AVC_WP_MODE_IMPLICIT)
2775     {
2776         biweight = 32;
2777     }
2778     else
2779     {
2780         biweight = (dist_scale_factor_ref_id0_list0 + 2) >> 2;
2781
2782         if (biweight != 16 && biweight != 21 &&
2783             biweight != 32 && biweight != 43 && biweight != 48)
2784         {
2785             biweight = 32;        // If # of B-pics between two refs is more than 3. VME does not support it.
2786         }
2787     }
2788
2789     return biweight;
2790 }
2791
2792 static void
2793 gen9_avc_get_dist_scale_factor(VADriverContextP ctx,
2794                                struct encode_state *encode_state,
2795                                struct intel_encoder_context *encoder_context)
2796 {
2797     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2798     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
2799     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
2800     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
2801
2802     int max_num_references;
2803     VAPictureH264 *curr_pic;
2804     VAPictureH264 *ref_pic_l0;
2805     VAPictureH264 *ref_pic_l1;
2806     int i = 0;
2807     int tb = 0;
2808     int td = 0;
2809     int tx = 0;
2810     int tmp = 0;
2811     int poc0 = 0;
2812     int poc1 = 0;
2813
2814     max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
2815
2816     memset(avc_state->dist_scale_factor_list0,0,32*sizeof(unsigned int));
2817     curr_pic = &pic_param->CurrPic;
2818     for(i = 0; i < max_num_references; i++)
2819     {
2820         ref_pic_l0 = &(slice_param->RefPicList0[i]);
2821
2822         if((ref_pic_l0->flags & VA_PICTURE_H264_INVALID) ||
2823            (ref_pic_l0->picture_id == VA_INVALID_SURFACE) )
2824             break;
2825         ref_pic_l1 = &(slice_param->RefPicList1[0]);
2826         if((ref_pic_l0->flags & VA_PICTURE_H264_INVALID) ||
2827            (ref_pic_l0->picture_id == VA_INVALID_SURFACE) )
2828             break;
2829
2830         poc0 = (curr_pic->TopFieldOrderCnt - ref_pic_l0->TopFieldOrderCnt);
2831         poc1 = (ref_pic_l1->TopFieldOrderCnt - ref_pic_l0->TopFieldOrderCnt);
2832         CLIP(poc0,-128,127);
2833         CLIP(poc1,-128,127);
2834         tb = poc0;
2835         td = poc1;
2836
2837         if(td == 0)
2838         {
2839             td = 1;
2840         }
2841         tmp = (td/2 > 0)?(td/2):(-(td/2));
2842         tx = (16384 + tmp)/td ;
2843         tmp = (tb*tx+32)>>6;
2844         CLIP(tmp,-1024,1023);
2845         avc_state->dist_scale_factor_list0[i] = tmp;
2846     }
2847     return;
2848 }
2849
2850 static unsigned int
2851 gen9_avc_get_qp_from_ref_list(VADriverContextP ctx,
2852                               VAEncSliceParameterBufferH264 *slice_param,
2853                               int list,
2854                               int ref_frame_idx)
2855 {
2856     struct i965_driver_data *i965 = i965_driver_data(ctx);
2857     struct object_surface *obj_surface;
2858     struct gen9_surface_avc *avc_priv_surface;
2859     VASurfaceID surface_id;
2860
2861     assert(slice_param);
2862     assert(list < 2);
2863
2864     if(list == 0)
2865     {
2866         if(ref_frame_idx < slice_param->num_ref_idx_l0_active_minus1 + 1)
2867             surface_id = slice_param->RefPicList0[ref_frame_idx].picture_id;
2868         else
2869             return 0;
2870     }else
2871     {
2872         if(ref_frame_idx < slice_param->num_ref_idx_l1_active_minus1 + 1)
2873             surface_id = slice_param->RefPicList1[ref_frame_idx].picture_id;
2874         else
2875             return 0;
2876     }
2877     obj_surface = SURFACE(surface_id);
2878     if(obj_surface && obj_surface->private_data)
2879     {
2880         avc_priv_surface = obj_surface->private_data;
2881         return avc_priv_surface->qp_value;
2882     }else
2883     {
2884         return 0;
2885     }
2886 }
2887
2888 static void
2889 gen9_avc_load_mb_brc_const_data(VADriverContextP ctx,
2890                         struct encode_state *encode_state,
2891                         struct intel_encoder_context *encoder_context)
2892 {
2893     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2894     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
2895     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
2896     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
2897     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
2898
2899     struct i965_gpe_resource *gpe_resource = NULL;
2900     unsigned int * data =NULL;
2901     unsigned int * data_tmp = NULL;
2902     unsigned int size = 16 * 52;
2903     unsigned int table_idx = 0;
2904     unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
2905     unsigned int transform_8x8_mode_flag = pic_param->pic_fields.bits.transform_8x8_mode_flag;
2906     int i = 0;
2907
2908     gpe_resource = &(avc_ctx->res_mbbrc_const_data_buffer);
2909     assert(gpe_resource);
2910     data = i965_map_gpe_resource(gpe_resource);
2911     assert(data);
2912
2913     table_idx = slice_type_kernel[generic_state->frame_type];
2914
2915     memcpy(data,gen9_avc_mb_brc_const_data[table_idx][0],size*sizeof(unsigned int));
2916
2917     data_tmp = data;
2918
2919     switch(generic_state->frame_type)
2920     {
2921     case SLICE_TYPE_I:
2922         for(i = 0; i < 52 ; i++)
2923         {
2924             if(avc_state->old_mode_cost_enable)
2925                 *data = (unsigned int)gen9_avc_old_intra_mode_cost[i];
2926             data += 16;
2927         }
2928         break;
2929     case SLICE_TYPE_P:
2930     case SLICE_TYPE_B:
2931         for(i = 0; i < 52 ; i++)
2932         {
2933             if(generic_state->frame_type == SLICE_TYPE_P)
2934             {
2935                 if(avc_state->skip_bias_adjustment_enable)
2936                     *(data + 3) = (unsigned int)gen9_avc_mv_cost_p_skip_adjustment[i];
2937             }
2938             if(avc_state->non_ftq_skip_threshold_lut_input_enable)
2939             {
2940                 *(data + 9) = (unsigned int)i965_avc_calc_skip_value(block_based_skip_enable,transform_8x8_mode_flag,avc_state->non_ftq_skip_threshold_lut[i]);
2941             }else if(generic_state->frame_type == SLICE_TYPE_P)
2942             {
2943                 *(data + 9) = (unsigned int)gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag][i];
2944             }else
2945             {
2946                 *(data + 9) = (unsigned int)gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag][i];
2947             }
2948
2949             if(avc_state->adaptive_intra_scaling_enable)
2950             {
2951                 *(data + 10) = (unsigned int)gen9_avc_adaptive_intra_scaling_factor[i];
2952             }else
2953             {
2954                 *(data + 10) = (unsigned int)gen9_avc_intra_scaling_factor[i];
2955
2956             }
2957             data += 16;
2958
2959         }
2960         break;
2961     default:
2962         assert(0);
2963     }
2964
2965     data = data_tmp;
2966     for(i = 0; i < 52 ; i++)
2967     {
2968         if(avc_state->ftq_skip_threshold_lut_input_enable)
2969         {
2970             *(data + 6) =  (avc_state->ftq_skip_threshold_lut[i] |
2971                 (avc_state->ftq_skip_threshold_lut[i] <<16) |
2972                 (avc_state->ftq_skip_threshold_lut[i] <<24) );
2973             *(data + 7) =  (avc_state->ftq_skip_threshold_lut[i] |
2974                 (avc_state->ftq_skip_threshold_lut[i] <<8) |
2975                 (avc_state->ftq_skip_threshold_lut[i] <<16) |
2976                 (avc_state->ftq_skip_threshold_lut[i] <<24) );
2977         }
2978
2979         if(avc_state->kernel_trellis_enable)
2980         {
2981             *(data + 11) = (unsigned int)avc_state->lamda_value_lut[i][0];
2982             *(data + 12) = (unsigned int)avc_state->lamda_value_lut[i][1];
2983
2984         }
2985         data += 16;
2986
2987     }
2988     i965_unmap_gpe_resource(gpe_resource);
2989 }
2990
2991 static void
2992 gen9_avc_set_curbe_mbenc(VADriverContextP ctx,
2993                          struct encode_state *encode_state,
2994                          struct i965_gpe_context *gpe_context,
2995                          struct intel_encoder_context *encoder_context,
2996                          void * param)
2997 {
2998     struct i965_driver_data *i965 = i965_driver_data(ctx);
2999     gen9_avc_mbenc_curbe_data *cmd;
3000     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3001     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
3002     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
3003
3004     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3005     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
3006     VASurfaceID surface_id;
3007     struct object_surface *obj_surface;
3008
3009     struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
3010     unsigned char qp = 0;
3011     unsigned char me_method = 0;
3012     unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
3013     unsigned int table_idx = 0;
3014
3015     unsigned int preset = generic_state->preset;
3016     me_method = (generic_state->frame_type == SLICE_TYPE_B)? gen9_avc_b_me_method[preset]:gen9_avc_p_me_method[preset];
3017     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
3018
3019     cmd = (gen9_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
3020
3021     if (!cmd)
3022         return;
3023
3024     memset(cmd,0,sizeof(gen9_avc_mbenc_curbe_data));
3025
3026     if(mbenc_i_frame_dist_in_use)
3027     {
3028         memcpy(cmd,gen9_avc_mbenc_curbe_i_frame_dist_init_data,sizeof(gen9_avc_mbenc_curbe_data));
3029
3030     }else
3031     {
3032         switch(generic_state->frame_type)
3033         {
3034         case SLICE_TYPE_I:
3035             memcpy(cmd,gen9_avc_mbenc_curbe_normal_i_frame_init_data,sizeof(gen9_avc_mbenc_curbe_data));
3036             break;
3037         case SLICE_TYPE_P:
3038             memcpy(cmd,gen9_avc_mbenc_curbe_normal_p_frame_init_data,sizeof(gen9_avc_mbenc_curbe_data));
3039             break;
3040         case SLICE_TYPE_B:
3041             memcpy(cmd,gen9_avc_mbenc_curbe_normal_b_frame_init_data,sizeof(gen9_avc_mbenc_curbe_data));
3042             break;
3043         default:
3044             assert(0);
3045         }
3046
3047     }
3048     cmd->dw0.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
3049     cmd->dw37.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
3050     cmd->dw0.t8x8_flag_for_inter_enable = pic_param->pic_fields.bits.transform_8x8_mode_flag;
3051     cmd->dw37.t8x8_flag_for_inter_enable = pic_param->pic_fields.bits.transform_8x8_mode_flag;
3052
3053     cmd->dw2.max_len_sp = gen9_avc_max_len_sp[preset];
3054     cmd->dw38.max_len_sp = 0;
3055
3056     cmd->dw3.src_access = 0;
3057     cmd->dw3.ref_access = 0;
3058
3059     if(avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I))
3060     {
3061         if(avc_state->ftq_override)
3062         {
3063             cmd->dw3.ftq_enable = avc_state->ftq_enable;
3064
3065         }else
3066         {
3067             if(generic_state->frame_type == SLICE_TYPE_P)
3068             {
3069                 cmd->dw3.ftq_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
3070
3071             }else
3072             {
3073                 cmd->dw3.ftq_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
3074             }
3075         }
3076     }else
3077     {
3078         cmd->dw3.ftq_enable = 0;
3079     }
3080
3081     if(avc_state->disable_sub_mb_partion)
3082         cmd->dw3.sub_mb_part_mask = 0x7;
3083
3084     if(mbenc_i_frame_dist_in_use)
3085     {
3086         cmd->dw2.pitch_width = generic_state->downscaled_width_4x_in_mb;
3087         cmd->dw4.picture_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
3088         cmd->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1)/4;
3089         cmd->dw6.batch_buffer_end = 0;
3090         cmd->dw31.intra_compute_type = 1;
3091
3092     }else
3093     {
3094         cmd->dw2.pitch_width = generic_state->frame_width_in_mbs;
3095         cmd->dw4.picture_height_minus1 = generic_state->frame_height_in_mbs - 1;
3096         cmd->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice)?generic_state->frame_height_in_mbs:avc_state->slice_height;
3097
3098         {
3099             memcpy(&(cmd->dw8),gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp],8*sizeof(unsigned int));
3100             if((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable)
3101             {
3102                 //cmd->dw8 = gen9_avc_old_intra_mode_cost[qp];
3103             }else if(avc_state->skip_bias_adjustment_enable)
3104             {
3105                 /* Load different MvCost for P picture when SkipBiasAdjustment is enabled
3106                 // No need to check for P picture as the flag is only enabled for P picture */
3107                 cmd->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
3108
3109             }
3110         }
3111
3112         table_idx = (generic_state->frame_type == SLICE_TYPE_B)?1:0;
3113         memcpy(&(cmd->dw16),table_enc_search_path[table_idx][me_method],16*sizeof(unsigned int));
3114     }
3115     cmd->dw4.enable_fbr_bypass = avc_state->fbr_bypass_enable;
3116     cmd->dw4.enable_intra_cost_scaling_for_static_frame = avc_state->sfd_enable && generic_state->hme_enabled;
3117     cmd->dw4.field_parity_flag = 0;//bottom field
3118     cmd->dw4.enable_cur_fld_idr = 0;//field realted
3119     cmd->dw4.contrained_intra_pred_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
3120     cmd->dw4.hme_enable = generic_state->hme_enabled;
3121     cmd->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
3122     cmd->dw4.use_actual_ref_qp_value = generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
3123
3124
3125     cmd->dw7.intra_part_mask = pic_param->pic_fields.bits.transform_8x8_mode_flag?0:0x02;
3126     cmd->dw7.src_field_polarity = 0;//field related
3127
3128     /*ftq_skip_threshold_lut set,dw14 /15*/
3129
3130     /*r5 disable NonFTQSkipThresholdLUT*/
3131     if(generic_state->frame_type == SLICE_TYPE_P)
3132     {
3133         cmd->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][pic_param->pic_fields.bits.transform_8x8_mode_flag][qp];
3134
3135     }else if(generic_state->frame_type == SLICE_TYPE_B)
3136     {
3137         cmd->dw32.skip_val = gen9_avc_skip_value_b[avc_state->block_based_skip_enable][pic_param->pic_fields.bits.transform_8x8_mode_flag][qp];
3138
3139     }
3140
3141     cmd->dw13.qp_prime_y = qp;
3142     cmd->dw13.qp_prime_cb = qp;
3143     cmd->dw13.qp_prime_cr = qp;
3144     cmd->dw13.target_size_in_word = 0xff;//hardcode for brc disable
3145
3146
3147     if((generic_state->frame_type != SLICE_TYPE_I)&& avc_state->multi_pre_enable)
3148     {
3149         switch(gen9_avc_multi_pred[preset])
3150         {
3151         case 0:
3152             cmd->dw32.mult_pred_l0_disable = 128;
3153             cmd->dw32.mult_pred_l1_disable = 128;
3154             break;
3155         case 1:
3156             cmd->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_P)?1:128;
3157             cmd->dw32.mult_pred_l1_disable = 128;
3158             break;
3159         case 2:
3160             cmd->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_B)?1:128;
3161             cmd->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B)?1:128;
3162             break;
3163         case 3:
3164             cmd->dw32.mult_pred_l0_disable = 1;
3165             cmd->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B)?1:128;
3166             break;
3167
3168         }
3169
3170     }else
3171     {
3172         cmd->dw32.mult_pred_l0_disable = 128;
3173         cmd->dw32.mult_pred_l1_disable = 128;
3174     }
3175
3176     /*field setting for dw33 34, ignored*/
3177
3178     if(avc_state->adaptive_transform_decision_enable)
3179     {
3180         if(generic_state->frame_type != SLICE_TYPE_I)
3181         {
3182             cmd->dw34.enable_adaptive_tx_decision = 1;
3183         }
3184
3185         cmd->dw58.mb_texture_threshold = 1024;
3186         cmd->dw58.tx_decision_threshold = 128;
3187     }
3188
3189
3190     if(generic_state->frame_type == SLICE_TYPE_B)
3191     {
3192         cmd->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
3193         cmd->dw34.list1_ref_id0_frm_field_parity = 0;
3194         cmd->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
3195     }
3196     cmd->dw34.b_original_bff = 0; //frame only
3197     cmd->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
3198     cmd->dw34.roi_enable_flag = curbe_param->roi_enabled;
3199     cmd->dw34.mad_enable_falg = avc_state->mad_enable;
3200     cmd->dw34.mb_brc_enable = avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled;
3201     cmd->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
3202     cmd->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
3203
3204     if(cmd->dw34.force_non_skip_check)
3205     {
3206        cmd->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
3207     }
3208
3209     cmd->dw36.check_all_fractional_enable = avc_state->caf_enable;
3210     cmd->dw38.ref_threshold = 400;
3211     cmd->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B)?gen9_avc_hme_b_combine_len[preset]:gen9_avc_hme_combine_len[preset];
3212
3213     /* Default:2 used for MBBRC (MB QP Surface width and height are 4x downscaled picture in MB unit * 4  bytes)
3214        0 used for MBQP data surface (MB QP Surface width and height are same as the input picture size in MB unit * 1bytes)
3215        starting GEN9, BRC use split kernel, MB QP surface is same size as input picture */
3216     cmd->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled)?0:2;
3217
3218     if(mbenc_i_frame_dist_in_use)
3219     {
3220         cmd->dw13.qp_prime_y = 0;
3221         cmd->dw13.qp_prime_cb = 0;
3222         cmd->dw13.qp_prime_cr = 0;
3223         cmd->dw33.intra_16x16_nondc_penalty = 0;
3224         cmd->dw33.intra_8x8_nondc_penalty = 0;
3225         cmd->dw33.intra_4x4_nondc_penalty = 0;
3226
3227     }
3228     if(cmd->dw4.use_actual_ref_qp_value)
3229     {
3230         cmd->dw44.actual_qp_value_for_ref_id0_list0 =  gen9_avc_get_qp_from_ref_list(ctx,slice_param,0,0);
3231         cmd->dw44.actual_qp_value_for_ref_id1_list0 =  gen9_avc_get_qp_from_ref_list(ctx,slice_param,0,1);
3232         cmd->dw44.actual_qp_value_for_ref_id2_list0 =  gen9_avc_get_qp_from_ref_list(ctx,slice_param,0,2);
3233         cmd->dw44.actual_qp_value_for_ref_id3_list0 =  gen9_avc_get_qp_from_ref_list(ctx,slice_param,0,3);
3234         cmd->dw45.actual_qp_value_for_ref_id4_list0 =  gen9_avc_get_qp_from_ref_list(ctx,slice_param,0,4);
3235         cmd->dw45.actual_qp_value_for_ref_id5_list0 =  gen9_avc_get_qp_from_ref_list(ctx,slice_param,0,5);
3236         cmd->dw45.actual_qp_value_for_ref_id6_list0 =  gen9_avc_get_qp_from_ref_list(ctx,slice_param,0,6);
3237         cmd->dw45.actual_qp_value_for_ref_id7_list0 =  gen9_avc_get_qp_from_ref_list(ctx,slice_param,0,7);
3238         cmd->dw46.actual_qp_value_for_ref_id0_list1 =  gen9_avc_get_qp_from_ref_list(ctx,slice_param,1,0);
3239         cmd->dw46.actual_qp_value_for_ref_id1_list1 =  gen9_avc_get_qp_from_ref_list(ctx,slice_param,1,1);
3240     }
3241
3242     table_idx = slice_type_kernel[generic_state->frame_type];
3243     cmd->dw46.ref_cost = gen9_avc_ref_cost[table_idx][qp];
3244
3245     if(generic_state->frame_type == SLICE_TYPE_I)
3246     {
3247         cmd->dw0.skip_mode_enable = 0;
3248         cmd->dw37.skip_mode_enable = 0;
3249         cmd->dw36.hme_combine_overlap = 0;
3250         cmd->dw47.intra_cost_sf = 16;
3251         cmd->dw34.enable_direct_bias_adjustment = 0;
3252         cmd->dw34.enable_global_motion_bias_adjustment = 0;
3253
3254     }else if(generic_state->frame_type == SLICE_TYPE_P)
3255     {
3256         cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc)/2;
3257         cmd->dw3.bme_disable_fbr = 1;
3258         cmd->dw5.ref_width = gen9_avc_search_x[preset];
3259         cmd->dw5.ref_height = gen9_avc_search_y[preset];
3260         cmd->dw7.non_skip_zmv_added = 1;
3261         cmd->dw7.non_skip_mode_added = 1;
3262         cmd->dw7.skip_center_mask = 1;
3263         cmd->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable)?gen9_avc_adaptive_intra_scaling_factor[qp]:gen9_avc_intra_scaling_factor[qp];
3264         cmd->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame onlys
3265         cmd->dw36.hme_combine_overlap = 1;
3266         cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable)?slice_param->num_ref_idx_l0_active_minus1:0;
3267         cmd->dw39.ref_width = gen9_avc_search_x[preset];
3268         cmd->dw39.ref_height = gen9_avc_search_y[preset];
3269         cmd->dw34.enable_direct_bias_adjustment = 0;
3270         cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
3271         if(avc_state->global_motion_bias_adjustment_enable)
3272             cmd->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
3273
3274     }else
3275     {
3276         cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc)/2;
3277         cmd->dw1.bi_weight = avc_state->bi_weight;
3278         cmd->dw3.search_ctrl = 7;
3279         cmd->dw3.skip_type = 1;
3280         cmd->dw5.ref_width = gen9_avc_b_search_x[preset];
3281         cmd->dw5.ref_height = gen9_avc_b_search_y[preset];
3282         cmd->dw7.skip_center_mask = 0xff;
3283         cmd->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable)?gen9_avc_adaptive_intra_scaling_factor[qp]:gen9_avc_intra_scaling_factor[qp];
3284         cmd->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
3285         cmd->dw36.hme_combine_overlap = 1;
3286         surface_id = slice_param->RefPicList1[0].picture_id;
3287         obj_surface = SURFACE(surface_id);
3288         if (!obj_surface)
3289         {
3290             WARN_ONCE("Invalid backward reference frame\n");
3291             return;
3292         }
3293         cmd->dw36.is_fwd_frame_short_term_ref = !!( slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
3294
3295         cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable)?slice_param->num_ref_idx_l0_active_minus1:0;
3296         cmd->dw36.num_ref_idx_l1_minus_one = (avc_state->multi_pre_enable)?slice_param->num_ref_idx_l1_active_minus1:0;
3297         cmd->dw39.ref_width = gen9_avc_b_search_x[preset];
3298         cmd->dw39.ref_height = gen9_avc_b_search_y[preset];
3299         cmd->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
3300         cmd->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
3301         cmd->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
3302         cmd->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
3303         cmd->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
3304         cmd->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
3305         cmd->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
3306         cmd->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
3307
3308         cmd->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
3309         if(cmd->dw34.enable_direct_bias_adjustment)
3310         {
3311             cmd->dw7.non_skip_zmv_added = 1;
3312             cmd->dw7.non_skip_mode_added = 1;
3313         }
3314
3315         cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
3316         if(avc_state->global_motion_bias_adjustment_enable)
3317             cmd->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
3318
3319     }
3320
3321     avc_state->block_based_skip_enable = cmd->dw3.block_based_skip_enable;
3322
3323     if(avc_state->rolling_intra_refresh_enable)
3324     {
3325         /*by now disable it*/
3326         cmd->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
3327
3328     }else
3329     {
3330         cmd->dw34.widi_intra_refresh_en = 0;
3331     }
3332
3333     cmd->dw34.enable_per_mb_static_check = avc_state->sfd_enable && generic_state->hme_enabled;
3334     cmd->dw34.enable_adaptive_search_window_size = avc_state->adaptive_search_window_enable;
3335
3336     /*roi set disable by now. 49-56*/
3337     if(curbe_param->roi_enabled)
3338     {
3339         cmd->dw49.roi_1_x_left   = generic_state->roi[0].left;
3340         cmd->dw49.roi_1_y_top    = generic_state->roi[0].top;
3341         cmd->dw50.roi_1_x_right  = generic_state->roi[0].right;
3342         cmd->dw50.roi_1_y_bottom = generic_state->roi[0].bottom;
3343
3344         cmd->dw51.roi_2_x_left   = generic_state->roi[1].left;
3345         cmd->dw51.roi_2_y_top    = generic_state->roi[1].top;
3346         cmd->dw52.roi_2_x_right  = generic_state->roi[1].right;
3347         cmd->dw52.roi_2_y_bottom = generic_state->roi[1].bottom;
3348
3349         cmd->dw53.roi_3_x_left   = generic_state->roi[2].left;
3350         cmd->dw53.roi_3_y_top    = generic_state->roi[2].top;
3351         cmd->dw54.roi_3_x_right  = generic_state->roi[2].right;
3352         cmd->dw54.roi_3_y_bottom = generic_state->roi[2].bottom;
3353
3354         cmd->dw55.roi_4_x_left   = generic_state->roi[3].left;
3355         cmd->dw55.roi_4_y_top    = generic_state->roi[3].top;
3356         cmd->dw56.roi_4_x_right  = generic_state->roi[3].right;
3357         cmd->dw56.roi_4_y_bottom = generic_state->roi[3].bottom;
3358
3359         if(!generic_state->brc_enabled)
3360         {
3361             char tmp = 0;
3362             tmp = generic_state->roi[0].value;
3363             CLIP(tmp,-qp,52-qp);
3364             cmd->dw57.roi_1_dqp_prime_y = tmp;
3365             tmp = generic_state->roi[1].value;
3366             CLIP(tmp,-qp,52-qp);
3367             cmd->dw57.roi_2_dqp_prime_y = tmp;
3368             tmp = generic_state->roi[2].value;
3369             CLIP(tmp,-qp,52-qp);
3370             cmd->dw57.roi_3_dqp_prime_y = tmp;
3371             tmp = generic_state->roi[3].value;
3372             CLIP(tmp,-qp,52-qp);
3373             cmd->dw57.roi_4_dqp_prime_y = tmp;
3374         }else
3375         {
3376             cmd->dw34.roi_enable_flag = 0;
3377         }
3378     }
3379
3380     cmd->dw64.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
3381     cmd->dw65.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
3382     cmd->dw66.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
3383     cmd->dw67.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
3384     cmd->dw68.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
3385     cmd->dw69.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
3386     cmd->dw70.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
3387     cmd->dw71.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
3388     cmd->dw72.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
3389     cmd->dw73.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
3390     cmd->dw74.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
3391     cmd->dw75.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
3392     cmd->dw76.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
3393     cmd->dw77.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
3394     cmd->dw78.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
3395     cmd->dw79.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
3396     cmd->dw80.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
3397     cmd->dw81.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
3398     cmd->dw82.force_non_skip_mb_map_surface = GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
3399     cmd->dw83.widi_wa_surf_index = GEN9_AVC_MBENC_WIDI_WA_INDEX;
3400     cmd->dw84.brc_curbe_surf_index = GEN9_AVC_MBENC_BRC_CURBE_DATA_INDEX;
3401     cmd->dw85.static_detection_cost_table_index = GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX;
3402
3403     i965_gpe_context_unmap_curbe(gpe_context);
3404
3405     return;
3406 }
3407
3408 static void
3409 gen9_avc_send_surface_mbenc(VADriverContextP ctx,
3410                             struct encode_state *encode_state,
3411                             struct i965_gpe_context *gpe_context,
3412                             struct intel_encoder_context *encoder_context,
3413                             void * param_mbenc)
3414 {
3415     struct i965_driver_data *i965 = i965_driver_data(ctx);
3416     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3417     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
3418     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
3419     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
3420     struct object_surface *obj_surface;
3421     struct gen9_surface_avc *avc_priv_surface;
3422     struct i965_gpe_resource *gpe_resource;
3423     struct mbenc_param * param = (struct mbenc_param *)param_mbenc ;
3424      VASurfaceID surface_id;
3425     unsigned int mbenc_i_frame_dist_in_use = param->mbenc_i_frame_dist_in_use;
3426     unsigned int size = 0;
3427     unsigned int w_mb = generic_state->frame_width_in_mbs;
3428     unsigned int h_mb = generic_state->frame_height_in_mbs;
3429     int i = 0;
3430     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3431
3432     obj_surface = encode_state->reconstructed_object;
3433
3434     if (!obj_surface || !obj_surface->private_data)
3435         return;
3436     avc_priv_surface = obj_surface->private_data;
3437
3438     /*pak obj command buffer output*/
3439     size = w_mb * h_mb * 16 * 4;
3440     gpe_resource = &avc_priv_surface->res_mb_code_surface;
3441     gen9_add_buffer_gpe_surface(ctx,
3442                                 gpe_context,
3443                                 gpe_resource,
3444                                 0,
3445                                 size / 4,
3446                                 0,
3447                                 GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX);
3448
3449     /*mv data buffer output*/
3450     size = w_mb * h_mb * 32 * 4;
3451     gpe_resource = &avc_priv_surface->res_mv_data_surface;
3452     gen9_add_buffer_gpe_surface(ctx,
3453                                 gpe_context,
3454                                 gpe_resource,
3455                                 0,
3456                                 size / 4,
3457                                 0,
3458                                 GEN9_AVC_MBENC_IND_MV_DATA_INDEX);
3459
3460     /*input current  YUV surface, current input Y/UV object*/
3461     if(mbenc_i_frame_dist_in_use)
3462     {
3463         obj_surface = encode_state->reconstructed_object;
3464         if (!obj_surface || !obj_surface->private_data)
3465             return;
3466         avc_priv_surface = obj_surface->private_data;
3467         obj_surface = avc_priv_surface->scaled_4x_surface_obj;
3468     }else
3469     {
3470         obj_surface = encode_state->input_yuv_object;
3471     }
3472     gen9_add_2d_gpe_surface(ctx,
3473                             gpe_context,
3474                             obj_surface,
3475                             0,
3476                             1,
3477                             I965_SURFACEFORMAT_R8_UNORM,
3478                             GEN9_AVC_MBENC_CURR_Y_INDEX);
3479
3480     gen9_add_2d_gpe_surface(ctx,
3481                             gpe_context,
3482                             obj_surface,
3483                             1,
3484                             1,
3485                             I965_SURFACEFORMAT_R16_UINT,
3486                             GEN9_AVC_MBENC_CURR_UV_INDEX);
3487
3488     if(generic_state->hme_enabled)
3489     {
3490         /*memv input 4x*/
3491         gpe_resource = &(avc_ctx->s4x_memv_data_buffer);
3492         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3493                                        gpe_resource,
3494                                        1,
3495                                        I965_SURFACEFORMAT_R8_UNORM,
3496                                        GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX);
3497         /* memv distortion input*/
3498         gpe_resource = &(avc_ctx->s4x_memv_distortion_buffer);
3499         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3500                                        gpe_resource,
3501                                        1,
3502                                        I965_SURFACEFORMAT_R8_UNORM,
3503                                        GEN9_AVC_MBENC_4XME_DISTORTION_INDEX);
3504     }
3505
3506     /*mbbrc const data_buffer*/
3507     if(param->mb_const_data_buffer_in_use)
3508     {
3509         size = 16 * 52 * sizeof(unsigned int);
3510         gpe_resource = &avc_ctx->res_mbbrc_const_data_buffer;
3511         gen9_add_buffer_gpe_surface(ctx,
3512                                     gpe_context,
3513                                     gpe_resource,
3514                                     0,
3515                                     size / 4,
3516                                     0,
3517                                     GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX);
3518
3519     }
3520
3521     /*mb qp data_buffer*/
3522     if(param->mb_qp_buffer_in_use)
3523     {
3524         if(avc_state->mb_qp_data_enable)
3525             gpe_resource = &(avc_ctx->res_mb_qp_data_surface);
3526         else
3527             gpe_resource = &(avc_ctx->res_mbbrc_mb_qp_data_surface);
3528         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3529                                        gpe_resource,
3530                                        1,
3531                                        I965_SURFACEFORMAT_R8_UNORM,
3532                                        GEN9_AVC_MBENC_MBQP_INDEX);
3533     }
3534
3535     /*input current  YUV surface, current input Y/UV object*/
3536     if(mbenc_i_frame_dist_in_use)
3537     {
3538         obj_surface = encode_state->reconstructed_object;
3539         if (!obj_surface || !obj_surface->private_data)
3540             return;
3541         avc_priv_surface = obj_surface->private_data;
3542         obj_surface = avc_priv_surface->scaled_4x_surface_obj;
3543     }else
3544     {
3545         obj_surface = encode_state->input_yuv_object;
3546     }
3547     gen9_add_adv_gpe_surface(ctx, gpe_context,
3548                              obj_surface,
3549                              GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX);
3550     /*input ref YUV surface*/
3551     for(i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++)
3552     {
3553         surface_id = slice_param->RefPicList0[i].picture_id;
3554         obj_surface = SURFACE(surface_id);
3555         if (!obj_surface || !obj_surface->private_data)
3556             break;
3557
3558         gen9_add_adv_gpe_surface(ctx, gpe_context,
3559                                  obj_surface,
3560                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX+i*2 + 1);
3561     }
3562     /*input current  YUV surface, current input Y/UV object*/
3563     if(mbenc_i_frame_dist_in_use)
3564     {
3565         obj_surface = encode_state->reconstructed_object;
3566         if (!obj_surface || !obj_surface->private_data)
3567             return;
3568         avc_priv_surface = obj_surface->private_data;
3569         obj_surface = avc_priv_surface->scaled_4x_surface_obj;
3570     }else
3571     {
3572         obj_surface = encode_state->input_yuv_object;
3573     }
3574     gen9_add_adv_gpe_surface(ctx, gpe_context,
3575                              obj_surface,
3576                              GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX);
3577
3578     for(i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++)
3579     {
3580         if(i > 0) break;// only  one ref supported here for B frame
3581         surface_id = slice_param->RefPicList1[i].picture_id;
3582         obj_surface = SURFACE(surface_id);
3583         if (!obj_surface || !obj_surface->private_data)
3584             break;
3585
3586         gen9_add_adv_gpe_surface(ctx, gpe_context,
3587                                  obj_surface,
3588                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX+i*2 + 1);
3589         gen9_add_adv_gpe_surface(ctx, gpe_context,
3590                                  obj_surface,
3591                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX+i*2 + 2);
3592         if(i == 0)
3593         {
3594             avc_priv_surface = obj_surface->private_data;
3595             /*pak obj command buffer output(mb code)*/
3596             size = w_mb * h_mb * 16 * 4;
3597             gpe_resource = &avc_priv_surface->res_mb_code_surface;
3598             gen9_add_buffer_gpe_surface(ctx,
3599                                         gpe_context,
3600                                         gpe_resource,
3601                                         0,
3602                                         size / 4,
3603                                         0,
3604                                         GEN9_AVC_MBENC_FWD_MB_DATA_INDEX);
3605
3606             /*mv data buffer output*/
3607             size = w_mb * h_mb * 32 * 4;
3608             gpe_resource = &avc_priv_surface->res_mv_data_surface;
3609             gen9_add_buffer_gpe_surface(ctx,
3610                                         gpe_context,
3611                                         gpe_resource,
3612                                         0,
3613                                         size / 4,
3614                                         0,
3615                                         GEN9_AVC_MBENC_FWD_MV_DATA_INDEX);
3616
3617         }
3618
3619         if( i < INTEL_AVC_MAX_BWD_REF_NUM)
3620         {
3621             gen9_add_adv_gpe_surface(ctx, gpe_context,
3622                                      obj_surface,
3623                                      GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX+i*2 + 1 + INTEL_AVC_MAX_BWD_REF_NUM);
3624         }
3625
3626     }
3627
3628     /* BRC distortion data buffer for I frame*/
3629     if(mbenc_i_frame_dist_in_use)
3630     {
3631         gpe_resource = &(avc_ctx->res_brc_dist_data_surface);
3632         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3633                                        gpe_resource,
3634                                        1,
3635                                        I965_SURFACEFORMAT_R8_UNORM,
3636                                        GEN9_AVC_MBENC_BRC_DISTORTION_INDEX);
3637     }
3638
3639     /* as ref frame ,update later RefPicSelect of Current Picture*/
3640     obj_surface = encode_state->reconstructed_object;
3641     avc_priv_surface = obj_surface->private_data;
3642     if(avc_state->ref_pic_select_list_supported && avc_priv_surface->is_as_ref)
3643     {
3644         gpe_resource = &(avc_priv_surface->res_ref_pic_select_surface);
3645         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3646                                        gpe_resource,
3647                                        1,
3648                                        I965_SURFACEFORMAT_R8_UNORM,
3649                                        GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX);
3650
3651     }
3652
3653     if(param->mb_vproc_stats_enable)
3654     {
3655         /*mb status buffer input*/
3656         size = w_mb * h_mb * 16 * 4;
3657         gpe_resource = &(avc_ctx->res_mb_status_buffer);
3658         gen9_add_buffer_gpe_surface(ctx,
3659                                     gpe_context,
3660                                     gpe_resource,
3661                                     0,
3662                                     size / 4,
3663                                     0,
3664                                     GEN9_AVC_MBENC_MB_STATS_INDEX);
3665
3666     }else if(avc_state->flatness_check_enable)
3667     {
3668
3669         gpe_resource = &(avc_ctx->res_flatness_check_surface);
3670         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3671                                        gpe_resource,
3672                                        1,
3673                                        I965_SURFACEFORMAT_R8_UNORM,
3674                                        GEN9_AVC_MBENC_MB_STATS_INDEX);
3675     }
3676
3677     if(param->mad_enable)
3678     {
3679         /*mad buffer input*/
3680         size = 4;
3681         gpe_resource = &(avc_ctx->res_mad_data_buffer);
3682         gen9_add_buffer_gpe_surface(ctx,
3683                                     gpe_context,
3684                                     gpe_resource,
3685                                     0,
3686                                     size / 4,
3687                                     0,
3688                                     GEN9_AVC_MBENC_MAD_DATA_INDEX);
3689         i965_zero_gpe_resource(gpe_resource);
3690     }
3691
3692     /*brc updated mbenc curbe data buffer,it is ignored*/
3693
3694     /*artitratry num mbs in slice*/
3695     if(avc_state->arbitrary_num_mbs_in_slice)
3696     {
3697         /*slice surface input*/
3698         gpe_resource = &(avc_ctx->res_mbenc_slice_map_surface);
3699         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3700                                        gpe_resource,
3701                                        1,
3702                                        I965_SURFACEFORMAT_R8_UNORM,
3703                                        GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX);
3704     }
3705
3706     /* BRC distortion data buffer for I frame */
3707     if(!mbenc_i_frame_dist_in_use)
3708     {
3709         if(avc_state->mb_disable_skip_map_enable)
3710         {
3711             gpe_resource = &(avc_ctx->res_mb_disable_skip_map_surface);
3712             gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3713                                            gpe_resource,
3714                                            1,
3715                                            I965_SURFACEFORMAT_R8_UNORM,
3716                                            GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX);
3717         }
3718
3719         if(avc_state->sfd_enable && generic_state->hme_enabled)
3720         {
3721             if(generic_state->frame_type == SLICE_TYPE_P)
3722             {
3723                 gpe_resource = &(avc_ctx->res_sfd_cost_table_p_frame_buffer);
3724
3725             }else if(generic_state->frame_type == SLICE_TYPE_B)
3726             {
3727                 gpe_resource = &(avc_ctx->res_sfd_cost_table_b_frame_buffer);
3728             }
3729
3730             if(generic_state->frame_type != SLICE_TYPE_I)
3731             {
3732                 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3733                                                gpe_resource,
3734                                                1,
3735                                                I965_SURFACEFORMAT_R8_UNORM,
3736                                                GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX);
3737             }
3738         }
3739     }
3740
3741     return;
3742 }
3743
3744 static VAStatus
3745 gen9_avc_kernel_mbenc(VADriverContextP ctx,
3746                       struct encode_state *encode_state,
3747                       struct intel_encoder_context *encoder_context,
3748                       bool i_frame_dist_in_use)
3749 {
3750     struct i965_driver_data *i965 = i965_driver_data(ctx);
3751     struct i965_gpe_table *gpe = &i965->gpe_table;
3752     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3753     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )vme_context->generic_enc_ctx;
3754     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
3755     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
3756     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
3757
3758     struct i965_gpe_context *gpe_context;
3759     struct gpe_media_object_walker_parameter media_object_walker_param;
3760     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
3761     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
3762     int media_function = 0;
3763     int kernel_idx = 0;
3764     unsigned int mb_const_data_buffer_in_use = 0;
3765     unsigned int mb_qp_buffer_in_use = 0;
3766     unsigned int brc_enabled = 0;
3767     unsigned int roi_enable = (generic_state->num_roi > 0)?1:0;
3768     unsigned int dirty_roi_enable = ((generic_state->dirty_num_roi > 0) && (generic_state->frame_type == SLICE_TYPE_P) && (0));
3769     struct mbenc_param param ;
3770
3771     int mbenc_i_frame_dist_in_use = i_frame_dist_in_use;
3772     int mad_enable = 0;
3773     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3774
3775     mb_const_data_buffer_in_use =
3776         generic_state->mb_brc_enabled ||
3777         roi_enable ||
3778         dirty_roi_enable ||
3779         avc_state->mb_qp_data_enable ||
3780         avc_state->rolling_intra_refresh_enable;
3781     mb_qp_buffer_in_use =
3782         generic_state->mb_brc_enabled ||
3783         generic_state->brc_roi_enable ||
3784         avc_state->mb_qp_data_enable;
3785
3786     if(mbenc_i_frame_dist_in_use)
3787     {
3788         media_function = INTEL_MEDIA_STATE_ENC_I_FRAME_DIST;
3789         kernel_idx = GEN9_AVC_KERNEL_BRC_I_FRAME_DIST;
3790         downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
3791         downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
3792         mad_enable = 0;
3793         brc_enabled = 0;
3794
3795         gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
3796     }else
3797     {
3798         switch(generic_state->kernel_mode)
3799         {
3800         case INTEL_ENC_KERNEL_NORMAL :
3801             {
3802                 media_function = INTEL_MEDIA_STATE_ENC_NORMAL;
3803                 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_NORMAL_I;
3804                 break;
3805             }
3806         case INTEL_ENC_KERNEL_PERFORMANCE :
3807             {
3808                 media_function = INTEL_MEDIA_STATE_ENC_PERFORMANCE;
3809                 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_PERFORMANCE_I;
3810                 break;
3811             }
3812         case INTEL_ENC_KERNEL_QUALITY :
3813             {
3814                 media_function = INTEL_MEDIA_STATE_ENC_QUALITY;
3815                 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_QUALITY_I;
3816                 break;
3817             }
3818         default:
3819             assert(0);
3820
3821         }
3822
3823         if(generic_state->frame_type == SLICE_TYPE_P)
3824         {
3825            kernel_idx += 1;
3826         }
3827         else if(generic_state->frame_type == SLICE_TYPE_B)
3828         {
3829            kernel_idx += 2;
3830         }
3831
3832         downscaled_width_in_mb = generic_state->frame_width_in_mbs;
3833         downscaled_height_in_mb = generic_state->frame_height_in_mbs;
3834         mad_enable = avc_state->mad_enable;
3835         brc_enabled = generic_state->brc_enabled;
3836
3837         gpe_context = &(avc_ctx->context_mbenc.gpe_contexts[kernel_idx]);
3838     }
3839
3840     memset(&param,0,sizeof(struct mbenc_param));
3841
3842     param.mb_const_data_buffer_in_use = mb_const_data_buffer_in_use;
3843     param.mb_qp_buffer_in_use = mb_qp_buffer_in_use;
3844     param.mbenc_i_frame_dist_in_use = mbenc_i_frame_dist_in_use;
3845     param.mad_enable = mad_enable;
3846     param.brc_enabled = brc_enabled;
3847     param.roi_enabled = roi_enable;
3848
3849     if(avc_state->mb_status_supported)
3850     {
3851         param.mb_vproc_stats_enable =  avc_state->flatness_check_enable || avc_state->adaptive_transform_decision_enable;
3852     }
3853
3854     if(!avc_state->mbenc_curbe_set_in_brc_update)
3855     {
3856         gpe->context_init(ctx, gpe_context);
3857     }
3858
3859     gpe->reset_binding_table(ctx, gpe_context);
3860
3861     if(!avc_state->mbenc_curbe_set_in_brc_update)
3862     {
3863         /*set curbe here*/
3864         generic_ctx->pfn_set_curbe_mbenc(ctx,encode_state,gpe_context,encoder_context,&param);
3865     }
3866
3867     /* MB brc const data buffer set up*/
3868     if(mb_const_data_buffer_in_use)
3869     {
3870         gen9_avc_load_mb_brc_const_data(ctx,encode_state,encoder_context);
3871     }
3872
3873     /*clear the mad buffer*/
3874     if(mad_enable)
3875     {
3876         i965_zero_gpe_resource(&(avc_ctx->res_mad_data_buffer));
3877     }
3878     /*send surface*/
3879     generic_ctx->pfn_send_mbenc_surface(ctx,encode_state,gpe_context,encoder_context,&param);
3880
3881     gpe->setup_interface_data(ctx, gpe_context);
3882
3883     /*walker setting*/
3884     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
3885
3886     kernel_walker_param.use_scoreboard = 1;
3887     kernel_walker_param.resolution_x = downscaled_width_in_mb ;
3888     kernel_walker_param.resolution_y = downscaled_height_in_mb ;
3889     if(mbenc_i_frame_dist_in_use)
3890     {
3891         kernel_walker_param.no_dependency = 1;
3892     }else
3893     {
3894         switch(generic_state->frame_type)
3895         {
3896         case SLICE_TYPE_I:
3897             kernel_walker_param.walker_degree = WALKER_45_DEGREE;
3898             break;
3899         case SLICE_TYPE_P:
3900             kernel_walker_param.walker_degree = WALKER_26_DEGREE;
3901             break;
3902         case SLICE_TYPE_B:
3903             kernel_walker_param.walker_degree = WALKER_26_DEGREE;
3904             if(!slice_param->direct_spatial_mv_pred_flag)
3905             {
3906                 kernel_walker_param.walker_degree = WALKER_45_DEGREE;
3907             }
3908             break;
3909         default:
3910             assert(0);
3911         }
3912         kernel_walker_param.no_dependency = 0;
3913     }
3914
3915     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
3916
3917     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
3918                                         gpe_context,
3919                                         media_function,
3920                                         &media_object_walker_param);
3921     return VA_STATUS_SUCCESS;
3922 }
3923
3924 /*
3925 me kernle related function
3926 */
3927 static void
3928 gen9_avc_set_curbe_me(VADriverContextP ctx,
3929                       struct encode_state *encode_state,
3930                       struct i965_gpe_context *gpe_context,
3931                       struct intel_encoder_context *encoder_context,
3932                       void * param)
3933 {
3934     gen9_avc_me_curbe_data *curbe_cmd;
3935     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3936     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
3937     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
3938
3939     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3940
3941     struct me_param * curbe_param = (struct me_param *)param ;
3942     unsigned char  use_mv_from_prev_step = 0;
3943     unsigned char write_distortions = 0;
3944     unsigned char qp_prime_y = 0;
3945     unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
3946     unsigned char seach_table_idx = 0;
3947     unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
3948     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
3949     unsigned int scale_factor = 0;
3950
3951     qp_prime_y = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta;
3952     switch(curbe_param->hme_type)
3953     {
3954     case INTEL_ENC_HME_4x :
3955         {
3956             use_mv_from_prev_step = (generic_state->b16xme_enabled)? 1:0;
3957             write_distortions = 1;
3958             mv_shift_factor = 2;
3959             scale_factor = 4;
3960             prev_mv_read_pos_factor = 0;
3961             break;
3962         }
3963     case INTEL_ENC_HME_16x :
3964         {
3965             use_mv_from_prev_step = (generic_state->b32xme_enabled)? 1:0;
3966             write_distortions = 0;
3967             mv_shift_factor = 2;
3968             scale_factor = 16;
3969             prev_mv_read_pos_factor = 1;
3970             break;
3971         }
3972     case INTEL_ENC_HME_32x :
3973         {
3974             use_mv_from_prev_step = 0;
3975             write_distortions = 0;
3976             mv_shift_factor = 1;
3977             scale_factor = 32;
3978             prev_mv_read_pos_factor = 0;
3979             break;
3980         }
3981     default:
3982         assert(0);
3983
3984     }
3985     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
3986
3987     if (!curbe_cmd)
3988         return;
3989
3990     downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel/scale_factor,16)/16;
3991     downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel/scale_factor,16)/16;
3992
3993     memcpy(curbe_cmd,gen9_avc_me_curbe_init_data,sizeof(gen9_avc_me_curbe_data));
3994
3995     curbe_cmd->dw3.sub_pel_mode = 3;
3996     if(avc_state->field_scaling_output_interleaved)
3997     {
3998         /*frame set to zero,field specified*/
3999         curbe_cmd->dw3.src_access = 0;
4000         curbe_cmd->dw3.ref_access = 0;
4001         curbe_cmd->dw7.src_field_polarity = 0;
4002     }
4003     curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
4004     curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
4005     curbe_cmd->dw5.qp_prime_y = qp_prime_y;
4006
4007     curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
4008     curbe_cmd->dw6.write_distortions = write_distortions;
4009     curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
4010     curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
4011
4012     if(generic_state->frame_type == SLICE_TYPE_B)
4013     {
4014         curbe_cmd->dw1.bi_weight = 32;
4015         curbe_cmd->dw13.num_ref_idx_l1_minus1 = slice_param->num_ref_idx_l1_active_minus1;
4016         me_method = gen9_avc_b_me_method[generic_state->preset];
4017         seach_table_idx = 1;
4018     }
4019
4020     if(generic_state->frame_type == SLICE_TYPE_P ||
4021        generic_state->frame_type == SLICE_TYPE_B )
4022        curbe_cmd->dw13.num_ref_idx_l0_minus1 = slice_param->num_ref_idx_l0_active_minus1;
4023
4024     curbe_cmd->dw13.ref_streamin_cost = 5;
4025     curbe_cmd->dw13.roi_enable = 0;
4026
4027     curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
4028     curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
4029
4030     memcpy(&curbe_cmd->dw16,table_enc_search_path[seach_table_idx][me_method],14*sizeof(int));
4031
4032     curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN9_AVC_ME_MV_DATA_SURFACE_INDEX;
4033     curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x)? GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX:GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX ;
4034     curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN9_AVC_ME_DISTORTION_SURFACE_INDEX;
4035     curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN9_AVC_ME_BRC_DISTORTION_INDEX;
4036     curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX;
4037     curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX;
4038     curbe_cmd->dw38.reserved = GEN9_AVC_ME_VDENC_STREAMIN_INDEX;
4039
4040     i965_gpe_context_unmap_curbe(gpe_context);
4041     return;
4042 }
4043
4044 static void
4045 gen9_avc_send_surface_me(VADriverContextP ctx,
4046                          struct encode_state *encode_state,
4047                          struct i965_gpe_context *gpe_context,
4048                          struct intel_encoder_context *encoder_context,
4049                          void * param)
4050 {
4051     struct i965_driver_data *i965 = i965_driver_data(ctx);
4052
4053     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4054     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
4055     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
4056     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
4057
4058     struct object_surface *obj_surface, *input_surface;
4059     struct gen9_surface_avc *avc_priv_surface;
4060     struct i965_gpe_resource *gpe_resource;
4061     struct me_param * curbe_param = (struct me_param *)param ;
4062
4063     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4064     VASurfaceID surface_id;
4065     int i = 0;
4066
4067     /* all scaled input surface stored in reconstructed_object*/
4068     obj_surface = encode_state->reconstructed_object;
4069     if (!obj_surface || !obj_surface->private_data)
4070         return;
4071     avc_priv_surface = obj_surface->private_data;
4072
4073
4074     switch(curbe_param->hme_type)
4075     {
4076     case INTEL_ENC_HME_4x :
4077         {
4078             /*memv output 4x*/
4079             gpe_resource = &avc_ctx->s4x_memv_data_buffer;
4080             gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4081                                            gpe_resource,
4082                                            1,
4083                                            I965_SURFACEFORMAT_R8_UNORM,
4084                                            GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
4085
4086             /*memv input 16x*/
4087             if(generic_state->b16xme_enabled)
4088             {
4089                 gpe_resource = &avc_ctx->s16x_memv_data_buffer;
4090                 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4091                                                gpe_resource,
4092                                                1,
4093                                                I965_SURFACEFORMAT_R8_UNORM,
4094                                                GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX);
4095             }
4096             /* brc distortion  output*/
4097             gpe_resource = &avc_ctx->res_brc_dist_data_surface;
4098             gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4099                                            gpe_resource,
4100                                            1,
4101                                            I965_SURFACEFORMAT_R8_UNORM,
4102                                            GEN9_AVC_ME_BRC_DISTORTION_INDEX);
4103            /* memv distortion output*/
4104             gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
4105             gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4106                                            gpe_resource,
4107                                            1,
4108                                            I965_SURFACEFORMAT_R8_UNORM,
4109                                            GEN9_AVC_ME_DISTORTION_SURFACE_INDEX);
4110             /*input current down scaled YUV surface*/
4111             obj_surface = encode_state->reconstructed_object;
4112             avc_priv_surface = obj_surface->private_data;
4113             input_surface = avc_priv_surface->scaled_4x_surface_obj;
4114             gen9_add_adv_gpe_surface(ctx, gpe_context,
4115                                      input_surface,
4116                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
4117             /*input ref scaled YUV surface*/
4118             for(i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++)
4119             {
4120                 surface_id = slice_param->RefPicList0[i].picture_id;
4121                 obj_surface = SURFACE(surface_id);
4122                 if (!obj_surface || !obj_surface->private_data)
4123                     break;
4124                 avc_priv_surface = obj_surface->private_data;
4125
4126                 input_surface = avc_priv_surface->scaled_4x_surface_obj;
4127
4128                 gen9_add_adv_gpe_surface(ctx, gpe_context,
4129                                          input_surface,
4130                                          GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX+i*2 + 1);
4131             }
4132
4133             obj_surface = encode_state->reconstructed_object;
4134             avc_priv_surface = obj_surface->private_data;
4135             input_surface = avc_priv_surface->scaled_4x_surface_obj;
4136
4137             gen9_add_adv_gpe_surface(ctx, gpe_context,
4138                                      input_surface,
4139                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
4140
4141             for(i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++)
4142             {
4143                 surface_id = slice_param->RefPicList1[i].picture_id;
4144                 obj_surface = SURFACE(surface_id);
4145                 if (!obj_surface || !obj_surface->private_data)
4146                     break;
4147                 avc_priv_surface = obj_surface->private_data;
4148
4149                 input_surface = avc_priv_surface->scaled_4x_surface_obj;
4150
4151                 gen9_add_adv_gpe_surface(ctx, gpe_context,
4152                                          input_surface,
4153                                          GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX+i*2 + 1);
4154             }
4155             break;
4156
4157         }
4158     case INTEL_ENC_HME_16x :
4159         {
4160             gpe_resource = &avc_ctx->s16x_memv_data_buffer;
4161             gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4162                                            gpe_resource,
4163                                            1,
4164                                            I965_SURFACEFORMAT_R8_UNORM,
4165                                            GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
4166
4167             if(generic_state->b32xme_enabled)
4168             {
4169                 gpe_resource = &avc_ctx->s32x_memv_data_buffer;
4170                 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4171                                                gpe_resource,
4172                                                1,
4173                                                I965_SURFACEFORMAT_R8_UNORM,
4174                                                GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX);
4175             }
4176
4177             obj_surface = encode_state->reconstructed_object;
4178             avc_priv_surface = obj_surface->private_data;
4179             input_surface = avc_priv_surface->scaled_16x_surface_obj;
4180             gen9_add_adv_gpe_surface(ctx, gpe_context,
4181                                      input_surface,
4182                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
4183
4184             for(i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++)
4185             {
4186                 surface_id = slice_param->RefPicList0[i].picture_id;
4187                 obj_surface = SURFACE(surface_id);
4188                 if (!obj_surface || !obj_surface->private_data)
4189                     break;
4190                 avc_priv_surface = obj_surface->private_data;
4191
4192                 input_surface = avc_priv_surface->scaled_16x_surface_obj;
4193
4194                 gen9_add_adv_gpe_surface(ctx, gpe_context,
4195                                          input_surface,
4196                                          GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX+i*2 + 1);
4197             }
4198
4199             obj_surface = encode_state->reconstructed_object;
4200             avc_priv_surface = obj_surface->private_data;
4201             input_surface = avc_priv_surface->scaled_16x_surface_obj;
4202
4203             gen9_add_adv_gpe_surface(ctx, gpe_context,
4204                                      input_surface,
4205                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
4206
4207             for(i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++)
4208             {
4209                 surface_id = slice_param->RefPicList1[i].picture_id;
4210                 obj_surface = SURFACE(surface_id);
4211                 if (!obj_surface || !obj_surface->private_data)
4212                     break;
4213                 avc_priv_surface = obj_surface->private_data;
4214
4215                 input_surface = avc_priv_surface->scaled_16x_surface_obj;
4216
4217                 gen9_add_adv_gpe_surface(ctx, gpe_context,
4218                                          input_surface,
4219                                          GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX+i*2 + 1);
4220             }
4221             break;
4222         }
4223     case INTEL_ENC_HME_32x :
4224         {
4225             gpe_resource = &avc_ctx->s32x_memv_data_buffer;
4226             gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4227                                            gpe_resource,
4228                                            1,
4229                                            I965_SURFACEFORMAT_R8_UNORM,
4230                                            GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
4231
4232             obj_surface = encode_state->reconstructed_object;
4233             avc_priv_surface = obj_surface->private_data;
4234             input_surface = avc_priv_surface->scaled_32x_surface_obj;
4235             gen9_add_adv_gpe_surface(ctx, gpe_context,
4236                                      input_surface,
4237                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
4238
4239             for(i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++)
4240             {
4241                 surface_id = slice_param->RefPicList0[i].picture_id;
4242                 obj_surface = SURFACE(surface_id);
4243                 if (!obj_surface || !obj_surface->private_data)
4244                     break;
4245                 avc_priv_surface = obj_surface->private_data;
4246
4247                 input_surface = avc_priv_surface->scaled_32x_surface_obj;
4248
4249                 gen9_add_adv_gpe_surface(ctx, gpe_context,
4250                                          input_surface,
4251                                          GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX+i*2 + 1);
4252             }
4253
4254             obj_surface = encode_state->reconstructed_object;
4255             avc_priv_surface = obj_surface->private_data;
4256             input_surface = avc_priv_surface->scaled_32x_surface_obj;
4257
4258             gen9_add_adv_gpe_surface(ctx, gpe_context,
4259                                      input_surface,
4260                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
4261
4262             for(i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++)
4263             {
4264                 surface_id = slice_param->RefPicList1[i].picture_id;
4265                 obj_surface = SURFACE(surface_id);
4266                 if (!obj_surface || !obj_surface->private_data)
4267                     break;
4268                 avc_priv_surface = obj_surface->private_data;
4269
4270                 input_surface = avc_priv_surface->scaled_32x_surface_obj;
4271
4272                 gen9_add_adv_gpe_surface(ctx, gpe_context,
4273                                          input_surface,
4274                                          GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX+i*2 + 1);
4275             }
4276             break;
4277         }
4278     default:
4279         assert(0);
4280
4281     }
4282 }
4283
4284 static VAStatus
4285 gen9_avc_kernel_me(VADriverContextP ctx,
4286                    struct encode_state *encode_state,
4287                    struct intel_encoder_context *encoder_context,
4288                    int hme_type)
4289 {
4290     struct i965_driver_data *i965 = i965_driver_data(ctx);
4291     struct i965_gpe_table *gpe = &i965->gpe_table;
4292     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4293     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )vme_context->generic_enc_ctx;
4294     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
4295     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
4296
4297     struct i965_gpe_context *gpe_context;
4298     struct gpe_media_object_walker_parameter media_object_walker_param;
4299     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
4300     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
4301     int media_function = 0;
4302     int kernel_idx = 0;
4303     struct me_param param ;
4304     unsigned int scale_factor = 0;
4305
4306     switch(hme_type)
4307     {
4308     case INTEL_ENC_HME_4x :
4309         {
4310             media_function = INTEL_MEDIA_STATE_4X_ME;
4311             scale_factor = 4;
4312             break;
4313         }
4314     case INTEL_ENC_HME_16x :
4315         {
4316             media_function = INTEL_MEDIA_STATE_16X_ME;
4317             scale_factor = 16;
4318             break;
4319         }
4320     case INTEL_ENC_HME_32x :
4321         {
4322             media_function = INTEL_MEDIA_STATE_32X_ME;
4323             scale_factor = 32;
4324             break;
4325         }
4326     default:
4327         assert(0);
4328
4329     }
4330
4331     downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel/scale_factor,16)/16;
4332     downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel/scale_factor,16)/16;
4333
4334     /* I frame should not come here.*/
4335     kernel_idx = (generic_state->frame_type == SLICE_TYPE_P)? GEN9_AVC_KERNEL_ME_P_IDX : GEN9_AVC_KERNEL_ME_B_IDX;
4336     gpe_context = &(avc_ctx->context_me.gpe_contexts[kernel_idx]);
4337
4338     gpe->context_init(ctx, gpe_context);
4339     gpe->reset_binding_table(ctx, gpe_context);
4340
4341     /*set curbe*/
4342     memset(&param,0,sizeof(param));
4343     param.hme_type = hme_type;
4344     generic_ctx->pfn_set_curbe_me(ctx,encode_state,gpe_context,encoder_context,&param);
4345
4346     /*send surface*/
4347     generic_ctx->pfn_send_me_surface(ctx,encode_state,gpe_context,encoder_context,&param);
4348
4349     gpe->setup_interface_data(ctx, gpe_context);
4350
4351     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
4352     /* the scaling is based on 8x8 blk level */
4353     kernel_walker_param.resolution_x = downscaled_width_in_mb ;
4354     kernel_walker_param.resolution_y = downscaled_height_in_mb ;
4355     kernel_walker_param.no_dependency = 1;
4356
4357     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
4358
4359     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
4360                                         gpe_context,
4361                                         media_function,
4362                                         &media_object_walker_param);
4363
4364     return VA_STATUS_SUCCESS;
4365 }
4366
4367 /*
4368 wp related function
4369 */
4370 static void
4371 gen9_avc_set_curbe_wp(VADriverContextP ctx,
4372                      struct encode_state *encode_state,
4373                      struct i965_gpe_context *gpe_context,
4374                      struct intel_encoder_context *encoder_context,
4375                      void * param)
4376 {
4377     gen9_avc_wp_curbe_data *cmd;
4378     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4379     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
4380     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4381     struct wp_param * curbe_param = (struct wp_param *)param;
4382
4383     cmd = i965_gpe_context_map_curbe(gpe_context);
4384
4385     if (!cmd)
4386         return;
4387     memset(cmd,0,sizeof(gen9_avc_wp_curbe_data));
4388     if(curbe_param->ref_list_idx)
4389     {
4390         cmd->dw0.default_weight = slice_param->luma_weight_l1[0];
4391         cmd->dw0.default_offset = slice_param->luma_offset_l1[0];
4392     }else
4393     {
4394         cmd->dw0.default_weight = slice_param->luma_weight_l0[0];
4395         cmd->dw0.default_offset = slice_param->luma_offset_l0[0];
4396     }
4397
4398     cmd->dw49.input_surface = GEN9_AVC_WP_INPUT_REF_SURFACE_INDEX;
4399     cmd->dw50.output_surface = GEN9_AVC_WP_OUTPUT_SCALED_SURFACE_INDEX;
4400
4401     i965_gpe_context_unmap_curbe(gpe_context);
4402
4403 }
4404
4405 static void
4406 gen9_avc_send_surface_wp(VADriverContextP ctx,
4407                          struct encode_state *encode_state,
4408                          struct i965_gpe_context *gpe_context,
4409                          struct intel_encoder_context *encoder_context,
4410                          void * param)
4411 {
4412     struct i965_driver_data *i965 = i965_driver_data(ctx);
4413     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4414     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
4415     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
4416     struct wp_param * curbe_param = (struct wp_param *)param;
4417     struct object_surface *obj_surface;
4418     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4419     VASurfaceID surface_id;
4420
4421     if(curbe_param->ref_list_idx)
4422     {
4423         surface_id = slice_param->RefPicList1[0].picture_id;
4424         obj_surface = SURFACE(surface_id);
4425         if (!obj_surface || !obj_surface->private_data)
4426             avc_state->weighted_ref_l1_enable = 0;
4427         else
4428             avc_state->weighted_ref_l1_enable = 1;
4429     }else
4430     {
4431         surface_id = slice_param->RefPicList0[0].picture_id;
4432         obj_surface = SURFACE(surface_id);
4433         if (!obj_surface || !obj_surface->private_data)
4434             avc_state->weighted_ref_l0_enable = 0;
4435         else
4436             avc_state->weighted_ref_l0_enable = 1;
4437     }
4438     if(!obj_surface)
4439         obj_surface = encode_state->reference_objects[0];
4440
4441
4442     gen9_add_adv_gpe_surface(ctx, gpe_context,
4443                              obj_surface,
4444                              GEN9_AVC_WP_INPUT_REF_SURFACE_INDEX);
4445
4446     obj_surface = avc_ctx->wp_output_pic_select_surface_obj[curbe_param->ref_list_idx];
4447     gen9_add_adv_gpe_surface(ctx, gpe_context,
4448                              obj_surface,
4449                              GEN9_AVC_WP_OUTPUT_SCALED_SURFACE_INDEX);
4450 }
4451
4452
4453 static VAStatus
4454 gen9_avc_kernel_wp(VADriverContextP ctx,
4455                    struct encode_state *encode_state,
4456                    struct intel_encoder_context *encoder_context,
4457                    unsigned int list1_in_use)
4458 {
4459     struct i965_driver_data *i965 = i965_driver_data(ctx);
4460     struct i965_gpe_table *gpe = &i965->gpe_table;
4461     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4462     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
4463     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
4464     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )vme_context->generic_enc_ctx;
4465
4466     struct i965_gpe_context *gpe_context;
4467     struct gpe_media_object_walker_parameter media_object_walker_param;
4468     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
4469     int media_function = INTEL_MEDIA_STATE_ENC_WP;
4470     struct wp_param param;
4471
4472     gpe_context = &(avc_ctx->context_wp.gpe_contexts);
4473
4474     gpe->context_init(ctx, gpe_context);
4475     gpe->reset_binding_table(ctx, gpe_context);
4476
4477     memset(&param,0,sizeof(param));
4478     param.ref_list_idx = (list1_in_use == 1)? 1: 0;
4479     /*set curbe*/
4480     generic_ctx->pfn_set_curbe_wp(ctx,encode_state,gpe_context,encoder_context,&param);
4481
4482     /*send surface*/
4483     generic_ctx->pfn_send_wp_surface(ctx,encode_state,gpe_context,encoder_context,&param);
4484
4485     gpe->setup_interface_data(ctx, gpe_context);
4486
4487     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
4488     /* the scaling is based on 8x8 blk level */
4489     kernel_walker_param.resolution_x = generic_state->frame_width_in_mbs;
4490     kernel_walker_param.resolution_y = generic_state->frame_height_in_mbs;
4491     kernel_walker_param.no_dependency = 1;
4492
4493     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
4494
4495     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
4496                                         gpe_context,
4497                                         media_function,
4498                                         &media_object_walker_param);
4499
4500     return VA_STATUS_SUCCESS;
4501 }
4502
4503
4504 /*
4505 sfd related function
4506 */
4507 static void
4508 gen9_avc_set_curbe_sfd(VADriverContextP ctx,
4509                      struct encode_state *encode_state,
4510                      struct i965_gpe_context *gpe_context,
4511                      struct intel_encoder_context *encoder_context,
4512                      void * param)
4513 {
4514     gen9_avc_sfd_curbe_data *cmd;
4515     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4516     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
4517     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
4518     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4519
4520     cmd = i965_gpe_context_map_curbe(gpe_context);
4521
4522     if (!cmd)
4523         return;
4524     memset(cmd,0,sizeof(gen9_avc_sfd_curbe_data));
4525
4526     cmd->dw0.enable_intra_cost_scaling_for_static_frame = 1 ;
4527     cmd->dw0.enable_adaptive_mv_stream_in = 0 ;
4528     cmd->dw0.stream_in_type = 7 ;
4529     cmd->dw0.slice_type = slice_type_kernel[generic_state->frame_type]  ;
4530     cmd->dw0.brc_mode_enable = generic_state->brc_enabled ;
4531     cmd->dw0.vdenc_mode_disable = 1 ;
4532
4533     cmd->dw1.hme_stream_in_ref_cost = 5 ;
4534     cmd->dw1.num_of_refs = slice_param->num_ref_idx_l0_active_minus1 ;
4535     cmd->dw1.qp_value = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta ;
4536
4537     cmd->dw2.frame_width_in_mbs = generic_state->frame_width_in_mbs ;
4538     cmd->dw2.frame_height_in_mbs = generic_state->frame_height_in_mbs ;
4539
4540     cmd->dw3.large_mv_threshold = 128 ;
4541     cmd->dw4.total_large_mv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs)/100 ;
4542     cmd->dw5.zmv_threshold = 4 ;
4543     cmd->dw6.total_zmv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * avc_state->zero_mv_threshold)/100 ; // zero_mv_threshold = 60;
4544     cmd->dw7.min_dist_threshold = 10 ;
4545
4546     if(generic_state->frame_type == SLICE_TYPE_P)
4547     {
4548         memcpy(cmd->cost_table,gen9_avc_sfd_cost_table_p_frame,52* sizeof(unsigned char));
4549
4550     }else if(generic_state->frame_type == SLICE_TYPE_B)
4551     {
4552         memcpy(cmd->cost_table,gen9_avc_sfd_cost_table_b_frame,52* sizeof(unsigned char));
4553     }
4554
4555     cmd->dw21.actual_width_in_mb = cmd->dw2.frame_width_in_mbs ;
4556     cmd->dw21.actual_height_in_mb = cmd->dw2.frame_height_in_mbs ;
4557     cmd->dw24.vdenc_input_image_state_index = GEN9_AVC_SFD_VDENC_INPUT_IMAGE_STATE_INDEX ;
4558     cmd->dw26.mv_data_surface_index = GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX ;
4559     cmd->dw27.inter_distortion_surface_index = GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX ;
4560     cmd->dw28.output_data_surface_index = GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX ;
4561     cmd->dw29.vdenc_output_image_state_index = GEN9_AVC_SFD_VDENC_OUTPUT_IMAGE_STATE_INDEX ;
4562
4563     i965_gpe_context_unmap_curbe(gpe_context);
4564
4565 }
4566
4567 static void
4568 gen9_avc_send_surface_sfd(VADriverContextP ctx,
4569                           struct encode_state *encode_state,
4570                           struct i965_gpe_context *gpe_context,
4571                           struct intel_encoder_context *encoder_context,
4572                           void * param)
4573 {
4574     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4575     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
4576     struct i965_gpe_resource *gpe_resource;
4577     int size = 0;
4578
4579     /*HME mv data surface memv output 4x*/
4580     gpe_resource = &avc_ctx->s4x_memv_data_buffer;
4581     gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4582                                    gpe_resource,
4583                                    1,
4584                                    I965_SURFACEFORMAT_R8_UNORM,
4585                                    GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX);
4586
4587     /* memv distortion */
4588     gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
4589     gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4590                                    gpe_resource,
4591                                    1,
4592                                    I965_SURFACEFORMAT_R8_UNORM,
4593                                    GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX);
4594     /*buffer output*/
4595     size = 32 * 4 *4;
4596     gpe_resource = &avc_ctx->res_sfd_output_buffer;
4597     gen9_add_buffer_gpe_surface(ctx,
4598                                 gpe_context,
4599                                 gpe_resource,
4600                                 0,
4601                                 size / 4,
4602                                 0,
4603                                 GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX);
4604
4605 }
4606
4607 static VAStatus
4608 gen9_avc_kernel_sfd(VADriverContextP ctx,
4609                     struct encode_state *encode_state,
4610                     struct intel_encoder_context *encoder_context)
4611 {
4612     struct i965_driver_data *i965 = i965_driver_data(ctx);
4613     struct i965_gpe_table *gpe = &i965->gpe_table;
4614     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4615     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
4616     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )vme_context->generic_enc_ctx;
4617
4618     struct i965_gpe_context *gpe_context;
4619     struct gpe_media_object_parameter media_object_param;
4620     struct gpe_media_object_inline_data media_object_inline_data;
4621     int media_function = INTEL_MEDIA_STATE_STATIC_FRAME_DETECTION;
4622     gpe_context = &(avc_ctx->context_sfd.gpe_contexts);
4623
4624     gpe->context_init(ctx, gpe_context);
4625     gpe->reset_binding_table(ctx, gpe_context);
4626
4627     /*set curbe*/
4628     generic_ctx->pfn_set_curbe_sfd(ctx,encode_state,gpe_context,encoder_context,NULL);
4629
4630     /*send surface*/
4631     generic_ctx->pfn_send_sfd_surface(ctx,encode_state,gpe_context,encoder_context,NULL);
4632
4633     gpe->setup_interface_data(ctx, gpe_context);
4634
4635     memset(&media_object_param, 0, sizeof(media_object_param));
4636     memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
4637     media_object_param.pinline_data = &media_object_inline_data;
4638     media_object_param.inline_size = sizeof(media_object_inline_data);
4639
4640     gen9_avc_run_kernel_media_object(ctx, encoder_context,
4641                                      gpe_context,
4642                                      media_function,
4643                                      &media_object_param);
4644
4645     return VA_STATUS_SUCCESS;
4646 }
4647
4648 /*
4649 kernel related function:init/destroy etc
4650 */
4651 static void
4652 gen9_avc_kernel_init_scaling(VADriverContextP ctx,
4653                              struct generic_encoder_context *generic_context,
4654                              struct gen_avc_scaling_context *kernel_context)
4655 {
4656     struct i965_driver_data *i965 = i965_driver_data(ctx);
4657     struct i965_gpe_table *gpe = &i965->gpe_table;
4658     struct i965_gpe_context *gpe_context = NULL;
4659     struct encoder_kernel_parameter kernel_param ;
4660     struct encoder_scoreboard_parameter scoreboard_param;
4661     struct i965_kernel common_kernel;
4662
4663     /* 4x scaling kernel*/
4664     kernel_param.curbe_size = sizeof(gen9_avc_scaling4x_curbe_data);
4665     kernel_param.inline_data_size = sizeof(gen9_avc_scaling4x_curbe_data);
4666     kernel_param.sampler_size = 0;
4667
4668     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4669     scoreboard_param.mask = 0xFF;
4670     scoreboard_param.enable = generic_context->use_hw_scoreboard;
4671     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
4672     scoreboard_param.walkpat_flag = 0;
4673
4674     gpe_context = &kernel_context->gpe_contexts[GEN9_AVC_KERNEL_SCALING_4X_IDX];
4675     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4676     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4677
4678     memset(&common_kernel, 0, sizeof(common_kernel));
4679
4680     intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4681                                          generic_context->enc_kernel_size,
4682                                          INTEL_GENERIC_ENC_SCALING4X,
4683                                          0,
4684                                          &common_kernel);
4685
4686     gpe->load_kernels(ctx,
4687                       gpe_context,
4688                       &common_kernel,
4689                       1);
4690
4691     /*2x scaling kernel*/
4692     kernel_param.curbe_size = sizeof(gen9_avc_scaling2x_curbe_data);
4693     kernel_param.inline_data_size = 0;
4694     kernel_param.sampler_size = 0;
4695
4696     gpe_context = &kernel_context->gpe_contexts[GEN9_AVC_KERNEL_SCALING_2X_IDX];
4697     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4698     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4699
4700     memset(&common_kernel, 0, sizeof(common_kernel));
4701
4702     intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4703                                          generic_context->enc_kernel_size,
4704                                          INTEL_GENERIC_ENC_SCALING2X,
4705                                          0,
4706                                          &common_kernel);
4707
4708     gpe->load_kernels(ctx,
4709                       gpe_context,
4710                       &common_kernel,
4711                       1);
4712
4713 }
4714
4715 static void
4716 gen9_avc_kernel_init_me(VADriverContextP ctx,
4717                         struct generic_encoder_context *generic_context,
4718                         struct gen_avc_me_context *kernel_context)
4719 {
4720     struct i965_driver_data *i965 = i965_driver_data(ctx);
4721     struct i965_gpe_table *gpe = &i965->gpe_table;
4722     struct i965_gpe_context *gpe_context = NULL;
4723     struct encoder_kernel_parameter kernel_param ;
4724     struct encoder_scoreboard_parameter scoreboard_param;
4725     struct i965_kernel common_kernel;
4726     int i = 0;
4727
4728     kernel_param.curbe_size = sizeof(gen9_avc_me_curbe_data);
4729     kernel_param.inline_data_size = 0;
4730     kernel_param.sampler_size = 0;
4731
4732     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4733     scoreboard_param.mask = 0xFF;
4734     scoreboard_param.enable = generic_context->use_hw_scoreboard;
4735     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
4736     scoreboard_param.walkpat_flag = 0;
4737
4738     for (i = 0; i < 2; i++) {
4739         gpe_context = &kernel_context->gpe_contexts[i];
4740         gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4741         gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4742
4743         memset(&common_kernel, 0, sizeof(common_kernel));
4744
4745         intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4746                                              generic_context->enc_kernel_size,
4747                                              INTEL_GENERIC_ENC_ME,
4748                                              i,
4749                                              &common_kernel);
4750
4751         gpe->load_kernels(ctx,
4752                               gpe_context,
4753                               &common_kernel,
4754                               1);
4755     }
4756
4757 }
4758
4759 static void
4760 gen9_avc_kernel_init_mbenc(VADriverContextP ctx,
4761                            struct generic_encoder_context *generic_context,
4762                            struct gen_avc_mbenc_context *kernel_context)
4763 {
4764     struct i965_driver_data *i965 = i965_driver_data(ctx);
4765     struct i965_gpe_table *gpe = &i965->gpe_table;
4766     struct i965_gpe_context *gpe_context = NULL;
4767     struct encoder_kernel_parameter kernel_param ;
4768     struct encoder_scoreboard_parameter scoreboard_param;
4769     struct i965_kernel common_kernel;
4770     int i = 0;
4771
4772     kernel_param.curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
4773     kernel_param.inline_data_size = 0;
4774     kernel_param.sampler_size = 0;
4775
4776     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4777     scoreboard_param.mask = 0xFF;
4778     scoreboard_param.enable = generic_context->use_hw_scoreboard;
4779     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
4780     scoreboard_param.walkpat_flag = 0;
4781
4782     for (i = 0; i < NUM_GEN9_AVC_KERNEL_MBENC ; i++) {
4783         gpe_context = &kernel_context->gpe_contexts[i];
4784         gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4785         gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4786
4787         memset(&common_kernel, 0, sizeof(common_kernel));
4788
4789         intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4790                                              generic_context->enc_kernel_size,
4791                                              INTEL_GENERIC_ENC_MBENC,
4792                                              i,
4793                                              &common_kernel);
4794
4795         gpe->load_kernels(ctx,
4796                           gpe_context,
4797                           &common_kernel,
4798                           1);
4799     }
4800
4801 }
4802
4803 static void
4804 gen9_avc_kernel_init_brc(VADriverContextP ctx,
4805                          struct generic_encoder_context *generic_context,
4806                          struct gen_avc_brc_context *kernel_context)
4807 {
4808     struct i965_driver_data *i965 = i965_driver_data(ctx);
4809     struct i965_gpe_table *gpe = &i965->gpe_table;
4810     struct i965_gpe_context *gpe_context = NULL;
4811     struct encoder_kernel_parameter kernel_param ;
4812     struct encoder_scoreboard_parameter scoreboard_param;
4813     struct i965_kernel common_kernel;
4814     int i = 0;
4815
4816     static const int brc_curbe_size[NUM_GEN9_AVC_KERNEL_BRC] = {
4817         (sizeof(gen9_avc_brc_init_reset_curbe_data)),
4818         (sizeof(gen9_avc_frame_brc_update_curbe_data)),
4819         (sizeof(gen9_avc_brc_init_reset_curbe_data)),
4820         (sizeof(gen9_avc_mbenc_curbe_data)),
4821         0,
4822         (sizeof(gen9_avc_mb_brc_curbe_data))
4823     };
4824
4825     kernel_param.inline_data_size = 0;
4826     kernel_param.sampler_size = 0;
4827
4828     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4829     scoreboard_param.mask = 0xFF;
4830     scoreboard_param.enable = generic_context->use_hw_scoreboard;
4831     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
4832     scoreboard_param.walkpat_flag = 0;
4833
4834     for (i = 0; i < NUM_GEN9_AVC_KERNEL_BRC; i++) {
4835         kernel_param.curbe_size = brc_curbe_size[i];
4836         gpe_context = &kernel_context->gpe_contexts[i];
4837         gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4838         gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4839
4840         memset(&common_kernel, 0, sizeof(common_kernel));
4841
4842         intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4843                                              generic_context->enc_kernel_size,
4844                                              INTEL_GENERIC_ENC_BRC,
4845                                              i,
4846                                              &common_kernel);
4847
4848         gpe->load_kernels(ctx,
4849                           gpe_context,
4850                           &common_kernel,
4851                           1);
4852     }
4853
4854 }
4855
4856 static void
4857 gen9_avc_kernel_init_wp(VADriverContextP ctx,
4858                         struct generic_encoder_context *generic_context,
4859                         struct gen_avc_wp_context *kernel_context)
4860 {
4861     struct i965_driver_data *i965 = i965_driver_data(ctx);
4862     struct i965_gpe_table *gpe = &i965->gpe_table;
4863     struct i965_gpe_context *gpe_context = NULL;
4864     struct encoder_kernel_parameter kernel_param ;
4865     struct encoder_scoreboard_parameter scoreboard_param;
4866     struct i965_kernel common_kernel;
4867
4868     kernel_param.curbe_size = sizeof(gen9_avc_wp_curbe_data);
4869     kernel_param.inline_data_size = 0;
4870     kernel_param.sampler_size = 0;
4871
4872     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4873     scoreboard_param.mask = 0xFF;
4874     scoreboard_param.enable = generic_context->use_hw_scoreboard;
4875     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
4876     scoreboard_param.walkpat_flag = 0;
4877
4878     gpe_context = &kernel_context->gpe_contexts;
4879     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4880     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4881
4882     memset(&common_kernel, 0, sizeof(common_kernel));
4883
4884     intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4885                                          generic_context->enc_kernel_size,
4886                                          INTEL_GENERIC_ENC_WP,
4887                                          0,
4888                                          &common_kernel);
4889
4890     gpe->load_kernels(ctx,
4891                           gpe_context,
4892                           &common_kernel,
4893                           1);
4894
4895 }
4896
4897 static void
4898 gen9_avc_kernel_init_sfd(VADriverContextP ctx,
4899                          struct generic_encoder_context *generic_context,
4900                          struct gen_avc_sfd_context *kernel_context)
4901 {
4902     struct i965_driver_data *i965 = i965_driver_data(ctx);
4903     struct i965_gpe_table *gpe = &i965->gpe_table;
4904     struct i965_gpe_context *gpe_context = NULL;
4905     struct encoder_kernel_parameter kernel_param ;
4906     struct encoder_scoreboard_parameter scoreboard_param;
4907     struct i965_kernel common_kernel;
4908
4909     kernel_param.curbe_size = sizeof(gen9_avc_sfd_curbe_data);
4910     kernel_param.inline_data_size = 0;
4911     kernel_param.sampler_size = 0;
4912
4913     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4914     scoreboard_param.mask = 0xFF;
4915     scoreboard_param.enable = generic_context->use_hw_scoreboard;
4916     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
4917     scoreboard_param.walkpat_flag = 0;
4918
4919     gpe_context = &kernel_context->gpe_contexts;
4920     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4921     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4922
4923     memset(&common_kernel, 0, sizeof(common_kernel));
4924
4925     intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4926                                          generic_context->enc_kernel_size,
4927                                          INTEL_GENERIC_ENC_SFD,
4928                                          0,
4929                                          &common_kernel);
4930
4931     gpe->load_kernels(ctx,
4932                           gpe_context,
4933                           &common_kernel,
4934                           1);
4935
4936 }
4937
4938 static void
4939 gen9_avc_kernel_destroy(struct encoder_vme_mfc_context * vme_context)
4940 {
4941
4942     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
4943     struct i965_driver_data *i965 = i965_driver_data(avc_ctx->ctx);
4944     struct i965_gpe_table *gpe = &i965->gpe_table;
4945
4946     int i = 0;
4947
4948     gen9_avc_free_resources(vme_context);
4949
4950     for(i = 0; i < NUM_GEN9_AVC_KERNEL_SCALING; i++)
4951         gpe->context_destroy(&avc_ctx->context_scaling.gpe_contexts[i]);
4952
4953     for(i = 0; i < NUM_GEN9_AVC_KERNEL_BRC; i++)
4954         gpe->context_destroy(&avc_ctx->context_brc.gpe_contexts[i]);
4955
4956     for(i = 0; i < NUM_GEN9_AVC_KERNEL_ME; i++)
4957         gpe->context_destroy(&avc_ctx->context_me.gpe_contexts[i]);
4958
4959     for(i = 0; i < NUM_GEN9_AVC_KERNEL_MBENC; i++)
4960         gpe->context_destroy(&avc_ctx->context_mbenc.gpe_contexts[i]);
4961
4962     gpe->context_destroy(&avc_ctx->context_wp.gpe_contexts);
4963
4964     gpe->context_destroy(&avc_ctx->context_sfd.gpe_contexts);
4965
4966 }
4967
4968 /*
4969 vme pipeline
4970 */
4971 static void
4972 gen9_avc_update_parameters(VADriverContextP ctx,
4973                              VAProfile profile,
4974                              struct encode_state *encode_state,
4975                              struct intel_encoder_context *encoder_context)
4976 {
4977     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4978     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
4979     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
4980     VAEncSequenceParameterBufferH264 *seq_param;
4981     VAEncPictureParameterBufferH264 *pic_param ;
4982     VAEncSliceParameterBufferH264 * slice_param;
4983     int i,j;
4984     unsigned int preset = generic_state->preset;
4985
4986     /* seq/pic/slice parameter setting */
4987     generic_state->b16xme_supported = gen9_avc_super_hme[preset];
4988     generic_state->b32xme_supported = gen9_avc_ultra_hme[preset];
4989
4990     avc_state->seq_param =  (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
4991     avc_state->pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
4992
4993
4994     avc_state->enable_avc_ildb = 0;
4995     avc_state->slice_num = 0;
4996     for (j = 0; j < encode_state->num_slice_params_ext && avc_state->enable_avc_ildb == 0; j++) {
4997         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
4998         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
4999
5000         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
5001             assert((slice_param->slice_type == SLICE_TYPE_I) ||
5002                    (slice_param->slice_type == SLICE_TYPE_SI) ||
5003                    (slice_param->slice_type == SLICE_TYPE_P) ||
5004                    (slice_param->slice_type == SLICE_TYPE_SP) ||
5005                    (slice_param->slice_type == SLICE_TYPE_B));
5006
5007             if (slice_param->disable_deblocking_filter_idc != 1) {
5008                 avc_state->enable_avc_ildb = 1;
5009             }
5010
5011             avc_state->slice_param[i] = slice_param;
5012             slice_param++;
5013             avc_state->slice_num++;
5014         }
5015     }
5016
5017     /* how many slices support by now? 1 slice or multi slices, but row slice.not slice group. */
5018     seq_param = avc_state->seq_param;
5019     pic_param = avc_state->pic_param;
5020     slice_param = avc_state->slice_param[0];
5021
5022     generic_state->frame_type = avc_state->slice_param[0]->slice_type;
5023
5024     if (slice_param->slice_type == SLICE_TYPE_I ||
5025         slice_param->slice_type == SLICE_TYPE_SI)
5026         generic_state->frame_type = SLICE_TYPE_I;
5027     else if(slice_param->slice_type == SLICE_TYPE_P)
5028         generic_state->frame_type = SLICE_TYPE_P;
5029     else if(slice_param->slice_type == SLICE_TYPE_B)
5030         generic_state->frame_type = SLICE_TYPE_B;
5031     if (profile == VAProfileH264High)
5032         avc_state->transform_8x8_mode_enable = !!pic_param->pic_fields.bits.transform_8x8_mode_flag;
5033     else
5034         avc_state->transform_8x8_mode_enable = 0;
5035
5036     /* rc init*/
5037     if(generic_state->brc_enabled &&(!generic_state->brc_inited || generic_state->brc_need_reset ))
5038     {
5039         generic_state->target_bit_rate = ALIGN(seq_param->bits_per_second, 1000) / 1000;
5040         generic_state->init_vbv_buffer_fullness_in_bit = seq_param->bits_per_second;
5041         generic_state->vbv_buffer_size_in_bit = (uint64_t)seq_param->bits_per_second << 1;
5042         generic_state->frames_per_100s = 3000; /* 30fps */
5043     }
5044
5045     generic_state->gop_size = seq_param->intra_period;
5046     generic_state->gop_ref_distance = seq_param->ip_period;
5047
5048     if (generic_state->internal_rate_mode == VA_RC_CBR) {
5049         generic_state->max_bit_rate = generic_state->target_bit_rate;
5050         generic_state->min_bit_rate = generic_state->target_bit_rate;
5051     }
5052
5053     if(generic_state->frame_type == SLICE_TYPE_I || generic_state->first_frame)
5054     {
5055         gen9_avc_update_misc_parameters(ctx, encode_state, encoder_context);
5056     }
5057
5058     generic_state->preset = encoder_context->quality_level;
5059     if(encoder_context->quality_level == INTEL_PRESET_UNKNOWN)
5060     {
5061         generic_state->preset = INTEL_PRESET_RT_SPEED;
5062     }
5063     generic_state->kernel_mode = gen9_avc_kernel_mode[generic_state->preset];
5064
5065     if(!generic_state->brc_inited)
5066     {
5067         generic_state->brc_init_reset_input_bits_per_frame = ((double)(generic_state->max_bit_rate * 1000) * 100) / generic_state->frames_per_100s;;
5068         generic_state->brc_init_current_target_buf_full_in_bits = generic_state->init_vbv_buffer_fullness_in_bit;
5069         generic_state->brc_init_reset_buf_size_in_bits = generic_state->vbv_buffer_size_in_bit;
5070         generic_state->brc_target_size = generic_state->init_vbv_buffer_fullness_in_bit;
5071     }
5072
5073
5074     generic_state->curr_pak_pass = 0;
5075     generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
5076
5077     if (generic_state->internal_rate_mode == VA_RC_CBR ||
5078         generic_state->internal_rate_mode == VA_RC_VBR)
5079         generic_state->brc_enabled = 1;
5080     else
5081         generic_state->brc_enabled = 0;
5082
5083     if (generic_state->brc_enabled &&
5084         (!generic_state->init_vbv_buffer_fullness_in_bit ||
5085          !generic_state->vbv_buffer_size_in_bit ||
5086          !generic_state->max_bit_rate ||
5087          !generic_state->target_bit_rate ||
5088          !generic_state->frames_per_100s))
5089     {
5090         WARN_ONCE("Rate control parameter is required for BRC\n");
5091         generic_state->brc_enabled = 0;
5092     }
5093
5094     if (!generic_state->brc_enabled) {
5095         generic_state->target_bit_rate = 0;
5096         generic_state->max_bit_rate = 0;
5097         generic_state->min_bit_rate = 0;
5098         generic_state->init_vbv_buffer_fullness_in_bit = 0;
5099         generic_state->vbv_buffer_size_in_bit = 0;
5100         generic_state->num_pak_passes = 1;
5101     } else {
5102         generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
5103     }
5104
5105
5106     generic_state->frame_width_in_mbs = seq_param->picture_width_in_mbs;
5107     generic_state->frame_height_in_mbs = seq_param->picture_height_in_mbs;
5108     generic_state->frame_width_in_pixel = generic_state->frame_width_in_mbs * 16;
5109     generic_state->frame_height_in_pixel = generic_state->frame_height_in_mbs * 16;
5110
5111     generic_state->frame_width_4x  = ALIGN(generic_state->frame_width_in_pixel/4,16);
5112     generic_state->frame_height_4x = ALIGN(generic_state->frame_height_in_pixel/4,16);
5113     generic_state->downscaled_width_4x_in_mb  = generic_state->frame_width_4x/16 ;
5114     generic_state->downscaled_height_4x_in_mb = generic_state->frame_height_4x/16;
5115
5116     generic_state->frame_width_16x  =  ALIGN(generic_state->frame_width_in_pixel/16,16);
5117     generic_state->frame_height_16x =  ALIGN(generic_state->frame_height_in_pixel/16,16);
5118     generic_state->downscaled_width_16x_in_mb  = generic_state->frame_width_16x/16 ;
5119     generic_state->downscaled_height_16x_in_mb = generic_state->frame_height_16x/16;
5120
5121     generic_state->frame_width_32x  = ALIGN(generic_state->frame_width_in_pixel/32,16);
5122     generic_state->frame_height_32x = ALIGN(generic_state->frame_height_in_pixel/32,16);
5123     generic_state->downscaled_width_32x_in_mb  = generic_state->frame_width_32x/16 ;
5124     generic_state->downscaled_height_32x_in_mb = generic_state->frame_height_32x/16;
5125
5126     if (generic_state->hme_supported) {
5127         generic_state->hme_enabled = 1;
5128     } else {
5129         generic_state->hme_enabled = 0;
5130     }
5131
5132     if (generic_state->b16xme_supported) {
5133         generic_state->b16xme_enabled = 1;
5134     } else {
5135         generic_state->b16xme_enabled = 0;
5136     }
5137
5138     if (generic_state->b32xme_supported) {
5139         generic_state->b32xme_enabled = 1;
5140     } else {
5141         generic_state->b32xme_enabled = 0;
5142     }
5143     /* disable HME/16xME if the size is too small */
5144     if (generic_state->frame_width_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5145         generic_state->b32xme_supported = 0;
5146         generic_state->b32xme_enabled = 0;
5147         generic_state->b16xme_supported = 0;
5148         generic_state->b16xme_enabled = 0;
5149         generic_state->frame_width_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5150         generic_state->downscaled_width_4x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5151     }
5152     if (generic_state->frame_height_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5153         generic_state->b32xme_supported = 0;
5154         generic_state->b32xme_enabled = 0;
5155         generic_state->b16xme_supported = 0;
5156         generic_state->b16xme_enabled = 0;
5157         generic_state->frame_height_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5158         generic_state->downscaled_height_4x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5159     }
5160
5161     if (generic_state->frame_width_16x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT)
5162     {
5163         generic_state->b32xme_supported = 0;
5164         generic_state->b32xme_enabled = 0;
5165         generic_state->frame_width_16x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5166         generic_state->downscaled_width_16x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5167     }
5168     if (generic_state->frame_height_16x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5169         generic_state->b32xme_supported = 0;
5170         generic_state->b32xme_enabled = 0;
5171         generic_state->frame_height_16x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5172         generic_state->downscaled_height_16x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5173     }
5174
5175     if (generic_state->frame_width_32x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT)
5176     {
5177         generic_state->frame_width_32x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5178         generic_state->downscaled_width_32x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5179     }
5180     if (generic_state->frame_height_32x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5181         generic_state->frame_height_32x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5182         generic_state->downscaled_height_32x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5183     }
5184
5185 }
5186
5187 static VAStatus
5188 gen9_avc_encode_check_parameter(VADriverContextP ctx,
5189                                 struct encode_state *encode_state,
5190                                 struct intel_encoder_context *encoder_context)
5191 {
5192     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5193     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
5194     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
5195     unsigned int rate_control_mode = encoder_context->rate_control_mode;
5196     unsigned int preset = generic_state->preset;
5197     VAEncPictureParameterBufferH264 *pic_param ;
5198     int i = 0;
5199
5200     /*resolution change detection*/
5201     pic_param = avc_state->pic_param;
5202
5203     /*avbr init*/
5204     generic_state->avbr_curracy = 30;
5205     generic_state->avbr_convergence = 150;
5206
5207     switch (rate_control_mode & 0x7f) {
5208     case VA_RC_CBR:
5209         generic_state->internal_rate_mode = VA_RC_CBR;
5210         break;
5211
5212     case VA_RC_VBR:
5213         generic_state->internal_rate_mode = VA_RC_VBR;
5214         break;
5215
5216     case VA_RC_CQP:
5217     default:
5218         generic_state->internal_rate_mode = VA_RC_CQP;
5219         break;
5220     }
5221
5222     if (rate_control_mode != VA_RC_NONE &&
5223         rate_control_mode != VA_RC_CQP) {
5224         generic_state->brc_enabled = 1;
5225         generic_state->brc_distortion_buffer_supported = 1;
5226         generic_state->brc_constant_buffer_supported = 1;
5227         generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
5228     }
5229
5230     /*check brc parameter*/
5231     if(generic_state->brc_enabled)
5232     {
5233        avc_state->mb_qp_data_enable = 0;
5234     }
5235
5236     /*set the brc init and reset accordingly*/
5237     if(generic_state->brc_need_reset &&
5238         (generic_state->brc_distortion_buffer_supported == 0 ||
5239         rate_control_mode == VA_RC_CQP))
5240     {
5241        generic_state->brc_need_reset = 0;// not support by CQP
5242     }
5243
5244     if(generic_state->brc_need_reset && !avc_state->sfd_mb_enable)
5245     {
5246         avc_state->sfd_enable = 0;
5247     }
5248
5249     if(generic_state->frames_per_window_size == 0)
5250     {
5251         generic_state->frames_per_window_size = (generic_state->frames_per_100s/100 < 60)?(generic_state->frames_per_100s/100):60;
5252     }else if(generic_state->frames_per_window_size > 2 * generic_state->frames_per_100s/100)
5253     {
5254         generic_state->frames_per_window_size = (generic_state->frames_per_100s/100 < 60)?(generic_state->frames_per_100s/100):60;
5255     }
5256
5257     if(generic_state->brc_enabled)
5258     {
5259         generic_state->hme_enabled = generic_state->frame_type != SLICE_TYPE_I;
5260         if(avc_state->min_max_qp_enable)
5261         {
5262             generic_state->num_pak_passes = 1;
5263         }
5264         generic_state->brc_roi_enable = (rate_control_mode != VA_RC_CQP) && (generic_state->num_roi > 0);// only !CQP
5265         generic_state->mb_brc_enabled = generic_state->mb_brc_enabled || generic_state->brc_roi_enable;
5266     }else
5267     {
5268         generic_state->num_pak_passes = 1;// CQP only one pass
5269     }
5270
5271     avc_state->mbenc_i_frame_dist_in_use = 0;
5272     avc_state->mbenc_i_frame_dist_in_use = (generic_state->brc_enabled) && (generic_state->brc_distortion_buffer_supported) && (generic_state->frame_type == SLICE_TYPE_I);
5273
5274     /*ROI must enable mbbrc.*/
5275
5276     /*CAD check*/
5277     if(avc_state->caf_supported)
5278     {
5279         switch(generic_state->frame_type)
5280         {
5281         case SLICE_TYPE_I:
5282             break;
5283         case SLICE_TYPE_P:
5284             avc_state->caf_enable = gen9_avc_all_fractional[preset] & 0x01;
5285             break;
5286         case SLICE_TYPE_B:
5287             avc_state->caf_enable = (gen9_avc_all_fractional[preset] >> 1) & 0x01;
5288             break;
5289         }
5290
5291         if(avc_state->caf_enable && avc_state->caf_disable_hd && gen9_avc_disable_all_fractional_check_for_high_res[preset])
5292         {
5293             if(generic_state->frame_width_in_pixel >= 1280 && generic_state->frame_height_in_pixel >= 720)
5294                  avc_state->caf_enable = 0;
5295         }
5296     }
5297
5298     avc_state->adaptive_transform_decision_enable &= gen9_avc_enable_adaptive_tx_decision[preset&0x7];
5299
5300     /* Flatness check is enabled only if scaling will be performed and CAF is enabled. here only frame */
5301     if(avc_state->flatness_check_supported )
5302     {
5303         avc_state->flatness_check_enable = ((avc_state->caf_enable) && (generic_state->brc_enabled || generic_state->hme_supported)) ;
5304     }else
5305     {
5306         avc_state->flatness_check_enable = 0;
5307     }
5308
5309     /* check mb_status_supported/enbale*/
5310     if(avc_state->adaptive_transform_decision_enable)
5311     {
5312        avc_state->mb_status_enable = 1;
5313     }else
5314     {
5315        avc_state->mb_status_enable = 0;
5316     }
5317     /*slice check,all the slices use the same slice height except the last slice*/
5318     avc_state->arbitrary_num_mbs_in_slice = 0;
5319     for(i = 0; i < avc_state->slice_num;i++)
5320     {
5321         assert(avc_state->slice_param[i]->num_macroblocks % generic_state->frame_width_in_mbs == 0);
5322         avc_state->slice_height = avc_state->slice_param[i]->num_macroblocks / generic_state->frame_width_in_mbs;
5323         /*add it later for muli slices map*/
5324     }
5325
5326     if(generic_state->frame_type == SLICE_TYPE_I)
5327     {
5328        generic_state->hme_enabled = 0;
5329        generic_state->b16xme_enabled = 0;
5330        generic_state->b32xme_enabled = 0;
5331     }
5332
5333     if(generic_state->frame_type == SLICE_TYPE_B)
5334     {
5335         gen9_avc_get_dist_scale_factor(ctx,encode_state,encoder_context);
5336         avc_state->bi_weight = gen9_avc_get_biweight(avc_state->dist_scale_factor_list0[0],pic_param->pic_fields.bits.weighted_bipred_idc);
5337     }
5338
5339     /* Determine if SkipBiasAdjustment should be enabled for P picture 1. No B frame 2. Qp >= 22 3. CQP mode */
5340     avc_state->skip_bias_adjustment_enable = avc_state->skip_bias_adjustment_supported && (generic_state->frame_type == SLICE_TYPE_P)
5341         && (generic_state->gop_ref_distance == 1) && (avc_state->pic_param->pic_init_qp + avc_state->slice_param[0]->slice_qp_delta >= 22) && !generic_state->brc_enabled;
5342
5343     if(generic_state->kernel_mode == INTEL_ENC_KERNEL_QUALITY)
5344     {
5345         avc_state->tq_enable = 1;
5346         avc_state->tq_rounding = 6;
5347         if(generic_state->brc_enabled)
5348         {
5349             generic_state->mb_brc_enabled = 1;
5350         }
5351     }
5352
5353     return VA_STATUS_SUCCESS;
5354 }
5355
5356 static VAStatus
5357 gen9_avc_vme_gpe_kernel_prepare(VADriverContextP ctx,
5358                                 struct encode_state *encode_state,
5359                                 struct intel_encoder_context *encoder_context)
5360 {
5361     VAStatus va_status;
5362     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5363     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )vme_context->generic_enc_ctx;
5364     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
5365     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
5366     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
5367
5368     struct object_surface *obj_surface;
5369     struct object_buffer *obj_buffer;
5370     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5371     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
5372     struct i965_coded_buffer_segment *coded_buffer_segment;
5373
5374     struct gen9_surface_avc *avc_priv_surface;
5375     dri_bo *bo;
5376     struct avc_surface_param surface_param;
5377     int i,j = 0;
5378     unsigned char * pdata;
5379
5380     /* Setup current reconstruct frame */
5381     obj_surface = encode_state->reconstructed_object;
5382     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5383
5384     if (va_status != VA_STATUS_SUCCESS)
5385         return va_status;
5386
5387     memset(&surface_param,0,sizeof(surface_param));
5388     surface_param.frame_width = generic_state->frame_width_in_pixel;
5389     surface_param.frame_height = generic_state->frame_height_in_pixel;
5390     va_status = gen9_avc_init_check_surfaces(ctx,
5391                                              obj_surface,
5392                                              encoder_context,
5393                                              &surface_param);
5394     if (va_status != VA_STATUS_SUCCESS)
5395         return va_status;
5396     {
5397     /* init the member of avc_priv_surface,frame_store_id,qp_value*/
5398        avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
5399        avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS-2] = 0;
5400        avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS-1] = 0;
5401        i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS-2]);
5402        i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS-1]);
5403        i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS-2],avc_priv_surface->dmv_top);
5404        i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS-1],avc_priv_surface->dmv_bottom);
5405        avc_priv_surface->qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
5406        avc_priv_surface->frame_store_id = 0;
5407        avc_priv_surface->frame_idx = pic_param->CurrPic.frame_idx;
5408        avc_priv_surface->top_field_order_cnt = pic_param->CurrPic.TopFieldOrderCnt;
5409        avc_priv_surface->is_as_ref = pic_param->pic_fields.bits.reference_pic_flag;
5410        avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS-2] = avc_priv_surface->top_field_order_cnt;
5411        avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS-1] = avc_priv_surface->top_field_order_cnt + 1;
5412     }
5413     i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
5414     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_reconstructed_surface, obj_surface,GPE_RESOURCE_ALIGNMENT);
5415
5416     /* input YUV surface*/
5417     obj_surface = encode_state->input_yuv_object;
5418     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5419
5420     if (va_status != VA_STATUS_SUCCESS)
5421         return va_status;
5422     i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
5423     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_uncompressed_input_surface, obj_surface,GPE_RESOURCE_ALIGNMENT);
5424
5425     /* Reference surfaces */
5426     for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
5427         i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
5428         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i*2]);
5429         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i*2 + 1]);
5430         obj_surface = encode_state->reference_objects[i];
5431         avc_state->top_field_poc[2*i] = 0;
5432         avc_state->top_field_poc[2*i+1] = 0;
5433
5434         if (obj_surface && obj_surface->bo) {
5435             i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->list_reference_res[i], obj_surface,GPE_RESOURCE_ALIGNMENT);
5436
5437             /* actually it should be handled when it is reconstructed surface*/
5438             va_status = gen9_avc_init_check_surfaces(ctx,
5439                 obj_surface,encoder_context,
5440                 &surface_param);
5441             if (va_status != VA_STATUS_SUCCESS)
5442                 return va_status;
5443             avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
5444             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i*2],avc_priv_surface->dmv_top);
5445             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i*2 + 1],avc_priv_surface->dmv_bottom);
5446             avc_state->top_field_poc[2*i] = avc_priv_surface->top_field_order_cnt;
5447             avc_state->top_field_poc[2*i+1] = avc_priv_surface->top_field_order_cnt + 1;
5448             avc_priv_surface->frame_store_id = i;
5449         }else
5450         {
5451             break;
5452         }
5453     }
5454
5455     /* Encoded bitstream ?*/
5456     obj_buffer = encode_state->coded_buf_object;
5457     bo = obj_buffer->buffer_store->bo;
5458     i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
5459     i965_dri_object_to_buffer_gpe_resource(&generic_ctx->compressed_bitstream.res, bo);
5460     generic_ctx->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
5461     generic_ctx->compressed_bitstream.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
5462
5463     /*status buffer */
5464     avc_ctx->status_buffer.bo = bo;
5465
5466     /* set the internal flag to 0 to indicate the coded size is unknown */
5467     dri_bo_map(bo, 1);
5468     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
5469     coded_buffer_segment->mapped = 0;
5470     coded_buffer_segment->codec = encoder_context->codec;
5471     coded_buffer_segment->status_support = 1;
5472
5473     pdata = bo->virtual + avc_ctx->status_buffer.base_offset;
5474     memset(pdata,0,avc_ctx->status_buffer.status_buffer_size);
5475     dri_bo_unmap(bo);
5476
5477     //frame id, it is the ref pic id in the reference_objects list.
5478     avc_state->num_refs[0] = 0;
5479     avc_state->num_refs[1] = 0;
5480     if (generic_state->frame_type == SLICE_TYPE_P) {
5481         avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
5482
5483         if (slice_param->num_ref_idx_active_override_flag)
5484             avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
5485     } else if (generic_state->frame_type == SLICE_TYPE_B) {
5486         avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
5487         avc_state->num_refs[1] = pic_param->num_ref_idx_l1_active_minus1 + 1;
5488
5489         if (slice_param->num_ref_idx_active_override_flag) {
5490             avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
5491             avc_state->num_refs[1] = slice_param->num_ref_idx_l1_active_minus1 + 1;
5492         }
5493     }
5494
5495     if (avc_state->num_refs[0] > ARRAY_ELEMS(avc_state->list_ref_idx[0]))
5496         return VA_STATUS_ERROR_INVALID_VALUE;
5497     if (avc_state->num_refs[1] > ARRAY_ELEMS(avc_state->list_ref_idx[1]))
5498         return VA_STATUS_ERROR_INVALID_VALUE;
5499
5500     for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[0]); i++) {
5501         VAPictureH264 *va_pic;
5502
5503         assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(avc_state->list_ref_idx[0]));
5504         avc_state->list_ref_idx[0][i] = 0;
5505
5506         if (i >= avc_state->num_refs[0])
5507             continue;
5508
5509         va_pic = &slice_param->RefPicList0[i];
5510
5511         for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
5512             obj_surface = encode_state->reference_objects[j];
5513
5514             if (obj_surface &&
5515                 obj_surface->bo &&
5516                 obj_surface->base.id == va_pic->picture_id) {
5517
5518                 assert(obj_surface->base.id != VA_INVALID_SURFACE);
5519                 avc_state->list_ref_idx[0][i] = j;
5520
5521                 break;
5522             }
5523         }
5524     }
5525     for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[1]); i++) {
5526         VAPictureH264 *va_pic;
5527
5528         assert(ARRAY_ELEMS(slice_param->RefPicList1) == ARRAY_ELEMS(avc_state->list_ref_idx[1]));
5529         avc_state->list_ref_idx[1][i] = 0;
5530
5531         if (i >= avc_state->num_refs[1])
5532             continue;
5533
5534         va_pic = &slice_param->RefPicList1[i];
5535
5536         for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
5537             obj_surface = encode_state->reference_objects[j];
5538
5539             if (obj_surface &&
5540                 obj_surface->bo &&
5541                 obj_surface->base.id == va_pic->picture_id) {
5542
5543                 assert(obj_surface->base.id != VA_INVALID_SURFACE);
5544                 avc_state->list_ref_idx[1][i] = j;
5545
5546                 break;
5547             }
5548         }
5549     }
5550
5551     return VA_STATUS_SUCCESS;
5552 }
5553
5554 static VAStatus
5555 gen9_avc_vme_gpe_kernel_init(VADriverContextP ctx,
5556                              struct encode_state *encode_state,
5557                              struct intel_encoder_context *encoder_context)
5558 {
5559     return VA_STATUS_SUCCESS;
5560 }
5561
5562 static VAStatus
5563 gen9_avc_vme_gpe_kernel_final(VADriverContextP ctx,
5564                               struct encode_state *encode_state,
5565                               struct intel_encoder_context *encoder_context)
5566 {
5567
5568     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5569     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
5570     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
5571
5572     /*set this flag when all kernel is finished*/
5573     if(generic_state->brc_enabled)
5574     {
5575         generic_state->brc_inited = 1;
5576         generic_state->brc_need_reset = 0;
5577         avc_state->mbenc_curbe_set_in_brc_update = 0;
5578     }
5579     return VA_STATUS_SUCCESS;
5580 }
5581
5582 static VAStatus
5583 gen9_avc_vme_gpe_kernel_run(VADriverContextP ctx,
5584                             struct encode_state *encode_state,
5585                             struct intel_encoder_context *encoder_context)
5586 {
5587     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5588     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
5589     struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
5590
5591     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
5592     VAEncSliceParameterBufferH264 *slice_param = avc_state->slice_param[0];
5593     int sfd_in_use = 0;
5594
5595     /* BRC init/reset needs to be called before HME since it will reset the Brc Distortion surface*/
5596     if(generic_state->brc_enabled &&(!generic_state->brc_inited || generic_state->brc_need_reset ))
5597     {
5598         gen9_avc_kernel_brc_init_reset(ctx,encode_state,encoder_context);
5599     }
5600
5601     /*down scaling*/
5602     if(generic_state->hme_supported)
5603     {
5604         gen9_avc_kernel_scaling(ctx,encode_state,encoder_context,INTEL_ENC_HME_4x);
5605         if(generic_state->b16xme_supported)
5606         {
5607             gen9_avc_kernel_scaling(ctx,encode_state,encoder_context,INTEL_ENC_HME_16x);
5608             if(generic_state->b32xme_supported)
5609             {
5610                 gen9_avc_kernel_scaling(ctx,encode_state,encoder_context,INTEL_ENC_HME_32x);
5611             }
5612         }
5613     }
5614
5615     /*me kernel*/
5616     if(generic_state->hme_enabled)
5617     {
5618         if(generic_state->b16xme_enabled)
5619         {
5620             if(generic_state->b32xme_enabled)
5621             {
5622                 gen9_avc_kernel_me(ctx,encode_state,encoder_context,INTEL_ENC_HME_32x);
5623             }
5624             gen9_avc_kernel_me(ctx,encode_state,encoder_context,INTEL_ENC_HME_16x);
5625         }
5626         gen9_avc_kernel_me(ctx,encode_state,encoder_context,INTEL_ENC_HME_4x);
5627     }
5628
5629     /*call SFD kernel after HME in same command buffer*/
5630     sfd_in_use = avc_state->sfd_enable && generic_state->hme_enabled;
5631     sfd_in_use = sfd_in_use && !avc_state->sfd_mb_enable;
5632     if(sfd_in_use)
5633     {
5634         gen9_avc_kernel_sfd(ctx,encode_state,encoder_context);
5635     }
5636
5637     /* BRC and MbEnc are included in the same task phase*/
5638     if(generic_state->brc_enabled)
5639     {
5640         if(avc_state->mbenc_i_frame_dist_in_use)
5641         {
5642             gen9_avc_kernel_mbenc(ctx,encode_state,encoder_context,true);
5643         }
5644         gen9_avc_kernel_brc_frame_update(ctx,encode_state,encoder_context);
5645
5646         if(generic_state->mb_brc_enabled)
5647         {
5648             gen9_avc_kernel_brc_mb_update(ctx,encode_state,encoder_context);
5649         }
5650     }
5651
5652     /*weight prediction,disable by now */
5653     avc_state->weighted_ref_l0_enable = 0;
5654     avc_state->weighted_ref_l1_enable = 0;
5655     if(avc_state->weighted_prediction_supported &&
5656         ((generic_state->frame_type == SLICE_TYPE_P && pic_param->pic_fields.bits.weighted_pred_flag) ||
5657         (generic_state->frame_type == SLICE_TYPE_B && pic_param->pic_fields.bits.weighted_bipred_idc == INTEL_AVC_WP_MODE_EXPLICIT)))
5658     {
5659         if(slice_param->luma_weight_l0_flag & 1)
5660         {
5661             gen9_avc_kernel_wp(ctx,encode_state,encoder_context,0);
5662
5663         }else if(!(slice_param->chroma_weight_l0_flag & 1))
5664         {
5665             pic_param->pic_fields.bits.weighted_pred_flag = 0;// it should be handled in app
5666         }
5667
5668         if(generic_state->frame_type == SLICE_TYPE_B && pic_param->pic_fields.bits.weighted_bipred_idc == INTEL_AVC_WP_MODE_EXPLICIT)
5669         {
5670             if(slice_param->luma_weight_l1_flag & 1)
5671             {
5672                 gen9_avc_kernel_wp(ctx,encode_state,encoder_context,1);
5673             }else if(!((slice_param->luma_weight_l0_flag & 1)||
5674                        (slice_param->chroma_weight_l0_flag & 1)||
5675                        (slice_param->chroma_weight_l1_flag & 1)))
5676             {
5677                 pic_param->pic_fields.bits.weighted_bipred_idc = INTEL_AVC_WP_MODE_DEFAULT;// it should be handled in app
5678             }
5679         }
5680     }
5681
5682     /*mbenc kernel*/
5683     gen9_avc_kernel_mbenc(ctx,encode_state,encoder_context,false);
5684
5685     /*ignore the reset vertical line kernel*/
5686
5687     return VA_STATUS_SUCCESS;
5688 }
5689
5690 static VAStatus
5691 gen9_avc_vme_pipeline(VADriverContextP ctx,
5692                       VAProfile profile,
5693                       struct encode_state *encode_state,
5694                       struct intel_encoder_context *encoder_context)
5695 {
5696     VAStatus va_status;
5697
5698     gen9_avc_update_parameters(ctx, profile, encode_state, encoder_context);
5699
5700     va_status = gen9_avc_encode_check_parameter(ctx, encode_state, encoder_context);
5701     if (va_status != VA_STATUS_SUCCESS)
5702         return va_status;
5703
5704     va_status = gen9_avc_allocate_resources(ctx, encode_state, encoder_context);
5705     if (va_status != VA_STATUS_SUCCESS)
5706         return va_status;
5707
5708     va_status = gen9_avc_vme_gpe_kernel_prepare(ctx, encode_state, encoder_context);
5709     if (va_status != VA_STATUS_SUCCESS)
5710         return va_status;
5711
5712     va_status = gen9_avc_vme_gpe_kernel_init(ctx, encode_state, encoder_context);
5713     if (va_status != VA_STATUS_SUCCESS)
5714         return va_status;
5715
5716     va_status = gen9_avc_vme_gpe_kernel_run(ctx, encode_state, encoder_context);
5717     if (va_status != VA_STATUS_SUCCESS)
5718         return va_status;
5719
5720     gen9_avc_vme_gpe_kernel_final(ctx, encode_state, encoder_context);
5721
5722     return VA_STATUS_SUCCESS;
5723 }
5724
5725 static void
5726 gen9_avc_vme_context_destroy(void * context)
5727 {
5728     struct encoder_vme_mfc_context *vme_context = (struct encoder_vme_mfc_context *)context;
5729     struct generic_encoder_context *generic_ctx;
5730     struct i965_avc_encoder_context *avc_ctx;
5731     struct generic_enc_codec_state *generic_state;
5732     struct avc_enc_state *avc_state;
5733
5734     if (!vme_context)
5735         return;
5736
5737     generic_ctx = (struct generic_encoder_context * )vme_context->generic_enc_ctx;
5738     avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
5739     generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
5740     avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
5741
5742     gen9_avc_kernel_destroy(vme_context);
5743
5744     free(generic_ctx);
5745     free(avc_ctx);
5746     free(generic_state);
5747     free(avc_state);
5748     free(vme_context);
5749     return;
5750
5751 }
5752
5753 static void
5754 gen9_avc_kernel_init(VADriverContextP ctx,
5755                      struct intel_encoder_context *encoder_context)
5756 {
5757     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5758     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
5759     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )vme_context->generic_enc_ctx;
5760
5761     gen9_avc_kernel_init_scaling(ctx,generic_ctx,&avc_ctx->context_scaling);
5762     gen9_avc_kernel_init_brc(ctx,generic_ctx,&avc_ctx->context_brc);
5763     gen9_avc_kernel_init_me(ctx,generic_ctx,&avc_ctx->context_me);
5764     gen9_avc_kernel_init_mbenc(ctx,generic_ctx,&avc_ctx->context_mbenc);
5765     gen9_avc_kernel_init_wp(ctx,generic_ctx,&avc_ctx->context_wp);
5766     gen9_avc_kernel_init_sfd(ctx,generic_ctx,&avc_ctx->context_sfd);
5767
5768     //function pointer
5769     generic_ctx->pfn_set_curbe_scaling2x = gen9_avc_set_curbe_scaling2x;
5770     generic_ctx->pfn_set_curbe_scaling4x = gen9_avc_set_curbe_scaling4x;
5771     generic_ctx->pfn_set_curbe_me = gen9_avc_set_curbe_me;
5772     generic_ctx->pfn_set_curbe_mbenc = gen9_avc_set_curbe_mbenc;
5773     generic_ctx->pfn_set_curbe_brc_init_reset = gen9_avc_set_curbe_brc_init_reset;
5774     generic_ctx->pfn_set_curbe_brc_frame_update = gen9_avc_set_curbe_brc_frame_update;
5775     generic_ctx->pfn_set_curbe_brc_mb_update = gen9_avc_set_curbe_brc_mb_update;
5776     generic_ctx->pfn_set_curbe_sfd = gen9_avc_set_curbe_sfd;
5777     generic_ctx->pfn_set_curbe_wp = gen9_avc_set_curbe_wp;
5778
5779     generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
5780     generic_ctx->pfn_send_me_surface = gen9_avc_send_surface_me;
5781     generic_ctx->pfn_send_mbenc_surface = gen9_avc_send_surface_mbenc;
5782     generic_ctx->pfn_send_brc_init_reset_surface = gen9_avc_send_surface_brc_init_reset;
5783     generic_ctx->pfn_send_brc_frame_update_surface = gen9_avc_send_surface_brc_frame_update;
5784     generic_ctx->pfn_send_brc_mb_update_surface = gen9_avc_send_surface_brc_mb_update;
5785     generic_ctx->pfn_send_sfd_surface = gen9_avc_send_surface_sfd;
5786     generic_ctx->pfn_send_wp_surface = gen9_avc_send_surface_wp;
5787
5788
5789
5790 /*
5791 PAK pipeline related function
5792 */
5793 extern int
5794 intel_avc_enc_slice_type_fixup(int slice_type);
5795
5796 static void
5797 gen9_mfc_avc_pipe_mode_select(VADriverContextP ctx,
5798                               struct encode_state *encode_state,
5799                               struct intel_encoder_context *encoder_context)
5800 {
5801     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5802     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )pak_context->private_enc_ctx;
5803     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state;
5804     struct intel_batchbuffer *batch = encoder_context->base.batch;
5805
5806     BEGIN_BCS_BATCH(batch, 5);
5807
5808     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
5809     OUT_BCS_BATCH(batch,
5810                   (0 << 29) |
5811                   (MFX_LONG_MODE << 17) |       /* Must be long format for encoder */
5812                   (MFD_MODE_VLD << 15) |
5813                   (0 << 13) |                   /* Non-VDEnc mode  is 0*/
5814                   ((generic_state->curr_pak_pass != (generic_state->num_pak_passes -1)) << 10) |                   /* Stream-Out Enable */
5815                   ((!!avc_ctx->res_post_deblocking_output.bo) << 9)  |    /* Post Deblocking Output */
5816                   ((!!avc_ctx->res_pre_deblocking_output.bo) << 8)  |     /* Pre Deblocking Output */
5817                   (0 << 7)  |                   /* Scaled surface enable */
5818                   (0 << 6)  |                   /* Frame statistics stream out enable */
5819                   (0 << 5)  |                   /* not in stitch mode */
5820                   (1 << 4)  |                   /* encoding mode */
5821                   (MFX_FORMAT_AVC << 0));
5822     OUT_BCS_BATCH(batch,
5823                   (0 << 7)  | /* expand NOA bus flag */
5824                   (0 << 6)  | /* disable slice-level clock gating */
5825                   (0 << 5)  | /* disable clock gating for NOA */
5826                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
5827                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
5828                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
5829                   (0 << 1)  |
5830                   (0 << 0));
5831     OUT_BCS_BATCH(batch, 0);
5832     OUT_BCS_BATCH(batch, 0);
5833
5834     ADVANCE_BCS_BATCH(batch);
5835 }
5836
5837 static void
5838 gen9_mfc_avc_surface_state(VADriverContextP ctx,
5839                            struct intel_encoder_context *encoder_context,
5840                            struct i965_gpe_resource *gpe_resource,
5841                            int id)
5842 {
5843     struct intel_batchbuffer *batch = encoder_context->base.batch;
5844
5845     BEGIN_BCS_BATCH(batch, 6);
5846
5847     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
5848     OUT_BCS_BATCH(batch, id);
5849     OUT_BCS_BATCH(batch,
5850                   ((gpe_resource->height - 1) << 18) |
5851                   ((gpe_resource->width - 1) << 4));
5852     OUT_BCS_BATCH(batch,
5853                   (MFX_SURFACE_PLANAR_420_8 << 28) |    /* 420 planar YUV surface */
5854                   (1 << 27) |                           /* must be 1 for interleave U/V, hardware requirement */
5855                   ((gpe_resource->pitch - 1) << 3) |    /* pitch */
5856                   (0 << 2)  |                           /* must be 0 for interleave U/V */
5857                   (1 << 1)  |                           /* must be tiled */
5858                   (I965_TILEWALK_YMAJOR << 0));         /* tile walk, TILEWALK_YMAJOR */
5859     OUT_BCS_BATCH(batch,
5860                   (0 << 16) |                           /* must be 0 for interleave U/V */
5861                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
5862     OUT_BCS_BATCH(batch,
5863                   (0 << 16) |                           /* must be 0 for interleave U/V */
5864                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
5865
5866     ADVANCE_BCS_BATCH(batch);
5867 }
5868
5869 static void
5870 gen9_mfc_avc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5871 {
5872     struct i965_driver_data *i965 = i965_driver_data(ctx);
5873     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5874     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )pak_context->generic_enc_ctx;
5875     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )pak_context->private_enc_ctx;
5876     struct intel_batchbuffer *batch = encoder_context->base.batch;
5877     int i;
5878
5879     BEGIN_BCS_BATCH(batch, 65);
5880
5881     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (65 - 2));
5882
5883     /* the DW1-3 is for pre_deblocking */
5884     OUT_BUFFER_3DW(batch, avc_ctx->res_pre_deblocking_output.bo, 1, 0, i965->intel.mocs_state);
5885
5886     /* the DW4-6 is for the post_deblocking */
5887     OUT_BUFFER_3DW(batch, avc_ctx->res_post_deblocking_output.bo, 1, 0, i965->intel.mocs_state);
5888
5889     /* the DW7-9 is for the uncompressed_picture */
5890     OUT_BUFFER_3DW(batch, generic_ctx->res_uncompressed_input_surface.bo, 0, 0, i965->intel.mocs_state);
5891
5892     /* the DW10-12 is for PAK information (write) */
5893     OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, i965->intel.mocs_state);//?
5894
5895     /* the DW13-15 is for the intra_row_store_scratch */
5896     OUT_BUFFER_3DW(batch, avc_ctx->res_intra_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
5897
5898     /* the DW16-18 is for the deblocking filter */
5899     OUT_BUFFER_3DW(batch, avc_ctx->res_deblocking_filter_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
5900
5901     /* the DW 19-50 is for Reference pictures*/
5902     for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
5903         OUT_BUFFER_2DW(batch, avc_ctx->list_reference_res[i].bo, 0, 0);
5904     }
5905
5906     /* DW 51, reference picture attributes */
5907     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5908
5909     /* The DW 52-54 is for PAK information (read) */
5910     OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, i965->intel.mocs_state);
5911
5912     /* the DW 55-57 is the ILDB buffer */
5913     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
5914
5915     /* the DW 58-60 is the second ILDB buffer */
5916     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
5917
5918     /* DW 61, memory compress enable & mode */
5919     OUT_BCS_BATCH(batch, 0);
5920
5921     /* the DW 62-64 is the buffer */
5922     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
5923
5924     ADVANCE_BCS_BATCH(batch);
5925 }
5926
5927 static void
5928 gen9_mfc_avc_ind_obj_base_addr_state(VADriverContextP ctx,
5929                                      struct encode_state *encode_state,
5930                                      struct intel_encoder_context *encoder_context)
5931 {
5932     struct i965_driver_data *i965 = i965_driver_data(ctx);
5933     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5934     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )pak_context->generic_enc_ctx;
5935     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state;
5936     struct intel_batchbuffer *batch = encoder_context->base.batch;
5937     struct object_surface *obj_surface;
5938     struct gen9_surface_avc *avc_priv_surface;
5939     unsigned int size = 0;
5940     unsigned int w_mb = generic_state->frame_width_in_mbs;
5941     unsigned int h_mb = generic_state->frame_height_in_mbs;
5942
5943     obj_surface = encode_state->reconstructed_object;
5944
5945     if (!obj_surface || !obj_surface->private_data)
5946         return;
5947     avc_priv_surface = obj_surface->private_data;
5948
5949     BEGIN_BCS_BATCH(batch, 26);
5950
5951     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
5952     /* The DW1-5 is for the MFX indirect bistream offset */
5953     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
5954     OUT_BUFFER_2DW(batch, NULL, 0, 0);
5955
5956     /* the DW6-10 is for MFX Indirect MV Object Base Address */
5957     size = w_mb * h_mb * 32 * 4;
5958     OUT_BUFFER_3DW(batch,
5959                    avc_priv_surface->res_mv_data_surface.bo,
5960                    1,
5961                    0,
5962                    i965->intel.mocs_state);
5963     OUT_BUFFER_2DW(batch,
5964                    avc_priv_surface->res_mv_data_surface.bo,
5965                    1,
5966                    ALIGN(size,0x1000));
5967
5968     /* The DW11-15 is for MFX IT-COFF. Not used on encoder */
5969     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
5970     OUT_BUFFER_2DW(batch, NULL, 0, 0);
5971
5972     /* The DW16-20 is for MFX indirect DBLK. Not used on encoder */
5973     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
5974     OUT_BUFFER_2DW(batch, NULL, 0, 0);
5975
5976     /* The DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder
5977      * Note: an offset is specified in MFX_AVC_SLICE_STATE
5978      */
5979     OUT_BUFFER_3DW(batch,
5980                    generic_ctx->compressed_bitstream.res.bo,
5981                    1,
5982                    0,
5983                    i965->intel.mocs_state);
5984     OUT_BUFFER_2DW(batch,
5985                    generic_ctx->compressed_bitstream.res.bo,
5986                    1,
5987                    generic_ctx->compressed_bitstream.end_offset);
5988
5989     ADVANCE_BCS_BATCH(batch);
5990 }
5991
5992 static void
5993 gen9_mfc_avc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5994 {
5995     struct i965_driver_data *i965 = i965_driver_data(ctx);
5996     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5997     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )pak_context->private_enc_ctx;
5998     struct intel_batchbuffer *batch = encoder_context->base.batch;
5999
6000     BEGIN_BCS_BATCH(batch, 10);
6001
6002     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
6003
6004     /* The DW1-3 is for bsd/mpc row store scratch buffer */
6005     OUT_BUFFER_3DW(batch, avc_ctx->res_bsd_mpc_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
6006
6007     /* The DW4-6 is for MPR Row Store Scratch Buffer Base Address, ignore for encoder */
6008     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6009
6010     /* The DW7-9 is for Bitplane Read Buffer Base Address, ignore for encoder */
6011     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6012
6013     ADVANCE_BCS_BATCH(batch);
6014 }
6015
6016 static void
6017 gen9_mfc_avc_directmode_state(VADriverContextP ctx,
6018                               struct intel_encoder_context *encoder_context)
6019 {
6020     struct i965_driver_data *i965 = i965_driver_data(ctx);
6021     struct intel_batchbuffer *batch = encoder_context->base.batch;
6022     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6023     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )pak_context->private_enc_ctx;
6024     struct avc_enc_state * avc_state = (struct avc_enc_state * )pak_context->private_enc_state;
6025
6026     int i;
6027
6028     BEGIN_BCS_BATCH(batch, 71);
6029
6030     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
6031
6032     /* Reference frames and Current frames */
6033     /* the DW1-32 is for the direct MV for reference */
6034     for(i = 0; i < NUM_MFC_AVC_DMV_BUFFERS - 2; i += 2) {
6035         if ( avc_ctx->res_direct_mv_buffersr[i].bo != NULL) {
6036             OUT_BCS_RELOC64(batch, avc_ctx->res_direct_mv_buffersr[i].bo,
6037                           I915_GEM_DOMAIN_INSTRUCTION, 0,
6038                           0);
6039         } else {
6040             OUT_BCS_BATCH(batch, 0);
6041             OUT_BCS_BATCH(batch, 0);
6042         }
6043     }
6044
6045     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
6046
6047     /* the DW34-36 is the MV for the current frame */
6048     OUT_BCS_RELOC64(batch, avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2].bo,
6049                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
6050                   0);
6051
6052     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
6053
6054     /* POL list */
6055     for(i = 0; i < 32; i++) {
6056         OUT_BCS_BATCH(batch, avc_state->top_field_poc[i]);
6057     }
6058     OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2]);
6059     OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1]);
6060
6061     ADVANCE_BCS_BATCH(batch);
6062 }
6063
6064 static void
6065 gen9_mfc_qm_state(VADriverContextP ctx,
6066                   int qm_type,
6067                   const unsigned int *qm,
6068                   int qm_length,
6069                   struct intel_encoder_context *encoder_context)
6070 {
6071     struct intel_batchbuffer *batch = encoder_context->base.batch;
6072     unsigned int qm_buffer[16];
6073
6074     assert(qm_length <= 16);
6075     assert(sizeof(*qm) == 4);
6076     memset(qm_buffer,0,16*4);
6077     memcpy(qm_buffer, qm, qm_length * 4);
6078
6079     BEGIN_BCS_BATCH(batch, 18);
6080     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
6081     OUT_BCS_BATCH(batch, qm_type << 0);
6082     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
6083     ADVANCE_BCS_BATCH(batch);
6084 }
6085
6086 static void
6087 gen9_mfc_avc_qm_state(VADriverContextP ctx,
6088                       struct encode_state *encode_state,
6089                       struct intel_encoder_context *encoder_context)
6090 {
6091     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6092     struct avc_enc_state * avc_state = (struct avc_enc_state * )pak_context->private_enc_state;
6093     VAEncSequenceParameterBufferH264  *seq_param = avc_state->seq_param;
6094     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
6095
6096
6097     const unsigned int *qm_4x4_intra;
6098     const unsigned int *qm_4x4_inter;
6099     const unsigned int *qm_8x8_intra;
6100     const unsigned int *qm_8x8_inter;
6101
6102     if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
6103         && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
6104         qm_4x4_intra = qm_4x4_inter = qm_8x8_intra = qm_8x8_inter = qm_flat;
6105     } else {
6106         VAIQMatrixBufferH264 *qm;
6107         assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
6108         qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
6109         qm_4x4_intra = (unsigned int *)qm->ScalingList4x4[0];
6110         qm_4x4_inter = (unsigned int *)qm->ScalingList4x4[3];
6111         qm_8x8_intra = (unsigned int *)qm->ScalingList8x8[0];
6112         qm_8x8_inter = (unsigned int *)qm->ScalingList8x8[1];
6113     }
6114
6115     gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm_4x4_intra, 12, encoder_context);
6116     gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm_4x4_inter, 12, encoder_context);
6117     gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm_8x8_intra, 16, encoder_context);
6118     gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm_8x8_inter, 16, encoder_context);
6119 }
6120
6121 static void
6122 gen9_mfc_fqm_state(VADriverContextP ctx,
6123                    int fqm_type,
6124                    const unsigned int *fqm,
6125                    int fqm_length,
6126                    struct intel_encoder_context *encoder_context)
6127 {
6128     struct intel_batchbuffer *batch = encoder_context->base.batch;
6129     unsigned int fqm_buffer[32];
6130
6131     assert(fqm_length <= 32);
6132     assert(sizeof(*fqm) == 4);
6133     memset(fqm_buffer,0,32*4);
6134     memcpy(fqm_buffer, fqm, fqm_length * 4);
6135
6136     BEGIN_BCS_BATCH(batch, 34);
6137     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
6138     OUT_BCS_BATCH(batch, fqm_type << 0);
6139     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
6140     ADVANCE_BCS_BATCH(batch);
6141 }
6142
6143 static void
6144 gen9_mfc_fill_fqm(uint8_t *qm, uint16_t *fqm, int len)
6145 {
6146     int i, j;
6147     for (i = 0; i < len; i++)
6148        for (j = 0; j < len; j++)
6149        {
6150            assert(qm[j * len + i]);
6151            fqm[i * len + j] = (1 << 16) / qm[j * len + i];
6152        }
6153 }
6154
6155 static void
6156 gen9_mfc_avc_fqm_state(VADriverContextP ctx,
6157                       struct encode_state *encode_state,
6158                       struct intel_encoder_context *encoder_context)
6159 {
6160     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6161     struct avc_enc_state * avc_state = (struct avc_enc_state * )pak_context->private_enc_state;
6162     VAEncSequenceParameterBufferH264  *seq_param = avc_state->seq_param;
6163     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
6164
6165     if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
6166         && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
6167         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm_flat, 24, encoder_context);
6168         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm_flat, 24, encoder_context);
6169         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm_flat, 32, encoder_context);
6170         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm_flat, 32, encoder_context);
6171     } else {
6172         int i;
6173         uint32_t fqm[32];
6174         VAIQMatrixBufferH264 *qm;
6175         assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
6176         qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
6177
6178         for (i = 0; i < 3; i++)
6179             gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * i, 4);
6180         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm, 24, encoder_context);
6181
6182         for (i = 3; i < 6; i++)
6183             gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * (i - 3), 4);
6184         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm, 24, encoder_context);
6185
6186         gen9_mfc_fill_fqm(qm->ScalingList8x8[0], (uint16_t *)fqm, 8);
6187         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm, 32, encoder_context);
6188
6189         gen9_mfc_fill_fqm(qm->ScalingList8x8[1], (uint16_t *)fqm, 8);
6190         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm, 32, encoder_context);
6191     }
6192 }
6193
6194 static void
6195 gen9_mfc_avc_insert_object(VADriverContextP ctx,
6196                            struct intel_encoder_context *encoder_context,
6197                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
6198                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
6199                            int slice_header_indicator,
6200                            struct intel_batchbuffer *batch)
6201 {
6202     if (data_bits_in_last_dw == 0)
6203         data_bits_in_last_dw = 32;
6204
6205     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
6206
6207     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws));
6208     OUT_BCS_BATCH(batch,
6209                   (0 << 16) |   /* always start at offset 0 */
6210                   (slice_header_indicator << 14) |
6211                   (data_bits_in_last_dw << 8) |
6212                   (skip_emul_byte_count << 4) |
6213                   (!!emulation_flag << 3) |
6214                   ((!!is_last_header) << 2) |
6215                   ((!!is_end_of_slice) << 1) |
6216                   (0 << 0));    /* check this flag */
6217     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
6218
6219     ADVANCE_BCS_BATCH(batch);
6220 }
6221
6222 static void
6223 gen9_mfc_avc_insert_slice_packed_data(VADriverContextP ctx,
6224                                       struct encode_state *encode_state,
6225                                       struct intel_encoder_context *encoder_context,
6226                                       int slice_index,
6227                                       struct intel_batchbuffer *batch)
6228 {
6229     VAEncPackedHeaderParameterBuffer *param = NULL;
6230     unsigned int length_in_bits;
6231     unsigned int *header_data = NULL;
6232     int count, i, start_index;
6233     int slice_header_index;
6234
6235     if (encode_state->slice_header_index[slice_index] == 0)
6236         slice_header_index = -1;
6237     else
6238         slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
6239
6240     count = encode_state->slice_rawdata_count[slice_index];
6241     start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
6242
6243     for (i = 0; i < count; i++) {
6244         unsigned int skip_emul_byte_cnt;
6245
6246         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
6247
6248         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
6249
6250         /* skip the slice header packed data type as it is lastly inserted */
6251         if (param->type == VAEncPackedHeaderSlice)
6252             continue;
6253
6254         length_in_bits = param->bit_length;
6255
6256         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6257
6258         /* as the slice header is still required, the last header flag is set to
6259          * zero.
6260          */
6261         gen9_mfc_avc_insert_object(ctx,
6262                                    encoder_context,
6263                                    header_data,
6264                                    ALIGN(length_in_bits, 32) >> 5,
6265                                    length_in_bits & 0x1f,
6266                                    skip_emul_byte_cnt,
6267                                    0,
6268                                    0,
6269                                    !param->has_emulation_bytes,
6270                                    0,
6271                                    batch);
6272     }
6273
6274     if (slice_header_index == -1) {
6275         VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
6276         VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
6277         VAEncSliceParameterBufferH264 *slice_params = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
6278         unsigned char *slice_header = NULL;
6279         int slice_header_length_in_bits = 0;
6280
6281         /* No slice header data is passed. And the driver needs to generate it */
6282         /* For the Normal H264 */
6283         slice_header_length_in_bits = build_avc_slice_header(seq_param,
6284                                                              pic_param,
6285                                                              slice_params,
6286                                                              &slice_header);
6287         gen9_mfc_avc_insert_object(ctx,
6288                                    encoder_context,
6289                                    (unsigned int *)slice_header,
6290                                    ALIGN(slice_header_length_in_bits, 32) >> 5,
6291                                    slice_header_length_in_bits & 0x1f,
6292                                    5,  /* first 5 bytes are start code + nal unit type */
6293                                    1, 0, 1,
6294                                    1,
6295                                    batch);
6296
6297         free(slice_header);
6298     } else {
6299         unsigned int skip_emul_byte_cnt;
6300
6301         header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
6302
6303         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[slice_header_index]->buffer);
6304         length_in_bits = param->bit_length;
6305
6306         /* as the slice header is the last header data for one slice,
6307          * the last header flag is set to one.
6308          */
6309         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6310
6311         gen9_mfc_avc_insert_object(ctx,
6312                                    encoder_context,
6313                                    header_data,
6314                                    ALIGN(length_in_bits, 32) >> 5,
6315                                    length_in_bits & 0x1f,
6316                                    skip_emul_byte_cnt,
6317                                    1,
6318                                    0,
6319                                    !param->has_emulation_bytes,
6320                                    1,
6321                                    batch);
6322     }
6323
6324     return;
6325 }
6326
6327 static void
6328 gen9_mfc_avc_inset_headers(VADriverContextP ctx,
6329                            struct encode_state *encode_state,
6330                            struct intel_encoder_context *encoder_context,
6331                            VAEncSliceParameterBufferH264 *slice_param,
6332                            int slice_index,
6333                            struct intel_batchbuffer *batch)
6334 {
6335     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6336     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state;
6337     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
6338     unsigned int internal_rate_mode = generic_state->internal_rate_mode;
6339     unsigned int skip_emul_byte_cnt;
6340
6341     if (slice_index == 0) {
6342         if (encode_state->packed_header_data[idx]) {
6343             VAEncPackedHeaderParameterBuffer *param = NULL;
6344             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
6345             unsigned int length_in_bits;
6346
6347             assert(encode_state->packed_header_param[idx]);
6348             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
6349             length_in_bits = param->bit_length;
6350
6351             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6352             gen9_mfc_avc_insert_object(ctx,
6353                                        encoder_context,
6354                                        header_data,
6355                                        ALIGN(length_in_bits, 32) >> 5,
6356                                        length_in_bits & 0x1f,
6357                                        skip_emul_byte_cnt,
6358                                        0,
6359                                        0,
6360                                        !param->has_emulation_bytes,
6361                                        0,
6362                                        batch);
6363         }
6364
6365         idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
6366
6367         if (encode_state->packed_header_data[idx]) {
6368             VAEncPackedHeaderParameterBuffer *param = NULL;
6369             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
6370             unsigned int length_in_bits;
6371
6372             assert(encode_state->packed_header_param[idx]);
6373             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
6374             length_in_bits = param->bit_length;
6375
6376             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6377
6378             gen9_mfc_avc_insert_object(ctx,
6379                                        encoder_context,
6380                                        header_data,
6381                                        ALIGN(length_in_bits, 32) >> 5,
6382                                        length_in_bits & 0x1f,
6383                                        skip_emul_byte_cnt,
6384                                        0,
6385                                        0,
6386                                        !param->has_emulation_bytes,
6387                                        0,
6388                                        batch);
6389         }
6390
6391         idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
6392
6393         if (encode_state->packed_header_data[idx]) {
6394             VAEncPackedHeaderParameterBuffer *param = NULL;
6395             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
6396             unsigned int length_in_bits;
6397
6398             assert(encode_state->packed_header_param[idx]);
6399             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
6400             length_in_bits = param->bit_length;
6401
6402             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6403             gen9_mfc_avc_insert_object(ctx,
6404                                        encoder_context,
6405                                        header_data,
6406                                        ALIGN(length_in_bits, 32) >> 5,
6407                                        length_in_bits & 0x1f,
6408                                        skip_emul_byte_cnt,
6409                                        0,
6410                                        0,
6411                                        !param->has_emulation_bytes,
6412                                        0,
6413                                        batch);
6414         } else if (internal_rate_mode == VA_RC_CBR) {
6415             /* insert others */
6416         }
6417     }
6418
6419     gen9_mfc_avc_insert_slice_packed_data(ctx,
6420                                           encode_state,
6421                                           encoder_context,
6422                                           slice_index,
6423                                           batch);
6424 }
6425
6426 static void
6427 gen9_mfc_avc_slice_state(VADriverContextP ctx,
6428                          struct encode_state *encode_state,
6429                          struct intel_encoder_context *encoder_context,
6430                          VAEncPictureParameterBufferH264 *pic_param,
6431                          VAEncSliceParameterBufferH264 *slice_param,
6432                          VAEncSliceParameterBufferH264 *next_slice_param,
6433                          struct intel_batchbuffer *batch)
6434 {
6435     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6436     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )pak_context->generic_enc_ctx;
6437     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state;
6438     struct avc_enc_state * avc_state = (struct avc_enc_state * )pak_context->private_enc_state;
6439     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
6440     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
6441     unsigned char correct[6], grow, shrink;
6442     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
6443     int max_qp_n, max_qp_p;
6444     int i;
6445     int weighted_pred_idc = 0;
6446     int num_ref_l0 = 0, num_ref_l1 = 0;
6447     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
6448     int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
6449     unsigned int rc_panic_enable = 0;
6450     unsigned int rate_control_counter_enable = 0;
6451     unsigned int rounding_value = 0;
6452     unsigned int rounding_inter_enable = 0;
6453
6454     //check the inter rounding
6455     if(generic_state->frame_type == SLICE_TYPE_P)
6456     {
6457         if(avc_state->rounding_inter_p == AVC_INVALID_ROUNDING_VALUE)
6458         {
6459             if(avc_state->adaptive_rounding_inter_enable && !(generic_state->brc_enabled))
6460             {
6461                 if(generic_state->gop_ref_distance == 1)
6462                     avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_p_without_b[slice_qp];
6463                 else
6464                     avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_p[slice_qp];
6465             }
6466             else
6467             {
6468                 avc_state->rounding_value = gen9_avc_inter_rounding_p[generic_state->preset];
6469             }
6470
6471         }else
6472         {
6473             avc_state->rounding_value = avc_state->rounding_inter_p;
6474         }
6475     }else if(generic_state->frame_type == SLICE_TYPE_B)
6476     {
6477         if(pic_param->pic_fields.bits.reference_pic_flag)
6478         {
6479             if(avc_state->rounding_inter_b_ref == AVC_INVALID_ROUNDING_VALUE)
6480                 avc_state->rounding_value = gen9_avc_inter_rounding_b_ref[generic_state->preset];
6481             else
6482                 avc_state->rounding_value = avc_state->rounding_inter_b_ref;
6483         }
6484         else
6485         {
6486             if(avc_state->rounding_inter_b == AVC_INVALID_ROUNDING_VALUE)
6487             {
6488                 if(avc_state->adaptive_rounding_inter_enable && !(generic_state->brc_enabled))
6489                     avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_b[slice_qp];
6490                 else
6491                     avc_state->rounding_value = gen9_avc_inter_rounding_b[generic_state->preset];
6492             }else
6493             {
6494                 avc_state->rounding_value = avc_state->rounding_inter_b;
6495             }
6496         }
6497     }
6498
6499     slice_hor_pos = slice_param->macroblock_address % generic_state->frame_width_in_mbs;
6500     slice_ver_pos = slice_param->macroblock_address / generic_state->frame_width_in_mbs;
6501
6502     if (next_slice_param) {
6503         next_slice_hor_pos = next_slice_param->macroblock_address % generic_state->frame_width_in_mbs;
6504         next_slice_ver_pos = next_slice_param->macroblock_address / generic_state->frame_width_in_mbs;
6505     } else {
6506         next_slice_hor_pos = 0;
6507         next_slice_ver_pos = generic_state->frame_height_in_mbs;
6508     }
6509
6510     if (slice_type == SLICE_TYPE_I) {
6511         luma_log2_weight_denom = 0;
6512         chroma_log2_weight_denom = 0;
6513     } else if (slice_type == SLICE_TYPE_P) {
6514         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
6515         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
6516         rounding_inter_enable = avc_state->rounding_inter_enable;
6517         rounding_value = avc_state->rounding_value;
6518
6519         if (slice_param->num_ref_idx_active_override_flag)
6520             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
6521     } else if (slice_type == SLICE_TYPE_B) {
6522         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
6523         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
6524         num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
6525         rounding_inter_enable = avc_state->rounding_inter_enable;
6526         rounding_value = avc_state->rounding_value;
6527
6528         if (slice_param->num_ref_idx_active_override_flag) {
6529             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
6530             num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
6531         }
6532
6533         if (weighted_pred_idc == 2) {
6534             /* 8.4.3 - Derivation process for prediction weights (8-279) */
6535             luma_log2_weight_denom = 5;
6536             chroma_log2_weight_denom = 5;
6537         }
6538     }
6539
6540     max_qp_n = 0;
6541     max_qp_p = 0;
6542     grow = 0;
6543     shrink = 0;
6544
6545     rate_control_counter_enable = (generic_state->brc_enabled && (generic_state->curr_pak_pass != 0));
6546     rc_panic_enable = (avc_state->rc_panic_enable &&
6547                       (!avc_state->min_max_qp_enable) &&
6548                       (encoder_context->rate_control_mode != VA_RC_CQP) &&
6549                       (generic_state->curr_pak_pass == (generic_state->num_pak_passes - 1)));
6550
6551     for (i = 0; i < 6; i++)
6552         correct[i] = 0;
6553
6554     BEGIN_BCS_BATCH(batch, 11);
6555
6556     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
6557     OUT_BCS_BATCH(batch, slice_type);
6558     OUT_BCS_BATCH(batch,
6559                   (num_ref_l1 << 24) |
6560                   (num_ref_l0 << 16) |
6561                   (chroma_log2_weight_denom << 8) |
6562                   (luma_log2_weight_denom << 0));
6563     OUT_BCS_BATCH(batch,
6564                   (weighted_pred_idc << 30) |
6565                   (((slice_type == SLICE_TYPE_B)?slice_param->direct_spatial_mv_pred_flag:0) << 29) |
6566                   (slice_param->disable_deblocking_filter_idc << 27) |
6567                   (slice_param->cabac_init_idc << 24) |
6568                   (slice_qp << 16) |
6569                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
6570                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
6571
6572     OUT_BCS_BATCH(batch,
6573                   slice_ver_pos << 24 |
6574                   slice_hor_pos << 16 |
6575                   slice_param->macroblock_address);
6576     OUT_BCS_BATCH(batch,
6577                   next_slice_ver_pos << 16 |
6578                   next_slice_hor_pos);
6579
6580     OUT_BCS_BATCH(batch,
6581                   (rate_control_counter_enable << 31) |
6582                   (1 << 30) |           /* ResetRateControlCounter */
6583                   (2 << 28) |           /* Loose Rate Control */
6584                   (0 << 24) |           /* RC Stable Tolerance */
6585                   (rc_panic_enable << 23) |           /* RC Panic Enable */
6586                   (1 << 22) |           /* CBP mode */
6587                   (0 << 21) |           /* MB Type Direct Conversion, 0: Enable, 1: Disable */
6588                   (0 << 20) |           /* MB Type Skip Conversion, 0: Enable, 1: Disable */
6589                   (!next_slice_param << 19) |                   /* Is Last Slice */
6590                   (0 << 18) |           /* BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable */
6591                   (1 << 17) |           /* HeaderPresentFlag */
6592                   (1 << 16) |           /* SliceData PresentFlag */
6593                   (0 << 15) |           /* TailPresentFlag  */
6594                   (1 << 13) |           /* RBSP NAL TYPE */
6595                   (1 << 12));           /* CabacZeroWordInsertionEnable */
6596
6597     OUT_BCS_BATCH(batch, generic_ctx->compressed_bitstream.start_offset);
6598
6599     OUT_BCS_BATCH(batch,
6600                   (max_qp_n << 24) |     /*Target QP - 24 is lowest QP*/
6601                   (max_qp_p << 16) |     /*Target QP + 20 is highest QP*/
6602                   (shrink << 8) |
6603                   (grow << 0));
6604     OUT_BCS_BATCH(batch,
6605                   (rounding_inter_enable << 31) |
6606                   (rounding_value << 28) |
6607                   (1 << 27) |
6608                   (5 << 24) |
6609                   (correct[5] << 20) |
6610                   (correct[4] << 16) |
6611                   (correct[3] << 12) |
6612                   (correct[2] << 8) |
6613                   (correct[1] << 4) |
6614                   (correct[0] << 0));
6615     OUT_BCS_BATCH(batch, 0);
6616
6617     ADVANCE_BCS_BATCH(batch);
6618 }
6619
6620 static uint8_t
6621 gen9_mfc_avc_get_ref_idx_state(VAPictureH264 *va_pic, unsigned int frame_store_id)
6622 {
6623     unsigned int is_long_term =
6624         !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
6625     unsigned int is_top_field =
6626         !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
6627     unsigned int is_bottom_field =
6628         !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
6629
6630     return ((is_long_term                         << 6) |
6631             (0 << 5) |
6632             (frame_store_id                       << 1) |
6633             ((is_top_field ^ 1) & is_bottom_field));
6634 }
6635
6636 static void
6637 gen9_mfc_avc_ref_idx_state(VADriverContextP ctx,
6638                                  struct encode_state *encode_state,
6639                                  struct intel_encoder_context *encoder_context,
6640                                  VAEncSliceParameterBufferH264 *slice_param,
6641                                  struct intel_batchbuffer *batch)
6642 {
6643     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6644     struct avc_enc_state * avc_state = (struct avc_enc_state * )pak_context->private_enc_state;
6645     VAPictureH264 *ref_pic;
6646     int i, slice_type, ref_idx_shift;
6647     unsigned int fwd_ref_entry;
6648     unsigned int bwd_ref_entry;
6649
6650     /* max 4 ref frames are allowed for l0 and l1 */
6651     fwd_ref_entry = 0x80808080;
6652     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
6653
6654     if ((slice_type == SLICE_TYPE_P) ||
6655         (slice_type == SLICE_TYPE_B)) {
6656           for (i = 0; i < MIN(avc_state->num_refs[0],4); i++) {
6657               ref_pic = &slice_param->RefPicList0[i];
6658               ref_idx_shift = i * 8;
6659
6660               fwd_ref_entry &= ~(0xFF << ref_idx_shift);
6661               fwd_ref_entry += (gen9_mfc_avc_get_ref_idx_state(ref_pic, avc_state->list_ref_idx[0][i]) << ref_idx_shift);
6662           }
6663     }
6664
6665     bwd_ref_entry = 0x80808080;
6666     if (slice_type == SLICE_TYPE_B) {
6667         for (i = 0; i < MIN(avc_state->num_refs[1],4); i++) {
6668             ref_pic = &slice_param->RefPicList1[i];
6669             ref_idx_shift = i * 8;
6670
6671             bwd_ref_entry &= ~(0xFF << ref_idx_shift);
6672             bwd_ref_entry += (gen9_mfc_avc_get_ref_idx_state(ref_pic, avc_state->list_ref_idx[1][i]) << ref_idx_shift);
6673         }
6674     }
6675
6676     if ((slice_type == SLICE_TYPE_P) ||
6677         (slice_type == SLICE_TYPE_B)) {
6678         BEGIN_BCS_BATCH(batch, 10);
6679         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
6680         OUT_BCS_BATCH(batch, 0);                        // L0
6681         OUT_BCS_BATCH(batch, fwd_ref_entry);
6682
6683         for (i = 0; i < 7; i++) {
6684             OUT_BCS_BATCH(batch, 0x80808080);
6685         }
6686
6687         ADVANCE_BCS_BATCH(batch);
6688     }
6689
6690     if (slice_type == SLICE_TYPE_B) {
6691         BEGIN_BCS_BATCH(batch, 10);
6692         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
6693         OUT_BCS_BATCH(batch, 1);                  //Select L1
6694         OUT_BCS_BATCH(batch, bwd_ref_entry);      //max 4 reference allowed
6695         for(i = 0; i < 7; i++) {
6696             OUT_BCS_BATCH(batch, 0x80808080);
6697         }
6698         ADVANCE_BCS_BATCH(batch);
6699     }
6700 }
6701
6702 static void
6703 gen9_mfc_avc_weightoffset_state(VADriverContextP ctx,
6704                                 struct encode_state *encode_state,
6705                                 struct intel_encoder_context *encoder_context,
6706                                 VAEncPictureParameterBufferH264 *pic_param,
6707                                 VAEncSliceParameterBufferH264 *slice_param,
6708                                 struct intel_batchbuffer *batch)
6709 {
6710     int i, slice_type;
6711     short weightoffsets[32 * 6];
6712
6713     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
6714
6715     if (slice_type == SLICE_TYPE_P &&
6716         pic_param->pic_fields.bits.weighted_pred_flag == 1) {
6717         memset(weightoffsets,0,32*6 * sizeof(short));
6718         for (i = 0; i < 32; i++) {
6719             weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
6720             weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
6721             weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
6722             weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
6723             weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
6724             weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
6725         }
6726
6727         BEGIN_BCS_BATCH(batch, 98);
6728         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
6729         OUT_BCS_BATCH(batch, 0);
6730         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
6731
6732         ADVANCE_BCS_BATCH(batch);
6733     }
6734
6735     if (slice_type == SLICE_TYPE_B &&
6736         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
6737         memset(weightoffsets,0,32*6 * sizeof(short));
6738         for (i = 0; i < 32; i++) {
6739             weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
6740             weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
6741             weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
6742             weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
6743             weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
6744             weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
6745         }
6746
6747         BEGIN_BCS_BATCH(batch, 98);
6748         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
6749         OUT_BCS_BATCH(batch, 0);
6750         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
6751         ADVANCE_BCS_BATCH(batch);
6752
6753         memset(weightoffsets,0,32*6 * sizeof(short));
6754         for (i = 0; i < 32; i++) {
6755             weightoffsets[i * 6 + 0] = slice_param->luma_weight_l1[i];
6756             weightoffsets[i * 6 + 1] = slice_param->luma_offset_l1[i];
6757             weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l1[i][0];
6758             weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l1[i][0];
6759             weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l1[i][1];
6760             weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l1[i][1];
6761         }
6762
6763         BEGIN_BCS_BATCH(batch, 98);
6764         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
6765         OUT_BCS_BATCH(batch, 1);
6766         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
6767         ADVANCE_BCS_BATCH(batch);
6768     }
6769 }
6770
6771 static void
6772 gen9_mfc_avc_single_slice(VADriverContextP ctx,
6773                           struct encode_state *encode_state,
6774                           struct intel_encoder_context *encoder_context,
6775                           VAEncSliceParameterBufferH264 *slice_param,
6776                           VAEncSliceParameterBufferH264 *next_slice_param,
6777                           int slice_index)
6778 {
6779     struct i965_driver_data *i965 = i965_driver_data(ctx);
6780     struct i965_gpe_table *gpe = &i965->gpe_table;
6781     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6782     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )pak_context->private_enc_ctx;
6783     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state;
6784     struct avc_enc_state * avc_state = (struct avc_enc_state * )pak_context->private_enc_state;
6785     struct intel_batchbuffer *batch = encoder_context->base.batch;
6786     struct intel_batchbuffer *slice_batch = avc_ctx->pres_slice_batch_buffer_2nd_level;
6787     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
6788     struct gpe_mi_batch_buffer_start_parameter second_level_batch;
6789     struct object_surface *obj_surface;
6790     struct gen9_surface_avc *avc_priv_surface;
6791
6792     unsigned int slice_offset = 0;
6793
6794     if(generic_state->curr_pak_pass == 0)
6795     {
6796         slice_offset = intel_batchbuffer_used_size(slice_batch);
6797         avc_state->slice_batch_offset[slice_index] = slice_offset;
6798         gen9_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context, slice_param,slice_batch);
6799         gen9_mfc_avc_weightoffset_state(ctx,
6800                                         encode_state,
6801                                         encoder_context,
6802                                         pic_param,
6803                                         slice_param,
6804                                         slice_batch);
6805         gen9_mfc_avc_slice_state(ctx,
6806                                  encode_state,
6807                                  encoder_context,
6808                                  pic_param,
6809                                  slice_param,
6810                                  next_slice_param,
6811                                  slice_batch);
6812         gen9_mfc_avc_inset_headers(ctx,
6813                                    encode_state,
6814                                    encoder_context,
6815                                    slice_param,
6816                                    slice_index,
6817                                    slice_batch);
6818
6819         BEGIN_BCS_BATCH(slice_batch, 2);
6820         OUT_BCS_BATCH(slice_batch, 0);
6821         OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END);
6822         ADVANCE_BCS_BATCH(slice_batch);
6823
6824     }else
6825     {
6826         slice_offset = avc_state->slice_batch_offset[slice_index];
6827     }
6828     /* insert slice as second levle.*/
6829     memset(&second_level_batch, 0, sizeof(second_level_batch));
6830     second_level_batch.is_second_level = 1; /* Must be the second level batch buffer */
6831     second_level_batch.offset = slice_offset;
6832     second_level_batch.bo = slice_batch->buffer;
6833     gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
6834
6835     /* insert mb code as second levle.*/
6836     obj_surface = encode_state->reconstructed_object;
6837     assert(obj_surface->private_data);
6838     avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
6839
6840     memset(&second_level_batch, 0, sizeof(second_level_batch));
6841     second_level_batch.is_second_level = 1; /* Must be the second level batch buffer */
6842     second_level_batch.offset = slice_param->macroblock_address * 16 * 4;
6843     second_level_batch.bo = avc_priv_surface->res_mb_code_surface.bo;
6844     gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
6845
6846 }
6847
6848 static void
6849 gen9_avc_pak_slice_level(VADriverContextP ctx,
6850                          struct encode_state *encode_state,
6851                          struct intel_encoder_context *encoder_context)
6852 {
6853     struct i965_driver_data *i965 = i965_driver_data(ctx);
6854     struct i965_gpe_table *gpe = &i965->gpe_table;
6855     struct intel_batchbuffer *batch = encoder_context->base.batch;
6856     struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
6857     VAEncSliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
6858     int i, j;
6859     int slice_index = 0;
6860     int is_frame_level = 1;       /* check it for SKL,now single slice per frame */
6861     int has_tail = 0;             /* check it later */
6862
6863     for (j = 0; j < encode_state->num_slice_params_ext; j++) {
6864         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
6865
6866         if (j == encode_state->num_slice_params_ext - 1)
6867             next_slice_group_param = NULL;
6868         else
6869             next_slice_group_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j + 1]->buffer;
6870
6871         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
6872             if (i < encode_state->slice_params_ext[j]->num_elements - 1)
6873                 next_slice_param = slice_param + 1;
6874             else
6875                 next_slice_param = next_slice_group_param;
6876
6877             gen9_mfc_avc_single_slice(ctx,
6878                                       encode_state,
6879                                       encoder_context,
6880                                       slice_param,
6881                                       next_slice_param,
6882                                       slice_index);
6883             slice_param++;
6884             slice_index++;
6885
6886             if (is_frame_level)
6887                 break;
6888             else {
6889                 /* remove assert(0) and add other commands here */
6890                 assert(0);
6891             }
6892         }
6893
6894         if (is_frame_level)
6895             break;
6896     }
6897
6898     if (has_tail) {
6899         /* insert a tail if required */
6900     }
6901
6902     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
6903     mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
6904     gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_params);
6905 }
6906 static void
6907 gen9_avc_pak_picture_level(VADriverContextP ctx,
6908                            struct encode_state *encode_state,
6909                            struct intel_encoder_context *encoder_context)
6910 {
6911     struct i965_driver_data *i965 = i965_driver_data(ctx);
6912     struct i965_gpe_table *gpe = &i965->gpe_table;
6913     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6914     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )pak_context->generic_enc_ctx;
6915     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )pak_context->private_enc_ctx;
6916     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state;
6917     struct gpe_mi_batch_buffer_start_parameter second_level_batch;
6918     struct intel_batchbuffer *batch = encoder_context->base.batch;
6919
6920     if (generic_state->brc_enabled &&
6921         generic_state->curr_pak_pass) {
6922         struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
6923         struct encoder_status_buffer_internal *status_buffer;
6924         status_buffer = &(avc_ctx->status_buffer);
6925
6926         memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
6927         mi_conditional_batch_buffer_end_params.offset = status_buffer->image_status_mask_offset;
6928         mi_conditional_batch_buffer_end_params.bo = status_buffer->bo;
6929         mi_conditional_batch_buffer_end_params.compare_data = 0;
6930         mi_conditional_batch_buffer_end_params.compare_mask_mode_disabled = 0;
6931         gpe->mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
6932     }
6933
6934     gen9_mfc_avc_pipe_mode_select(ctx,encode_state,encoder_context);
6935     gen9_mfc_avc_surface_state(ctx,encoder_context,&(generic_ctx->res_reconstructed_surface),0);
6936     gen9_mfc_avc_surface_state(ctx,encoder_context,&(generic_ctx->res_uncompressed_input_surface),4);
6937     gen9_mfc_avc_pipe_buf_addr_state(ctx,encoder_context);
6938     gen9_mfc_avc_ind_obj_base_addr_state(ctx,encode_state,encoder_context);
6939     gen9_mfc_avc_bsp_buf_base_addr_state(ctx,encoder_context);
6940
6941     if(generic_state->brc_enabled)
6942     {
6943         memset(&second_level_batch, 0, sizeof(second_level_batch));
6944         if (generic_state->curr_pak_pass == 0) {
6945             second_level_batch.offset = 0;
6946         } else {
6947             second_level_batch.offset = generic_state->curr_pak_pass * INTEL_AVC_IMAGE_STATE_CMD_SIZE;
6948         }
6949         second_level_batch.is_second_level = 1;
6950         second_level_batch.bo = avc_ctx->res_brc_image_state_read_buffer.bo;
6951         gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
6952     }else
6953     {
6954         /*generate a new image state */
6955         gen9_avc_set_image_state_non_brc(ctx,encode_state,encoder_context,&(avc_ctx->res_image_state_batch_buffer_2nd_level));
6956         memset(&second_level_batch, 0, sizeof(second_level_batch));
6957         second_level_batch.offset = 0;
6958         second_level_batch.is_second_level = 1;
6959         second_level_batch.bo = avc_ctx->res_image_state_batch_buffer_2nd_level.bo;
6960         gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
6961     }
6962
6963     gen9_mfc_avc_qm_state(ctx,encode_state,encoder_context);
6964     gen9_mfc_avc_fqm_state(ctx,encode_state,encoder_context);
6965     gen9_mfc_avc_directmode_state(ctx,encoder_context);
6966
6967 }
6968
6969 static void
6970 gen9_avc_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
6971 {
6972     struct i965_driver_data *i965 = i965_driver_data(ctx);
6973     struct i965_gpe_table *gpe = &i965->gpe_table;
6974     struct intel_batchbuffer *batch = encoder_context->base.batch;
6975     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6976     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )pak_context->private_enc_ctx;
6977     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state;
6978
6979     struct gpe_mi_store_register_mem_parameter mi_store_reg_mem_param;
6980     struct gpe_mi_store_data_imm_parameter mi_store_data_imm_param;
6981     struct gpe_mi_flush_dw_parameter mi_flush_dw_param;
6982     struct encoder_status_buffer_internal *status_buffer;
6983
6984     status_buffer = &(avc_ctx->status_buffer);
6985
6986     memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
6987     gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
6988
6989     /* read register and store into status_buffer and pak_statitistic info */
6990     memset(&mi_store_reg_mem_param, 0, sizeof(mi_store_reg_mem_param));
6991     mi_store_reg_mem_param.bo = status_buffer->bo;
6992     mi_store_reg_mem_param.offset = status_buffer->bs_byte_count_frame_offset;
6993     mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_reg_offset;
6994     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
6995
6996     mi_store_reg_mem_param.bo = status_buffer->bo;
6997     mi_store_reg_mem_param.offset = status_buffer->image_status_mask_offset;
6998     mi_store_reg_mem_param.mmio_offset = status_buffer->image_status_mask_reg_offset;
6999     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7000
7001     /*update the status in the pak_statistic_surface */
7002     mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
7003     mi_store_reg_mem_param.offset = 0;
7004     mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_reg_offset;
7005     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7006
7007     mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
7008     mi_store_reg_mem_param.offset = 4;
7009     mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_nh_reg_offset;
7010     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7011
7012     memset(&mi_store_data_imm_param, 0, sizeof(mi_store_data_imm_param));
7013     mi_store_data_imm_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
7014     mi_store_data_imm_param.offset = sizeof(unsigned int) * 2;
7015     mi_store_data_imm_param.dw0 = (generic_state->curr_pak_pass + 1);
7016     gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm_param);
7017
7018     mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
7019     mi_store_reg_mem_param.offset = sizeof(unsigned int) * (4 + generic_state->curr_pak_pass) ;
7020     mi_store_reg_mem_param.mmio_offset = status_buffer->image_status_ctrl_reg_offset;
7021     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7022
7023     memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
7024     gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
7025
7026     return;
7027 }
7028
7029 static void
7030 gen9_avc_pak_brc_prepare(struct encode_state *encode_state,
7031                           struct intel_encoder_context *encoder_context)
7032 {
7033     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7034     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state;
7035     unsigned int rate_control_mode = encoder_context->rate_control_mode;
7036
7037     switch (rate_control_mode & 0x7f) {
7038     case VA_RC_CBR:
7039         generic_state->internal_rate_mode = VA_RC_CBR;
7040         break;
7041
7042     case VA_RC_VBR:
7043         generic_state->internal_rate_mode = VA_RC_VBR;//AVBR
7044         break;
7045
7046     case VA_RC_CQP:
7047     default:
7048         generic_state->internal_rate_mode = VA_RC_CQP;
7049         break;
7050     }
7051
7052     if (encoder_context->quality_level == 0)\r
7053         encoder_context->quality_level = ENCODER_DEFAULT_QUALITY_AVC;\r
7054 }
7055
7056 static VAStatus
7057 gen9_avc_pak_pipeline_prepare(VADriverContextP ctx,
7058                      struct encode_state *encode_state,
7059                      struct intel_encoder_context *encoder_context)
7060 {
7061     VAStatus va_status;
7062     struct i965_driver_data *i965 = i965_driver_data(ctx);
7063     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7064     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )pak_context->generic_enc_ctx;
7065     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )pak_context->private_enc_ctx;
7066     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state;
7067     struct avc_enc_state * avc_state = (struct avc_enc_state * )pak_context->private_enc_state;
7068
7069     struct object_surface *obj_surface;
7070     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
7071     VAEncSliceParameterBufferH264 *slice_param = avc_state->slice_param[0];
7072
7073     struct gen9_surface_avc *avc_priv_surface;
7074     int i, j, enable_avc_ildb = 0;
7075     unsigned int allocate_flag = 1;
7076     unsigned int size;
7077     unsigned int w_mb = generic_state->frame_width_in_mbs;
7078     unsigned int h_mb = generic_state->frame_height_in_mbs;
7079     struct avc_surface_param surface_param;
7080
7081     /* update the parameter and check slice parameter */
7082     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
7083         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
7084         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
7085
7086         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
7087             assert((slice_param->slice_type == SLICE_TYPE_I) ||
7088                    (slice_param->slice_type == SLICE_TYPE_SI) ||
7089                    (slice_param->slice_type == SLICE_TYPE_P) ||
7090                    (slice_param->slice_type == SLICE_TYPE_SP) ||
7091                    (slice_param->slice_type == SLICE_TYPE_B));
7092
7093             if (slice_param->disable_deblocking_filter_idc != 1) {
7094                 enable_avc_ildb = 1;
7095                 break;
7096             }
7097
7098             slice_param++;
7099         }
7100     }
7101     avc_state->enable_avc_ildb = enable_avc_ildb;
7102
7103     /* setup the all surface and buffer for PAK */
7104     /* Setup current reconstruct frame */
7105     obj_surface = encode_state->reconstructed_object;
7106     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
7107
7108     if (va_status != VA_STATUS_SUCCESS)
7109         return va_status;
7110
7111     memset(&surface_param,0,sizeof(surface_param));
7112     surface_param.frame_width = generic_state->frame_width_in_pixel;
7113     surface_param.frame_height = generic_state->frame_height_in_pixel;
7114     va_status = gen9_avc_init_check_surfaces(ctx,
7115                                              obj_surface,encoder_context,
7116                                              &surface_param);
7117     if (va_status != VA_STATUS_SUCCESS)
7118         return va_status;
7119     /* init the member of avc_priv_surface,frame_store_id,qp_value */
7120     {
7121        avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
7122        avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS-2] = 0;
7123        avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS-1] = 0;
7124        i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS-2]);
7125        i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS-1]);
7126        i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS-2],avc_priv_surface->dmv_top);
7127        i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS-1],avc_priv_surface->dmv_bottom);
7128        avc_priv_surface->qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
7129        avc_priv_surface->frame_store_id = 0;
7130        avc_priv_surface->frame_idx = pic_param->CurrPic.frame_idx;
7131        avc_priv_surface->top_field_order_cnt = pic_param->CurrPic.TopFieldOrderCnt;
7132        avc_priv_surface->is_as_ref = pic_param->pic_fields.bits.reference_pic_flag;
7133        avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS-2] = avc_priv_surface->top_field_order_cnt;
7134        avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS-1] = avc_priv_surface->top_field_order_cnt + 1;
7135     }
7136     i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
7137     i965_free_gpe_resource(&avc_ctx->res_post_deblocking_output);
7138     i965_free_gpe_resource(&avc_ctx->res_pre_deblocking_output);
7139     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_reconstructed_surface, obj_surface,GPE_RESOURCE_ALIGNMENT);
7140
7141
7142     if (avc_state->enable_avc_ildb) {
7143         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_post_deblocking_output, obj_surface,GPE_RESOURCE_ALIGNMENT);
7144     } else {
7145         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_pre_deblocking_output, obj_surface,GPE_RESOURCE_ALIGNMENT);
7146     }
7147     /* input YUV surface */
7148     obj_surface = encode_state->input_yuv_object;
7149     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
7150
7151     if (va_status != VA_STATUS_SUCCESS)
7152         return va_status;
7153     i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
7154     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_uncompressed_input_surface, obj_surface,GPE_RESOURCE_ALIGNMENT);
7155
7156     /* Reference surfaces */
7157     for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
7158         i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
7159         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i*2]);
7160         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i*2 + 1]);
7161         obj_surface = encode_state->reference_objects[i];
7162         avc_state->top_field_poc[2*i] = 0;
7163         avc_state->top_field_poc[2*i+1] = 0;
7164
7165         if (obj_surface && obj_surface->bo) {
7166             i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->list_reference_res[i], obj_surface,GPE_RESOURCE_ALIGNMENT);
7167
7168             /* actually it should be handled when it is reconstructed surface */
7169             va_status = gen9_avc_init_check_surfaces(ctx,
7170                 obj_surface,encoder_context,
7171                 &surface_param);
7172             if (va_status != VA_STATUS_SUCCESS)
7173                 return va_status;
7174             avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
7175             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i*2],avc_priv_surface->dmv_top);
7176             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i*2 + 1],avc_priv_surface->dmv_bottom);
7177             avc_priv_surface->frame_store_id = i;
7178             avc_state->top_field_poc[2*i] = avc_priv_surface->top_field_order_cnt;
7179             avc_state->top_field_poc[2*i+1] = avc_priv_surface->top_field_order_cnt+1;
7180         }else
7181         {
7182             break;
7183         }
7184     }
7185
7186     if (avc_ctx->pres_slice_batch_buffer_2nd_level)
7187     {
7188         intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
7189         avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
7190     }
7191
7192     avc_ctx->pres_slice_batch_buffer_2nd_level =
7193         intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD,
7194                               4096 *
7195                               encode_state->num_slice_params_ext);
7196     if (!avc_ctx->pres_slice_batch_buffer_2nd_level)
7197         return VA_STATUS_ERROR_ALLOCATION_FAILED;
7198
7199     for (i = 0;i < MAX_AVC_SLICE_NUM;i++) {
7200         avc_state->slice_batch_offset[i] = 0;
7201     }
7202
7203
7204     size = w_mb * 64;
7205     i965_free_gpe_resource(&avc_ctx->res_intra_row_store_scratch_buffer);
7206     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
7207                                  &avc_ctx->res_intra_row_store_scratch_buffer,
7208                                  size,
7209                                 "PAK Intra row store scratch buffer");
7210     if (!allocate_flag)
7211         goto failed_allocation;
7212
7213     size = w_mb * 4 * 64;
7214     i965_free_gpe_resource(&avc_ctx->res_deblocking_filter_row_store_scratch_buffer);
7215     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
7216                                  &avc_ctx->res_deblocking_filter_row_store_scratch_buffer,
7217                                  size,
7218                                 "PAK Deblocking filter row store scratch buffer");
7219     if (!allocate_flag)
7220         goto failed_allocation;
7221
7222     size = w_mb * 2 * 64;
7223     i965_free_gpe_resource(&avc_ctx->res_bsd_mpc_row_store_scratch_buffer);
7224     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
7225                                  &avc_ctx->res_bsd_mpc_row_store_scratch_buffer,
7226                                  size,
7227                                 "PAK BSD/MPC row store scratch buffer");
7228     if (!allocate_flag)
7229         goto failed_allocation;
7230
7231     size = w_mb * h_mb * 16;
7232     i965_free_gpe_resource(&avc_ctx->res_pak_mb_status_buffer);
7233     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
7234                                  &avc_ctx->res_pak_mb_status_buffer,
7235                                  size,
7236                                 "PAK MB status buffer");
7237     if (!allocate_flag)
7238         goto failed_allocation;
7239
7240     return VA_STATUS_SUCCESS;
7241
7242 failed_allocation:
7243     return VA_STATUS_ERROR_ALLOCATION_FAILED;
7244 }
7245
7246 static VAStatus
7247 gen9_avc_encode_picture(VADriverContextP ctx,
7248                         VAProfile profile,
7249                         struct encode_state *encode_state,
7250                         struct intel_encoder_context *encoder_context)
7251 {
7252     VAStatus va_status;
7253     struct i965_driver_data *i965 = i965_driver_data(ctx);
7254     struct i965_gpe_table *gpe = &i965->gpe_table;
7255     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7256     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context * )vme_context->private_enc_ctx;
7257     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
7258     struct intel_batchbuffer *batch = encoder_context->base.batch;
7259
7260     va_status = gen9_avc_pak_pipeline_prepare(ctx, encode_state, encoder_context);
7261
7262     if (va_status != VA_STATUS_SUCCESS)
7263         return va_status;
7264
7265     if (i965->intel.has_bsd2)
7266         intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
7267     else
7268         intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
7269     intel_batchbuffer_emit_mi_flush(batch);
7270
7271     for (generic_state->curr_pak_pass = 0;
7272          generic_state->curr_pak_pass < generic_state->num_pak_passes;
7273          generic_state->curr_pak_pass++) {
7274
7275          if (generic_state->curr_pak_pass == 0) {
7276              /* Initialize the avc Image Ctrl reg for the first pass,write 0 to staturs/control register, is it needed in AVC? */
7277              struct gpe_mi_load_register_imm_parameter mi_load_reg_imm;
7278              struct encoder_status_buffer_internal *status_buffer;
7279
7280              status_buffer = &(avc_ctx->status_buffer);
7281              memset(&mi_load_reg_imm, 0, sizeof(mi_load_reg_imm));
7282              mi_load_reg_imm.mmio_offset = status_buffer->image_status_ctrl_reg_offset;
7283              mi_load_reg_imm.data = 0;
7284              gpe->mi_load_register_imm(ctx, batch, &mi_load_reg_imm);
7285          }
7286          gen9_avc_pak_picture_level(ctx, encode_state, encoder_context);
7287          gen9_avc_pak_slice_level(ctx, encode_state, encoder_context);
7288          gen9_avc_read_mfc_status(ctx, encoder_context);
7289
7290     }
7291
7292     if (avc_ctx->pres_slice_batch_buffer_2nd_level)
7293     {
7294         intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
7295         avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
7296     }
7297
7298     intel_batchbuffer_end_atomic(batch);
7299     intel_batchbuffer_flush(batch);
7300
7301     generic_state->seq_frame_number++;
7302     generic_state->total_frame_number++;
7303     generic_state->first_frame = 0;
7304     return VA_STATUS_SUCCESS;
7305 }
7306
7307 static VAStatus
7308 gen9_avc_pak_pipeline(VADriverContextP ctx,
7309                       VAProfile profile,
7310                       struct encode_state *encode_state,
7311                       struct intel_encoder_context *encoder_context)
7312 {
7313     VAStatus vaStatus;
7314
7315     switch (profile) {
7316     case VAProfileH264ConstrainedBaseline:
7317     case VAProfileH264Main:
7318     case VAProfileH264High:
7319         vaStatus = gen9_avc_encode_picture(ctx, profile, encode_state, encoder_context);
7320         break;
7321
7322     default:
7323         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
7324         break;
7325     }
7326
7327     return vaStatus;
7328 }
7329
7330 static void
7331 gen9_avc_pak_context_destroy(void * context)
7332 {
7333     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)context;
7334     struct generic_encoder_context * generic_ctx;
7335     struct i965_avc_encoder_context * avc_ctx;
7336     int i = 0;
7337
7338     if (!pak_context)
7339         return;
7340
7341     generic_ctx = (struct generic_encoder_context * )pak_context->generic_enc_ctx;
7342     avc_ctx = (struct i965_avc_encoder_context * )pak_context->private_enc_ctx;
7343
7344     // other things
7345     i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
7346     i965_free_gpe_resource(&avc_ctx->res_post_deblocking_output);
7347     i965_free_gpe_resource(&avc_ctx->res_pre_deblocking_output);
7348     i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
7349
7350     i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
7351     i965_free_gpe_resource(&avc_ctx->res_intra_row_store_scratch_buffer);
7352     i965_free_gpe_resource(&avc_ctx->res_deblocking_filter_row_store_scratch_buffer);
7353     i965_free_gpe_resource(&avc_ctx->res_bsd_mpc_row_store_scratch_buffer);
7354     i965_free_gpe_resource(&avc_ctx->res_pak_mb_status_buffer);
7355
7356     for(i = 0 ; i < MAX_MFC_AVC_REFERENCE_SURFACES; i++)
7357     {
7358         i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
7359     }
7360
7361     for(i = 0 ; i < NUM_MFC_AVC_DMV_BUFFERS; i++)
7362     {
7363         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i]);
7364     }
7365
7366     if (avc_ctx->pres_slice_batch_buffer_2nd_level)
7367     {
7368         intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
7369         avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
7370     }
7371
7372 }
7373
7374 static VAStatus
7375 gen9_avc_get_coded_status(VADriverContextP ctx,
7376                           struct intel_encoder_context *encoder_context,
7377                           struct i965_coded_buffer_segment *coded_buf_seg)
7378 {
7379     struct encoder_status *avc_encode_status;
7380
7381     if (!encoder_context || !coded_buf_seg)
7382         return VA_STATUS_ERROR_INVALID_BUFFER;
7383
7384     avc_encode_status = (struct encoder_status *)coded_buf_seg->codec_private_data;
7385     coded_buf_seg->base.size = avc_encode_status->bs_byte_count_frame;
7386
7387     return VA_STATUS_SUCCESS;
7388 }
7389
7390 Bool
7391 gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
7392 {
7393     /* VME & PAK share the same context */
7394     struct i965_driver_data *i965 = i965_driver_data(ctx);
7395     struct encoder_vme_mfc_context * vme_context = NULL;
7396     struct generic_encoder_context * generic_ctx = NULL;
7397     struct i965_avc_encoder_context * avc_ctx = NULL;
7398     struct generic_enc_codec_state * generic_state = NULL;
7399     struct avc_enc_state * avc_state = NULL;
7400     struct encoder_status_buffer_internal *status_buffer;
7401     uint32_t base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
7402
7403     vme_context = calloc(1, sizeof(struct encoder_vme_mfc_context));
7404     generic_ctx = calloc(1, sizeof(struct generic_encoder_context));
7405     avc_ctx = calloc(1, sizeof(struct i965_avc_encoder_context));
7406     generic_state = calloc(1, sizeof(struct generic_enc_codec_state));
7407     avc_state = calloc(1, sizeof(struct avc_enc_state));
7408
7409     if(!vme_context || !generic_ctx || !avc_ctx || !generic_state || !avc_state)
7410         goto allocate_structure_failed;
7411
7412     memset(vme_context,0,sizeof(struct encoder_vme_mfc_context));
7413     memset(generic_ctx,0,sizeof(struct generic_encoder_context));
7414     memset(avc_ctx,0,sizeof(struct i965_avc_encoder_context));
7415     memset(generic_state,0,sizeof(struct generic_enc_codec_state));
7416     memset(avc_state,0,sizeof(struct avc_enc_state));
7417
7418     encoder_context->vme_context = vme_context;
7419     vme_context->generic_enc_ctx = generic_ctx;
7420     vme_context->private_enc_ctx = avc_ctx;
7421     vme_context->generic_enc_state = generic_state;
7422     vme_context->private_enc_state = avc_state;
7423
7424     if (IS_SKL(i965->intel.device_info)||
7425         IS_BXT(i965->intel.device_info)) {
7426         generic_ctx->enc_kernel_ptr = (void *)skl_avc_encoder_kernels;
7427         generic_ctx->enc_kernel_size = sizeof(skl_avc_encoder_kernels);
7428     }
7429     else
7430         goto allocate_structure_failed;
7431
7432     /* initialize misc ? */
7433     avc_ctx->ctx = ctx;
7434     generic_ctx->use_hw_scoreboard = 1;
7435     generic_ctx->use_hw_non_stalling_scoreboard = 1;
7436
7437     /* initialize generic state */
7438
7439     generic_state->kernel_mode = INTEL_ENC_KERNEL_NORMAL;
7440     generic_state->preset = INTEL_PRESET_RT_SPEED;
7441     generic_state->seq_frame_number = 0;
7442     generic_state->total_frame_number = 0;
7443     generic_state->frame_type = 0;
7444     generic_state->first_frame = 1;
7445
7446     generic_state->frame_width_in_pixel = 0;
7447     generic_state->frame_height_in_pixel = 0;
7448     generic_state->frame_width_in_mbs = 0;
7449     generic_state->frame_height_in_mbs = 0;
7450     generic_state->frame_width_4x = 0;
7451     generic_state->frame_height_4x = 0;
7452     generic_state->frame_width_16x = 0;
7453     generic_state->frame_height_16x = 0;
7454     generic_state->frame_width_32x = 0;
7455     generic_state->downscaled_width_4x_in_mb = 0;
7456     generic_state->downscaled_height_4x_in_mb = 0;
7457     generic_state->downscaled_width_16x_in_mb = 0;
7458     generic_state->downscaled_height_16x_in_mb = 0;
7459     generic_state->downscaled_width_32x_in_mb = 0;
7460     generic_state->downscaled_height_32x_in_mb = 0;
7461
7462     generic_state->hme_supported = 1;
7463     generic_state->b16xme_supported = 1;
7464     generic_state->b32xme_supported = 0;
7465     generic_state->hme_enabled = 0;
7466     generic_state->b16xme_enabled = 0;
7467     generic_state->b32xme_enabled = 0;
7468     generic_state->brc_distortion_buffer_supported = 1;
7469     generic_state->brc_constant_buffer_supported = 0;
7470
7471
7472     generic_state->frame_rate = 30;
7473     generic_state->brc_allocated = 0;
7474     generic_state->brc_inited = 0;
7475     generic_state->brc_need_reset = 0;
7476     generic_state->is_low_delay = 0;
7477     generic_state->brc_enabled = 0;//default
7478     generic_state->internal_rate_mode = 0;
7479     generic_state->curr_pak_pass = 0;
7480     generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
7481     generic_state->is_first_pass = 1;
7482     generic_state->is_last_pass = 0;
7483     generic_state->mb_brc_enabled = 0; // enable mb brc
7484     generic_state->brc_roi_enable = 0;
7485     generic_state->brc_dirty_roi_enable = 0;
7486     generic_state->skip_frame_enbale = 0;
7487
7488     generic_state->target_bit_rate = 0;
7489     generic_state->max_bit_rate = 0;
7490     generic_state->min_bit_rate = 0;
7491     generic_state->init_vbv_buffer_fullness_in_bit = 0;
7492     generic_state->vbv_buffer_size_in_bit = 0;
7493     generic_state->frames_per_100s = 0;
7494     generic_state->gop_size = 0;
7495     generic_state->gop_ref_distance = 0;
7496     generic_state->brc_target_size = 0;
7497     generic_state->brc_mode = 0;
7498     generic_state->brc_init_current_target_buf_full_in_bits = 0.0;
7499     generic_state->brc_init_reset_input_bits_per_frame = 0.0;
7500     generic_state->brc_init_reset_buf_size_in_bits = 0;
7501     generic_state->brc_init_previous_target_buf_full_in_bits = 0;
7502     generic_state->frames_per_window_size = 0;//default
7503     generic_state->target_percentage = 0;
7504
7505     generic_state->avbr_curracy = 0;
7506     generic_state->avbr_convergence = 0;
7507
7508     generic_state->num_skip_frames = 0;
7509     generic_state->size_skip_frames = 0;
7510
7511     generic_state->num_roi = 0;
7512     generic_state->max_delta_qp = 0;
7513     generic_state->min_delta_qp = 0;
7514
7515     if (encoder_context->rate_control_mode != VA_RC_NONE &&
7516         encoder_context->rate_control_mode != VA_RC_CQP) {
7517         generic_state->brc_enabled = 1;
7518         generic_state->brc_distortion_buffer_supported = 1;
7519         generic_state->brc_constant_buffer_supported = 1;
7520         generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
7521     }
7522     /*avc state initialization */
7523     avc_state->mad_enable = 0;
7524     avc_state->mb_disable_skip_map_enable = 0;
7525     avc_state->sfd_enable = 1;//default
7526     avc_state->sfd_mb_enable = 1;//set it true
7527     avc_state->adaptive_search_window_enable = 1;//default
7528     avc_state->mb_qp_data_enable = 0;
7529     avc_state->intra_refresh_i_enable = 0;
7530     avc_state->min_max_qp_enable = 0;
7531     avc_state->skip_bias_adjustment_enable = 0;//default,same as   skip_bias_adjustment_supporte? no
7532
7533     //external input
7534     avc_state->non_ftq_skip_threshold_lut_input_enable = 0;
7535     avc_state->ftq_skip_threshold_lut_input_enable = 0;
7536     avc_state->ftq_override = 0;
7537
7538     avc_state->direct_bias_adjustment_enable = 0;
7539     avc_state->global_motion_bias_adjustment_enable = 0;
7540     avc_state->disable_sub_mb_partion = 0;
7541     avc_state->arbitrary_num_mbs_in_slice = 0;
7542     avc_state->adaptive_transform_decision_enable = 0;//default
7543     avc_state->skip_check_disable = 0;
7544     avc_state->tq_enable = 0;
7545     avc_state->enable_avc_ildb = 0;
7546     avc_state->mbaff_flag = 0;
7547     avc_state->enable_force_skip = 1;//default
7548     avc_state->rc_panic_enable = 1;//default
7549     avc_state->suppress_recon_enable = 1;//default
7550
7551     avc_state->ref_pic_select_list_supported = 1;
7552     avc_state->mb_brc_supported = 1;//?,default
7553     avc_state->multi_pre_enable = 1;//default
7554     avc_state->ftq_enable = 1;//default
7555     avc_state->caf_supported = 1; //default
7556     avc_state->caf_enable = 0;
7557     avc_state->caf_disable_hd = 1;//default
7558     avc_state->skip_bias_adjustment_supported = 1;//default
7559
7560     avc_state->adaptive_intra_scaling_enable = 1;//default
7561     avc_state->old_mode_cost_enable = 0;//default
7562     avc_state->multi_ref_qp_enable = 1;//default
7563     avc_state->weighted_ref_l0_enable = 1;//default
7564     avc_state->weighted_ref_l1_enable = 1;//default
7565     avc_state->weighted_prediction_supported = 0;
7566     avc_state->brc_split_enable = 0;
7567     avc_state->slice_level_report_supported = 0;
7568
7569     avc_state->fbr_bypass_enable = 1;//default
7570     avc_state->field_scaling_output_interleaved = 0;
7571     avc_state->mb_variance_output_enable = 0;
7572     avc_state->mb_pixel_average_output_enable = 0;
7573     avc_state->rolling_intra_refresh_enable = 0;// same as intra_refresh_i_enable?
7574     avc_state->mbenc_curbe_set_in_brc_update = 0;
7575     avc_state->rounding_inter_enable = 1; //default
7576     avc_state->adaptive_rounding_inter_enable = 1;//default
7577
7578     avc_state->mbenc_i_frame_dist_in_use = 0;
7579     avc_state->mb_status_supported = 1; //set in intialization for gen9
7580     avc_state->mb_status_enable = 0;
7581     avc_state->mb_vproc_stats_enable = 0;
7582     avc_state->flatness_check_enable = 0;
7583     avc_state->flatness_check_supported = 1;//default
7584     avc_state->block_based_skip_enable = 0;
7585     avc_state->use_widi_mbenc_kernel = 0;
7586     avc_state->kernel_trellis_enable = 0;
7587     avc_state->generic_reserved = 0;
7588
7589     avc_state->rounding_value = 0;
7590     avc_state->rounding_inter_p = 255;//default
7591     avc_state->rounding_inter_b = 255; //default
7592     avc_state->rounding_inter_b_ref = 255; //default
7593     avc_state->min_qp_i = INTEL_AVC_MIN_QP;
7594     avc_state->min_qp_p = INTEL_AVC_MIN_QP;
7595     avc_state->min_qp_b = INTEL_AVC_MIN_QP;
7596     avc_state->max_qp_i = INTEL_AVC_MAX_QP;
7597     avc_state->max_qp_p = INTEL_AVC_MAX_QP;
7598     avc_state->max_qp_b = INTEL_AVC_MAX_QP;
7599
7600     memset(avc_state->non_ftq_skip_threshold_lut,0,52*sizeof(uint8_t));
7601     memset(avc_state->ftq_skip_threshold_lut,0,52*sizeof(uint8_t));
7602     memset(avc_state->lamda_value_lut,0,52*2*sizeof(uint8_t));
7603
7604     avc_state->intra_refresh_qp_threshold = 0;
7605     avc_state->trellis_flag = 0;
7606     avc_state->hme_mv_cost_scaling_factor = 0;
7607     avc_state->slice_height = 1;
7608     avc_state->slice_num = 1;
7609     memset(avc_state->dist_scale_factor_list0,0,32*sizeof(uint32_t));
7610     avc_state->bi_weight = 0;
7611     avc_state->brc_const_data_surface_width = 64;
7612     avc_state->brc_const_data_surface_height = 44;
7613
7614     avc_state->num_refs[0] = 0;
7615     avc_state->num_refs[1] = 0;
7616     memset(avc_state->list_ref_idx,0,32*2*sizeof(uint32_t));
7617     memset(avc_state->top_field_poc,0,NUM_MFC_AVC_DMV_BUFFERS*sizeof(int32_t));
7618     avc_state->tq_rounding = 0;
7619     avc_state->zero_mv_threshold = 0;
7620     avc_state->slice_second_levle_batch_buffer_in_use = 0;
7621
7622     //1. seq/pic/slice
7623
7624     /* the definition of status buffer offset for Encoder */
7625
7626     status_buffer = &avc_ctx->status_buffer;
7627     memset(status_buffer, 0,sizeof(struct encoder_status_buffer_internal));
7628
7629     status_buffer->base_offset = base_offset;
7630     status_buffer->bs_byte_count_frame_offset = base_offset + offsetof(struct encoder_status, bs_byte_count_frame);
7631     status_buffer->bs_byte_count_frame_nh_offset = base_offset + offsetof(struct encoder_status, bs_byte_count_frame_nh);
7632     status_buffer->image_status_mask_offset = base_offset + offsetof(struct encoder_status, image_status_mask);
7633     status_buffer->image_status_ctrl_offset = base_offset + offsetof(struct encoder_status, image_status_ctrl);
7634     status_buffer->mfc_qp_status_count_offset = base_offset + offsetof(struct encoder_status, mfc_qp_status_count);
7635     status_buffer->media_index_offset       = base_offset + offsetof(struct encoder_status, media_index);
7636
7637     status_buffer->status_buffer_size = sizeof(struct encoder_status);
7638     status_buffer->bs_byte_count_frame_reg_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG;
7639     status_buffer->bs_byte_count_frame_nh_reg_offset = MFC_BITSTREAM_BYTECOUNT_SLICE_REG;
7640     status_buffer->image_status_mask_reg_offset = MFC_IMAGE_STATUS_MASK_REG;
7641     status_buffer->image_status_ctrl_reg_offset = MFC_IMAGE_STATUS_CTRL_REG;
7642     status_buffer->mfc_qp_status_count_reg_offset = MFC_QP_STATUS_COUNT_REG;
7643
7644     gen9_avc_kernel_init(ctx,encoder_context);
7645     encoder_context->vme_context = vme_context;
7646     encoder_context->vme_pipeline = gen9_avc_vme_pipeline;
7647     encoder_context->vme_context_destroy = gen9_avc_vme_context_destroy;
7648
7649     return true;
7650
7651 allocate_structure_failed:
7652
7653     free(vme_context);
7654     free(generic_ctx);
7655     free(avc_ctx);
7656     free(generic_state);
7657     free(avc_state);
7658     return false;
7659 }
7660
7661 Bool
7662 gen9_avc_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
7663 {
7664     /* VME & PAK share the same context */
7665     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7666
7667     if (!pak_context)
7668         return false;
7669
7670     encoder_context->mfc_context = pak_context;
7671     encoder_context->mfc_context_destroy = gen9_avc_pak_context_destroy;
7672     encoder_context->mfc_pipeline = gen9_avc_pak_pipeline;
7673     encoder_context->mfc_brc_prepare = gen9_avc_pak_brc_prepare;
7674     encoder_context->get_status = gen9_avc_get_coded_status;
7675     return true;
7676 }