OSDN Git Service

ENC: enable avc encoder on cnl
[android-x86/hardware-intel-common-vaapi.git] / src / i965_avc_encoder.c
1 /*
2  * Copyright @ 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWAR OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Pengfei Qu <Pengfei.qu@intel.com>
26  *    Sreerenj Balachandran <sreerenj.balachandran@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <stdbool.h>
33 #include <string.h>
34 #include <math.h>
35 #include <assert.h>
36 #include <va/va.h>
37
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
40
41 #include "i965_defines.h"
42 #include "i965_structs.h"
43 #include "i965_drv_video.h"
44 #include "i965_encoder.h"
45 #include "i965_encoder_utils.h"
46 #include "intel_media.h"
47
48 #include "i965_gpe_utils.h"
49 #include "i965_encoder_common.h"
50 #include "i965_avc_encoder_common.h"
51 #include "i965_avc_encoder_kernels.h"
52 #include "i965_avc_encoder.h"
53 #include "i965_avc_const_def.h"
54
55 #define MAX_URB_SIZE                    4096 /* In register */
56 #define NUM_KERNELS_PER_GPE_CONTEXT     1
57 #define MBENC_KERNEL_BASE GEN9_AVC_KERNEL_MBENC_QUALITY_I
58 #define GPE_RESOURCE_ALIGNMENT 4  /* 4 means 16 = 1 << 4) */
59
60 #define OUT_BUFFER_2DW(batch, bo, is_target, delta)  do {               \
61         if (bo) {                                                       \
62             OUT_BCS_RELOC64(batch,                                        \
63                             bo,                                         \
64                             I915_GEM_DOMAIN_INSTRUCTION,                \
65                             is_target ? I915_GEM_DOMAIN_RENDER : 0,     \
66                             delta);                                     \
67         } else {                                                        \
68             OUT_BCS_BATCH(batch, 0);                                    \
69             OUT_BCS_BATCH(batch, 0);                                    \
70         }                                                               \
71     } while (0)
72
73 #define OUT_BUFFER_3DW(batch, bo, is_target, delta, attr)  do { \
74         OUT_BUFFER_2DW(batch, bo, is_target, delta);            \
75         OUT_BCS_BATCH(batch, attr);                             \
76     } while (0)
77
78 /* FEI specific buffer sizes per MB in bytes for gen9 */
79 #define FEI_AVC_MB_CODE_BUFFER_SIZE      64
80 #define FEI_AVC_MV_DATA_BUFFER_SIZE      128
81 #define FEI_AVC_MB_CONTROL_BUFFER_SIZE   16
82 #define FEI_AVC_MV_PREDICTOR_BUFFER_SIZE 40
83 #define FEI_AVC_DISTORTION_BUFFER_SIZE   48
84 #define FEI_AVC_QP_BUFFER_SIZE           1
85 #define PREENC_AVC_STATISTICS_BUFFER_SIZE 64
86
87 #define SCALE_CUR_PIC        1
88 #define SCALE_PAST_REF_PIC   2
89 #define SCALE_FUTURE_REF_PIC 3
90
91 static const uint32_t qm_flat[16] = {
92     0x10101010, 0x10101010, 0x10101010, 0x10101010,
93     0x10101010, 0x10101010, 0x10101010, 0x10101010,
94     0x10101010, 0x10101010, 0x10101010, 0x10101010,
95     0x10101010, 0x10101010, 0x10101010, 0x10101010
96 };
97
98 static const uint32_t fqm_flat[32] = {
99     0x10001000, 0x10001000, 0x10001000, 0x10001000,
100     0x10001000, 0x10001000, 0x10001000, 0x10001000,
101     0x10001000, 0x10001000, 0x10001000, 0x10001000,
102     0x10001000, 0x10001000, 0x10001000, 0x10001000,
103     0x10001000, 0x10001000, 0x10001000, 0x10001000,
104     0x10001000, 0x10001000, 0x10001000, 0x10001000,
105     0x10001000, 0x10001000, 0x10001000, 0x10001000,
106     0x10001000, 0x10001000, 0x10001000, 0x10001000
107 };
108
109 static const unsigned int slice_type_kernel[3] = {1, 2, 0};
110
111 static const gen9_avc_brc_init_reset_curbe_data gen9_avc_brc_init_reset_curbe_init_data = {
112     // unsigned int 0
113     {
114         0
115     },
116
117     // unsigned int 1
118     {
119         0
120     },
121
122     // unsigned int 2
123     {
124         0
125     },
126
127     // unsigned int 3
128     {
129         0
130     },
131
132     // unsigned int 4
133     {
134         0
135     },
136
137     // unsigned int 5
138     {
139         0
140     },
141
142     // unsigned int 6
143     {
144         0
145     },
146
147     // unsigned int 7
148     {
149         0
150     },
151
152     // unsigned int 8
153     {
154         0,
155         0
156     },
157
158     // unsigned int 9
159     {
160         0,
161         0
162     },
163
164     // unsigned int 10
165     {
166         0,
167         0
168     },
169
170     // unsigned int 11
171     {
172         0,
173         1
174     },
175
176     // unsigned int 12
177     {
178         51,
179         0
180     },
181
182     // unsigned int 13
183     {
184         40,
185         60,
186         80,
187         120
188     },
189
190     // unsigned int 14
191     {
192         35,
193         60,
194         80,
195         120
196     },
197
198     // unsigned int 15
199     {
200         40,
201         60,
202         90,
203         115
204     },
205
206     // unsigned int 16
207     {
208         0,
209         0,
210         0,
211         0
212     },
213
214     // unsigned int 17
215     {
216         0,
217         0,
218         0,
219         0
220     },
221
222     // unsigned int 18
223     {
224         0,
225         0,
226         0,
227         0
228     },
229
230     // unsigned int 19
231     {
232         0,
233         0,
234         0,
235         0
236     },
237
238     // unsigned int 20
239     {
240         0,
241         0,
242         0,
243         0
244     },
245
246     // unsigned int 21
247     {
248         0,
249         0,
250         0,
251         0
252     },
253
254     // unsigned int 22
255     {
256         0,
257         0,
258         0,
259         0
260     },
261
262     // unsigned int 23
263     {
264         0
265     }
266 };
267
268 static const gen8_avc_frame_brc_update_curbe_data gen8_avc_frame_brc_update_curbe_init_data = {
269     //unsigned int 0
270     {
271         0
272     },
273
274     //unsigned int 1
275     {
276         0
277     },
278
279     //unsigned int 2
280     {
281         0
282     },
283
284     //unsigned int 3
285     {
286
287         10,
288         50
289
290     },
291
292     //unsigned int 4
293     {
294
295         100,
296         150
297
298     },
299
300     //unsigned int 5
301     {
302         0, 0, 0, 0
303     },
304
305     //unsigned int 6
306     {
307         0, 0, 0, 0
308     },
309
310     //unsigned int 7
311     {
312         0
313     },
314
315     //unsigned int 8
316     {
317
318         1,
319         1,
320         3,
321         2
322
323     },
324
325     //unsigned int 9
326     {
327
328         1,
329         40,
330         5,
331         5
332
333     },
334
335     //unsigned int 10
336     {
337
338         3,
339         1,
340         7,
341         18
342
343     },
344
345     //unsigned int 11
346     {
347
348         25,
349         37,
350         40,
351         75
352
353     },
354
355     //unsigned int 12
356     {
357
358         97,
359         103,
360         125,
361         160
362
363     },
364
365     //unsigned int 13
366     {
367
368         -3,
369         -2,
370         -1,
371         0
372
373     },
374
375     //unsigned int 14
376     {
377
378         1,
379         2,
380         3,
381         0xff
382
383     },
384
385     //unsigned int 15
386     {
387         0, 0
388     },
389
390     //unsigned int 16
391     {
392         0, 0
393     },
394
395     //unsigned int 17
396     {
397         0, 0
398     },
399 };
400 static const gen9_avc_frame_brc_update_curbe_data gen9_avc_frame_brc_update_curbe_init_data = {
401     // unsigned int 0
402     {
403         0
404     },
405
406     // unsigned int 1
407     {
408         0
409     },
410
411     // unsigned int 2
412     {
413         0
414     },
415
416     // unsigned int 3
417     {
418         10,
419         50
420     },
421
422     // unsigned int 4
423     {
424         100,
425         150
426     },
427
428     // unsigned int 5
429     {
430         0,
431         0,
432         0,
433         0
434     },
435
436     // unsigned int 6
437     {
438         0,
439         0,
440         0,
441         0,
442         0,
443         0
444     },
445
446     // unsigned int 7
447     {
448         0
449     },
450
451     // unsigned int 8
452     {
453         1,
454         1,
455         3,
456         2
457     },
458
459     // unsigned int 9
460     {
461         1,
462         40,
463         5,
464         5
465     },
466
467     // unsigned int 10
468     {
469         3,
470         1,
471         7,
472         18
473     },
474
475     // unsigned int 11
476     {
477         25,
478         37,
479         40,
480         75
481     },
482
483     // unsigned int 12
484     {
485         97,
486         103,
487         125,
488         160
489     },
490
491     // unsigned int 13
492     {
493         -3,
494         -2,
495         -1,
496         0
497     },
498
499     // unsigned int 14
500     {
501         1,
502         2,
503         3,
504         0xff
505     },
506
507     // unsigned int 15
508     {
509         0,
510         0,
511         0,
512         0
513     },
514
515     // unsigned int 16
516     {
517         0
518     },
519
520     // unsigned int 17
521     {
522         0
523     },
524
525     // unsigned int 18
526     {
527         0
528     },
529
530     // unsigned int 19
531     {
532         0
533     },
534
535     // unsigned int 20
536     {
537         0
538     },
539
540     // unsigned int 21
541     {
542         0
543     },
544
545     // unsigned int 22
546     {
547         0
548     },
549
550     // unsigned int 23
551     {
552         0
553     },
554
555 };
556
557 static void
558 gen9_avc_update_misc_parameters(VADriverContextP ctx,
559                                 struct encode_state *encode_state,
560                                 struct intel_encoder_context *encoder_context)
561 {
562     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
563     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
564     int i;
565
566     /* brc */
567     generic_state->max_bit_rate = (encoder_context->brc.bits_per_second[0] + 1000 - 1) / 1000;
568
569     generic_state->brc_need_reset = encoder_context->brc.need_reset;
570
571     if (generic_state->internal_rate_mode == VA_RC_CBR) {
572         generic_state->min_bit_rate = generic_state->max_bit_rate;
573         generic_state->mb_brc_enabled = encoder_context->brc.mb_rate_control[0] == 1;
574
575         if (generic_state->target_bit_rate != generic_state->max_bit_rate) {
576             generic_state->target_bit_rate = generic_state->max_bit_rate;
577             generic_state->brc_need_reset = 1;
578         }
579     } else if (generic_state->internal_rate_mode == VA_RC_VBR) {
580         generic_state->min_bit_rate = generic_state->max_bit_rate * (2 * encoder_context->brc.target_percentage[0] - 100) / 100;
581         generic_state->mb_brc_enabled = encoder_context->brc.mb_rate_control[0] == 1;
582
583         if (generic_state->target_bit_rate != generic_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100) {
584             generic_state->target_bit_rate = generic_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100;
585             generic_state->brc_need_reset = 1;
586         }
587     }
588
589     /*  frame rate */
590     if (generic_state->internal_rate_mode != VA_RC_CQP) {
591         generic_state->frames_per_100s = encoder_context->brc.framerate[0].num * 100 / encoder_context->brc.framerate[0].den ;
592         generic_state->frame_rate = encoder_context->brc.framerate[0].num / encoder_context->brc.framerate[0].den ;
593         generic_state->frames_per_window_size = (int)(encoder_context->brc.window_size * generic_state->frame_rate / 1000); // brc.windows size in ms as the unit
594     } else {
595         generic_state->frames_per_100s = 30 * 100;
596         generic_state->frame_rate = 30 ;
597         generic_state->frames_per_window_size = 30;
598     }
599
600     /*  HRD */
601     if (generic_state->internal_rate_mode != VA_RC_CQP) {
602         generic_state->vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;//misc->buffer_size;
603         generic_state->init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;//misc->initial_buffer_fullness;
604     }
605
606     /* ROI */
607     generic_state->num_roi = MIN(encoder_context->brc.num_roi, 3);
608     if (generic_state->num_roi > 0) {
609         generic_state->max_delta_qp = encoder_context->brc.roi_max_delta_qp;
610         generic_state->min_delta_qp = encoder_context->brc.roi_min_delta_qp;
611
612         for (i = 0; i < generic_state->num_roi; i++) {
613             generic_state->roi[i].left   = encoder_context->brc.roi[i].left;
614             generic_state->roi[i].right  = encoder_context->brc.roi[i].right;
615             generic_state->roi[i].top    = encoder_context->brc.roi[i].top;
616             generic_state->roi[i].bottom = encoder_context->brc.roi[i].bottom;
617             generic_state->roi[i].value  = encoder_context->brc.roi[i].value;
618
619             generic_state->roi[i].left /= 16;
620             generic_state->roi[i].right /= 16;
621             generic_state->roi[i].top /= 16;
622             generic_state->roi[i].bottom /= 16;
623         }
624     }
625
626 }
627
628 static bool
629 intel_avc_get_kernel_header_and_size(void *pvbinary,
630                                      int binary_size,
631                                      INTEL_GENERIC_ENC_OPERATION operation,
632                                      int krnstate_idx,
633                                      struct i965_kernel *ret_kernel)
634 {
635     typedef uint32_t BIN_PTR[4];
636
637     char *bin_start;
638     gen9_avc_encoder_kernel_header      *pkh_table;
639     kernel_header          *pcurr_header, *pinvalid_entry, *pnext_header;
640     int next_krnoffset;
641
642     if (!pvbinary || !ret_kernel)
643         return false;
644
645     bin_start = (char *)pvbinary;
646     pkh_table = (gen9_avc_encoder_kernel_header *)pvbinary;
647     pinvalid_entry = &(pkh_table->static_detection) + 1;
648     next_krnoffset = binary_size;
649
650     if (operation == INTEL_GENERIC_ENC_SCALING4X) {
651         pcurr_header = &pkh_table->ply_dscale_ply;
652     } else if (operation == INTEL_GENERIC_ENC_SCALING2X) {
653         pcurr_header = &pkh_table->ply_2xdscale_ply;
654     } else if (operation == INTEL_GENERIC_ENC_ME) {
655         pcurr_header = &pkh_table->me_p;
656     } else if (operation == INTEL_GENERIC_ENC_BRC) {
657         pcurr_header = &pkh_table->frame_brc_init;
658     } else if (operation == INTEL_GENERIC_ENC_MBENC) {
659         pcurr_header = &pkh_table->mbenc_quality_I;
660     } else if (operation == INTEL_GENERIC_ENC_WP) {
661         pcurr_header = &pkh_table->wp;
662     } else if (operation == INTEL_GENERIC_ENC_SFD) {
663         pcurr_header = &pkh_table->static_detection;
664     } else {
665         return false;
666     }
667
668     pcurr_header += krnstate_idx;
669     ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
670
671     pnext_header = (pcurr_header + 1);
672     if (pnext_header < pinvalid_entry) {
673         next_krnoffset = pnext_header->kernel_start_pointer << 6;
674     }
675     ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
676
677     return true;
678 }
679
680 static bool
681 intel_avc_fei_get_kernel_header_and_size(
682     void                             *pvbinary,
683     int                              binary_size,
684     INTEL_GENERIC_ENC_OPERATION      operation,
685     int                              krnstate_idx,
686     struct i965_kernel               *ret_kernel)
687 {
688     typedef uint32_t BIN_PTR[4];
689
690     char *bin_start;
691     gen9_avc_fei_encoder_kernel_header      *pkh_table;
692     kernel_header          *pcurr_header, *pinvalid_entry, *pnext_header;
693     int next_krnoffset;
694
695     if (!pvbinary || !ret_kernel)
696         return false;
697
698     bin_start = (char *)pvbinary;
699     pkh_table = (gen9_avc_fei_encoder_kernel_header *)pvbinary;
700     pinvalid_entry = &(pkh_table->wp) + 1;
701     next_krnoffset = binary_size;
702
703     if (operation == INTEL_GENERIC_ENC_SCALING4X) {
704         pcurr_header = &pkh_table->ply_dscale_ply;
705     } else if (operation == INTEL_GENERIC_ENC_ME) {
706         pcurr_header = &pkh_table->me_p;
707     } else if (operation == INTEL_GENERIC_ENC_MBENC) {
708         pcurr_header = &pkh_table->mbenc_i;
709     } else if (operation == INTEL_GENERIC_ENC_PREPROC) {
710         pcurr_header =  &pkh_table->preproc;
711     } else {
712         return false;
713     }
714
715     pcurr_header += krnstate_idx;
716     ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
717
718     pnext_header = (pcurr_header + 1);
719     if (pnext_header < pinvalid_entry) {
720         next_krnoffset = pnext_header->kernel_start_pointer << 6;
721     }
722     ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
723
724     return true;
725 }
726
727 static void
728 gen9_free_surfaces_avc(void **data)
729 {
730     struct gen9_surface_avc *avc_surface;
731
732     if (!data || !*data)
733         return;
734
735     avc_surface = *data;
736
737     if (avc_surface->scaled_4x_surface_obj) {
738         i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_4x_surface_id, 1);
739         avc_surface->scaled_4x_surface_id = VA_INVALID_SURFACE;
740         avc_surface->scaled_4x_surface_obj = NULL;
741     }
742
743     if (avc_surface->scaled_16x_surface_obj) {
744         i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_16x_surface_id, 1);
745         avc_surface->scaled_16x_surface_id = VA_INVALID_SURFACE;
746         avc_surface->scaled_16x_surface_obj = NULL;
747     }
748
749     if (avc_surface->scaled_32x_surface_obj) {
750         i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_32x_surface_id, 1);
751         avc_surface->scaled_32x_surface_id = VA_INVALID_SURFACE;
752         avc_surface->scaled_32x_surface_obj = NULL;
753     }
754
755     i965_free_gpe_resource(&avc_surface->res_mb_code_surface);
756     i965_free_gpe_resource(&avc_surface->res_mv_data_surface);
757     i965_free_gpe_resource(&avc_surface->res_ref_pic_select_surface);
758
759     /* FEI specific resources */
760     /* since the driver previously taken an extra reference to the drm_bo
761      * in case the buffers were supplied by middleware, there shouldn't
762      * be any memory handling issue */
763     i965_free_gpe_resource(&avc_surface->res_fei_mb_cntrl_surface);
764     i965_free_gpe_resource(&avc_surface->res_fei_mv_predictor_surface);
765     i965_free_gpe_resource(&avc_surface->res_fei_vme_distortion_surface);
766     i965_free_gpe_resource(&avc_surface->res_fei_mb_qp_surface);
767
768     dri_bo_unreference(avc_surface->dmv_top);
769     avc_surface->dmv_top = NULL;
770     dri_bo_unreference(avc_surface->dmv_bottom);
771     avc_surface->dmv_bottom = NULL;
772
773     free(avc_surface);
774
775     *data = NULL;
776
777     return;
778 }
779
780 static VAStatus
781 gen9_avc_init_check_surfaces(VADriverContextP ctx,
782                              struct object_surface *obj_surface,
783                              struct intel_encoder_context *encoder_context,
784                              struct avc_surface_param *surface_param)
785 {
786     struct i965_driver_data *i965 = i965_driver_data(ctx);
787     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
788     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
789     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
790
791     struct gen9_surface_avc *avc_surface;
792     int downscaled_width_4x, downscaled_height_4x;
793     int downscaled_width_16x, downscaled_height_16x;
794     int downscaled_width_32x, downscaled_height_32x;
795     int size = 0;
796     unsigned int frame_width_in_mbs = ALIGN(surface_param->frame_width, 16) / 16;
797     unsigned int frame_height_in_mbs = ALIGN(surface_param->frame_height, 16) / 16;
798     unsigned int frame_mb_nums = frame_width_in_mbs * frame_height_in_mbs;
799     int allocate_flag = 1;
800     int width, height;
801
802     if (!obj_surface || !obj_surface->bo)
803         return VA_STATUS_ERROR_INVALID_SURFACE;
804
805     if (obj_surface->private_data) {
806         return VA_STATUS_SUCCESS;
807     }
808
809     avc_surface = calloc(1, sizeof(struct gen9_surface_avc));
810
811     if (!avc_surface)
812         return VA_STATUS_ERROR_ALLOCATION_FAILED;
813
814     avc_surface->ctx = ctx;
815     obj_surface->private_data = avc_surface;
816     obj_surface->free_private_data = gen9_free_surfaces_avc;
817
818     downscaled_width_4x = generic_state->frame_width_4x;
819     downscaled_height_4x = generic_state->frame_height_4x;
820
821     i965_CreateSurfaces(ctx,
822                         downscaled_width_4x,
823                         downscaled_height_4x,
824                         VA_RT_FORMAT_YUV420,
825                         1,
826                         &avc_surface->scaled_4x_surface_id);
827
828     avc_surface->scaled_4x_surface_obj = SURFACE(avc_surface->scaled_4x_surface_id);
829
830     if (!avc_surface->scaled_4x_surface_obj) {
831         return VA_STATUS_ERROR_ALLOCATION_FAILED;
832     }
833
834     i965_check_alloc_surface_bo(ctx, avc_surface->scaled_4x_surface_obj, 1,
835                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
836
837     downscaled_width_16x = generic_state->frame_width_16x;
838     downscaled_height_16x = generic_state->frame_height_16x;
839     i965_CreateSurfaces(ctx,
840                         downscaled_width_16x,
841                         downscaled_height_16x,
842                         VA_RT_FORMAT_YUV420,
843                         1,
844                         &avc_surface->scaled_16x_surface_id);
845     avc_surface->scaled_16x_surface_obj = SURFACE(avc_surface->scaled_16x_surface_id);
846
847     if (!avc_surface->scaled_16x_surface_obj) {
848         return VA_STATUS_ERROR_ALLOCATION_FAILED;
849     }
850
851     i965_check_alloc_surface_bo(ctx, avc_surface->scaled_16x_surface_obj, 1,
852                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
853
854     if (generic_state->b32xme_supported ||
855         generic_state->b32xme_enabled) {
856         downscaled_width_32x = generic_state->frame_width_32x;
857         downscaled_height_32x = generic_state->frame_height_32x;
858         i965_CreateSurfaces(ctx,
859                             downscaled_width_32x,
860                             downscaled_height_32x,
861                             VA_RT_FORMAT_YUV420,
862                             1,
863                             &avc_surface->scaled_32x_surface_id);
864         avc_surface->scaled_32x_surface_obj = SURFACE(avc_surface->scaled_32x_surface_id);
865
866         if (!avc_surface->scaled_32x_surface_obj) {
867             return VA_STATUS_ERROR_ALLOCATION_FAILED;
868         }
869
870         i965_check_alloc_surface_bo(ctx, avc_surface->scaled_32x_surface_obj, 1,
871                                     VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
872     }
873
874     /*mb code and mv data for each frame*/
875     if (!encoder_context->fei_enabled) {
876         size = frame_mb_nums * 16 * 4;
877         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
878                                                    &avc_surface->res_mb_code_surface,
879                                                    ALIGN(size, 0x1000),
880                                                    "mb code buffer");
881         if (!allocate_flag)
882             goto failed_allocation;
883
884         size = frame_mb_nums * 32 * 4;
885         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
886                                                    &avc_surface->res_mv_data_surface,
887                                                    ALIGN(size, 0x1000),
888                                                    "mv data buffer");
889         if (!allocate_flag)
890             goto failed_allocation;
891     }
892
893     /* ref pic list*/
894     if (avc_state->ref_pic_select_list_supported) {
895         width = ALIGN(frame_width_in_mbs * 8, 64);
896         height = frame_height_in_mbs ;
897         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
898                                                       &avc_surface->res_ref_pic_select_surface,
899                                                       width, height,
900                                                       width,
901                                                       "Ref pic select list buffer");
902         if (!allocate_flag)
903             goto failed_allocation;
904     }
905
906     /*direct mv*/
907     avc_surface->dmv_top =
908         dri_bo_alloc(i965->intel.bufmgr,
909                      "direct mv top Buffer",
910                      68 * frame_mb_nums,
911                      64);
912     avc_surface->dmv_bottom =
913         dri_bo_alloc(i965->intel.bufmgr,
914                      "direct mv bottom Buffer",
915                      68 * frame_mb_nums,
916                      64);
917     assert(avc_surface->dmv_top);
918     assert(avc_surface->dmv_bottom);
919
920     return VA_STATUS_SUCCESS;
921
922 failed_allocation:
923     return VA_STATUS_ERROR_ALLOCATION_FAILED;
924 }
925
926 static void
927 gen9_avc_generate_slice_map(VADriverContextP ctx,
928                             struct encode_state *encode_state,
929                             struct intel_encoder_context *encoder_context)
930 {
931     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
932     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
933     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
934     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
935
936     struct i965_gpe_resource *gpe_resource = NULL;
937     VAEncSliceParameterBufferH264 * slice_param = NULL;
938     unsigned int * data = NULL;
939     unsigned int * data_row = NULL;
940     int i, j, count = 0;
941     unsigned int pitch = ALIGN((generic_state->frame_width_in_mbs + 1) * 4, 64) / 4;
942
943     if (!avc_state->arbitrary_num_mbs_in_slice)
944         return;
945
946     gpe_resource = &(avc_ctx->res_mbenc_slice_map_surface);
947     assert(gpe_resource);
948
949     i965_zero_gpe_resource(gpe_resource);
950
951     data_row = (unsigned int *)i965_map_gpe_resource(gpe_resource);
952     assert(data_row);
953
954     data = data_row;
955     for (i = 0; i < avc_state->slice_num; i++) {
956         slice_param = avc_state->slice_param[i];
957         for (j = 0; j < slice_param->num_macroblocks; j++) {
958             *data++ = i;
959             if ((count > 0) && (count % generic_state->frame_width_in_mbs == 0)) {
960                 data_row += pitch;
961                 data = data_row;
962                 *data++ = i;
963             }
964             count++;
965         }
966     }
967     *data++ = 0xFFFFFFFF;
968
969     i965_unmap_gpe_resource(gpe_resource);
970 }
971
972 static VAStatus
973 gen9_avc_allocate_resources(VADriverContextP ctx,
974                             struct encode_state *encode_state,
975                             struct intel_encoder_context *encoder_context)
976 {
977     struct i965_driver_data *i965 = i965_driver_data(ctx);
978     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
979     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
980     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
981     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
982     unsigned int size  = 0;
983     unsigned int width  = 0;
984     unsigned int height  = 0;
985     unsigned char * data  = NULL;
986     int allocate_flag = 1;
987     int i = 0;
988
989     /*all the surface/buffer are allocated here*/
990
991     /*second level batch buffer for image state write when cqp etc*/
992     i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
993     size = INTEL_AVC_IMAGE_STATE_CMD_SIZE ;
994     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
995                                                &avc_ctx->res_image_state_batch_buffer_2nd_level,
996                                                ALIGN(size, 0x1000),
997                                                "second levle batch (image state write) buffer");
998     if (!allocate_flag)
999         goto failed_allocation;
1000
1001     /* scaling related surface   */
1002     if (avc_state->mb_status_supported) {
1003         i965_free_gpe_resource(&avc_ctx->res_mb_status_buffer);
1004         size = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * 16 * 4 + 1023) & ~0x3ff;
1005         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1006                                                    &avc_ctx->res_mb_status_buffer,
1007                                                    ALIGN(size, 0x1000),
1008                                                    "MB statistics output buffer");
1009         if (!allocate_flag)
1010             goto failed_allocation;
1011         i965_zero_gpe_resource(&avc_ctx->res_mb_status_buffer);
1012     }
1013
1014     if (avc_state->flatness_check_supported) {
1015         width = generic_state->frame_width_in_mbs * 4;
1016         height = generic_state->frame_height_in_mbs * 4;
1017         i965_free_gpe_resource(&avc_ctx->res_flatness_check_surface);
1018         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1019                                                       &avc_ctx->res_flatness_check_surface,
1020                                                       width, height,
1021                                                       ALIGN(width, 64),
1022                                                       "Flatness check buffer");
1023         if (!allocate_flag)
1024             goto failed_allocation;
1025     }
1026     /* me related surface */
1027     width = generic_state->downscaled_width_4x_in_mb * 8;
1028     height = generic_state->downscaled_height_4x_in_mb * 4 * 10;
1029     i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
1030     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1031                                                   &avc_ctx->s4x_memv_distortion_buffer,
1032                                                   width, height,
1033                                                   ALIGN(width, 64),
1034                                                   "4x MEMV distortion buffer");
1035     if (!allocate_flag)
1036         goto failed_allocation;
1037     i965_zero_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
1038
1039     width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
1040     height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
1041     i965_free_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
1042     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1043                                                   &avc_ctx->s4x_memv_min_distortion_brc_buffer,
1044                                                   width, height,
1045                                                   width,
1046                                                   "4x MEMV min distortion brc buffer");
1047     if (!allocate_flag)
1048         goto failed_allocation;
1049     i965_zero_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
1050
1051
1052     width = ALIGN(generic_state->downscaled_width_4x_in_mb * 32, 64);
1053     height = generic_state->downscaled_height_4x_in_mb * 4 * 2 * 10;
1054     i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
1055     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1056                                                   &avc_ctx->s4x_memv_data_buffer,
1057                                                   width, height,
1058                                                   width,
1059                                                   "4x MEMV data buffer");
1060     if (!allocate_flag)
1061         goto failed_allocation;
1062     i965_zero_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
1063
1064
1065     width = ALIGN(generic_state->downscaled_width_16x_in_mb * 32, 64);
1066     height = generic_state->downscaled_height_16x_in_mb * 4 * 2 * 10 ;
1067     i965_free_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
1068     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1069                                                   &avc_ctx->s16x_memv_data_buffer,
1070                                                   width, height,
1071                                                   width,
1072                                                   "16x MEMV data buffer");
1073     if (!allocate_flag)
1074         goto failed_allocation;
1075     i965_zero_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
1076
1077
1078     width = ALIGN(generic_state->downscaled_width_32x_in_mb * 32, 64);
1079     height = generic_state->downscaled_height_32x_in_mb * 4 * 2 * 10 ;
1080     i965_free_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
1081     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1082                                                   &avc_ctx->s32x_memv_data_buffer,
1083                                                   width, height,
1084                                                   width,
1085                                                   "32x MEMV data buffer");
1086     if (!allocate_flag)
1087         goto failed_allocation;
1088     i965_zero_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
1089
1090
1091     if (!generic_state->brc_allocated) {
1092         /*brc related surface */
1093         i965_free_gpe_resource(&avc_ctx->res_brc_history_buffer);
1094         size = 864;
1095         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1096                                                    &avc_ctx->res_brc_history_buffer,
1097                                                    ALIGN(size, 0x1000),
1098                                                    "brc history buffer");
1099         if (!allocate_flag)
1100             goto failed_allocation;
1101
1102         i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
1103         size = 64;//44
1104         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1105                                                    &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
1106                                                    ALIGN(size, 0x1000),
1107                                                    "brc pak statistic buffer");
1108         if (!allocate_flag)
1109             goto failed_allocation;
1110
1111         i965_free_gpe_resource(&avc_ctx->res_brc_image_state_read_buffer);
1112         size = INTEL_AVC_IMAGE_STATE_CMD_SIZE * 7;
1113         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1114                                                    &avc_ctx->res_brc_image_state_read_buffer,
1115                                                    ALIGN(size, 0x1000),
1116                                                    "brc image state read buffer");
1117         if (!allocate_flag)
1118             goto failed_allocation;
1119
1120         i965_free_gpe_resource(&avc_ctx->res_brc_image_state_write_buffer);
1121         size = INTEL_AVC_IMAGE_STATE_CMD_SIZE * 7;
1122         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1123                                                    &avc_ctx->res_brc_image_state_write_buffer,
1124                                                    ALIGN(size, 0x1000),
1125                                                    "brc image state write buffer");
1126         if (!allocate_flag)
1127             goto failed_allocation;
1128
1129         width = ALIGN(avc_state->brc_const_data_surface_width, 64);
1130         height = avc_state->brc_const_data_surface_height;
1131         i965_free_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
1132         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1133                                                       &avc_ctx->res_brc_const_data_buffer,
1134                                                       width, height,
1135                                                       width,
1136                                                       "brc const data buffer");
1137         if (!allocate_flag)
1138             goto failed_allocation;
1139         i965_zero_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
1140
1141         if (generic_state->brc_distortion_buffer_supported) {
1142             width = ALIGN(generic_state->downscaled_width_4x_in_mb * 8, 64);
1143             height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1144             width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
1145             height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
1146             i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
1147             allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1148                                                           &avc_ctx->res_brc_dist_data_surface,
1149                                                           width, height,
1150                                                           width,
1151                                                           "brc dist data buffer");
1152             if (!allocate_flag)
1153                 goto failed_allocation;
1154             i965_zero_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
1155         }
1156
1157         if (generic_state->brc_roi_enable) {
1158             width = ALIGN(generic_state->downscaled_width_4x_in_mb * 16, 64);
1159             height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1160             i965_free_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
1161             allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1162                                                           &avc_ctx->res_mbbrc_roi_surface,
1163                                                           width, height,
1164                                                           width,
1165                                                           "mbbrc roi buffer");
1166             if (!allocate_flag)
1167                 goto failed_allocation;
1168             i965_zero_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
1169         }
1170
1171         /*mb qp in mb brc*/
1172         width = ALIGN(generic_state->downscaled_width_4x_in_mb * 4, 64);
1173         height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1174         i965_free_gpe_resource(&avc_ctx->res_mbbrc_mb_qp_data_surface);
1175         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1176                                                       &avc_ctx->res_mbbrc_mb_qp_data_surface,
1177                                                       width, height,
1178                                                       width,
1179                                                       "mbbrc mb qp buffer");
1180         if (!allocate_flag)
1181             goto failed_allocation;
1182
1183         i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
1184         size = 16 * AVC_QP_MAX * 4;
1185         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1186                                                    &avc_ctx->res_mbbrc_const_data_buffer,
1187                                                    ALIGN(size, 0x1000),
1188                                                    "mbbrc const data buffer");
1189         if (!allocate_flag)
1190             goto failed_allocation;
1191
1192         if (avc_state->decouple_mbenc_curbe_from_brc_enable) {
1193             i965_free_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
1194             size = avc_state->mbenc_brc_buffer_size;
1195             allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1196                                                        &avc_ctx->res_mbenc_brc_buffer,
1197                                                        ALIGN(size, 0x1000),
1198                                                        "mbenc brc buffer");
1199             if (!allocate_flag)
1200                 goto failed_allocation;
1201             i965_zero_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
1202         }
1203         generic_state->brc_allocated = 1;
1204     }
1205
1206     /*mb qp external*/
1207     if (avc_state->mb_qp_data_enable) {
1208         width = ALIGN(generic_state->downscaled_width_4x_in_mb * 4, 64);
1209         height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1210         i965_free_gpe_resource(&avc_ctx->res_mb_qp_data_surface);
1211         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1212                                                       &avc_ctx->res_mb_qp_data_surface,
1213                                                       width, height,
1214                                                       width,
1215                                                       "external mb qp buffer");
1216         if (!allocate_flag)
1217             goto failed_allocation;
1218     }
1219
1220     /*     mbenc related surface. it share most of surface with other kernels     */
1221     if (avc_state->arbitrary_num_mbs_in_slice) {
1222         width = ALIGN((generic_state->frame_width_in_mbs + 1) * 4, 64);
1223         height = generic_state->frame_height_in_mbs ;
1224         i965_free_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1225         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1226                                                       &avc_ctx->res_mbenc_slice_map_surface,
1227                                                       width, height,
1228                                                       width,
1229                                                       "slice map buffer");
1230         if (!allocate_flag)
1231             goto failed_allocation;
1232         i965_zero_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1233
1234         /*generate slice map,default one slice per frame.*/
1235     }
1236
1237     /* sfd related surface  */
1238     if (avc_state->sfd_enable) {
1239         i965_free_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1240         size = 128;
1241         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1242                                                    &avc_ctx->res_sfd_output_buffer,
1243                                                    size,
1244                                                    "sfd output buffer");
1245         if (!allocate_flag)
1246             goto failed_allocation;
1247         i965_zero_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1248
1249         i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer);
1250         size = ALIGN(52, 64);
1251         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1252                                                    &avc_ctx->res_sfd_cost_table_p_frame_buffer,
1253                                                    size,
1254                                                    "sfd P frame cost table buffer");
1255         if (!allocate_flag)
1256             goto failed_allocation;
1257         data = i965_map_gpe_resource(&(avc_ctx->res_sfd_cost_table_p_frame_buffer));
1258         assert(data);
1259         memcpy(data, gen9_avc_sfd_cost_table_p_frame, sizeof(unsigned char) * 52);
1260         i965_unmap_gpe_resource(&(avc_ctx->res_sfd_cost_table_p_frame_buffer));
1261
1262         i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_b_frame_buffer);
1263         size = ALIGN(52, 64);
1264         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1265                                                    &avc_ctx->res_sfd_cost_table_b_frame_buffer,
1266                                                    size,
1267                                                    "sfd B frame cost table buffer");
1268         if (!allocate_flag)
1269             goto failed_allocation;
1270         data = i965_map_gpe_resource(&(avc_ctx->res_sfd_cost_table_b_frame_buffer));
1271         assert(data);
1272         memcpy(data, gen9_avc_sfd_cost_table_b_frame, sizeof(unsigned char) * 52);
1273         i965_unmap_gpe_resource(&(avc_ctx->res_sfd_cost_table_b_frame_buffer));
1274     }
1275
1276     /* wp related surfaces */
1277     if (avc_state->weighted_prediction_supported) {
1278         for (i = 0; i < 2 ; i++) {
1279             if (avc_ctx->wp_output_pic_select_surface_obj[i]) {
1280                 continue;
1281             }
1282
1283             width = generic_state->frame_width_in_pixel;
1284             height = generic_state->frame_height_in_pixel ;
1285             i965_CreateSurfaces(ctx,
1286                                 width,
1287                                 height,
1288                                 VA_RT_FORMAT_YUV420,
1289                                 1,
1290                                 &avc_ctx->wp_output_pic_select_surface_id[i]);
1291             avc_ctx->wp_output_pic_select_surface_obj[i] = SURFACE(avc_ctx->wp_output_pic_select_surface_id[i]);
1292
1293             if (!avc_ctx->wp_output_pic_select_surface_obj[i]) {
1294                 goto failed_allocation;
1295             }
1296
1297             i965_check_alloc_surface_bo(ctx, avc_ctx->wp_output_pic_select_surface_obj[i], 1,
1298                                         VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
1299         }
1300         i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[0]);
1301         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_wp_output_pic_select_surface_list[0], avc_ctx->wp_output_pic_select_surface_obj[0], GPE_RESOURCE_ALIGNMENT);
1302         i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[1]);
1303         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_wp_output_pic_select_surface_list[1], avc_ctx->wp_output_pic_select_surface_obj[1], GPE_RESOURCE_ALIGNMENT);
1304     }
1305
1306     /* other   */
1307
1308     i965_free_gpe_resource(&avc_ctx->res_mad_data_buffer);
1309     size = 4 * 1;
1310     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1311                                                &avc_ctx->res_mad_data_buffer,
1312                                                ALIGN(size, 0x1000),
1313                                                "MAD data buffer");
1314     if (!allocate_flag)
1315         goto failed_allocation;
1316
1317     return VA_STATUS_SUCCESS;
1318
1319 failed_allocation:
1320     return VA_STATUS_ERROR_ALLOCATION_FAILED;
1321 }
1322
1323 static void
1324 gen9_avc_free_resources(struct encoder_vme_mfc_context * vme_context)
1325 {
1326     if (!vme_context)
1327         return;
1328
1329     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1330     VADriverContextP ctx = avc_ctx->ctx;
1331     int i = 0;
1332
1333     /* free all the surface/buffer here*/
1334     i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
1335     i965_free_gpe_resource(&avc_ctx->res_mb_status_buffer);
1336     i965_free_gpe_resource(&avc_ctx->res_flatness_check_surface);
1337     i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
1338     i965_free_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
1339     i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
1340     i965_free_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
1341     i965_free_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
1342     i965_free_gpe_resource(&avc_ctx->res_brc_history_buffer);
1343     i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
1344     i965_free_gpe_resource(&avc_ctx->res_brc_image_state_read_buffer);
1345     i965_free_gpe_resource(&avc_ctx->res_brc_image_state_write_buffer);
1346     i965_free_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
1347     i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
1348     i965_free_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
1349     i965_free_gpe_resource(&avc_ctx->res_mbbrc_mb_qp_data_surface);
1350     i965_free_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
1351     i965_free_gpe_resource(&avc_ctx->res_mb_qp_data_surface);
1352     i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
1353     i965_free_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1354     i965_free_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1355     i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer);
1356     i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_b_frame_buffer);
1357     i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[0]);
1358     i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[1]);
1359     i965_free_gpe_resource(&avc_ctx->res_mad_data_buffer);
1360
1361     for (i = 0; i < 2 ; i++) {
1362         if (avc_ctx->wp_output_pic_select_surface_obj[i]) {
1363             i965_DestroySurfaces(ctx, &avc_ctx->wp_output_pic_select_surface_id[i], 1);
1364             avc_ctx->wp_output_pic_select_surface_id[i] = VA_INVALID_SURFACE;
1365             avc_ctx->wp_output_pic_select_surface_obj[i] = NULL;
1366         }
1367     }
1368
1369     /* free preenc resources */
1370     i965_free_gpe_resource(&avc_ctx->preproc_mv_predictor_buffer);
1371     i965_free_gpe_resource(&avc_ctx->preproc_mb_qp_buffer);
1372     i965_free_gpe_resource(&avc_ctx->preproc_mv_data_out_buffer);
1373     i965_free_gpe_resource(&avc_ctx->preproc_stat_data_out_buffer);
1374
1375     i965_free_gpe_resource(&avc_ctx->preenc_past_ref_stat_data_out_buffer);
1376     i965_free_gpe_resource(&avc_ctx->preenc_future_ref_stat_data_out_buffer);
1377
1378     i965_DestroySurfaces(ctx, &avc_ctx->preenc_scaled_4x_surface_id, 1);
1379     avc_ctx->preenc_scaled_4x_surface_id = VA_INVALID_SURFACE;
1380     avc_ctx->preenc_scaled_4x_surface_obj = NULL;
1381
1382     i965_DestroySurfaces(ctx, &avc_ctx->preenc_past_ref_scaled_4x_surface_id, 1);
1383     avc_ctx->preenc_past_ref_scaled_4x_surface_id = VA_INVALID_SURFACE;
1384     avc_ctx->preenc_past_ref_scaled_4x_surface_obj = NULL;
1385
1386     i965_DestroySurfaces(ctx, &avc_ctx->preenc_future_ref_scaled_4x_surface_id, 1);
1387     avc_ctx->preenc_future_ref_scaled_4x_surface_id = VA_INVALID_SURFACE;
1388     avc_ctx->preenc_future_ref_scaled_4x_surface_obj = NULL;
1389 }
1390
1391 static void
1392 gen9_avc_run_kernel_media_object(VADriverContextP ctx,
1393                                  struct intel_encoder_context *encoder_context,
1394                                  struct i965_gpe_context *gpe_context,
1395                                  int media_function,
1396                                  struct gpe_media_object_parameter *param)
1397 {
1398     struct i965_driver_data *i965 = i965_driver_data(ctx);
1399     struct i965_gpe_table *gpe = &i965->gpe_table;
1400     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1401     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1402
1403     struct intel_batchbuffer *batch = encoder_context->base.batch;
1404     struct encoder_status_buffer_internal *status_buffer;
1405     struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1406
1407     if (!batch)
1408         return;
1409
1410     intel_batchbuffer_start_atomic(batch, 0x1000);
1411     intel_batchbuffer_emit_mi_flush(batch);
1412
1413     status_buffer = &(avc_ctx->status_buffer);
1414     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1415     mi_store_data_imm.bo = status_buffer->bo;
1416     mi_store_data_imm.offset = status_buffer->media_index_offset;
1417     mi_store_data_imm.dw0 = media_function;
1418     gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1419
1420     gpe->pipeline_setup(ctx, gpe_context, batch);
1421     gpe->media_object(ctx, gpe_context, batch, param);
1422     gpe->media_state_flush(ctx, gpe_context, batch);
1423
1424     gpe->pipeline_end(ctx, gpe_context, batch);
1425
1426     intel_batchbuffer_end_atomic(batch);
1427
1428     intel_batchbuffer_flush(batch);
1429 }
1430
1431 static void
1432 gen9_avc_run_kernel_media_object_walker(VADriverContextP ctx,
1433                                         struct intel_encoder_context *encoder_context,
1434                                         struct i965_gpe_context *gpe_context,
1435                                         int media_function,
1436                                         struct gpe_media_object_walker_parameter *param)
1437 {
1438     struct i965_driver_data *i965 = i965_driver_data(ctx);
1439     struct i965_gpe_table *gpe = &i965->gpe_table;
1440     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1441     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1442
1443     struct intel_batchbuffer *batch = encoder_context->base.batch;
1444     struct encoder_status_buffer_internal *status_buffer;
1445     struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1446
1447     if (!batch)
1448         return;
1449
1450     intel_batchbuffer_start_atomic(batch, 0x1000);
1451
1452     intel_batchbuffer_emit_mi_flush(batch);
1453
1454     status_buffer = &(avc_ctx->status_buffer);
1455     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1456     mi_store_data_imm.bo = status_buffer->bo;
1457     mi_store_data_imm.offset = status_buffer->media_index_offset;
1458     mi_store_data_imm.dw0 = media_function;
1459     gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1460
1461     gpe->pipeline_setup(ctx, gpe_context, batch);
1462     gpe->media_object_walker(ctx, gpe_context, batch, param);
1463     gpe->media_state_flush(ctx, gpe_context, batch);
1464
1465     gpe->pipeline_end(ctx, gpe_context, batch);
1466
1467     intel_batchbuffer_end_atomic(batch);
1468
1469     intel_batchbuffer_flush(batch);
1470 }
1471
1472 static void
1473 gen9_init_gpe_context_avc(VADriverContextP ctx,
1474                           struct i965_gpe_context *gpe_context,
1475                           struct encoder_kernel_parameter *kernel_param)
1476 {
1477     struct i965_driver_data *i965 = i965_driver_data(ctx);
1478
1479     gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
1480
1481     gpe_context->sampler.entry_size = 0;
1482     gpe_context->sampler.max_entries = 0;
1483
1484     if (kernel_param->sampler_size) {
1485         gpe_context->sampler.entry_size = ALIGN(kernel_param->sampler_size, 64);
1486         gpe_context->sampler.max_entries = 1;
1487     }
1488
1489     gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
1490     gpe_context->idrt.max_entries = NUM_KERNELS_PER_GPE_CONTEXT;
1491
1492     gpe_context->surface_state_binding_table.max_entries = MAX_AVC_ENCODER_SURFACES;
1493     gpe_context->surface_state_binding_table.binding_table_offset = 0;
1494     gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_AVC_ENCODER_SURFACES * 4, 64);
1495     gpe_context->surface_state_binding_table.length = ALIGN(MAX_AVC_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_AVC_ENCODER_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
1496
1497     if (i965->intel.eu_total > 0)
1498         gpe_context->vfe_state.max_num_threads = 6 * i965->intel.eu_total;
1499     else
1500         gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads
1501
1502     gpe_context->vfe_state.curbe_allocation_size = MAX(1, ALIGN(gpe_context->curbe.length, 32) >> 5); // in registers
1503     gpe_context->vfe_state.urb_entry_size = MAX(1, ALIGN(kernel_param->inline_data_size, 32) >> 5); // in registers
1504     gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE -
1505                                               gpe_context->vfe_state.curbe_allocation_size -
1506                                               ((gpe_context->idrt.entry_size >> 5) *
1507                                                gpe_context->idrt.max_entries)) / gpe_context->vfe_state.urb_entry_size;
1508     gpe_context->vfe_state.num_urb_entries = CLAMP(1, 127, gpe_context->vfe_state.num_urb_entries);
1509     gpe_context->vfe_state.gpgpu_mode = 0;
1510 }
1511
1512 static void
1513 gen9_init_vfe_scoreboard_avc(struct i965_gpe_context *gpe_context,
1514                              struct encoder_scoreboard_parameter *scoreboard_param)
1515 {
1516     gpe_context->vfe_desc5.scoreboard0.mask = scoreboard_param->mask;
1517     gpe_context->vfe_desc5.scoreboard0.type = scoreboard_param->type;
1518     gpe_context->vfe_desc5.scoreboard0.enable = scoreboard_param->enable;
1519
1520     if (scoreboard_param->walkpat_flag) {
1521         gpe_context->vfe_desc5.scoreboard0.mask = 0x0F;
1522         gpe_context->vfe_desc5.scoreboard0.type = 1;
1523
1524         gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0x0;
1525         gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0xF;
1526
1527         gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
1528         gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xE;
1529
1530         gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0xF;
1531         gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0x3;
1532
1533         gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
1534         gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0x1;
1535     } else {
1536         // Scoreboard 0
1537         gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0xF;
1538         gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0x0;
1539
1540         // Scoreboard 1
1541         gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
1542         gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xF;
1543
1544         // Scoreboard 2
1545         gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0x1;
1546         gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0xF;
1547
1548         // Scoreboard 3
1549         gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
1550         gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0xF;
1551
1552         // Scoreboard 4
1553         gpe_context->vfe_desc7.scoreboard2.delta_x4 = 0xF;
1554         gpe_context->vfe_desc7.scoreboard2.delta_y4 = 0x1;
1555
1556         // Scoreboard 5
1557         gpe_context->vfe_desc7.scoreboard2.delta_x5 = 0x0;
1558         gpe_context->vfe_desc7.scoreboard2.delta_y5 = 0xE;
1559
1560         // Scoreboard 6
1561         gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0x1;
1562         gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
1563
1564         // Scoreboard 7
1565         gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0xF;
1566         gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
1567     }
1568 }
1569 /*
1570 VME pipeline related function
1571 */
1572
1573 /*
1574 scaling kernel related function
1575 */
1576 static void
1577 gen9_avc_set_curbe_scaling4x(VADriverContextP ctx,
1578                              struct encode_state *encode_state,
1579                              struct i965_gpe_context *gpe_context,
1580                              struct intel_encoder_context *encoder_context,
1581                              void *param)
1582 {
1583     gen9_avc_scaling4x_curbe_data *curbe_cmd;
1584     struct scaling_param *surface_param = (struct scaling_param *)param;
1585
1586     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1587
1588     if (!curbe_cmd)
1589         return;
1590
1591     memset(curbe_cmd, 0, sizeof(gen9_avc_scaling4x_curbe_data));
1592
1593     curbe_cmd->dw0.input_picture_width  = surface_param->input_frame_width;
1594     curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1595
1596     curbe_cmd->dw1.input_y_bti = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1597     curbe_cmd->dw2.output_y_bti = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1598
1599
1600     curbe_cmd->dw5.flatness_threshold = 128;
1601     curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
1602     curbe_cmd->dw7.enable_mb_variance_output = surface_param->enable_mb_variance_output;
1603     curbe_cmd->dw8.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
1604
1605     if (curbe_cmd->dw6.enable_mb_flatness_check ||
1606         curbe_cmd->dw7.enable_mb_variance_output ||
1607         curbe_cmd->dw8.enable_mb_pixel_average_output) {
1608         curbe_cmd->dw10.mbv_proc_stat_bti = GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX;
1609     }
1610
1611     i965_gpe_context_unmap_curbe(gpe_context);
1612     return;
1613 }
1614
1615 static void
1616 gen95_avc_set_curbe_scaling4x(VADriverContextP ctx,
1617                               struct encode_state *encode_state,
1618                               struct i965_gpe_context *gpe_context,
1619                               struct intel_encoder_context *encoder_context,
1620                               void *param)
1621 {
1622     gen95_avc_scaling4x_curbe_data *curbe_cmd;
1623     struct scaling_param *surface_param = (struct scaling_param *)param;
1624
1625     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1626
1627     if (!curbe_cmd)
1628         return;
1629
1630     memset(curbe_cmd, 0, sizeof(gen95_avc_scaling4x_curbe_data));
1631
1632     curbe_cmd->dw0.input_picture_width  = surface_param->input_frame_width;
1633     curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1634
1635     curbe_cmd->dw1.input_y_bti_frame = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1636     curbe_cmd->dw2.output_y_bti_frame = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1637
1638     if (surface_param->enable_mb_flatness_check)
1639         curbe_cmd->dw5.flatness_threshold = 128;
1640     curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
1641     curbe_cmd->dw6.enable_mb_variance_output = surface_param->enable_mb_variance_output;
1642     curbe_cmd->dw6.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
1643     curbe_cmd->dw6.enable_block8x8_statistics_output = surface_param->blk8x8_stat_enabled;
1644
1645     if (curbe_cmd->dw6.enable_mb_flatness_check ||
1646         curbe_cmd->dw6.enable_mb_variance_output ||
1647         curbe_cmd->dw6.enable_mb_pixel_average_output) {
1648         curbe_cmd->dw8.mbv_proc_stat_bti_frame = GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX;
1649     }
1650
1651     i965_gpe_context_unmap_curbe(gpe_context);
1652     return;
1653 }
1654
1655 static void
1656 gen9_avc_set_curbe_scaling2x(VADriverContextP ctx,
1657                              struct encode_state *encode_state,
1658                              struct i965_gpe_context *gpe_context,
1659                              struct intel_encoder_context *encoder_context,
1660                              void *param)
1661 {
1662     gen9_avc_scaling2x_curbe_data *curbe_cmd;
1663     struct scaling_param *surface_param = (struct scaling_param *)param;
1664
1665     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1666
1667     if (!curbe_cmd)
1668         return;
1669
1670     memset(curbe_cmd, 0, sizeof(gen9_avc_scaling2x_curbe_data));
1671
1672     curbe_cmd->dw0.input_picture_width  = surface_param->input_frame_width;
1673     curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1674
1675     curbe_cmd->dw8.input_y_bti = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1676     curbe_cmd->dw9.output_y_bti = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1677
1678     i965_gpe_context_unmap_curbe(gpe_context);
1679     return;
1680 }
1681
1682 static void
1683 gen9_avc_send_surface_scaling(VADriverContextP ctx,
1684                               struct encode_state *encode_state,
1685                               struct i965_gpe_context *gpe_context,
1686                               struct intel_encoder_context *encoder_context,
1687                               void *param)
1688 {
1689     struct scaling_param *surface_param = (struct scaling_param *)param;
1690     struct i965_driver_data *i965 = i965_driver_data(ctx);
1691     unsigned int surface_format;
1692     unsigned int res_size;
1693
1694     if (surface_param->scaling_out_use_32unorm_surf_fmt)
1695         surface_format = I965_SURFACEFORMAT_R32_UNORM;
1696     else if (surface_param->scaling_out_use_16unorm_surf_fmt)
1697         surface_format = I965_SURFACEFORMAT_R16_UNORM;
1698     else
1699         surface_format = I965_SURFACEFORMAT_R8_UNORM;
1700
1701     i965_add_2d_gpe_surface(ctx, gpe_context,
1702                             surface_param->input_surface,
1703                             0, 1, surface_format,
1704                             GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX);
1705
1706     i965_add_2d_gpe_surface(ctx, gpe_context,
1707                             surface_param->output_surface,
1708                             0, 1, surface_format,
1709                             GEN9_AVC_SCALING_FRAME_DST_Y_INDEX);
1710
1711     /*add buffer mv_proc_stat, here need change*/
1712     if (IS_GEN8(i965->intel.device_info)) {
1713         if (surface_param->mbv_proc_stat_enabled) {
1714             res_size = 16 * (surface_param->input_frame_width / 16) * (surface_param->input_frame_height / 16) * sizeof(unsigned int);
1715
1716             i965_add_buffer_gpe_surface(ctx,
1717                                         gpe_context,
1718                                         surface_param->pres_mbv_proc_stat_buffer,
1719                                         0,
1720                                         res_size / 4,
1721                                         0,
1722                                         GEN8_SCALING_FRAME_MBVPROCSTATS_DST_CM);
1723         }
1724         if (surface_param->enable_mb_flatness_check) {
1725             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
1726                                            surface_param->pres_flatness_check_surface,
1727                                            1,
1728                                            I965_SURFACEFORMAT_R8_UNORM,
1729                                            GEN8_SCALING_FRAME_FLATNESS_DST_CM);
1730         }
1731     } else {
1732         if (surface_param->mbv_proc_stat_enabled) {
1733             res_size = 16 * (surface_param->input_frame_width / 16) * (surface_param->input_frame_height / 16) * sizeof(unsigned int);
1734
1735             i965_add_buffer_gpe_surface(ctx,
1736                                         gpe_context,
1737                                         surface_param->pres_mbv_proc_stat_buffer,
1738                                         0,
1739                                         res_size / 4,
1740                                         0,
1741                                         GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
1742         } else if (surface_param->enable_mb_flatness_check) {
1743             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
1744                                            surface_param->pres_flatness_check_surface,
1745                                            1,
1746                                            I965_SURFACEFORMAT_R8_UNORM,
1747                                            GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
1748         }
1749     }
1750     return;
1751 }
1752
1753 static VAStatus
1754 gen9_avc_kernel_scaling(VADriverContextP ctx,
1755                         struct encode_state *encode_state,
1756                         struct intel_encoder_context *encoder_context,
1757                         int hme_type)
1758 {
1759     struct i965_driver_data *i965 = i965_driver_data(ctx);
1760     struct i965_gpe_table *gpe = &i965->gpe_table;
1761     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1762     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1763     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1764     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1765     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
1766
1767     struct i965_gpe_context *gpe_context;
1768     struct scaling_param surface_param;
1769     struct object_surface *obj_surface;
1770     struct gen9_surface_avc *avc_priv_surface;
1771     struct gpe_media_object_walker_parameter media_object_walker_param;
1772     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
1773     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
1774     int media_function = 0;
1775     int kernel_idx = 0;
1776
1777     obj_surface = encode_state->reconstructed_object;
1778     avc_priv_surface = obj_surface->private_data;
1779
1780     memset(&surface_param, 0, sizeof(struct scaling_param));
1781     switch (hme_type) {
1782     case INTEL_ENC_HME_4x : {
1783         media_function = INTEL_MEDIA_STATE_4X_SCALING;
1784         kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
1785         downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
1786         downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
1787
1788         surface_param.input_surface = encode_state->input_yuv_object ;
1789         surface_param.input_frame_width = generic_state->frame_width_in_pixel ;
1790         surface_param.input_frame_height = generic_state->frame_height_in_pixel ;
1791
1792         surface_param.output_surface = avc_priv_surface->scaled_4x_surface_obj ;
1793         surface_param.output_frame_width = generic_state->frame_width_4x ;
1794         surface_param.output_frame_height = generic_state->frame_height_4x ;
1795
1796         surface_param.enable_mb_flatness_check = avc_state->flatness_check_enable;
1797         surface_param.enable_mb_variance_output = avc_state->mb_status_enable;
1798         surface_param.enable_mb_pixel_average_output = avc_state->mb_status_enable;
1799
1800         surface_param.blk8x8_stat_enabled = 0 ;
1801         surface_param.use_4x_scaling  = 1 ;
1802         surface_param.use_16x_scaling = 0 ;
1803         surface_param.use_32x_scaling = 0 ;
1804         break;
1805     }
1806     case INTEL_ENC_HME_16x : {
1807         media_function = INTEL_MEDIA_STATE_16X_SCALING;
1808         kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
1809         downscaled_width_in_mb = generic_state->downscaled_width_16x_in_mb;
1810         downscaled_height_in_mb = generic_state->downscaled_height_16x_in_mb;
1811
1812         surface_param.input_surface = avc_priv_surface->scaled_4x_surface_obj ;
1813         surface_param.input_frame_width = generic_state->frame_width_4x ;
1814         surface_param.input_frame_height = generic_state->frame_height_4x ;
1815
1816         surface_param.output_surface = avc_priv_surface->scaled_16x_surface_obj ;
1817         surface_param.output_frame_width = generic_state->frame_width_16x ;
1818         surface_param.output_frame_height = generic_state->frame_height_16x ;
1819
1820         surface_param.enable_mb_flatness_check = 0 ;
1821         surface_param.enable_mb_variance_output = 0 ;
1822         surface_param.enable_mb_pixel_average_output = 0 ;
1823
1824         surface_param.blk8x8_stat_enabled = 0 ;
1825         surface_param.use_4x_scaling  = 0 ;
1826         surface_param.use_16x_scaling = 1 ;
1827         surface_param.use_32x_scaling = 0 ;
1828
1829         break;
1830     }
1831     case INTEL_ENC_HME_32x : {
1832         media_function = INTEL_MEDIA_STATE_32X_SCALING;
1833         kernel_idx = GEN9_AVC_KERNEL_SCALING_2X_IDX;
1834         downscaled_width_in_mb = generic_state->downscaled_width_32x_in_mb;
1835         downscaled_height_in_mb = generic_state->downscaled_height_32x_in_mb;
1836
1837         surface_param.input_surface = avc_priv_surface->scaled_16x_surface_obj ;
1838         surface_param.input_frame_width = generic_state->frame_width_16x ;
1839         surface_param.input_frame_height = generic_state->frame_height_16x ;
1840
1841         surface_param.output_surface = avc_priv_surface->scaled_32x_surface_obj ;
1842         surface_param.output_frame_width = generic_state->frame_width_32x ;
1843         surface_param.output_frame_height = generic_state->frame_height_32x ;
1844
1845         surface_param.enable_mb_flatness_check = 0 ;
1846         surface_param.enable_mb_variance_output = 0 ;
1847         surface_param.enable_mb_pixel_average_output = 0 ;
1848
1849         surface_param.blk8x8_stat_enabled = 0 ;
1850         surface_param.use_4x_scaling  = 0 ;
1851         surface_param.use_16x_scaling = 0 ;
1852         surface_param.use_32x_scaling = 1 ;
1853         break;
1854     }
1855     default :
1856         assert(0);
1857
1858     }
1859
1860     gpe_context = &(avc_ctx->context_scaling.gpe_contexts[kernel_idx]);
1861
1862     gpe->context_init(ctx, gpe_context);
1863     gpe->reset_binding_table(ctx, gpe_context);
1864
1865     if (surface_param.use_32x_scaling) {
1866         generic_ctx->pfn_set_curbe_scaling2x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1867     } else {
1868         generic_ctx->pfn_set_curbe_scaling4x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1869     }
1870
1871     if (surface_param.use_32x_scaling) {
1872         surface_param.scaling_out_use_16unorm_surf_fmt = 1 ;
1873         surface_param.scaling_out_use_32unorm_surf_fmt = 0 ;
1874     } else {
1875         surface_param.scaling_out_use_16unorm_surf_fmt = 0 ;
1876         surface_param.scaling_out_use_32unorm_surf_fmt = 1 ;
1877     }
1878
1879     if (surface_param.use_4x_scaling) {
1880         if (avc_state->mb_status_supported) {
1881             surface_param.enable_mb_flatness_check = 0;
1882             surface_param.mbv_proc_stat_enabled = (surface_param.use_4x_scaling) ? (avc_state->mb_status_enable || avc_state->flatness_check_enable) : 0 ;
1883             surface_param.pres_mbv_proc_stat_buffer = &(avc_ctx->res_mb_status_buffer);
1884
1885         } else {
1886             surface_param.enable_mb_flatness_check = (surface_param.use_4x_scaling) ? avc_state->flatness_check_enable : 0;
1887             surface_param.mbv_proc_stat_enabled = 0 ;
1888             surface_param.pres_flatness_check_surface = &(avc_ctx->res_flatness_check_surface);
1889         }
1890     }
1891
1892     generic_ctx->pfn_send_scaling_surface(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1893
1894     /* setup the interface data */
1895     gpe->setup_interface_data(ctx, gpe_context);
1896
1897     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
1898     if (surface_param.use_32x_scaling) {
1899         kernel_walker_param.resolution_x = downscaled_width_in_mb ;
1900         kernel_walker_param.resolution_y = downscaled_height_in_mb ;
1901     } else {
1902         /* the scaling is based on 8x8 blk level */
1903         kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
1904         kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
1905     }
1906     kernel_walker_param.no_dependency = 1;
1907
1908     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
1909
1910     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
1911                                             gpe_context,
1912                                             media_function,
1913                                             &media_object_walker_param);
1914
1915     return VA_STATUS_SUCCESS;
1916 }
1917
1918 /*
1919 frame/mb brc related function
1920 */
1921 static void
1922 gen9_avc_init_mfx_avc_img_state(VADriverContextP ctx,
1923                                 struct encode_state *encode_state,
1924                                 struct intel_encoder_context *encoder_context,
1925                                 struct gen9_mfx_avc_img_state *pstate)
1926 {
1927     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1928     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1929     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1930
1931     VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
1932     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
1933
1934     memset(pstate, 0, sizeof(*pstate));
1935
1936     pstate->dw0.dword_length = (sizeof(struct gen9_mfx_avc_img_state)) / 4 - 2;
1937     pstate->dw0.sub_opcode_b = 0;
1938     pstate->dw0.sub_opcode_a = 0;
1939     pstate->dw0.command_opcode = 1;
1940     pstate->dw0.pipeline = 2;
1941     pstate->dw0.command_type = 3;
1942
1943     pstate->dw1.frame_size_in_mbs = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs ;
1944
1945     pstate->dw2.frame_width_in_mbs_minus1 = generic_state->frame_width_in_mbs - 1;
1946     pstate->dw2.frame_height_in_mbs_minus1 = generic_state->frame_height_in_mbs - 1;
1947
1948     pstate->dw3.image_structure = 0;//frame is zero
1949     pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
1950     pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
1951     pstate->dw3.brc_domain_rate_control_enable = 0;//0,set for non-vdenc mode;
1952     pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
1953     pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
1954
1955     pstate->dw4.field_picture_flag = 0;
1956     pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
1957     pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
1958     pstate->dw4.transform_8x8_idct_mode_flag = pic_param->pic_fields.bits.transform_8x8_mode_flag;
1959     pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
1960     pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
1961     pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
1962     pstate->dw4.mb_mv_format_flag = 1;
1963     pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
1964     pstate->dw4.mv_unpacked_flag = 1;
1965     pstate->dw4.insert_test_flag = 0;
1966     pstate->dw4.load_slice_pointer_flag = 0;
1967     pstate->dw4.macroblock_stat_enable = 0;        /* disable in the first pass */
1968     pstate->dw4.minimum_frame_size = 0;
1969     pstate->dw5.intra_mb_max_bit_flag = 1;
1970     pstate->dw5.inter_mb_max_bit_flag = 1;
1971     pstate->dw5.frame_size_over_flag = 1;
1972     pstate->dw5.frame_size_under_flag = 1;
1973     pstate->dw5.intra_mb_ipcm_flag = 1;
1974     pstate->dw5.mb_rate_ctrl_flag = 0;
1975     pstate->dw5.non_first_pass_flag = 0;
1976     pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
1977     pstate->dw5.aq_chroma_disable = 1;
1978     if (pstate->dw4.entropy_coding_flag && (avc_state->tq_enable)) {
1979         pstate->dw5.aq_enable = avc_state->tq_enable;
1980         pstate->dw5.aq_rounding = avc_state->tq_rounding;
1981     } else {
1982         pstate->dw5.aq_rounding = 0;
1983     }
1984
1985     pstate->dw6.intra_mb_max_size = 2700;
1986     pstate->dw6.inter_mb_max_size = 4095;
1987
1988     pstate->dw8.slice_delta_qp_max0 = 0;
1989     pstate->dw8.slice_delta_qp_max1 = 0;
1990     pstate->dw8.slice_delta_qp_max2 = 0;
1991     pstate->dw8.slice_delta_qp_max3 = 0;
1992
1993     pstate->dw9.slice_delta_qp_min0 = 0;
1994     pstate->dw9.slice_delta_qp_min1 = 0;
1995     pstate->dw9.slice_delta_qp_min2 = 0;
1996     pstate->dw9.slice_delta_qp_min3 = 0;
1997
1998     pstate->dw10.frame_bitrate_min = 0;
1999     pstate->dw10.frame_bitrate_min_unit = 1;
2000     pstate->dw10.frame_bitrate_min_unit_mode = 1;
2001     pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
2002     pstate->dw10.frame_bitrate_max_unit = 1;
2003     pstate->dw10.frame_bitrate_max_unit_mode = 1;
2004
2005     pstate->dw11.frame_bitrate_min_delta = 0;
2006     pstate->dw11.frame_bitrate_max_delta = 0;
2007
2008     pstate->dw12.vad_error_logic = 1;
2009     /* set paramters DW19/DW20 for slices */
2010 }
2011
2012 static void
2013 gen8_avc_init_mfx_avc_img_state(VADriverContextP ctx,
2014                                 struct encode_state *encode_state,
2015                                 struct intel_encoder_context *encoder_context,
2016                                 struct gen8_mfx_avc_img_state *pstate)
2017 {
2018     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2019     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2020     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2021
2022     VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
2023     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
2024
2025     memset(pstate, 0, sizeof(*pstate));
2026
2027     pstate->dw0.dword_length = (sizeof(struct gen8_mfx_avc_img_state)) / 4 - 2;
2028     pstate->dw0.command_sub_opcode_b = 0;
2029     pstate->dw0.command_sub_opcode_a = 0;
2030     pstate->dw0.command_opcode = 1;
2031     pstate->dw0.command_pipeline = 2;
2032     pstate->dw0.command_type = 3;
2033
2034     pstate->dw1.frame_size_in_mbs = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs ;
2035
2036     pstate->dw2.frame_width_in_mbs_minus1 = generic_state->frame_width_in_mbs - 1;
2037     pstate->dw2.frame_height_in_mbs_minus1 = generic_state->frame_height_in_mbs - 1;
2038
2039     pstate->dw3.image_structure = 0;//frame is zero
2040     pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
2041     pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
2042     pstate->dw3.inter_mb_conf_flag = 0;
2043     pstate->dw3.intra_mb_conf_flag = 0;
2044     pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
2045     pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
2046
2047     pstate->dw4.field_picture_flag = 0;
2048     pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
2049     pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
2050     pstate->dw4.transform_8x8_idct_mode_flag = pic_param->pic_fields.bits.transform_8x8_mode_flag;
2051     pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
2052     pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
2053     pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
2054     pstate->dw4.mb_mv_format_flag = 1;
2055     pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
2056     pstate->dw4.mv_unpacked_flag = 1;
2057     pstate->dw4.insert_test_flag = 0;
2058     pstate->dw4.load_slice_pointer_flag = 0;
2059     pstate->dw4.macroblock_stat_enable = 0;        /* disable in the first pass */
2060     pstate->dw4.minimum_frame_size = 0;
2061     pstate->dw5.intra_mb_max_bit_flag = 1;
2062     pstate->dw5.inter_mb_max_bit_flag = 1;
2063     pstate->dw5.frame_size_over_flag = 1;
2064     pstate->dw5.frame_size_under_flag = 1;
2065     pstate->dw5.intra_mb_ipcm_flag = 1;
2066     pstate->dw5.mb_rate_ctrl_flag = 0;
2067     pstate->dw5.non_first_pass_flag = 0;
2068     pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
2069     pstate->dw5.aq_chroma_disable = 1;
2070     if (pstate->dw4.entropy_coding_flag && (avc_state->tq_enable)) {
2071         pstate->dw5.aq_enable = avc_state->tq_enable;
2072         pstate->dw5.aq_rounding = avc_state->tq_rounding;
2073     } else {
2074         pstate->dw5.aq_rounding = 0;
2075     }
2076
2077     pstate->dw6.intra_mb_max_size = 2700;
2078     pstate->dw6.inter_mb_max_size = 4095;
2079
2080     pstate->dw8.slice_delta_qp_max0 = 0;
2081     pstate->dw8.slice_delta_qp_max1 = 0;
2082     pstate->dw8.slice_delta_qp_max2 = 0;
2083     pstate->dw8.slice_delta_qp_max3 = 0;
2084
2085     pstate->dw9.slice_delta_qp_min0 = 0;
2086     pstate->dw9.slice_delta_qp_min1 = 0;
2087     pstate->dw9.slice_delta_qp_min2 = 0;
2088     pstate->dw9.slice_delta_qp_min3 = 0;
2089
2090     pstate->dw10.frame_bitrate_min = 0;
2091     pstate->dw10.frame_bitrate_min_unit = 1;
2092     pstate->dw10.frame_bitrate_min_unit_mode = 1;
2093     pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
2094     pstate->dw10.frame_bitrate_max_unit = 1;
2095     pstate->dw10.frame_bitrate_max_unit_mode = 1;
2096
2097     pstate->dw11.frame_bitrate_min_delta = 0;
2098     pstate->dw11.frame_bitrate_max_delta = 0;
2099     /* set paramters DW19/DW20 for slices */
2100 }
2101 void gen9_avc_set_image_state(VADriverContextP ctx,
2102                               struct encode_state *encode_state,
2103                               struct intel_encoder_context *encoder_context,
2104                               struct i965_gpe_resource *gpe_resource)
2105 {
2106     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2107     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
2108     char *pdata;
2109     int i;
2110     unsigned int * data;
2111     struct gen9_mfx_avc_img_state cmd;
2112
2113     pdata = i965_map_gpe_resource(gpe_resource);
2114
2115     if (!pdata)
2116         return;
2117
2118     gen9_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
2119     for (i = 0; i < generic_state->num_pak_passes; i++) {
2120
2121         if (i == 0) {
2122             cmd.dw4.macroblock_stat_enable = 0;
2123             cmd.dw5.non_first_pass_flag = 0;
2124         } else {
2125             cmd.dw4.macroblock_stat_enable = 1;
2126             cmd.dw5.non_first_pass_flag = 1;
2127             cmd.dw5.intra_mb_ipcm_flag = 1;
2128
2129         }
2130         cmd.dw5.mb_rate_ctrl_flag = 0;
2131         memcpy(pdata, &cmd, sizeof(struct gen9_mfx_avc_img_state));
2132         data = (unsigned int *)(pdata + sizeof(struct gen9_mfx_avc_img_state));
2133         *data = MI_BATCH_BUFFER_END;
2134
2135         pdata += INTEL_AVC_IMAGE_STATE_CMD_SIZE;
2136     }
2137     i965_unmap_gpe_resource(gpe_resource);
2138     return;
2139 }
2140
2141 void gen8_avc_set_image_state(VADriverContextP ctx,
2142                               struct encode_state *encode_state,
2143                               struct intel_encoder_context *encoder_context,
2144                               struct i965_gpe_resource *gpe_resource)
2145 {
2146     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2147     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
2148     char *pdata;
2149     int i;
2150     unsigned int * data;
2151     struct gen8_mfx_avc_img_state cmd;
2152
2153     pdata = i965_map_gpe_resource(gpe_resource);
2154
2155     if (!pdata)
2156         return;
2157
2158     gen8_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
2159     for (i = 0; i < generic_state->num_pak_passes; i++) {
2160
2161         if (i == 0) {
2162             cmd.dw4.macroblock_stat_enable = 0;
2163             cmd.dw5.non_first_pass_flag = 0;
2164         } else {
2165             cmd.dw4.macroblock_stat_enable = 1;
2166             cmd.dw5.non_first_pass_flag = 1;
2167             cmd.dw5.intra_mb_ipcm_flag = 1;
2168             cmd.dw3.inter_mb_conf_flag = 1;
2169             cmd.dw3.intra_mb_conf_flag = 1;
2170         }
2171         cmd.dw5.mb_rate_ctrl_flag = 0;
2172         memcpy(pdata, &cmd, sizeof(struct gen8_mfx_avc_img_state));
2173         data = (unsigned int *)(pdata + sizeof(struct gen8_mfx_avc_img_state));
2174         *data = MI_BATCH_BUFFER_END;
2175
2176         pdata += INTEL_AVC_IMAGE_STATE_CMD_SIZE;
2177     }
2178     i965_unmap_gpe_resource(gpe_resource);
2179     return;
2180 }
2181
2182 void gen9_avc_set_image_state_non_brc(VADriverContextP ctx,
2183                                       struct encode_state *encode_state,
2184                                       struct intel_encoder_context *encoder_context,
2185                                       struct i965_gpe_resource *gpe_resource)
2186 {
2187     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2188     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
2189     char *pdata;
2190
2191     unsigned int * data;
2192     struct gen9_mfx_avc_img_state cmd;
2193
2194     pdata = i965_map_gpe_resource(gpe_resource);
2195
2196     if (!pdata)
2197         return;
2198
2199     gen9_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
2200
2201     if (generic_state->curr_pak_pass == 0) {
2202         cmd.dw4.macroblock_stat_enable = 0;
2203         cmd.dw5.non_first_pass_flag = 0;
2204
2205     } else {
2206         cmd.dw4.macroblock_stat_enable = 1;
2207         cmd.dw5.non_first_pass_flag = 0;
2208         cmd.dw5.intra_mb_ipcm_flag = 1;
2209     }
2210
2211     cmd.dw5.mb_rate_ctrl_flag = 0;
2212     memcpy(pdata, &cmd, sizeof(struct gen9_mfx_avc_img_state));
2213     data = (unsigned int *)(pdata + sizeof(struct gen9_mfx_avc_img_state));
2214     *data = MI_BATCH_BUFFER_END;
2215
2216     i965_unmap_gpe_resource(gpe_resource);
2217     return;
2218 }
2219
2220 static void
2221 gen95_avc_calc_lambda_table(VADriverContextP ctx,
2222                             struct encode_state *encode_state,
2223                             struct intel_encoder_context *encoder_context)
2224 {
2225     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2226     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2227     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2228     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
2229     unsigned int value, inter, intra;
2230     unsigned int rounding_value = 0;
2231     unsigned int size = 0;
2232     int i = 0;
2233     int col = 0;
2234     unsigned int * lambda_table = (unsigned int *)(avc_state->lamda_value_lut);
2235
2236     value = 0;
2237     inter = 0;
2238     intra = 0;
2239
2240     size = AVC_QP_MAX * 2 * sizeof(unsigned int);
2241     switch (generic_state->frame_type) {
2242     case SLICE_TYPE_I:
2243         memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_i_frame[0][0], size * sizeof(unsigned char));
2244         break;
2245     case SLICE_TYPE_P:
2246         memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_p_frame[0][0], size * sizeof(unsigned char));
2247         break;
2248     case SLICE_TYPE_B:
2249         memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_b_frame[0][0], size * sizeof(unsigned char));
2250         break;
2251     default:
2252         assert(0);
2253         break;
2254     }
2255
2256     for (i = 0; i < AVC_QP_MAX ; i++) {
2257         for (col = 0; col < 2; col++) {
2258             value = *(lambda_table + i * 2 + col);
2259             intra = value >> 16;
2260
2261             if (intra < GEN95_AVC_MAX_LAMBDA) {
2262                 if (intra == 0xfffa) {
2263                     intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
2264                 }
2265             }
2266
2267             intra = intra << 16;
2268             inter = value & 0xffff;
2269
2270             if (inter < GEN95_AVC_MAX_LAMBDA) {
2271                 if (inter == 0xffef) {
2272                     if (generic_state->frame_type == SLICE_TYPE_P) {
2273                         if (avc_state->rounding_inter_p == AVC_INVALID_ROUNDING_VALUE)
2274                             rounding_value = gen9_avc_inter_rounding_p[generic_state->preset];
2275                         else
2276                             rounding_value = avc_state->rounding_inter_p;
2277                     } else if (generic_state->frame_type == SLICE_TYPE_B) {
2278                         if (pic_param->pic_fields.bits.reference_pic_flag) {
2279                             if (avc_state->rounding_inter_b_ref == AVC_INVALID_ROUNDING_VALUE)
2280                                 rounding_value = gen9_avc_inter_rounding_b_ref[generic_state->preset];
2281                             else
2282                                 rounding_value = avc_state->rounding_inter_b_ref;
2283                         } else {
2284                             if (avc_state->rounding_inter_b == AVC_INVALID_ROUNDING_VALUE)
2285                                 rounding_value = gen9_avc_inter_rounding_b[generic_state->preset];
2286                             else
2287                                 rounding_value = avc_state->rounding_inter_b;
2288                         }
2289                     }
2290                 }
2291                 inter = 0xf000 + rounding_value;
2292             }
2293             *(lambda_table + i * 2 + col) = intra + inter;
2294         }
2295     }
2296 }
2297
2298 static void
2299 gen9_avc_init_brc_const_data(VADriverContextP ctx,
2300                              struct encode_state *encode_state,
2301                              struct intel_encoder_context *encoder_context)
2302 {
2303     struct i965_driver_data *i965 = i965_driver_data(ctx);
2304     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2305     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2306     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2307     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2308
2309     struct i965_gpe_resource *gpe_resource = NULL;
2310     unsigned char * data = NULL;
2311     unsigned char * data_tmp = NULL;
2312     unsigned int size = 0;
2313     unsigned int table_idx = 0;
2314     unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
2315     int i = 0;
2316
2317     struct object_surface *obj_surface;
2318     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
2319     VASurfaceID surface_id;
2320     unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
2321
2322     gpe_resource = &(avc_ctx->res_brc_const_data_buffer);
2323     assert(gpe_resource);
2324
2325     i965_zero_gpe_resource(gpe_resource);
2326
2327     data = i965_map_gpe_resource(gpe_resource);
2328     assert(data);
2329
2330     table_idx = slice_type_kernel[generic_state->frame_type];
2331
2332     /* Fill surface with QP Adjustment table, Distortion threshold table, MaxFrame threshold table, Distortion QP Adjustment Table*/
2333     size = sizeof(gen9_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb);
2334     memcpy(data, gen9_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb, size * sizeof(unsigned char));
2335
2336     data += size;
2337
2338     /* skip threshold table*/
2339     size = 128;
2340     switch (generic_state->frame_type) {
2341     case SLICE_TYPE_P:
2342         memcpy(data, gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2343         break;
2344     case SLICE_TYPE_B:
2345         memcpy(data, gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2346         break;
2347     default:
2348         /*SLICE_TYPE_I,no change */
2349         break;
2350     }
2351
2352     if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->non_ftq_skip_threshold_lut_input_enable) {
2353         for (i = 0; i < AVC_QP_MAX ; i++) {
2354             *(data + 1 + (i * 2)) = (unsigned char)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
2355         }
2356     }
2357     data += size;
2358
2359     /*fill the qp for ref list*/
2360     size = 32 + 32 + 32 + 160;
2361     memset(data, 0xff, 32);
2362     memset(data + 32 + 32, 0xff, 32);
2363     switch (generic_state->frame_type) {
2364     case SLICE_TYPE_P: {
2365         for (i = 0 ; i <  slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
2366             surface_id = slice_param->RefPicList0[i].picture_id;
2367             obj_surface = SURFACE(surface_id);
2368             if (!obj_surface)
2369                 break;
2370             *(data + i) = avc_state->list_ref_idx[0][i];//?
2371         }
2372     }
2373     break;
2374     case SLICE_TYPE_B: {
2375         data = data + 32 + 32;
2376         for (i = 0 ; i <  slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
2377             surface_id = slice_param->RefPicList1[i].picture_id;
2378             obj_surface = SURFACE(surface_id);
2379             if (!obj_surface)
2380                 break;
2381             *(data + i) = avc_state->list_ref_idx[1][i];//?
2382         }
2383
2384         data = data - 32 - 32;
2385
2386         for (i = 0 ; i <  slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
2387             surface_id = slice_param->RefPicList0[i].picture_id;
2388             obj_surface = SURFACE(surface_id);
2389             if (!obj_surface)
2390                 break;
2391             *(data + i) = avc_state->list_ref_idx[0][i];//?
2392         }
2393     }
2394     break;
2395     default:
2396         /*SLICE_TYPE_I,no change */
2397         break;
2398     }
2399     data += size;
2400
2401     /*mv cost and mode cost*/
2402     size = 1664;
2403     memcpy(data, (unsigned char *)&gen9_avc_mode_mv_cost_table[table_idx][0][0], size * sizeof(unsigned char));
2404
2405     if (avc_state->old_mode_cost_enable) {
2406         data_tmp = data;
2407         for (i = 0; i < AVC_QP_MAX ; i++) {
2408             *(data_tmp + 3) = (unsigned int)gen9_avc_old_intra_mode_cost[i];
2409             data_tmp += 16;
2410         }
2411     }
2412
2413     if (avc_state->ftq_skip_threshold_lut_input_enable) {
2414         for (i = 0; i < AVC_QP_MAX ; i++) {
2415             *(data + (i * 32) + 24) =
2416                 *(data + (i * 32) + 25) =
2417                     *(data + (i * 32) + 27) =
2418                         *(data + (i * 32) + 28) =
2419                             *(data + (i * 32) + 29) =
2420                                 *(data + (i * 32) + 30) =
2421                                     *(data + (i * 32) + 31) = avc_state->ftq_skip_threshold_lut[i];
2422         }
2423
2424     }
2425     data += size;
2426
2427     /*ref cost*/
2428     size = 128;
2429     memcpy(data, (unsigned char *)&gen9_avc_ref_cost[table_idx][0], size * sizeof(unsigned char));
2430     data += size;
2431
2432     /*scaling factor*/
2433     size = 64;
2434     if (avc_state->adaptive_intra_scaling_enable) {
2435         memcpy(data, (unsigned char *)gen9_avc_adaptive_intra_scaling_factor, size * sizeof(unsigned char));
2436     } else {
2437         memcpy(data, (unsigned char *)gen9_avc_intra_scaling_factor, size * sizeof(unsigned char));
2438     }
2439
2440     if (IS_KBL(i965->intel.device_info) ||
2441         IS_GEN10(i965->intel.device_info) ||
2442         IS_GLK(i965->intel.device_info)) {
2443         data += size;
2444
2445         size = 512;
2446         memcpy(data, (unsigned char *)gen95_avc_lambda_data, size * sizeof(unsigned char));
2447         data += size;
2448
2449         size = 64;
2450         memcpy(data, (unsigned char *)gen95_avc_ftq25, size * sizeof(unsigned char));
2451     }
2452
2453     i965_unmap_gpe_resource(gpe_resource);
2454 }
2455
2456 static void
2457 gen9_avc_init_brc_const_data_old(VADriverContextP ctx,
2458                                  struct encode_state *encode_state,
2459                                  struct intel_encoder_context *encoder_context)
2460 {
2461     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2462     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2463     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2464     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2465
2466     struct i965_gpe_resource *gpe_resource = NULL;
2467     unsigned int * data = NULL;
2468     unsigned int * data_tmp = NULL;
2469     unsigned int size = 0;
2470     unsigned int table_idx = 0;
2471     unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
2472     unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
2473     int i = 0;
2474
2475     gpe_resource = &(avc_ctx->res_brc_const_data_buffer);
2476     assert(gpe_resource);
2477
2478     i965_zero_gpe_resource(gpe_resource);
2479
2480     data = i965_map_gpe_resource(gpe_resource);
2481     assert(data);
2482
2483     table_idx = slice_type_kernel[generic_state->frame_type];
2484
2485     /* Fill surface with QP Adjustment table, Distortion threshold table, MaxFrame threshold table, Distortion QP Adjustment Table*/
2486     size = sizeof(gen75_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb);
2487     memcpy(data, gen75_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb, size * sizeof(unsigned char));
2488
2489     data += size;
2490
2491     /* skip threshold table*/
2492     size = 128;
2493     switch (generic_state->frame_type) {
2494     case SLICE_TYPE_P:
2495         memcpy(data, gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2496         break;
2497     case SLICE_TYPE_B:
2498         memcpy(data, gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2499         break;
2500     default:
2501         /*SLICE_TYPE_I,no change */
2502         break;
2503     }
2504
2505     if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->non_ftq_skip_threshold_lut_input_enable) {
2506         for (i = 0; i < AVC_QP_MAX ; i++) {
2507             *(data + 1 + (i * 2)) = (unsigned char)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
2508         }
2509     }
2510     data += size;
2511
2512     /*fill the qp for ref list*/
2513     size = 128;
2514     data += size;
2515     size = 128;
2516     data += size;
2517
2518     /*mv cost and mode cost*/
2519     size = 1664;
2520     memcpy(data, (unsigned char *)&gen75_avc_mode_mv_cost_table[table_idx][0][0], size * sizeof(unsigned char));
2521
2522     if (avc_state->old_mode_cost_enable) {
2523         data_tmp = data;
2524         for (i = 0; i < AVC_QP_MAX ; i++) {
2525             *(data_tmp + 3) = (unsigned int)gen9_avc_old_intra_mode_cost[i];
2526             data_tmp += 16;
2527         }
2528     }
2529
2530     if (avc_state->ftq_skip_threshold_lut_input_enable) {
2531         for (i = 0; i < AVC_QP_MAX ; i++) {
2532             *(data + (i * 32) + 24) =
2533                 *(data + (i * 32) + 25) =
2534                     *(data + (i * 32) + 27) =
2535                         *(data + (i * 32) + 28) =
2536                             *(data + (i * 32) + 29) =
2537                                 *(data + (i * 32) + 30) =
2538                                     *(data + (i * 32) + 31) = avc_state->ftq_skip_threshold_lut[i];
2539         }
2540
2541     }
2542     data += size;
2543
2544     /*ref cost*/
2545     size = 128;
2546     memcpy(data, (unsigned char *)&gen9_avc_ref_cost[table_idx][0], size * sizeof(unsigned char));
2547
2548     i965_unmap_gpe_resource(gpe_resource);
2549 }
2550 static void
2551 gen9_avc_set_curbe_brc_init_reset(VADriverContextP ctx,
2552                                   struct encode_state *encode_state,
2553                                   struct i965_gpe_context *gpe_context,
2554                                   struct intel_encoder_context *encoder_context,
2555                                   void * param)
2556 {
2557     gen9_avc_brc_init_reset_curbe_data *cmd;
2558     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2559     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2560     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2561     double input_bits_per_frame = 0;
2562     double bps_ratio = 0;
2563     VAEncSequenceParameterBufferH264 * seq_param = avc_state->seq_param;
2564     struct avc_param common_param;
2565
2566     cmd = i965_gpe_context_map_curbe(gpe_context);
2567
2568     if (!cmd)
2569         return;
2570
2571     memcpy(cmd, &gen9_avc_brc_init_reset_curbe_init_data, sizeof(gen9_avc_brc_init_reset_curbe_data));
2572
2573     memset(&common_param, 0, sizeof(common_param));
2574     common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
2575     common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
2576     common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
2577     common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
2578     common_param.frames_per_100s = generic_state->frames_per_100s;
2579     common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
2580     common_param.target_bit_rate = generic_state->target_bit_rate;
2581
2582     cmd->dw0.profile_level_max_frame = i965_avc_get_profile_level_max_frame(&common_param, seq_param->level_idc);
2583     cmd->dw1.init_buf_full_in_bits = generic_state->init_vbv_buffer_fullness_in_bit;
2584     cmd->dw2.buf_size_in_bits = generic_state->vbv_buffer_size_in_bit;
2585     cmd->dw3.average_bit_rate = generic_state->target_bit_rate * 1000;
2586     cmd->dw4.max_bit_rate = generic_state->max_bit_rate * 1000;
2587     cmd->dw8.gop_p = (generic_state->gop_ref_distance) ? ((generic_state->gop_size - 1) / generic_state->gop_ref_distance) : 0;
2588     cmd->dw9.gop_b = (generic_state->gop_size - 1 - cmd->dw8.gop_p);
2589     cmd->dw9.frame_width_in_bytes = generic_state->frame_width_in_pixel;
2590     cmd->dw10.frame_height_in_bytes = generic_state->frame_height_in_pixel;
2591     cmd->dw12.no_slices = avc_state->slice_num;
2592
2593     //VUI
2594     if (seq_param->vui_parameters_present_flag && generic_state->internal_rate_mode != INTEL_BRC_AVBR) {
2595         cmd->dw4.max_bit_rate = cmd->dw4.max_bit_rate;
2596         if (generic_state->internal_rate_mode == VA_RC_CBR) {
2597             cmd->dw3.average_bit_rate = cmd->dw4.max_bit_rate;
2598
2599         }
2600
2601     }
2602     cmd->dw6.frame_rate_m = generic_state->frames_per_100s;
2603     cmd->dw7.frame_rate_d = 100;
2604     cmd->dw8.brc_flag = 0;
2605     cmd->dw8.brc_flag |= (generic_state->mb_brc_enabled) ? 0 : 0x8000;
2606
2607
2608     if (generic_state->internal_rate_mode == VA_RC_CBR) {
2609         //CBR
2610         cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate;
2611         cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISCBR;
2612
2613     } else if (generic_state->internal_rate_mode == VA_RC_VBR) {
2614         //VBR
2615         if (cmd->dw4.max_bit_rate < cmd->dw3.average_bit_rate) {
2616             cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate << 1;
2617         }
2618         cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISVBR;
2619
2620     } else if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2621         //AVBR
2622         cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate;
2623         cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISAVBR;
2624
2625     }
2626     //igonre icq/vcm/qvbr
2627
2628     cmd->dw10.avbr_accuracy = generic_state->avbr_curracy;
2629     cmd->dw11.avbr_convergence = generic_state->avbr_convergence;
2630
2631     //frame bits
2632     input_bits_per_frame = (double)(cmd->dw4.max_bit_rate) * (double)(cmd->dw7.frame_rate_d) / (double)(cmd->dw6.frame_rate_m);;
2633
2634     if (cmd->dw2.buf_size_in_bits == 0) {
2635         cmd->dw2.buf_size_in_bits = (unsigned int)(input_bits_per_frame * 4);
2636     }
2637
2638     if (cmd->dw1.init_buf_full_in_bits == 0) {
2639         cmd->dw1.init_buf_full_in_bits = cmd->dw2.buf_size_in_bits * 7 / 8;
2640     }
2641     if (cmd->dw1.init_buf_full_in_bits < (unsigned int)(input_bits_per_frame * 2)) {
2642         cmd->dw1.init_buf_full_in_bits = (unsigned int)(input_bits_per_frame * 2);
2643     }
2644     if (cmd->dw1.init_buf_full_in_bits > cmd->dw2.buf_size_in_bits) {
2645         cmd->dw1.init_buf_full_in_bits = cmd->dw2.buf_size_in_bits;
2646     }
2647
2648     //AVBR
2649     if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2650         cmd->dw2.buf_size_in_bits = 2 * generic_state->target_bit_rate * 1000;
2651         cmd->dw1.init_buf_full_in_bits = (unsigned int)(3 * cmd->dw2.buf_size_in_bits / 4);
2652
2653     }
2654
2655     bps_ratio = input_bits_per_frame / (cmd->dw2.buf_size_in_bits / 30.0);
2656     bps_ratio = (bps_ratio < 0.1) ? 0.1 : (bps_ratio > 3.5) ? 3.5 : bps_ratio;
2657
2658
2659     cmd->dw16.deviation_threshold_0_pand_b = (unsigned int)(-50 * pow(0.90, bps_ratio));
2660     cmd->dw16.deviation_threshold_1_pand_b = (unsigned int)(-50 * pow(0.66, bps_ratio));
2661     cmd->dw16.deviation_threshold_2_pand_b = (unsigned int)(-50 * pow(0.46, bps_ratio));
2662     cmd->dw16.deviation_threshold_3_pand_b = (unsigned int)(-50 * pow(0.3, bps_ratio));
2663     cmd->dw17.deviation_threshold_4_pand_b = (unsigned int)(50 *  pow(0.3, bps_ratio));
2664     cmd->dw17.deviation_threshold_5_pand_b = (unsigned int)(50 * pow(0.46, bps_ratio));
2665     cmd->dw17.deviation_threshold_6_pand_b = (unsigned int)(50 * pow(0.7,  bps_ratio));
2666     cmd->dw17.deviation_threshold_7_pand_b = (unsigned int)(50 * pow(0.9,  bps_ratio));
2667     cmd->dw18.deviation_threshold_0_vbr = (unsigned int)(-50 * pow(0.9, bps_ratio));
2668     cmd->dw18.deviation_threshold_1_vbr = (unsigned int)(-50 * pow(0.7, bps_ratio));
2669     cmd->dw18.deviation_threshold_2_vbr = (unsigned int)(-50 * pow(0.5, bps_ratio));
2670     cmd->dw18.deviation_threshold_3_vbr = (unsigned int)(-50 * pow(0.3, bps_ratio));
2671     cmd->dw19.deviation_threshold_4_vbr = (unsigned int)(100 * pow(0.4, bps_ratio));
2672     cmd->dw19.deviation_threshold_5_vbr = (unsigned int)(100 * pow(0.5, bps_ratio));
2673     cmd->dw19.deviation_threshold_6_vbr = (unsigned int)(100 * pow(0.75, bps_ratio));
2674     cmd->dw19.deviation_threshold_7_vbr = (unsigned int)(100 * pow(0.9, bps_ratio));
2675     cmd->dw20.deviation_threshold_0_i = (unsigned int)(-50 * pow(0.8, bps_ratio));
2676     cmd->dw20.deviation_threshold_1_i = (unsigned int)(-50 * pow(0.6, bps_ratio));
2677     cmd->dw20.deviation_threshold_2_i = (unsigned int)(-50 * pow(0.34, bps_ratio));
2678     cmd->dw20.deviation_threshold_3_i = (unsigned int)(-50 * pow(0.2, bps_ratio));
2679     cmd->dw21.deviation_threshold_4_i = (unsigned int)(50 * pow(0.2,  bps_ratio));
2680     cmd->dw21.deviation_threshold_5_i = (unsigned int)(50 * pow(0.4,  bps_ratio));
2681     cmd->dw21.deviation_threshold_6_i = (unsigned int)(50 * pow(0.66, bps_ratio));
2682     cmd->dw21.deviation_threshold_7_i = (unsigned int)(50 * pow(0.9,  bps_ratio));
2683
2684     cmd->dw22.sliding_window_size = generic_state->frames_per_window_size;
2685
2686     i965_gpe_context_unmap_curbe(gpe_context);
2687
2688     return;
2689 }
2690
2691 static void
2692 gen9_avc_send_surface_brc_init_reset(VADriverContextP ctx,
2693                                      struct encode_state *encode_state,
2694                                      struct i965_gpe_context *gpe_context,
2695                                      struct intel_encoder_context *encoder_context,
2696                                      void * param_mbenc)
2697 {
2698     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2699     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2700
2701     i965_add_buffer_gpe_surface(ctx,
2702                                 gpe_context,
2703                                 &avc_ctx->res_brc_history_buffer,
2704                                 0,
2705                                 avc_ctx->res_brc_history_buffer.size,
2706                                 0,
2707                                 GEN9_AVC_BRC_INIT_RESET_HISTORY_INDEX);
2708
2709     i965_add_buffer_2d_gpe_surface(ctx,
2710                                    gpe_context,
2711                                    &avc_ctx->res_brc_dist_data_surface,
2712                                    1,
2713                                    I965_SURFACEFORMAT_R8_UNORM,
2714                                    GEN9_AVC_BRC_INIT_RESET_DISTORTION_INDEX);
2715
2716     return;
2717 }
2718
2719 static VAStatus
2720 gen9_avc_kernel_brc_init_reset(VADriverContextP ctx,
2721                                struct encode_state *encode_state,
2722                                struct intel_encoder_context *encoder_context)
2723 {
2724     struct i965_driver_data *i965 = i965_driver_data(ctx);
2725     struct i965_gpe_table *gpe = &i965->gpe_table;
2726     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2727     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2728     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2729     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
2730
2731     struct i965_gpe_context *gpe_context;
2732     struct gpe_media_object_parameter media_object_param;
2733     struct gpe_media_object_inline_data media_object_inline_data;
2734     int media_function = 0;
2735     int kernel_idx = GEN9_AVC_KERNEL_BRC_INIT;
2736
2737     media_function = INTEL_MEDIA_STATE_BRC_INIT_RESET;
2738
2739     if (generic_state->brc_inited)
2740         kernel_idx = GEN9_AVC_KERNEL_BRC_RESET;
2741
2742     gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
2743
2744     gpe->context_init(ctx, gpe_context);
2745     gpe->reset_binding_table(ctx, gpe_context);
2746
2747     generic_ctx->pfn_set_curbe_brc_init_reset(ctx, encode_state, gpe_context, encoder_context, NULL);
2748
2749     generic_ctx->pfn_send_brc_init_reset_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
2750
2751     gpe->setup_interface_data(ctx, gpe_context);
2752
2753     memset(&media_object_param, 0, sizeof(media_object_param));
2754     memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
2755     media_object_param.pinline_data = &media_object_inline_data;
2756     media_object_param.inline_size = sizeof(media_object_inline_data);
2757
2758     gen9_avc_run_kernel_media_object(ctx, encoder_context,
2759                                      gpe_context,
2760                                      media_function,
2761                                      &media_object_param);
2762
2763     return VA_STATUS_SUCCESS;
2764 }
2765
2766 static void
2767 gen9_avc_set_curbe_brc_frame_update(VADriverContextP ctx,
2768                                     struct encode_state *encode_state,
2769                                     struct i965_gpe_context *gpe_context,
2770                                     struct intel_encoder_context *encoder_context,
2771                                     void * param)
2772 {
2773     gen9_avc_frame_brc_update_curbe_data *cmd;
2774     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2775     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2776     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2777     struct object_surface *obj_surface;
2778     struct gen9_surface_avc *avc_priv_surface;
2779     struct avc_param common_param;
2780     VAEncSequenceParameterBufferH264 * seq_param = avc_state->seq_param;
2781
2782     obj_surface = encode_state->reconstructed_object;
2783
2784     if (!obj_surface || !obj_surface->private_data)
2785         return;
2786     avc_priv_surface = obj_surface->private_data;
2787
2788     cmd = i965_gpe_context_map_curbe(gpe_context);
2789
2790     if (!cmd)
2791         return;
2792
2793     memcpy(cmd, &gen9_avc_frame_brc_update_curbe_init_data, sizeof(gen9_avc_frame_brc_update_curbe_data));
2794
2795     cmd->dw5.target_size_flag = 0 ;
2796     if (generic_state->brc_init_current_target_buf_full_in_bits > (double)generic_state->brc_init_reset_buf_size_in_bits) {
2797         /*overflow*/
2798         generic_state->brc_init_current_target_buf_full_in_bits -= (double)generic_state->brc_init_reset_buf_size_in_bits;
2799         cmd->dw5.target_size_flag = 1 ;
2800     }
2801
2802     if (generic_state->skip_frame_enbale) {
2803         cmd->dw6.num_skip_frames = generic_state->num_skip_frames ;
2804         cmd->dw7.size_skip_frames = generic_state->size_skip_frames;
2805
2806         generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame * generic_state->num_skip_frames;
2807
2808     }
2809     cmd->dw0.target_size = (unsigned int)generic_state->brc_init_current_target_buf_full_in_bits ;
2810     cmd->dw1.frame_number = generic_state->seq_frame_number ;
2811     cmd->dw2.size_of_pic_headers = generic_state->herder_bytes_inserted << 3 ;
2812     cmd->dw5.cur_frame_type = generic_state->frame_type ;
2813     cmd->dw5.brc_flag = 0 ;
2814     cmd->dw5.brc_flag |= (avc_priv_surface->is_as_ref) ? INTEL_ENCODE_BRCUPDATE_IS_REFERENCE : 0 ;
2815
2816     if (avc_state->multi_pre_enable) {
2817         cmd->dw5.brc_flag  |= INTEL_ENCODE_BRCUPDATE_IS_ACTUALQP ;
2818         cmd->dw14.qp_index_of_cur_pic = avc_priv_surface->frame_idx ; //do not know this. use -1
2819     }
2820
2821     cmd->dw5.max_num_paks = generic_state->num_pak_passes ;
2822     if (avc_state->min_max_qp_enable) {
2823         switch (generic_state->frame_type) {
2824         case SLICE_TYPE_I:
2825             cmd->dw6.minimum_qp = avc_state->min_qp_i ;
2826             cmd->dw6.maximum_qp = avc_state->max_qp_i ;
2827             break;
2828         case SLICE_TYPE_P:
2829             cmd->dw6.minimum_qp = avc_state->min_qp_p ;
2830             cmd->dw6.maximum_qp = avc_state->max_qp_p ;
2831             break;
2832         case SLICE_TYPE_B:
2833             cmd->dw6.minimum_qp = avc_state->min_qp_b ;
2834             cmd->dw6.maximum_qp = avc_state->max_qp_b ;
2835             break;
2836         }
2837     } else {
2838         cmd->dw6.minimum_qp = 0 ;
2839         cmd->dw6.maximum_qp = 0 ;
2840     }
2841     cmd->dw6.enable_force_skip = avc_state->enable_force_skip ;
2842     cmd->dw6.enable_sliding_window = 0 ;
2843
2844     generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame;
2845
2846     if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2847         cmd->dw3.start_gadj_frame0 = (unsigned int)((10 *   generic_state->avbr_convergence) / (double)150);
2848         cmd->dw3.start_gadj_frame1 = (unsigned int)((50 *   generic_state->avbr_convergence) / (double)150);
2849         cmd->dw4.start_gadj_frame2 = (unsigned int)((100 *  generic_state->avbr_convergence) / (double)150);
2850         cmd->dw4.start_gadj_frame3 = (unsigned int)((150 *  generic_state->avbr_convergence) / (double)150);
2851         cmd->dw11.g_rate_ratio_threshold_0 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 40)));
2852         cmd->dw11.g_rate_ratio_threshold_1 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 75)));
2853         cmd->dw12.g_rate_ratio_threshold_2 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 97)));
2854         cmd->dw12.g_rate_ratio_threshold_3 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (103 - 100)));
2855         cmd->dw12.g_rate_ratio_threshold_4 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (125 - 100)));
2856         cmd->dw12.g_rate_ratio_threshold_5 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (160 - 100)));
2857
2858     }
2859     cmd->dw15.enable_roi = generic_state->brc_roi_enable ;
2860
2861     memset(&common_param, 0, sizeof(common_param));
2862     common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
2863     common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
2864     common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
2865     common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
2866     common_param.frames_per_100s = generic_state->frames_per_100s;
2867     common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
2868     common_param.target_bit_rate = generic_state->target_bit_rate;
2869
2870     cmd->dw19.user_max_frame = i965_avc_get_profile_level_max_frame(&common_param, seq_param->level_idc);
2871     i965_gpe_context_unmap_curbe(gpe_context);
2872
2873     return;
2874 }
2875
2876 static void
2877 gen9_avc_send_surface_brc_frame_update(VADriverContextP ctx,
2878                                        struct encode_state *encode_state,
2879                                        struct i965_gpe_context *gpe_context,
2880                                        struct intel_encoder_context *encoder_context,
2881                                        void * param_brc)
2882 {
2883     struct i965_driver_data *i965 = i965_driver_data(ctx);
2884     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2885     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2886     struct brc_param * param = (struct brc_param *)param_brc ;
2887     struct i965_gpe_context * gpe_context_mbenc = param->gpe_context_mbenc;
2888     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2889     unsigned char is_g95 = 0;
2890
2891     if (IS_SKL(i965->intel.device_info) ||
2892         IS_BXT(i965->intel.device_info) ||
2893         IS_GEN8(i965->intel.device_info))
2894         is_g95 = 0;
2895     else if (IS_KBL(i965->intel.device_info) ||
2896              IS_GEN10(i965->intel.device_info) ||
2897              IS_GLK(i965->intel.device_info))
2898         is_g95 = 1;
2899
2900     /* brc history buffer*/
2901     i965_add_buffer_gpe_surface(ctx,
2902                                 gpe_context,
2903                                 &avc_ctx->res_brc_history_buffer,
2904                                 0,
2905                                 avc_ctx->res_brc_history_buffer.size,
2906                                 0,
2907                                 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_HISTORY_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_HISTORY_INDEX));
2908
2909     /* previous pak buffer*/
2910     i965_add_buffer_gpe_surface(ctx,
2911                                 gpe_context,
2912                                 &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
2913                                 0,
2914                                 avc_ctx->res_brc_pre_pak_statistics_output_buffer.size,
2915                                 0,
2916                                 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_PAK_STATISTICS_OUTPUT_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_PAK_STATISTICS_OUTPUT_INDEX));
2917
2918     /* image state command buffer read only*/
2919     i965_add_buffer_gpe_surface(ctx,
2920                                 gpe_context,
2921                                 &avc_ctx->res_brc_image_state_read_buffer,
2922                                 0,
2923                                 avc_ctx->res_brc_image_state_read_buffer.size,
2924                                 0,
2925                                 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_READ_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_READ_INDEX));
2926
2927     /* image state command buffer write only*/
2928     i965_add_buffer_gpe_surface(ctx,
2929                                 gpe_context,
2930                                 &avc_ctx->res_brc_image_state_write_buffer,
2931                                 0,
2932                                 avc_ctx->res_brc_image_state_write_buffer.size,
2933                                 0,
2934                                 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_WRITE_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_WRITE_INDEX));
2935
2936     if (avc_state->mbenc_brc_buffer_size > 0) {
2937         i965_add_buffer_gpe_surface(ctx,
2938                                     gpe_context,
2939                                     &(avc_ctx->res_mbenc_brc_buffer),
2940                                     0,
2941                                     avc_ctx->res_mbenc_brc_buffer.size,
2942                                     0,
2943                                     GEN95_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_WRITE_INDEX);
2944     } else {
2945         /*  Mbenc curbe input buffer */
2946         gen9_add_dri_buffer_gpe_surface(ctx,
2947                                         gpe_context,
2948                                         gpe_context_mbenc->dynamic_state.bo,
2949                                         0,
2950                                         ALIGN(gpe_context_mbenc->curbe.length, 64),
2951                                         gpe_context_mbenc->curbe.offset,
2952                                         GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_READ_INDEX);
2953         /* Mbenc curbe output buffer */
2954         gen9_add_dri_buffer_gpe_surface(ctx,
2955                                         gpe_context,
2956                                         gpe_context_mbenc->dynamic_state.bo,
2957                                         0,
2958                                         ALIGN(gpe_context_mbenc->curbe.length, 64),
2959                                         gpe_context_mbenc->curbe.offset,
2960                                         GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_WRITE_INDEX);
2961     }
2962
2963     /* AVC_ME Distortion 2D surface buffer,input/output. is it res_brc_dist_data_surface*/
2964     i965_add_buffer_2d_gpe_surface(ctx,
2965                                    gpe_context,
2966                                    &avc_ctx->res_brc_dist_data_surface,
2967                                    1,
2968                                    I965_SURFACEFORMAT_R8_UNORM,
2969                                    (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_DISTORTION_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_DISTORTION_INDEX));
2970
2971     /* BRC const data 2D surface buffer */
2972     i965_add_buffer_2d_gpe_surface(ctx,
2973                                    gpe_context,
2974                                    &avc_ctx->res_brc_const_data_buffer,
2975                                    1,
2976                                    I965_SURFACEFORMAT_R8_UNORM,
2977                                    (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX));
2978
2979     /* MB statistical data surface*/
2980     if (!IS_GEN8(i965->intel.device_info)) {
2981         i965_add_buffer_gpe_surface(ctx,
2982                                     gpe_context,
2983                                     &avc_ctx->res_mb_status_buffer,
2984                                     0,
2985                                     avc_ctx->res_mb_status_buffer.size,
2986                                     0,
2987                                     (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX));
2988     } else {
2989         i965_add_buffer_2d_gpe_surface(ctx,
2990                                        gpe_context,
2991                                        &avc_ctx->res_mbbrc_mb_qp_data_surface,
2992                                        1,
2993                                        I965_SURFACEFORMAT_R8_UNORM,
2994                                        GEN9_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX);
2995     }
2996     return;
2997 }
2998
2999 static VAStatus
3000 gen9_avc_kernel_brc_frame_update(VADriverContextP ctx,
3001                                  struct encode_state *encode_state,
3002                                  struct intel_encoder_context *encoder_context)
3003
3004 {
3005     struct i965_driver_data *i965 = i965_driver_data(ctx);
3006     struct i965_gpe_table *gpe = &i965->gpe_table;
3007     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3008     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
3009     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3010     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3011     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3012
3013     struct i965_gpe_context *gpe_context = NULL;
3014     struct gpe_media_object_parameter media_object_param;
3015     struct gpe_media_object_inline_data media_object_inline_data;
3016     int media_function = 0;
3017     int kernel_idx = 0;
3018     unsigned int mb_const_data_buffer_in_use, mb_qp_buffer_in_use;
3019     unsigned int brc_enabled = 0;
3020     unsigned int roi_enable = (generic_state->num_roi > 0) ? 1 : 0;
3021     unsigned int dirty_roi_enable = ((generic_state->dirty_num_roi > 0) && (generic_state->frame_type == SLICE_TYPE_P) && (0));
3022
3023     /* the following set the mbenc curbe*/
3024     struct mbenc_param curbe_mbenc_param ;
3025     struct brc_param curbe_brc_param ;
3026
3027     mb_const_data_buffer_in_use =
3028         generic_state->mb_brc_enabled ||
3029         roi_enable ||
3030         dirty_roi_enable ||
3031         avc_state->mb_qp_data_enable ||
3032         avc_state->rolling_intra_refresh_enable;
3033     mb_qp_buffer_in_use =
3034         generic_state->mb_brc_enabled ||
3035         generic_state->brc_roi_enable ||
3036         avc_state->mb_qp_data_enable;
3037
3038     switch (generic_state->kernel_mode) {
3039     case INTEL_ENC_KERNEL_NORMAL : {
3040         kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_NORMAL_I;
3041         break;
3042     }
3043     case INTEL_ENC_KERNEL_PERFORMANCE : {
3044         kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_PERFORMANCE_I;
3045         break;
3046     }
3047     case INTEL_ENC_KERNEL_QUALITY : {
3048         kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_QUALITY_I;
3049         break;
3050     }
3051     default:
3052         assert(0);
3053
3054     }
3055
3056     if (generic_state->frame_type == SLICE_TYPE_P) {
3057         kernel_idx += 1;
3058     } else if (generic_state->frame_type == SLICE_TYPE_B) {
3059         kernel_idx += 2;
3060     }
3061
3062     gpe_context = &(avc_ctx->context_mbenc.gpe_contexts[kernel_idx]);
3063     gpe->context_init(ctx, gpe_context);
3064
3065     memset(&curbe_mbenc_param, 0, sizeof(struct mbenc_param));
3066
3067     curbe_mbenc_param.mb_const_data_buffer_in_use = mb_const_data_buffer_in_use;
3068     curbe_mbenc_param.mb_qp_buffer_in_use = mb_qp_buffer_in_use;
3069     curbe_mbenc_param.mbenc_i_frame_dist_in_use = 0;
3070     curbe_mbenc_param.brc_enabled = brc_enabled;
3071     curbe_mbenc_param.roi_enabled = roi_enable;
3072
3073     /* set curbe mbenc*/
3074     generic_ctx->pfn_set_curbe_mbenc(ctx, encode_state, gpe_context, encoder_context, &curbe_mbenc_param);
3075
3076     // gen95 set curbe out of the brc. gen9 do it here
3077     avc_state->mbenc_curbe_set_in_brc_update = !avc_state->decouple_mbenc_curbe_from_brc_enable;
3078     /*begin brc frame update*/
3079     memset(&curbe_brc_param, 0, sizeof(struct brc_param));
3080     curbe_brc_param.gpe_context_mbenc = gpe_context;
3081     media_function = INTEL_MEDIA_STATE_BRC_UPDATE;
3082     kernel_idx = GEN9_AVC_KERNEL_BRC_FRAME_UPDATE;
3083     gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
3084     curbe_brc_param.gpe_context_brc_frame_update = gpe_context;
3085
3086     gpe->context_init(ctx, gpe_context);
3087     gpe->reset_binding_table(ctx, gpe_context);
3088     /*brc copy ignored*/
3089
3090     /* set curbe frame update*/
3091     generic_ctx->pfn_set_curbe_brc_frame_update(ctx, encode_state, gpe_context, encoder_context, &curbe_brc_param);
3092
3093     /* load brc constant data, is it same as mbenc mb brc constant data? no.*/
3094     if (avc_state->multi_pre_enable) {
3095         gen9_avc_init_brc_const_data(ctx, encode_state, encoder_context);
3096     } else {
3097         gen9_avc_init_brc_const_data_old(ctx, encode_state, encoder_context);
3098     }
3099     /* image state construct*/
3100     if (IS_GEN8(i965->intel.device_info)) {
3101         gen8_avc_set_image_state(ctx, encode_state, encoder_context, &(avc_ctx->res_brc_image_state_read_buffer));
3102     } else {
3103         gen9_avc_set_image_state(ctx, encode_state, encoder_context, &(avc_ctx->res_brc_image_state_read_buffer));
3104     }
3105     /* set surface frame mbenc*/
3106     generic_ctx->pfn_send_brc_frame_update_surface(ctx, encode_state, gpe_context, encoder_context, &curbe_brc_param);
3107
3108
3109     gpe->setup_interface_data(ctx, gpe_context);
3110
3111     memset(&media_object_param, 0, sizeof(media_object_param));
3112     memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
3113     media_object_param.pinline_data = &media_object_inline_data;
3114     media_object_param.inline_size = sizeof(media_object_inline_data);
3115
3116     gen9_avc_run_kernel_media_object(ctx, encoder_context,
3117                                      gpe_context,
3118                                      media_function,
3119                                      &media_object_param);
3120
3121     return VA_STATUS_SUCCESS;
3122 }
3123
3124 static void
3125 gen9_avc_set_curbe_brc_mb_update(VADriverContextP ctx,
3126                                  struct encode_state *encode_state,
3127                                  struct i965_gpe_context *gpe_context,
3128                                  struct intel_encoder_context *encoder_context,
3129                                  void * param)
3130 {
3131     gen9_avc_mb_brc_curbe_data *cmd;
3132     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3133     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3134
3135     cmd = i965_gpe_context_map_curbe(gpe_context);
3136
3137     if (!cmd)
3138         return;
3139
3140     memset(cmd, 0, sizeof(gen9_avc_mb_brc_curbe_data));
3141
3142     cmd->dw0.cur_frame_type = generic_state->frame_type;
3143     if (generic_state->brc_roi_enable) {
3144         cmd->dw0.enable_roi = 1;
3145     } else {
3146         cmd->dw0.enable_roi = 0;
3147     }
3148
3149     i965_gpe_context_unmap_curbe(gpe_context);
3150
3151     return;
3152 }
3153
3154 static void
3155 gen9_avc_send_surface_brc_mb_update(VADriverContextP ctx,
3156                                     struct encode_state *encode_state,
3157                                     struct i965_gpe_context *gpe_context,
3158                                     struct intel_encoder_context *encoder_context,
3159                                     void * param_mbenc)
3160 {
3161     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3162     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3163     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3164
3165     /* brc history buffer*/
3166     i965_add_buffer_gpe_surface(ctx,
3167                                 gpe_context,
3168                                 &avc_ctx->res_brc_history_buffer,
3169                                 0,
3170                                 avc_ctx->res_brc_history_buffer.size,
3171                                 0,
3172                                 GEN9_AVC_MB_BRC_UPDATE_HISTORY_INDEX);
3173
3174     /* MB qp data buffer is it same as res_mbbrc_mb_qp_data_surface*/
3175     if (generic_state->mb_brc_enabled) {
3176         i965_add_buffer_2d_gpe_surface(ctx,
3177                                        gpe_context,
3178                                        &avc_ctx->res_mbbrc_mb_qp_data_surface,
3179                                        1,
3180                                        I965_SURFACEFORMAT_R8_UNORM,
3181                                        GEN9_AVC_MB_BRC_UPDATE_MB_QP_INDEX);
3182
3183     }
3184
3185     /* BRC roi feature*/
3186     if (generic_state->brc_roi_enable) {
3187         i965_add_buffer_gpe_surface(ctx,
3188                                     gpe_context,
3189                                     &avc_ctx->res_mbbrc_roi_surface,
3190                                     0,
3191                                     avc_ctx->res_mbbrc_roi_surface.size,
3192                                     0,
3193                                     GEN9_AVC_MB_BRC_UPDATE_ROI_INDEX);
3194
3195     }
3196
3197     /* MB statistical data surface*/
3198     i965_add_buffer_gpe_surface(ctx,
3199                                 gpe_context,
3200                                 &avc_ctx->res_mb_status_buffer,
3201                                 0,
3202                                 avc_ctx->res_mb_status_buffer.size,
3203                                 0,
3204                                 GEN9_AVC_MB_BRC_UPDATE_MB_STATUS_INDEX);
3205
3206     return;
3207 }
3208
3209 static VAStatus
3210 gen9_avc_kernel_brc_mb_update(VADriverContextP ctx,
3211                               struct encode_state *encode_state,
3212                               struct intel_encoder_context *encoder_context)
3213
3214 {
3215     struct i965_driver_data *i965 = i965_driver_data(ctx);
3216     struct i965_gpe_table *gpe = &i965->gpe_table;
3217     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3218     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3219     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3220     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
3221
3222     struct i965_gpe_context *gpe_context;
3223     struct gpe_media_object_walker_parameter media_object_walker_param;
3224     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
3225     int media_function = 0;
3226     int kernel_idx = 0;
3227
3228     media_function = INTEL_MEDIA_STATE_MB_BRC_UPDATE;
3229     kernel_idx = GEN9_AVC_KERNEL_BRC_MB_UPDATE;
3230     gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
3231
3232     gpe->context_init(ctx, gpe_context);
3233     gpe->reset_binding_table(ctx, gpe_context);
3234
3235     /* set curbe brc mb update*/
3236     generic_ctx->pfn_set_curbe_brc_mb_update(ctx, encode_state, gpe_context, encoder_context, NULL);
3237
3238
3239     /* set surface brc mb update*/
3240     generic_ctx->pfn_send_brc_mb_update_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
3241
3242
3243     gpe->setup_interface_data(ctx, gpe_context);
3244
3245     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
3246     /* the scaling is based on 8x8 blk level */
3247     kernel_walker_param.resolution_x = (generic_state->frame_width_in_mbs + 1) / 2;
3248     kernel_walker_param.resolution_y = (generic_state->frame_height_in_mbs + 1) / 2 ;
3249     kernel_walker_param.no_dependency = 1;
3250
3251     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
3252
3253     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
3254                                             gpe_context,
3255                                             media_function,
3256                                             &media_object_walker_param);
3257
3258     return VA_STATUS_SUCCESS;
3259 }
3260
3261 /*
3262 mbenc kernel related function,it include intra dist kernel
3263 */
3264 static int
3265 gen9_avc_get_biweight(int dist_scale_factor_ref_id0_list0, unsigned short weighted_bipredidc)
3266 {
3267     int biweight = 32;      // default value
3268
3269     /* based on kernel HLD*/
3270     if (weighted_bipredidc != INTEL_AVC_WP_MODE_IMPLICIT) {
3271         biweight = 32;
3272     } else {
3273         biweight = (dist_scale_factor_ref_id0_list0 + 2) >> 2;
3274
3275         if (biweight != 16 && biweight != 21 &&
3276             biweight != 32 && biweight != 43 && biweight != 48) {
3277             biweight = 32;        // If # of B-pics between two refs is more than 3. VME does not support it.
3278         }
3279     }
3280
3281     return biweight;
3282 }
3283
3284 static void
3285 gen9_avc_get_dist_scale_factor(VADriverContextP ctx,
3286                                struct encode_state *encode_state,
3287                                struct intel_encoder_context *encoder_context)
3288 {
3289     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3290     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3291     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3292     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
3293
3294     int max_num_references;
3295     VAPictureH264 *curr_pic;
3296     VAPictureH264 *ref_pic_l0;
3297     VAPictureH264 *ref_pic_l1;
3298     int i = 0;
3299     int tb = 0;
3300     int td = 0;
3301     int tx = 0;
3302     int tmp = 0;
3303     int poc0 = 0;
3304     int poc1 = 0;
3305
3306     max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
3307
3308     memset(avc_state->dist_scale_factor_list0, 0, 32 * sizeof(unsigned int));
3309     curr_pic = &pic_param->CurrPic;
3310     for (i = 0; i < max_num_references; i++) {
3311         ref_pic_l0 = &(slice_param->RefPicList0[i]);
3312
3313         if ((ref_pic_l0->flags & VA_PICTURE_H264_INVALID) ||
3314             (ref_pic_l0->picture_id == VA_INVALID_SURFACE))
3315             break;
3316         ref_pic_l1 = &(slice_param->RefPicList1[0]);
3317         if ((ref_pic_l0->flags & VA_PICTURE_H264_INVALID) ||
3318             (ref_pic_l0->picture_id == VA_INVALID_SURFACE))
3319             break;
3320
3321         poc0 = (curr_pic->TopFieldOrderCnt - ref_pic_l0->TopFieldOrderCnt);
3322         poc1 = (ref_pic_l1->TopFieldOrderCnt - ref_pic_l0->TopFieldOrderCnt);
3323         CLIP(poc0, -128, 127);
3324         CLIP(poc1, -128, 127);
3325         tb = poc0;
3326         td = poc1;
3327
3328         if (td == 0) {
3329             td = 1;
3330         }
3331         tmp = (td / 2 > 0) ? (td / 2) : (-(td / 2));
3332         tx = (16384 + tmp) / td ;
3333         tmp = (tb * tx + 32) >> 6;
3334         CLIP(tmp, -1024, 1023);
3335         avc_state->dist_scale_factor_list0[i] = tmp;
3336     }
3337     return;
3338 }
3339
3340 static unsigned int
3341 gen9_avc_get_qp_from_ref_list(VADriverContextP ctx,
3342                               VAEncSliceParameterBufferH264 *slice_param,
3343                               int list,
3344                               int ref_frame_idx)
3345 {
3346     struct i965_driver_data *i965 = i965_driver_data(ctx);
3347     struct object_surface *obj_surface;
3348     struct gen9_surface_avc *avc_priv_surface;
3349     VASurfaceID surface_id;
3350
3351     assert(slice_param);
3352     assert(list < 2);
3353
3354     if (list == 0) {
3355         if (ref_frame_idx < slice_param->num_ref_idx_l0_active_minus1 + 1)
3356             surface_id = slice_param->RefPicList0[ref_frame_idx].picture_id;
3357         else
3358             return 0;
3359     } else {
3360         if (ref_frame_idx < slice_param->num_ref_idx_l1_active_minus1 + 1)
3361             surface_id = slice_param->RefPicList1[ref_frame_idx].picture_id;
3362         else
3363             return 0;
3364     }
3365     obj_surface = SURFACE(surface_id);
3366     if (obj_surface && obj_surface->private_data) {
3367         avc_priv_surface = obj_surface->private_data;
3368         return avc_priv_surface->qp_value;
3369     } else {
3370         return 0;
3371     }
3372 }
3373
3374 static void
3375 gen9_avc_load_mb_brc_const_data(VADriverContextP ctx,
3376                                 struct encode_state *encode_state,
3377                                 struct intel_encoder_context *encoder_context)
3378 {
3379     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3380     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3381     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3382     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3383
3384     struct i965_gpe_resource *gpe_resource = NULL;
3385     unsigned int * data = NULL;
3386     unsigned int * data_tmp = NULL;
3387     unsigned int size = 16 * 52;
3388     unsigned int table_idx = 0;
3389     unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
3390     unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
3391     int i = 0;
3392
3393     gpe_resource = &(avc_ctx->res_mbbrc_const_data_buffer);
3394     assert(gpe_resource);
3395     data = i965_map_gpe_resource(gpe_resource);
3396     assert(data);
3397
3398     table_idx = slice_type_kernel[generic_state->frame_type];
3399
3400     memcpy(data, gen9_avc_mb_brc_const_data[table_idx][0], size * sizeof(unsigned int));
3401
3402     data_tmp = data;
3403
3404     switch (generic_state->frame_type) {
3405     case SLICE_TYPE_I:
3406         for (i = 0; i < AVC_QP_MAX ; i++) {
3407             if (avc_state->old_mode_cost_enable)
3408                 *data = (unsigned int)gen9_avc_old_intra_mode_cost[i];
3409             data += 16;
3410         }
3411         break;
3412     case SLICE_TYPE_P:
3413     case SLICE_TYPE_B:
3414         for (i = 0; i < AVC_QP_MAX ; i++) {
3415             if (generic_state->frame_type == SLICE_TYPE_P) {
3416                 if (avc_state->skip_bias_adjustment_enable)
3417                     *(data + 3) = (unsigned int)gen9_avc_mv_cost_p_skip_adjustment[i];
3418             }
3419             if (avc_state->non_ftq_skip_threshold_lut_input_enable) {
3420                 *(data + 9) = (unsigned int)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
3421             } else if (generic_state->frame_type == SLICE_TYPE_P) {
3422                 *(data + 9) = (unsigned int)gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag][i];
3423             } else {
3424                 *(data + 9) = (unsigned int)gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag][i];
3425             }
3426
3427             if (avc_state->adaptive_intra_scaling_enable) {
3428                 *(data + 10) = (unsigned int)gen9_avc_adaptive_intra_scaling_factor[i];
3429             } else {
3430                 *(data + 10) = (unsigned int)gen9_avc_intra_scaling_factor[i];
3431
3432             }
3433             data += 16;
3434
3435         }
3436         break;
3437     default:
3438         assert(0);
3439     }
3440
3441     data = data_tmp;
3442     for (i = 0; i < AVC_QP_MAX ; i++) {
3443         if (avc_state->ftq_skip_threshold_lut_input_enable) {
3444             *(data + 6) = (avc_state->ftq_skip_threshold_lut[i] |
3445                            (avc_state->ftq_skip_threshold_lut[i] << 16) |
3446                            (avc_state->ftq_skip_threshold_lut[i] << 24));
3447             *(data + 7) = (avc_state->ftq_skip_threshold_lut[i] |
3448                            (avc_state->ftq_skip_threshold_lut[i] << 8) |
3449                            (avc_state->ftq_skip_threshold_lut[i] << 16) |
3450                            (avc_state->ftq_skip_threshold_lut[i] << 24));
3451         }
3452
3453         if (avc_state->kernel_trellis_enable) {
3454             *(data + 11) = (unsigned int)avc_state->lamda_value_lut[i][0];
3455             *(data + 12) = (unsigned int)avc_state->lamda_value_lut[i][1];
3456
3457         }
3458         data += 16;
3459
3460     }
3461     i965_unmap_gpe_resource(gpe_resource);
3462 }
3463
3464 static void
3465 gen9_avc_set_curbe_mbenc(VADriverContextP ctx,
3466                          struct encode_state *encode_state,
3467                          struct i965_gpe_context *gpe_context,
3468                          struct intel_encoder_context *encoder_context,
3469                          void * param)
3470 {
3471     struct i965_driver_data *i965 = i965_driver_data(ctx);
3472     union {
3473         gen9_avc_mbenc_curbe_data *g9;
3474         gen95_avc_mbenc_curbe_data *g95;
3475     } cmd;
3476     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3477     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3478     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3479
3480     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3481     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
3482     VASurfaceID surface_id;
3483     struct object_surface *obj_surface;
3484
3485     struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
3486     unsigned char qp = 0;
3487     unsigned char me_method = 0;
3488     unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
3489     unsigned int table_idx = 0;
3490     unsigned char is_g9 = 0;
3491     unsigned char is_g95 = 0;
3492     unsigned int curbe_size = 0;
3493
3494     unsigned int preset = generic_state->preset;
3495     if (IS_SKL(i965->intel.device_info) ||
3496         IS_BXT(i965->intel.device_info)) {
3497         cmd.g9 = (gen9_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
3498         if (!cmd.g9)
3499             return;
3500         is_g9 = 1;
3501         curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
3502         memset(cmd.g9, 0, curbe_size);
3503
3504         if (mbenc_i_frame_dist_in_use) {
3505             memcpy(cmd.g9, gen9_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
3506
3507         } else {
3508             switch (generic_state->frame_type) {
3509             case SLICE_TYPE_I:
3510                 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
3511                 break;
3512             case SLICE_TYPE_P:
3513                 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
3514                 break;
3515             case SLICE_TYPE_B:
3516                 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
3517                 break;
3518             default:
3519                 assert(0);
3520             }
3521
3522         }
3523     } else if (IS_KBL(i965->intel.device_info) ||
3524                IS_GEN10(i965->intel.device_info) ||
3525                IS_GLK(i965->intel.device_info)) {
3526         cmd.g95 = (gen95_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
3527         if (!cmd.g95)
3528             return;
3529         is_g95 = 1;
3530         curbe_size = sizeof(gen95_avc_mbenc_curbe_data);
3531         memset(cmd.g9, 0, curbe_size);
3532
3533         if (mbenc_i_frame_dist_in_use) {
3534             memcpy(cmd.g95, gen95_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
3535
3536         } else {
3537             switch (generic_state->frame_type) {
3538             case SLICE_TYPE_I:
3539                 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
3540                 break;
3541             case SLICE_TYPE_P:
3542                 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
3543                 break;
3544             case SLICE_TYPE_B:
3545                 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
3546                 break;
3547             default:
3548                 assert(0);
3549             }
3550
3551         }
3552     } else {
3553         /* Never get here, just silence a gcc warning */
3554         assert(0);
3555
3556         return;
3557     }
3558
3559     me_method = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_b_me_method[preset] : gen9_avc_p_me_method[preset];
3560     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
3561
3562     cmd.g9->dw0.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
3563     cmd.g9->dw37.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
3564     cmd.g9->dw0.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
3565     cmd.g9->dw37.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
3566
3567     cmd.g9->dw2.max_len_sp = gen9_avc_max_len_sp[preset];
3568     cmd.g9->dw38.max_len_sp = 0;
3569
3570     if (is_g95)
3571         cmd.g95->dw1.extended_mv_cost_range = avc_state->extended_mv_cost_range_enable;
3572
3573     cmd.g9->dw3.src_access = 0;
3574     cmd.g9->dw3.ref_access = 0;
3575
3576     if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) {
3577         //disable ftq_override by now.
3578         if (avc_state->ftq_override) {
3579             cmd.g9->dw3.ftq_enable = avc_state->ftq_enable;
3580
3581         } else {
3582             // both gen9 and gen95 come here by now
3583             if (generic_state->frame_type == SLICE_TYPE_P) {
3584                 cmd.g9->dw3.ftq_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
3585
3586             } else {
3587                 cmd.g9->dw3.ftq_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
3588             }
3589         }
3590     } else {
3591         cmd.g9->dw3.ftq_enable = 0;
3592     }
3593
3594     if (avc_state->disable_sub_mb_partion)
3595         cmd.g9->dw3.sub_mb_part_mask = 0x7;
3596
3597     if (mbenc_i_frame_dist_in_use) {
3598         cmd.g9->dw2.pitch_width = generic_state->downscaled_width_4x_in_mb;
3599         cmd.g9->dw4.picture_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
3600         cmd.g9->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4;
3601         cmd.g9->dw6.batch_buffer_end = 0;
3602         cmd.g9->dw31.intra_compute_type = 1;
3603
3604     } else {
3605         cmd.g9->dw2.pitch_width = generic_state->frame_width_in_mbs;
3606         cmd.g9->dw4.picture_height_minus1 = generic_state->frame_height_in_mbs - 1;
3607         cmd.g9->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ? generic_state->frame_height_in_mbs : avc_state->slice_height;
3608
3609         {
3610             memcpy(&(cmd.g9->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
3611             if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
3612             } else if (avc_state->skip_bias_adjustment_enable) {
3613                 /* Load different MvCost for P picture when SkipBiasAdjustment is enabled
3614                 // No need to check for P picture as the flag is only enabled for P picture */
3615                 cmd.g9->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
3616
3617             }
3618         }
3619
3620         table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
3621         memcpy(&(cmd.g9->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
3622     }
3623     cmd.g9->dw4.enable_fbr_bypass = avc_state->fbr_bypass_enable;
3624     cmd.g9->dw4.enable_intra_cost_scaling_for_static_frame = avc_state->sfd_enable && generic_state->hme_enabled;
3625     cmd.g9->dw4.field_parity_flag = 0;//bottom field
3626     cmd.g9->dw4.enable_cur_fld_idr = 0;//field realted
3627     cmd.g9->dw4.contrained_intra_pred_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
3628     cmd.g9->dw4.hme_enable = generic_state->hme_enabled;
3629     cmd.g9->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
3630     cmd.g9->dw4.use_actual_ref_qp_value = generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
3631
3632
3633     cmd.g9->dw7.intra_part_mask = avc_state->transform_8x8_mode_enable ? 0 : 0x02;
3634     cmd.g9->dw7.src_field_polarity = 0;//field related
3635
3636     /*ftq_skip_threshold_lut set,dw14 /15*/
3637
3638     /*r5 disable NonFTQSkipThresholdLUT*/
3639     if (generic_state->frame_type == SLICE_TYPE_P) {
3640         cmd.g9->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
3641
3642     } else if (generic_state->frame_type == SLICE_TYPE_B) {
3643         cmd.g9->dw32.skip_val = gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
3644
3645     }
3646
3647     cmd.g9->dw13.qp_prime_y = qp;
3648     cmd.g9->dw13.qp_prime_cb = qp;
3649     cmd.g9->dw13.qp_prime_cr = qp;
3650     cmd.g9->dw13.target_size_in_word = 0xff;//hardcode for brc disable
3651
3652     if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) {
3653         switch (gen9_avc_multi_pred[preset]) {
3654         case 0:
3655             cmd.g9->dw32.mult_pred_l0_disable = 128;
3656             cmd.g9->dw32.mult_pred_l1_disable = 128;
3657             break;
3658         case 1:
3659             cmd.g9->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_P) ? 1 : 128;
3660             cmd.g9->dw32.mult_pred_l1_disable = 128;
3661             break;
3662         case 2:
3663             cmd.g9->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3664             cmd.g9->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3665             break;
3666         case 3:
3667             cmd.g9->dw32.mult_pred_l0_disable = 1;
3668             cmd.g9->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3669             break;
3670
3671         }
3672
3673     } else {
3674         cmd.g9->dw32.mult_pred_l0_disable = 128;
3675         cmd.g9->dw32.mult_pred_l1_disable = 128;
3676     }
3677
3678     /*field setting for dw33 34, ignored*/
3679
3680     if (avc_state->adaptive_transform_decision_enable) {
3681         if (generic_state->frame_type != SLICE_TYPE_I) {
3682             if (is_g9) {
3683                 cmd.g9->dw34.enable_adaptive_tx_decision = 1;
3684                 cmd.g9->dw58.mb_texture_threshold = 1024;
3685                 cmd.g9->dw58.tx_decision_threshold = 128;
3686             } else if (is_g95) {
3687                 cmd.g95->dw34.enable_adaptive_tx_decision = 1;
3688                 cmd.g9->dw58.mb_texture_threshold = 1024;
3689                 cmd.g9->dw58.tx_decision_threshold = 128;
3690             }
3691         }
3692     }
3693
3694
3695     if (generic_state->frame_type == SLICE_TYPE_B) {
3696         cmd.g9->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
3697         cmd.g9->dw34.list1_ref_id0_frm_field_parity = 0;
3698         cmd.g9->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
3699     }
3700
3701     cmd.g9->dw34.b_original_bff = 0; //frame only
3702     cmd.g9->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
3703     cmd.g9->dw34.roi_enable_flag = curbe_param->roi_enabled;
3704     cmd.g9->dw34.mad_enable_falg = avc_state->mad_enable;
3705     cmd.g9->dw34.mb_brc_enable = avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled;
3706     cmd.g9->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
3707     if (is_g95) {
3708         cmd.g95->dw34.tq_enable = avc_state->tq_enable;
3709         cmd.g95->dw34.cqp_flag = !generic_state->brc_enabled;
3710     }
3711
3712     if (is_g9) {
3713         cmd.g9->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
3714
3715         if (cmd.g9->dw34.force_non_skip_check) {
3716             cmd.g9->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
3717         }
3718     }
3719
3720
3721     cmd.g9->dw36.check_all_fractional_enable = avc_state->caf_enable;
3722     cmd.g9->dw38.ref_threshold = 400;
3723     cmd.g9->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
3724
3725     /* Default:2 used for MBBRC (MB QP Surface width and height are 4x downscaled picture in MB unit * 4  bytes)
3726        0 used for MBQP data surface (MB QP Surface width and height are same as the input picture size in MB unit * 1bytes)
3727        starting GEN9, BRC use split kernel, MB QP surface is same size as input picture */
3728     cmd.g9->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled) ? 0 : 2;
3729
3730     if (mbenc_i_frame_dist_in_use) {
3731         cmd.g9->dw13.qp_prime_y = 0;
3732         cmd.g9->dw13.qp_prime_cb = 0;
3733         cmd.g9->dw13.qp_prime_cr = 0;
3734         cmd.g9->dw33.intra_16x16_nondc_penalty = 0;
3735         cmd.g9->dw33.intra_8x8_nondc_penalty = 0;
3736         cmd.g9->dw33.intra_4x4_nondc_penalty = 0;
3737
3738     }
3739     if (cmd.g9->dw4.use_actual_ref_qp_value) {
3740         cmd.g9->dw44.actual_qp_value_for_ref_id0_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 0);
3741         cmd.g9->dw44.actual_qp_value_for_ref_id1_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 1);
3742         cmd.g9->dw44.actual_qp_value_for_ref_id2_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 2);
3743         cmd.g9->dw44.actual_qp_value_for_ref_id3_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 3);
3744         cmd.g9->dw45.actual_qp_value_for_ref_id4_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 4);
3745         cmd.g9->dw45.actual_qp_value_for_ref_id5_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 5);
3746         cmd.g9->dw45.actual_qp_value_for_ref_id6_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 6);
3747         cmd.g9->dw45.actual_qp_value_for_ref_id7_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 7);
3748         cmd.g9->dw46.actual_qp_value_for_ref_id0_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 0);
3749         cmd.g9->dw46.actual_qp_value_for_ref_id1_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 1);
3750     }
3751
3752     table_idx = slice_type_kernel[generic_state->frame_type];
3753     cmd.g9->dw46.ref_cost = gen9_avc_ref_cost[table_idx][qp];
3754
3755     if (generic_state->frame_type == SLICE_TYPE_I) {
3756         cmd.g9->dw0.skip_mode_enable = 0;
3757         cmd.g9->dw37.skip_mode_enable = 0;
3758         cmd.g9->dw36.hme_combine_overlap = 0;
3759         cmd.g9->dw47.intra_cost_sf = 16;
3760         cmd.g9->dw34.enable_direct_bias_adjustment = 0;
3761         if (is_g9)
3762             cmd.g9->dw34.enable_global_motion_bias_adjustment = 0;
3763
3764     } else if (generic_state->frame_type == SLICE_TYPE_P) {
3765         cmd.g9->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
3766         cmd.g9->dw3.bme_disable_fbr = 1;
3767         cmd.g9->dw5.ref_width = gen9_avc_search_x[preset];
3768         cmd.g9->dw5.ref_height = gen9_avc_search_y[preset];
3769         cmd.g9->dw7.non_skip_zmv_added = 1;
3770         cmd.g9->dw7.non_skip_mode_added = 1;
3771         cmd.g9->dw7.skip_center_mask = 1;
3772         cmd.g9->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
3773         cmd.g9->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame onlys
3774         cmd.g9->dw36.hme_combine_overlap = 1;
3775         cmd.g9->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
3776         cmd.g9->dw39.ref_width = gen9_avc_search_x[preset];
3777         cmd.g9->dw39.ref_height = gen9_avc_search_y[preset];
3778         cmd.g9->dw34.enable_direct_bias_adjustment = 0;
3779         cmd.g9->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
3780         if (is_g9 && avc_state->global_motion_bias_adjustment_enable)
3781             cmd.g9->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
3782
3783     } else {
3784         cmd.g9->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
3785         cmd.g9->dw1.bi_weight = avc_state->bi_weight;
3786         cmd.g9->dw3.search_ctrl = 7;
3787         cmd.g9->dw3.skip_type = 1;
3788         cmd.g9->dw5.ref_width = gen9_avc_b_search_x[preset];
3789         cmd.g9->dw5.ref_height = gen9_avc_b_search_y[preset];
3790         cmd.g9->dw7.skip_center_mask = 0xff;
3791         cmd.g9->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
3792         cmd.g9->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
3793         cmd.g9->dw36.hme_combine_overlap = 1;
3794         surface_id = slice_param->RefPicList1[0].picture_id;
3795         obj_surface = SURFACE(surface_id);
3796         if (!obj_surface) {
3797             WARN_ONCE("Invalid backward reference frame\n");
3798             return;
3799         }
3800         cmd.g9->dw36.is_fwd_frame_short_term_ref = !!(slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
3801
3802         cmd.g9->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
3803         cmd.g9->dw36.num_ref_idx_l1_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l1_active_minus1 : 0;
3804         cmd.g9->dw39.ref_width = gen9_avc_b_search_x[preset];
3805         cmd.g9->dw39.ref_height = gen9_avc_b_search_y[preset];
3806         cmd.g9->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
3807         cmd.g9->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
3808         cmd.g9->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
3809         cmd.g9->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
3810         cmd.g9->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
3811         cmd.g9->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
3812         cmd.g9->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
3813         cmd.g9->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
3814
3815         cmd.g9->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
3816         if (cmd.g9->dw34.enable_direct_bias_adjustment) {
3817             cmd.g9->dw7.non_skip_zmv_added = 1;
3818             cmd.g9->dw7.non_skip_mode_added = 1;
3819         }
3820
3821         cmd.g9->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
3822         if (is_g9 && avc_state->global_motion_bias_adjustment_enable)
3823             cmd.g9->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
3824
3825     }
3826
3827     avc_state->block_based_skip_enable = cmd.g9->dw3.block_based_skip_enable;
3828
3829     if (avc_state->rolling_intra_refresh_enable) {
3830         /*by now disable it*/
3831         cmd.g9->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
3832         cmd.g9->dw32.mult_pred_l0_disable = 128;
3833         /* Pass the same IntraRefreshUnit to the kernel w/o the adjustment by -1, so as to have an overlap of one MB row or column of Intra macroblocks
3834          across one P frame to another P frame, as needed by the RollingI algo */
3835         if (is_g9) {
3836             cmd.g9->dw48.widi_intra_refresh_mb_num = 0;
3837             cmd.g9->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
3838             cmd.g9->dw48.widi_intra_refresh_qp_delta = 0;
3839         }
3840
3841         if (is_g95) {
3842             if (avc_state->rolling_intra_refresh_enable == INTEL_ROLLING_I_SQUARE && generic_state->brc_enabled) {
3843                 cmd.g95->dw4.enable_intra_refresh = 0;
3844                 cmd.g95->dw34.widi_intra_refresh_en = INTEL_ROLLING_I_DISABLED;
3845                 cmd.g95->dw48.widi_intra_refresh_mb_x = 0;
3846                 cmd.g95->dw61.widi_intra_refresh_mb_y = 0;
3847             } else {
3848                 cmd.g95->dw4.enable_intra_refresh = 1;
3849                 cmd.g95->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
3850                 cmd.g95->dw48.widi_intra_refresh_mb_x = 0;
3851                 cmd.g95->dw61.widi_intra_refresh_mb_y = 0;
3852                 cmd.g9->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
3853                 cmd.g9->dw48.widi_intra_refresh_qp_delta = 0;
3854             }
3855         }
3856
3857     } else {
3858         cmd.g9->dw34.widi_intra_refresh_en = 0;
3859     }
3860
3861     cmd.g9->dw34.enable_per_mb_static_check = avc_state->sfd_enable && generic_state->hme_enabled;
3862     if (is_g9)
3863         cmd.g9->dw34.enable_adaptive_search_window_size = avc_state->adaptive_search_window_enable;
3864     else if (is_g95)
3865         cmd.g95->dw34.enable_adaptive_search_window_size = avc_state->adaptive_search_window_enable;
3866
3867     /*roi set disable by now. 49-56*/
3868     if (curbe_param->roi_enabled) {
3869         cmd.g9->dw49.roi_1_x_left   = generic_state->roi[0].left;
3870         cmd.g9->dw49.roi_1_y_top    = generic_state->roi[0].top;
3871         cmd.g9->dw50.roi_1_x_right  = generic_state->roi[0].right;
3872         cmd.g9->dw50.roi_1_y_bottom = generic_state->roi[0].bottom;
3873
3874         cmd.g9->dw51.roi_2_x_left   = generic_state->roi[1].left;
3875         cmd.g9->dw51.roi_2_y_top    = generic_state->roi[1].top;
3876         cmd.g9->dw52.roi_2_x_right  = generic_state->roi[1].right;
3877         cmd.g9->dw52.roi_2_y_bottom = generic_state->roi[1].bottom;
3878
3879         cmd.g9->dw53.roi_3_x_left   = generic_state->roi[2].left;
3880         cmd.g9->dw53.roi_3_y_top    = generic_state->roi[2].top;
3881         cmd.g9->dw54.roi_3_x_right  = generic_state->roi[2].right;
3882         cmd.g9->dw54.roi_3_y_bottom = generic_state->roi[2].bottom;
3883
3884         cmd.g9->dw55.roi_4_x_left   = generic_state->roi[3].left;
3885         cmd.g9->dw55.roi_4_y_top    = generic_state->roi[3].top;
3886         cmd.g9->dw56.roi_4_x_right  = generic_state->roi[3].right;
3887         cmd.g9->dw56.roi_4_y_bottom = generic_state->roi[3].bottom;
3888
3889         if (!generic_state->brc_enabled) {
3890             char tmp = 0;
3891             tmp = generic_state->roi[0].value;
3892             CLIP(tmp, -qp, AVC_QP_MAX - qp);
3893             cmd.g9->dw57.roi_1_dqp_prime_y = tmp;
3894             tmp = generic_state->roi[1].value;
3895             CLIP(tmp, -qp, AVC_QP_MAX - qp);
3896             cmd.g9->dw57.roi_2_dqp_prime_y = tmp;
3897             tmp = generic_state->roi[2].value;
3898             CLIP(tmp, -qp, AVC_QP_MAX - qp);
3899             cmd.g9->dw57.roi_3_dqp_prime_y = tmp;
3900             tmp = generic_state->roi[3].value;
3901             CLIP(tmp, -qp, AVC_QP_MAX - qp);
3902             cmd.g9->dw57.roi_4_dqp_prime_y = tmp;
3903         } else {
3904             cmd.g9->dw34.roi_enable_flag = 0;
3905         }
3906     }
3907
3908     if (is_g95) {
3909         if (avc_state->tq_enable) {
3910             if (generic_state->frame_type == SLICE_TYPE_I) {
3911                 cmd.g95->dw58.value = gen95_avc_tq_lambda_i_frame[qp][0];
3912                 cmd.g95->dw59.value = gen95_avc_tq_lambda_i_frame[qp][1];
3913
3914             } else if (generic_state->frame_type == SLICE_TYPE_P) {
3915                 cmd.g95->dw58.value = gen95_avc_tq_lambda_p_frame[qp][0];
3916                 cmd.g95->dw59.value = gen95_avc_tq_lambda_p_frame[qp][1];
3917
3918             } else {
3919                 cmd.g95->dw58.value = gen95_avc_tq_lambda_b_frame[qp][0];
3920                 cmd.g95->dw59.value = gen95_avc_tq_lambda_b_frame[qp][1];
3921             }
3922
3923             if (cmd.g95->dw58.lambda_8x8_inter > GEN95_AVC_MAX_LAMBDA)
3924                 cmd.g95->dw58.lambda_8x8_inter = 0xf000 + avc_state->rounding_value;
3925
3926             if (cmd.g95->dw58.lambda_8x8_intra > GEN95_AVC_MAX_LAMBDA)
3927                 cmd.g95->dw58.lambda_8x8_intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
3928
3929             if (cmd.g95->dw59.lambda_inter > GEN95_AVC_MAX_LAMBDA)
3930                 cmd.g95->dw59.lambda_inter = 0xf000 + avc_state->rounding_value;
3931
3932             if (cmd.g95->dw59.lambda_intra > GEN95_AVC_MAX_LAMBDA)
3933                 cmd.g95->dw59.lambda_intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
3934         }
3935     }
3936
3937     if (is_g95) {
3938         cmd.g95->dw66.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
3939         cmd.g95->dw67.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
3940         cmd.g95->dw68.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
3941         cmd.g95->dw69.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
3942         cmd.g95->dw70.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
3943         cmd.g95->dw71.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
3944         cmd.g95->dw72.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
3945         cmd.g95->dw73.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
3946         cmd.g95->dw74.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
3947         cmd.g95->dw75.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
3948         cmd.g95->dw76.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
3949         cmd.g95->dw77.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
3950         cmd.g95->dw78.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
3951         cmd.g95->dw79.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
3952         cmd.g95->dw80.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
3953         cmd.g95->dw81.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
3954         cmd.g95->dw82.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
3955         cmd.g95->dw83.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
3956         cmd.g95->dw84.brc_curbe_surf_index = GEN95_AVC_MBENC_BRC_CURBE_DATA_INDEX;
3957         cmd.g95->dw85.force_non_skip_mb_map_surface = GEN95_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
3958         cmd.g95->dw86.widi_wa_surf_index = GEN95_AVC_MBENC_WIDI_WA_INDEX;
3959         cmd.g95->dw87.static_detection_cost_table_index = GEN95_AVC_MBENC_SFD_COST_TABLE_INDEX;
3960     }
3961
3962     if (is_g9) {
3963         cmd.g9->dw64.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
3964         cmd.g9->dw65.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
3965         cmd.g9->dw66.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
3966         cmd.g9->dw67.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
3967         cmd.g9->dw68.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
3968         cmd.g9->dw69.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
3969         cmd.g9->dw70.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
3970         cmd.g9->dw71.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
3971         cmd.g9->dw72.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
3972         cmd.g9->dw73.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
3973         cmd.g9->dw74.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
3974         cmd.g9->dw75.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
3975         cmd.g9->dw76.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
3976         cmd.g9->dw77.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
3977         cmd.g9->dw78.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
3978         cmd.g9->dw79.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
3979         cmd.g9->dw80.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
3980         cmd.g9->dw81.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
3981         cmd.g9->dw82.force_non_skip_mb_map_surface = GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
3982         cmd.g9->dw83.widi_wa_surf_index = GEN9_AVC_MBENC_WIDI_WA_INDEX;
3983         cmd.g9->dw84.brc_curbe_surf_index = GEN9_AVC_MBENC_BRC_CURBE_DATA_INDEX;
3984         cmd.g9->dw85.static_detection_cost_table_index = GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX;
3985     }
3986
3987     i965_gpe_context_unmap_curbe(gpe_context);
3988
3989     return;
3990 }
3991
3992 static void
3993 gen9_avc_fei_set_curbe_mbenc(VADriverContextP ctx,
3994                              struct encode_state *encode_state,
3995                              struct i965_gpe_context *gpe_context,
3996                              struct intel_encoder_context *encoder_context,
3997                              void * param)
3998 {
3999     struct i965_driver_data *i965 = i965_driver_data(ctx);
4000     gen9_avc_fei_mbenc_curbe_data *cmd;
4001     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4002     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4003     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4004     VASurfaceID surface_id;
4005     struct object_surface *obj_surface;
4006     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4007     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
4008     VAEncMiscParameterFEIFrameControlH264 *fei_param = avc_state->fei_framectl_param;
4009
4010     struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
4011     unsigned char qp = 0;
4012     unsigned char me_method = 0;
4013     unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
4014     unsigned int table_idx = 0;
4015     int ref_width, ref_height, len_sp;
4016     int is_bframe = (generic_state->frame_type == SLICE_TYPE_B);
4017     int is_pframe = (generic_state->frame_type == SLICE_TYPE_P);
4018     unsigned int preset = generic_state->preset;
4019
4020     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
4021
4022     assert(gpe_context != NULL);
4023     cmd = (gen9_avc_fei_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
4024     memset(cmd, 0, sizeof(gen9_avc_fei_mbenc_curbe_data));
4025
4026     if (mbenc_i_frame_dist_in_use) {
4027         memcpy(cmd, gen9_avc_fei_mbenc_curbe_i_frame_dist_init_data, sizeof(gen9_avc_fei_mbenc_curbe_data));
4028
4029     } else {
4030         switch (generic_state->frame_type) {
4031         case SLICE_TYPE_I:
4032             memcpy(cmd, gen9_avc_fei_mbenc_curbe_i_frame_init_data, sizeof(gen9_avc_fei_mbenc_curbe_data));
4033             break;
4034         case SLICE_TYPE_P:
4035             memcpy(cmd, gen9_avc_fei_mbenc_curbe_p_frame_init_data, sizeof(gen9_avc_fei_mbenc_curbe_data));
4036             break;
4037         case SLICE_TYPE_B:
4038             memcpy(cmd, gen9_avc_fei_mbenc_curbe_b_frame_init_data, sizeof(gen9_avc_fei_mbenc_curbe_data));
4039             break;
4040         default:
4041             assert(0);
4042         }
4043
4044     }
4045     /* 4 means full search, 6 means diamand search */
4046     me_method  = (fei_param->search_window == 5) ||
4047                  (fei_param->search_window == 8) ? 4 : 6;
4048
4049     ref_width    = fei_param->ref_width;
4050     ref_height   = fei_param->ref_height;
4051     len_sp       = fei_param->len_sp;
4052     /* If there is a serch_window, discard user provided ref_width, ref_height
4053      * and search_path length */
4054     switch (fei_param->search_window) {
4055     case 0:
4056         /*  not use predefined search window, there should be a search_path input */
4057         if ((fei_param->search_path != 0) &&
4058             (fei_param->search_path != 1) &&
4059             (fei_param->search_path != 2)) {
4060             WARN_ONCE("Invalid input search_path for SearchWindow=0  \n");
4061             assert(0);
4062         }
4063         /* 4 means full search, 6 means diamand search */
4064         me_method = (fei_param->search_path == 1) ? 6 : 4;
4065         if (((ref_width * ref_height) > 2048) || (ref_width > 64) || (ref_height > 64)) {
4066             WARN_ONCE("Invalid input ref_width/ref_height in"
4067                       "SearchWindow=0 case! \n");
4068             assert(0);
4069         }
4070         break;
4071
4072     case 1:
4073         /* Tiny - 4 SUs 24x24 window */
4074         ref_width  = 24;
4075         ref_height = 24;
4076         len_sp     = 4;
4077         break;
4078
4079     case 2:
4080         /* Small - 9 SUs 28x28 window */
4081         ref_width  = 28;
4082         ref_height = 28;
4083         len_sp     = 9;
4084         break;
4085     case 3:
4086         /* Diamond - 16 SUs 48x40 window */
4087         ref_width  = 48;
4088         ref_height = 40;
4089         len_sp     = 16;
4090         break;
4091     case 4:
4092         /* Large Diamond - 32 SUs 48x40 window */
4093         ref_width  = 48;
4094         ref_height = 40;
4095         len_sp     = 32;
4096         break;
4097     case 5:
4098         /* Exhaustive - 48 SUs 48x40 window */
4099         ref_width  = 48;
4100         ref_height = 40;
4101         len_sp     = 48;
4102         break;
4103     case 6:
4104         /* Diamond - 16 SUs 64x32 window */
4105         ref_width  = 64;
4106         ref_height = 32;
4107         len_sp     = 16;
4108         break;
4109     case 7:
4110         /* Large Diamond - 32 SUs 64x32 window */
4111         ref_width  = 64;
4112         ref_height = 32;
4113         len_sp     = 32;
4114         break;
4115     case 8:
4116         /* Exhaustive - 48 SUs 64x32 window */
4117         ref_width  = 64;
4118         ref_height = 32;
4119         len_sp     = 48;
4120         break;
4121
4122     default:
4123         assert(0);
4124     }
4125
4126     /* ref_width*ref_height = Max 64x32 one direction, Max 32x32 two directions */
4127     if (is_bframe) {
4128         CLIP(ref_width, 4, 32);
4129         CLIP(ref_height, 4, 32);
4130     } else if (is_pframe) {
4131         CLIP(ref_width, 4, 64);
4132         CLIP(ref_height, 4, 32);
4133     }
4134
4135     cmd->dw0.adaptive_enable =
4136         cmd->dw37.adaptive_enable = fei_param->adaptive_search;
4137     cmd->dw0.t8x8_flag_for_inter_enable = cmd->dw37.t8x8_flag_for_inter_enable
4138                                           = avc_state->transform_8x8_mode_enable;
4139     cmd->dw2.max_len_sp = len_sp;
4140     cmd->dw38.max_len_sp = 0; // HLD mandates this field to be Zero
4141     cmd->dw2.max_num_su = cmd->dw38.max_num_su = 57;
4142     cmd->dw3.src_access =
4143         cmd->dw3.ref_access = 0; // change it to (is_frame ? 0: 1) when interlace is suppoted
4144
4145     if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) {
4146         if (avc_state->ftq_override) {
4147             cmd->dw3.ft_enable = avc_state->ftq_enable;
4148         } else {
4149             if (generic_state->frame_type == SLICE_TYPE_P) {
4150                 cmd->dw3.ft_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
4151             } else {
4152                 cmd->dw3.ft_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
4153             }
4154         }
4155     } else {
4156         cmd->dw3.ft_enable = 0;
4157     }
4158
4159     if (avc_state->disable_sub_mb_partion)
4160         cmd->dw3.sub_mb_part_mask = 0x7;
4161
4162     if (mbenc_i_frame_dist_in_use) {
4163         /* Fixme: Not supported, no brc in fei */
4164         assert(0);
4165         cmd->dw2.pic_width = generic_state->downscaled_width_4x_in_mb;
4166         cmd->dw4.pic_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
4167         cmd->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4;
4168         cmd->dw6.batch_buffer_end = 0;
4169         cmd->dw31.intra_compute_type = 1;
4170     }
4171
4172     cmd->dw2.pic_width = generic_state->frame_width_in_mbs;
4173     cmd->dw4.pic_height_minus1 = generic_state->frame_height_in_mbs - 1;
4174     cmd->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ?
4175                                generic_state->frame_height_in_mbs : avc_state->slice_height;
4176     cmd->dw3.sub_mb_part_mask = fei_param->sub_mb_part_mask;
4177     cmd->dw3.sub_pel_mode = fei_param->sub_pel_mode;
4178     cmd->dw3.inter_sad = fei_param->inter_sad;
4179     cmd->dw3.Intra_sad = fei_param->intra_sad;
4180     cmd->dw3.search_ctrl = (is_bframe) ? 7 : 0;
4181     cmd->dw4.enable_fbr_bypass = avc_state->fbr_bypass_enable;
4182     cmd->dw4.enable_intra_cost_scaling_for_static_frame =
4183         avc_state->sfd_enable && generic_state->hme_enabled;
4184     cmd->dw4.true_distortion_enable = fei_param->distortion_type == 0 ? 1 : 0;
4185     cmd->dw4.constrained_intra_pred_flag =
4186         pic_param->pic_fields.bits.constrained_intra_pred_flag;
4187     cmd->dw4.hme_enable = 0;
4188     cmd->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
4189     cmd->dw4.use_actual_ref_qp_value =
4190         generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
4191     cmd->dw7.intra_part_mask = fei_param->intra_part_mask;
4192     cmd->dw7.src_field_polarity = 0;
4193
4194     /* mv mode cost */
4195     memcpy(&(cmd->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
4196     if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
4197         // cmd->dw8 = gen9_avc_old_intra_mode_cost[qp];
4198     } else if (avc_state->skip_bias_adjustment_enable) {
4199         // Load different MvCost for P picture when SkipBiasAdjustment is enabled
4200         // No need to check for P picture as the flag is only enabled for P picture
4201         cmd->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
4202     }
4203
4204     //dw16
4205     /* search path tables */
4206     table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
4207     memcpy(&(cmd->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
4208
4209     //ftq_skip_threshold_lut set,dw14 /15
4210
4211     //r5 disable NonFTQSkipThresholdLUT
4212     if (generic_state->frame_type == SLICE_TYPE_P) {
4213         cmd->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
4214     } else if (generic_state->frame_type == SLICE_TYPE_B) {
4215         cmd->dw32.skip_val =
4216             gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
4217     }
4218     cmd->dw13.qp_prime_y = qp;
4219     cmd->dw13.qp_prime_cb = qp;
4220     cmd->dw13.qp_prime_cr = qp;
4221     cmd->dw13.target_size_in_word = 0xff; /* hardcoded for brc disable */
4222
4223     if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) {
4224         cmd->dw32.mult_pred_l0_disable = fei_param->multi_pred_l0 ? 0x01 : 0x80;
4225         cmd->dw32.mult_pred_l1_disable = ((generic_state->frame_type == SLICE_TYPE_B) && fei_param->multi_pred_l1) ? 0x01 : 0x80;
4226     } else {
4227         /* disable */
4228         cmd->dw32.mult_pred_l0_disable = 0x80;
4229         cmd->dw32.mult_pred_l1_disable = 0x80;
4230     }
4231     /* no field pic setting, not supported */
4232
4233     //dw34 58
4234     if (avc_state->adaptive_transform_decision_enable) {
4235         if (generic_state->frame_type != SLICE_TYPE_I) {
4236             cmd->dw34.enable_adaptive_tx_decision = 1;
4237         }
4238
4239         cmd->dw58.mb_texture_threshold = 1024;
4240         cmd->dw58.tx_decision_threshold = 128;
4241     }
4242     if (generic_state->frame_type == SLICE_TYPE_B) {
4243         cmd->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
4244         cmd->dw34.list1_ref_id1_frm_field_parity = 0;
4245         cmd->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
4246     }
4247     cmd->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
4248     cmd->dw34.roi_enable_flag = generic_state->brc_roi_enable;
4249     cmd->dw34.mad_enable_falg = avc_state->mad_enable;
4250     cmd->dw34.mb_brc_enable = avc_state->mb_qp_data_enable && generic_state->mb_brc_enabled;
4251     cmd->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
4252     cmd->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
4253
4254     if (cmd->dw34.force_non_skip_check) {
4255         cmd->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
4256     }
4257     cmd->dw36.check_all_fractional_enable = avc_state->caf_enable;
4258     cmd->dw38.ref_threshold = 400;
4259     cmd->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
4260     // Default:2 used for MBBRC (MB QP Surface width and height are 4x downscaled picture in MB unit * 4  bytes)
4261     // 0 used for MBQP data surface (MB QP Surface width and height are same as the input picture size in MB unit * 1bytes)
4262     // starting GEN9, BRC use split kernel, MB QP surface is same size as input picture
4263     cmd->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled) ? 0 : 2;
4264     if (mbenc_i_frame_dist_in_use) {
4265         cmd->dw13.qp_prime_y = 0;
4266         cmd->dw13.qp_prime_cb = 0;
4267         cmd->dw13.qp_prime_cr = 0;
4268         cmd->dw33.intra_16x16_nondc_penalty = 0;
4269         cmd->dw33.intra_8x8_nondc_penalty = 0;
4270         cmd->dw33.intra_4x4_nondc_penalty = 0;
4271     }
4272     if (cmd->dw4.use_actual_ref_qp_value) {
4273         cmd->dw44.actual_qp_value_for_ref_id0_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 0);
4274         cmd->dw44.actual_qp_value_for_ref_id1_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 1);
4275         cmd->dw44.actual_qp_value_for_ref_id2_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 2);
4276         cmd->dw44.actual_qp_value_for_ref_id3_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 3);
4277         cmd->dw45.actual_qp_value_for_ref_id4_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 4);
4278         cmd->dw45.actual_qp_value_for_ref_id5_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 5);
4279         cmd->dw45.actual_qp_value_for_ref_id6_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 6);
4280         cmd->dw45.actual_qp_value_for_ref_id7_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 7);
4281         cmd->dw46.actual_qp_value_for_ref_id0_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 0);
4282         cmd->dw46.actual_qp_value_for_ref_id1_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 1);
4283     }
4284
4285     table_idx = slice_type_kernel[generic_state->frame_type];
4286     cmd->dw46.ref_cost = gen9_avc_ref_cost[table_idx][qp];
4287
4288     if (generic_state->frame_type == SLICE_TYPE_I) {
4289         cmd->dw0.skip_mode_enable = 0;
4290         cmd->dw37.skip_mode_enable = 0;
4291         cmd->dw36.hme_combine_overlap = 0;
4292         cmd->dw36.check_all_fractional_enable = 0;
4293         cmd->dw47.intra_cost_sf = 16;/* not used, but recommended to set 16 by kernel team */
4294         cmd->dw34.enable_direct_bias_adjustment = 0;
4295         cmd->dw34.enable_global_motion_bias_adjustment = 0;
4296
4297     } else if (generic_state->frame_type == SLICE_TYPE_P) {
4298         cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
4299         cmd->dw3.bme_disable_fbr = 1;
4300         cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
4301         cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
4302         cmd->dw7.non_skip_zmv_added = 1;
4303         cmd->dw7.non_skip_mode_added = 1;
4304         cmd->dw7.skip_center_mask = 1;
4305
4306         cmd->dw47.intra_cost_sf =
4307             (avc_state->adaptive_intra_scaling_enable) ?
4308             gen9_avc_adaptive_intra_scaling_factor[preset] :
4309             gen9_avc_intra_scaling_factor[preset];
4310
4311         cmd->dw47.max_vmv_r =
4312             i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
4313
4314         cmd->dw36.hme_combine_overlap = 1;
4315         cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
4316         cmd->dw36.check_all_fractional_enable = fei_param->repartition_check_enable;
4317         cmd->dw34.enable_direct_bias_adjustment = 0;
4318         cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
4319         if (avc_state->global_motion_bias_adjustment_enable)
4320             cmd->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
4321
4322         cmd->dw64.num_mv_predictors_l0 = fei_param->num_mv_predictors_l0;
4323
4324     } else { /* B slice */
4325
4326         cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
4327         cmd->dw1.bi_Weight = avc_state->bi_weight;
4328         cmd->dw3.search_ctrl = 7;
4329         cmd->dw3.skip_type = 1;
4330         cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
4331         cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
4332         cmd->dw7.skip_center_mask = 0xff;
4333
4334         cmd->dw47.intra_cost_sf = avc_state->adaptive_intra_scaling_enable ?
4335                                   gen9_avc_adaptive_intra_scaling_factor[qp] :
4336                                   gen9_avc_intra_scaling_factor[qp];
4337
4338         cmd->dw47.max_vmv_r =
4339             i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
4340
4341         cmd->dw36.hme_combine_overlap = 1;
4342
4343         //check is_fwd_frame_short_term_ref
4344         surface_id = slice_param->RefPicList1[0].picture_id;
4345         obj_surface = SURFACE(surface_id);
4346         if (!obj_surface) {
4347             WARN_ONCE("Invalid backward reference frame\n");
4348             if (gpe_context)
4349                 i965_gpe_context_unmap_curbe(gpe_context);
4350             return;
4351         }
4352         cmd->dw36.is_fwd_frame_short_term_ref = !!(slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
4353
4354         cmd->dw36.num_ref_idx_l0_minus_one =
4355             (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1
4356             : 0;
4357         cmd->dw36.num_ref_idx_l1_minus_one =
4358             (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l1_active_minus1
4359             : 0;
4360         cmd->dw36.check_all_fractional_enable = fei_param->repartition_check_enable;
4361
4362         cmd->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
4363         cmd->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
4364         cmd->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
4365         cmd->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
4366         cmd->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
4367         cmd->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
4368         cmd->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
4369         cmd->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
4370
4371         cmd->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
4372         if (cmd->dw34.enable_direct_bias_adjustment) {
4373             cmd->dw7.non_skip_mode_added = 1;
4374             cmd->dw7.non_skip_zmv_added = 1;
4375         }
4376
4377         cmd->dw34.enable_global_motion_bias_adjustment =
4378             avc_state->global_motion_bias_adjustment_enable;
4379         if (avc_state->global_motion_bias_adjustment_enable)
4380             cmd->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
4381
4382         cmd->dw64.num_mv_predictors_l0 = fei_param->num_mv_predictors_l0;
4383         cmd->dw64.num_mv_predictors_l1 = fei_param->num_mv_predictors_l1;
4384     }
4385
4386     avc_state->block_based_skip_enable = cmd->dw3.block_based_skip_enable;
4387
4388     if (avc_state->rolling_intra_refresh_enable) {
4389         //Not supported
4390         cmd->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
4391
4392     } else {
4393         cmd->dw34.widi_intra_refresh_en = 0;
4394     }
4395     cmd->dw34.enable_per_mb_static_check = avc_state->sfd_enable && generic_state->hme_enabled;
4396     cmd->dw34.enable_adaptive_search_window_size = avc_state->adaptive_search_window_enable;
4397
4398     /* Fixme: Skipped ROI stuffs for now */
4399
4400     /* r64: FEI specific fields */
4401     cmd->dw64.fei_enable = 1;
4402     cmd->dw64.multiple_mv_predictor_per_mb_enable = fei_param->mv_predictor_enable;
4403     if (fei_param->distortion != VA_INVALID_ID)
4404         cmd->dw64.vme_distortion_output_enable = 1;
4405     cmd->dw64.per_mb_qp_enable = fei_param->mb_qp;
4406     cmd->dw64.mb_input_enable = fei_param->mb_input;
4407
4408     // FEI mode is disabled when external MVP is available
4409     if (fei_param->mv_predictor_enable)
4410         cmd->dw64.fei_mode = 0;
4411     else
4412         cmd->dw64.fei_mode = 1;
4413
4414     cmd->dw80.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
4415     cmd->dw81.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
4416     cmd->dw82.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
4417     cmd->dw83.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
4418     cmd->dw84.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
4419     cmd->dw85.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
4420     cmd->dw86.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
4421     cmd->dw87.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
4422     cmd->dw88.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
4423     cmd->dw89.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
4424     cmd->dw90.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
4425     cmd->dw91.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
4426     cmd->dw92.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
4427     cmd->dw93.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
4428     cmd->dw94.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
4429     cmd->dw95.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
4430     cmd->dw96.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
4431     cmd->dw97.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
4432     cmd->dw98.force_non_skip_mb_map_surface = GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
4433     cmd->dw99.widi_wa_surf_index = GEN9_AVC_MBENC_WIDI_WA_INDEX;
4434     cmd->dw100.brc_curbe_surf_index = GEN9_AVC_MBENC_BRC_CURBE_DATA_INDEX;
4435     cmd->dw101.static_detection_cost_table_index = GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX;
4436     cmd->dw102.fei_mv_predictor_surf_index = GEN9_AVC_MBENC_MV_PREDICTOR_INDEX;
4437     i965_gpe_context_unmap_curbe(gpe_context);
4438
4439     return;
4440 }
4441
4442 static void
4443 gen9_avc_send_surface_mbenc(VADriverContextP ctx,
4444                             struct encode_state *encode_state,
4445                             struct i965_gpe_context *gpe_context,
4446                             struct intel_encoder_context *encoder_context,
4447                             void * param_mbenc)
4448 {
4449     struct i965_driver_data *i965 = i965_driver_data(ctx);
4450     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4451     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4452     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4453     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4454     struct object_surface *obj_surface;
4455     struct gen9_surface_avc *avc_priv_surface;
4456     struct i965_gpe_resource *gpe_resource;
4457     struct mbenc_param * param = (struct mbenc_param *)param_mbenc ;
4458     VASurfaceID surface_id;
4459     unsigned int mbenc_i_frame_dist_in_use = param->mbenc_i_frame_dist_in_use;
4460     unsigned int size = 0;
4461     unsigned int frame_mb_size = generic_state->frame_width_in_mbs *
4462                                  generic_state->frame_height_in_mbs;
4463     int i = 0;
4464     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4465     unsigned char is_g95 = 0;
4466
4467     if (IS_SKL(i965->intel.device_info) ||
4468         IS_BXT(i965->intel.device_info))
4469         is_g95 = 0;
4470     else if (IS_KBL(i965->intel.device_info) ||
4471              IS_GEN10(i965->intel.device_info) ||
4472              IS_GLK(i965->intel.device_info))
4473         is_g95 = 1;
4474
4475     obj_surface = encode_state->reconstructed_object;
4476
4477     if (!obj_surface || !obj_surface->private_data)
4478         return;
4479     avc_priv_surface = obj_surface->private_data;
4480
4481     /*pak obj command buffer output*/
4482     size = frame_mb_size * 16 * 4;
4483     gpe_resource = &avc_priv_surface->res_mb_code_surface;
4484     i965_add_buffer_gpe_surface(ctx,
4485                                 gpe_context,
4486                                 gpe_resource,
4487                                 0,
4488                                 size / 4,
4489                                 0,
4490                                 GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX);
4491
4492     /*mv data buffer output*/
4493     size = frame_mb_size * 32 * 4;
4494     gpe_resource = &avc_priv_surface->res_mv_data_surface;
4495     i965_add_buffer_gpe_surface(ctx,
4496                                 gpe_context,
4497                                 gpe_resource,
4498                                 0,
4499                                 size / 4,
4500                                 0,
4501                                 GEN9_AVC_MBENC_IND_MV_DATA_INDEX);
4502
4503     /*input current  YUV surface, current input Y/UV object*/
4504     if (mbenc_i_frame_dist_in_use) {
4505         obj_surface = encode_state->reconstructed_object;
4506         if (!obj_surface || !obj_surface->private_data)
4507             return;
4508         avc_priv_surface = obj_surface->private_data;
4509         obj_surface = avc_priv_surface->scaled_4x_surface_obj;
4510     } else {
4511         obj_surface = encode_state->input_yuv_object;
4512     }
4513     i965_add_2d_gpe_surface(ctx,
4514                             gpe_context,
4515                             obj_surface,
4516                             0,
4517                             1,
4518                             I965_SURFACEFORMAT_R8_UNORM,
4519                             GEN9_AVC_MBENC_CURR_Y_INDEX);
4520
4521     i965_add_2d_gpe_surface(ctx,
4522                             gpe_context,
4523                             obj_surface,
4524                             1,
4525                             1,
4526                             I965_SURFACEFORMAT_R16_UINT,
4527                             GEN9_AVC_MBENC_CURR_UV_INDEX);
4528
4529     if (generic_state->hme_enabled) {
4530         /*memv input 4x*/
4531         if (!IS_GEN8(i965->intel.device_info)) {
4532             gpe_resource = &(avc_ctx->s4x_memv_data_buffer);
4533             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4534                                            gpe_resource,
4535                                            1,
4536                                            I965_SURFACEFORMAT_R8_UNORM,
4537                                            GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX);
4538             /* memv distortion input*/
4539             gpe_resource = &(avc_ctx->s4x_memv_distortion_buffer);
4540             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4541                                            gpe_resource,
4542                                            1,
4543                                            I965_SURFACEFORMAT_R8_UNORM,
4544                                            GEN9_AVC_MBENC_4XME_DISTORTION_INDEX);
4545         } else if (generic_state->frame_type != SLICE_TYPE_I) {
4546             gpe_resource = &(avc_ctx->s4x_memv_data_buffer);
4547             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4548                                            gpe_resource,
4549                                            1,
4550                                            I965_SURFACEFORMAT_R8_UNORM,
4551                                            GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX);
4552             /* memv distortion input*/
4553             gpe_resource = &(avc_ctx->s4x_memv_distortion_buffer);
4554             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4555                                            gpe_resource,
4556                                            1,
4557                                            I965_SURFACEFORMAT_R8_UNORM,
4558                                            GEN9_AVC_MBENC_4XME_DISTORTION_INDEX);
4559         }
4560     }
4561
4562     /*mbbrc const data_buffer*/
4563     if (param->mb_const_data_buffer_in_use) {
4564         size = 16 * AVC_QP_MAX * sizeof(unsigned int);
4565         gpe_resource = &avc_ctx->res_mbbrc_const_data_buffer;
4566         i965_add_buffer_gpe_surface(ctx,
4567                                     gpe_context,
4568                                     gpe_resource,
4569                                     0,
4570                                     size / 4,
4571                                     0,
4572                                     GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX);
4573
4574     }
4575
4576     /*mb qp data_buffer*/
4577     if (param->mb_qp_buffer_in_use) {
4578         if (avc_state->mb_qp_data_enable)
4579             gpe_resource = &(avc_ctx->res_mb_qp_data_surface);
4580         else
4581             gpe_resource = &(avc_ctx->res_mbbrc_mb_qp_data_surface);
4582         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4583                                        gpe_resource,
4584                                        1,
4585                                        I965_SURFACEFORMAT_R8_UNORM,
4586                                        GEN9_AVC_MBENC_MBQP_INDEX);
4587     }
4588
4589     /*input current  YUV surface, current input Y/UV object*/
4590     if (mbenc_i_frame_dist_in_use) {
4591         obj_surface = encode_state->reconstructed_object;
4592         if (!obj_surface || !obj_surface->private_data)
4593             return;
4594         avc_priv_surface = obj_surface->private_data;
4595         obj_surface = avc_priv_surface->scaled_4x_surface_obj;
4596     } else {
4597         obj_surface = encode_state->input_yuv_object;
4598     }
4599     i965_add_adv_gpe_surface(ctx, gpe_context,
4600                              obj_surface,
4601                              GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX);
4602     /*input ref YUV surface*/
4603     for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
4604         surface_id = slice_param->RefPicList0[i].picture_id;
4605         obj_surface = SURFACE(surface_id);
4606         if (!obj_surface || !obj_surface->private_data)
4607             break;
4608
4609         i965_add_adv_gpe_surface(ctx, gpe_context,
4610                                  obj_surface,
4611                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 1);
4612     }
4613     /*input current  YUV surface, current input Y/UV object*/
4614     if (mbenc_i_frame_dist_in_use) {
4615         obj_surface = encode_state->reconstructed_object;
4616         if (!obj_surface || !obj_surface->private_data)
4617             return;
4618         avc_priv_surface = obj_surface->private_data;
4619         obj_surface = avc_priv_surface->scaled_4x_surface_obj;
4620     } else {
4621         obj_surface = encode_state->input_yuv_object;
4622     }
4623     i965_add_adv_gpe_surface(ctx, gpe_context,
4624                              obj_surface,
4625                              GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX);
4626
4627     for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
4628         if (i > 0) break; // only  one ref supported here for B frame
4629         surface_id = slice_param->RefPicList1[i].picture_id;
4630         obj_surface = SURFACE(surface_id);
4631         if (!obj_surface || !obj_surface->private_data)
4632             break;
4633
4634         i965_add_adv_gpe_surface(ctx, gpe_context,
4635                                  obj_surface,
4636                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1);
4637         i965_add_adv_gpe_surface(ctx, gpe_context,
4638                                  obj_surface,
4639                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 2);
4640         if (i == 0) {
4641             avc_priv_surface = obj_surface->private_data;
4642             /*pak obj command buffer output(mb code)*/
4643             size = frame_mb_size * 16 * 4;
4644             gpe_resource = &avc_priv_surface->res_mb_code_surface;
4645             i965_add_buffer_gpe_surface(ctx,
4646                                         gpe_context,
4647                                         gpe_resource,
4648                                         0,
4649                                         size / 4,
4650                                         0,
4651                                         GEN9_AVC_MBENC_FWD_MB_DATA_INDEX);
4652
4653             /*mv data buffer output*/
4654             size = frame_mb_size * 32 * 4;
4655             gpe_resource = &avc_priv_surface->res_mv_data_surface;
4656             i965_add_buffer_gpe_surface(ctx,
4657                                         gpe_context,
4658                                         gpe_resource,
4659                                         0,
4660                                         size / 4,
4661                                         0,
4662                                         GEN9_AVC_MBENC_FWD_MV_DATA_INDEX);
4663
4664         }
4665
4666         if (i < INTEL_AVC_MAX_BWD_REF_NUM) {
4667             i965_add_adv_gpe_surface(ctx, gpe_context,
4668                                      obj_surface,
4669                                      GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1 + INTEL_AVC_MAX_BWD_REF_NUM);
4670         }
4671
4672     }
4673
4674     /* BRC distortion data buffer for I frame*/
4675     if (mbenc_i_frame_dist_in_use) {
4676         gpe_resource = &(avc_ctx->res_brc_dist_data_surface);
4677         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4678                                        gpe_resource,
4679                                        1,
4680                                        I965_SURFACEFORMAT_R8_UNORM,
4681                                        GEN9_AVC_MBENC_BRC_DISTORTION_INDEX);
4682     }
4683
4684     /* as ref frame ,update later RefPicSelect of Current Picture*/
4685     obj_surface = encode_state->reconstructed_object;
4686     avc_priv_surface = obj_surface->private_data;
4687     if (avc_state->ref_pic_select_list_supported && avc_priv_surface->is_as_ref) {
4688         gpe_resource = &(avc_priv_surface->res_ref_pic_select_surface);
4689         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4690                                        gpe_resource,
4691                                        1,
4692                                        I965_SURFACEFORMAT_R8_UNORM,
4693                                        GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX);
4694
4695     }
4696     if (!IS_GEN8(i965->intel.device_info)) {
4697         if (param->mb_vproc_stats_enable) {
4698             /*mb status buffer input*/
4699             size = frame_mb_size * 16 * 4;
4700             gpe_resource = &(avc_ctx->res_mb_status_buffer);
4701             i965_add_buffer_gpe_surface(ctx,
4702                                         gpe_context,
4703                                         gpe_resource,
4704                                         0,
4705                                         size / 4,
4706                                         0,
4707                                         GEN9_AVC_MBENC_MB_STATS_INDEX);
4708
4709         } else if (avc_state->flatness_check_enable) {
4710             gpe_resource = &(avc_ctx->res_flatness_check_surface);
4711             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4712                                            gpe_resource,
4713                                            1,
4714                                            I965_SURFACEFORMAT_R8_UNORM,
4715                                            GEN9_AVC_MBENC_MB_STATS_INDEX);
4716         }
4717     } else if (avc_state->flatness_check_enable) {
4718         gpe_resource = &(avc_ctx->res_flatness_check_surface);
4719         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4720                                        gpe_resource,
4721                                        1,
4722                                        I965_SURFACEFORMAT_R8_UNORM,
4723                                        GEN9_AVC_MBENC_MB_STATS_INDEX);
4724     }
4725
4726     if (param->mad_enable) {
4727         /*mad buffer input*/
4728         size = 4;
4729         gpe_resource = &(avc_ctx->res_mad_data_buffer);
4730         i965_add_buffer_gpe_surface(ctx,
4731                                     gpe_context,
4732                                     gpe_resource,
4733                                     0,
4734                                     size / 4,
4735                                     0,
4736                                     GEN9_AVC_MBENC_MAD_DATA_INDEX);
4737         i965_zero_gpe_resource(gpe_resource);
4738     }
4739
4740     /*brc updated mbenc curbe data buffer,it is ignored by gen9 and used in gen95*/
4741     if (avc_state->mbenc_brc_buffer_size > 0) {
4742         size = avc_state->mbenc_brc_buffer_size;
4743         gpe_resource = &(avc_ctx->res_mbenc_brc_buffer);
4744         i965_add_buffer_gpe_surface(ctx,
4745                                     gpe_context,
4746                                     gpe_resource,
4747                                     0,
4748                                     size / 4,
4749                                     0,
4750                                     GEN95_AVC_MBENC_BRC_CURBE_DATA_INDEX);
4751     }
4752
4753     /*artitratry num mbs in slice*/
4754     if (avc_state->arbitrary_num_mbs_in_slice) {
4755         /*slice surface input*/
4756         gpe_resource = &(avc_ctx->res_mbenc_slice_map_surface);
4757         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4758                                        gpe_resource,
4759                                        1,
4760                                        I965_SURFACEFORMAT_R8_UNORM,
4761                                        GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX);
4762         gen9_avc_generate_slice_map(ctx, encode_state, encoder_context);
4763     }
4764
4765     /* BRC distortion data buffer for I frame */
4766     if (!mbenc_i_frame_dist_in_use) {
4767         if (avc_state->mb_disable_skip_map_enable) {
4768             gpe_resource = &(avc_ctx->res_mb_disable_skip_map_surface);
4769             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4770                                            gpe_resource,
4771                                            1,
4772                                            I965_SURFACEFORMAT_R8_UNORM,
4773                                            (is_g95 ? GEN95_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX : GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX));
4774         }
4775         if (IS_GEN8(i965->intel.device_info)) {
4776             if (avc_state->sfd_enable) {
4777                 size = 128 / sizeof(unsigned long);
4778                 gpe_resource = &(avc_ctx->res_sfd_output_buffer);
4779                 i965_add_buffer_gpe_surface(ctx,
4780                                             gpe_context,
4781                                             gpe_resource,
4782                                             0,
4783                                             size / 4,
4784                                             0,
4785                                             GEN8_AVC_MBENC_STATIC_FRAME_DETECTION_OUTPUT_CM);
4786
4787             }
4788         } else {
4789             if (avc_state->sfd_enable && generic_state->hme_enabled) {
4790                 if (generic_state->frame_type == SLICE_TYPE_P) {
4791                     gpe_resource = &(avc_ctx->res_sfd_cost_table_p_frame_buffer);
4792                 } else if (generic_state->frame_type == SLICE_TYPE_B) {
4793                     gpe_resource = &(avc_ctx->res_sfd_cost_table_b_frame_buffer);
4794                 }
4795                 if (generic_state->frame_type != SLICE_TYPE_I) {
4796                     size = 64;
4797                     i965_add_buffer_gpe_surface(ctx,
4798                                                 gpe_context,
4799                                                 gpe_resource,
4800                                                 0,
4801                                                 size / 4,
4802                                                 0,
4803                                                 (is_g95 ? GEN95_AVC_MBENC_SFD_COST_TABLE_INDEX : GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX));
4804
4805
4806                 }
4807             }
4808         }
4809     }
4810     return;
4811 }
4812
4813 static void
4814 gen9_avc_fei_send_surface_mbenc(VADriverContextP ctx,
4815                                 struct encode_state *encode_state,
4816                                 struct i965_gpe_context *gpe_context,
4817                                 struct intel_encoder_context *encoder_context,
4818                                 void * param_mbenc)
4819 {
4820     struct i965_driver_data *i965 = i965_driver_data(ctx);
4821     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4822     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4823     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4824     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4825     VAEncMiscParameterFEIFrameControlH264 *fei_param = NULL;
4826     struct object_buffer *obj_buffer = NULL;
4827     struct buffer_store *buffer_store = NULL;
4828     struct object_surface *obj_surface = NULL;
4829     struct gen9_surface_avc *avc_priv_surface;
4830     struct i965_gpe_resource *gpe_resource;
4831     VASurfaceID surface_id;
4832     unsigned int size = 0;
4833     unsigned int frame_mb_nums;
4834     int i = 0, allocate_flag = 1;
4835
4836     obj_surface = encode_state->reconstructed_object;
4837     if (!obj_surface || !obj_surface->private_data)
4838         return;
4839     avc_priv_surface = obj_surface->private_data;
4840
4841     frame_mb_nums = generic_state->frame_width_in_mbs *
4842                     generic_state->frame_height_in_mbs;
4843     fei_param = avc_state->fei_framectl_param;
4844
4845     assert(fei_param != NULL);
4846
4847     /* res_mb_code_surface for MB code */
4848     size = frame_mb_nums * FEI_AVC_MB_CODE_BUFFER_SIZE;
4849     if (avc_priv_surface->res_mb_code_surface.bo != NULL)
4850         i965_free_gpe_resource(&avc_priv_surface->res_mb_code_surface);
4851     if (fei_param->mb_code_data != VA_INVALID_ID) {
4852         obj_buffer = BUFFER(fei_param->mb_code_data);
4853         assert(obj_buffer != NULL);
4854         buffer_store = obj_buffer->buffer_store;
4855         assert(size <= buffer_store->bo->size);
4856         i965_dri_object_to_buffer_gpe_resource(
4857             &avc_priv_surface->res_mb_code_surface,
4858             buffer_store->bo);
4859     } else {
4860         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
4861                                                    &avc_priv_surface->res_mb_code_surface,
4862                                                    ALIGN(size, 0x1000),
4863                                                    "mb code buffer");
4864         assert(allocate_flag != 0);
4865     }
4866
4867     /* res_mv_data_surface for MV data */
4868     size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
4869     if (avc_priv_surface->res_mv_data_surface.bo != NULL)
4870         i965_free_gpe_resource(&avc_priv_surface->res_mv_data_surface);
4871     if (fei_param->mv_data != VA_INVALID_ID) {
4872         obj_buffer = BUFFER(fei_param->mv_data);
4873         assert(obj_buffer != NULL);
4874         buffer_store = obj_buffer->buffer_store;
4875         assert(size <= buffer_store->bo->size);
4876         i965_dri_object_to_buffer_gpe_resource(
4877             &avc_priv_surface->res_mv_data_surface,
4878             buffer_store->bo);
4879     } else {
4880         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
4881                                                    &avc_priv_surface->res_mv_data_surface,
4882                                                    ALIGN(size, 0x1000),
4883                                                    "mv data buffer");
4884         assert(allocate_flag != 0);
4885     }
4886
4887     /* fei mb control data surface */
4888     size = frame_mb_nums * FEI_AVC_MB_CONTROL_BUFFER_SIZE;
4889     if (fei_param->mb_input | fei_param->mb_size_ctrl) {
4890         assert(fei_param->mb_ctrl != VA_INVALID_ID);
4891         obj_buffer = BUFFER(fei_param->mb_ctrl);
4892         assert(obj_buffer != NULL);
4893         buffer_store = obj_buffer->buffer_store;
4894         assert(size <= buffer_store->bo->size);
4895         if (avc_priv_surface->res_fei_mb_cntrl_surface.bo != NULL)
4896             i965_free_gpe_resource(&avc_priv_surface->res_fei_mb_cntrl_surface);
4897         i965_dri_object_to_buffer_gpe_resource(
4898             &avc_priv_surface->res_fei_mb_cntrl_surface,
4899             buffer_store->bo);
4900     }
4901
4902     /* fei mv predictor surface*/
4903     size = frame_mb_nums * FEI_AVC_MV_PREDICTOR_BUFFER_SIZE;
4904     if (fei_param->mv_predictor_enable &&
4905         (fei_param->mv_predictor != VA_INVALID_ID)) {
4906         obj_buffer = BUFFER(fei_param->mv_predictor);
4907         assert(obj_buffer != NULL);
4908         buffer_store = obj_buffer->buffer_store;
4909         assert(size <= buffer_store->bo->size);
4910         if (avc_priv_surface->res_fei_mv_predictor_surface.bo != NULL)
4911             i965_free_gpe_resource(&avc_priv_surface->res_fei_mv_predictor_surface);
4912         i965_dri_object_to_buffer_gpe_resource(
4913             &avc_priv_surface->res_fei_mv_predictor_surface,
4914             buffer_store->bo);
4915     } else {
4916         if (fei_param->mv_predictor_enable)
4917             assert(fei_param->mv_predictor != VA_INVALID_ID);
4918     }
4919
4920     /* fei vme distortion */
4921     size = frame_mb_nums * FEI_AVC_DISTORTION_BUFFER_SIZE;
4922     if (avc_priv_surface->res_fei_vme_distortion_surface.bo != NULL)
4923         i965_free_gpe_resource(&avc_priv_surface->res_fei_vme_distortion_surface);
4924     if (fei_param->distortion != VA_INVALID_ID) {
4925         obj_buffer = BUFFER(fei_param->distortion);
4926         assert(obj_buffer != NULL);
4927         buffer_store = obj_buffer->buffer_store;
4928         assert(size <= buffer_store->bo->size);
4929         i965_dri_object_to_buffer_gpe_resource(
4930             &avc_priv_surface->res_fei_vme_distortion_surface,
4931             buffer_store->bo);
4932     } else {
4933         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
4934                                                    &avc_priv_surface->res_fei_vme_distortion_surface,
4935                                                    ALIGN(size, 0x1000),
4936                                                    "fei vme distortion");
4937         assert(allocate_flag != 0);
4938     }
4939
4940     /* fei mb qp  */
4941     /* Fixme/Confirm:  not sure why we need 3 byte padding here */
4942     size = frame_mb_nums * FEI_AVC_QP_BUFFER_SIZE + 3;
4943     if (fei_param->mb_qp && (fei_param->qp != VA_INVALID_ID)) {
4944         obj_buffer = BUFFER(fei_param->qp);
4945         assert(obj_buffer != NULL);
4946         buffer_store = obj_buffer->buffer_store;
4947         assert((size - 3) <= buffer_store->bo->size);
4948         if (avc_priv_surface->res_fei_mb_qp_surface.bo != NULL)
4949             i965_free_gpe_resource(&avc_priv_surface->res_fei_mb_qp_surface);
4950         i965_dri_object_to_buffer_gpe_resource(
4951             &avc_priv_surface->res_fei_mb_qp_surface,
4952             buffer_store->bo);
4953     } else {
4954         if (fei_param->mb_qp)
4955             assert(fei_param->qp != VA_INVALID_ID);
4956     }
4957
4958     /*==== pak obj command buffer output ====*/
4959     size = frame_mb_nums * FEI_AVC_MB_CODE_BUFFER_SIZE;
4960     gpe_resource = &avc_priv_surface->res_mb_code_surface;
4961     i965_add_buffer_gpe_surface(ctx,
4962                                 gpe_context,
4963                                 gpe_resource,
4964                                 0,
4965                                 size / 4,
4966                                 0,
4967                                 GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX);
4968
4969
4970     /*=== mv data buffer output */
4971     size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
4972     gpe_resource = &avc_priv_surface->res_mv_data_surface;
4973     i965_add_buffer_gpe_surface(ctx,
4974                                 gpe_context,
4975                                 gpe_resource,
4976                                 0,
4977                                 size / 4,
4978                                 0,
4979                                 GEN9_AVC_MBENC_IND_MV_DATA_INDEX);
4980
4981
4982     /* === current input Y (binding table offset = 3)=== */
4983     obj_surface = encode_state->input_yuv_object;
4984     i965_add_2d_gpe_surface(ctx,
4985                             gpe_context,
4986                             obj_surface,
4987                             0,
4988                             1,
4989                             I965_SURFACEFORMAT_R8_UNORM,
4990                             GEN9_AVC_MBENC_CURR_Y_INDEX);
4991
4992     /* === current input UV === (binding table offset == 4)*/
4993     i965_add_2d_gpe_surface(ctx,
4994                             gpe_context,
4995                             obj_surface,
4996                             1,
4997                             1,
4998                             I965_SURFACEFORMAT_R16_UINT,
4999                             GEN9_AVC_MBENC_CURR_UV_INDEX);
5000
5001     /* === input current YUV surface, (binding table offset == 15) === */
5002     i965_add_adv_gpe_surface(ctx, gpe_context,
5003                              obj_surface,
5004                              GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX);
5005
5006
5007     /*== input current  YUV surface, (binding table offset == 32)*/
5008     i965_add_adv_gpe_surface(ctx, gpe_context,
5009                              obj_surface,
5010                              GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX);
5011
5012     /* list 0 references */
5013     for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
5014
5015         surface_id = slice_param->RefPicList0[i].picture_id;
5016         obj_surface = SURFACE(surface_id);
5017         if (!obj_surface || !obj_surface->private_data)
5018             break;
5019         i965_add_adv_gpe_surface(ctx, gpe_context,
5020                                  obj_surface,
5021                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 1);
5022     }
5023
5024
5025     /* list 1 references */
5026     for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
5027         if (i > 0) break; // only  one ref supported here for B frame
5028         surface_id = slice_param->RefPicList1[i].picture_id;
5029         obj_surface = SURFACE(surface_id);
5030         if (!obj_surface || !obj_surface->private_data)
5031             break;
5032
5033         i965_add_adv_gpe_surface(ctx, gpe_context,
5034                                  obj_surface,
5035                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 2);
5036         if (i == 0) {
5037             avc_priv_surface = obj_surface->private_data;
5038             /* mb code of Backward reference frame */
5039             size = frame_mb_nums * FEI_AVC_MB_CODE_BUFFER_SIZE;
5040             gpe_resource = &avc_priv_surface->res_mb_code_surface;
5041             i965_add_buffer_gpe_surface(ctx,
5042                                         gpe_context,
5043                                         gpe_resource,
5044                                         0,
5045                                         size / 4,
5046                                         0,
5047                                         GEN9_AVC_MBENC_FWD_MB_DATA_INDEX);
5048
5049             /* mv data of backward ref frame */
5050             size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
5051             gpe_resource = &avc_priv_surface->res_mv_data_surface;
5052             i965_add_buffer_gpe_surface(ctx,
5053                                         gpe_context,
5054                                         gpe_resource,
5055                                         0,
5056                                         size / 4,
5057                                         0,
5058                                         GEN9_AVC_MBENC_FWD_MV_DATA_INDEX);
5059
5060         }
5061         //again
5062         if (i < INTEL_AVC_MAX_BWD_REF_NUM) {
5063             i965_add_adv_gpe_surface(ctx, gpe_context,
5064                                      obj_surface,
5065                                      GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1);
5066         }
5067     }
5068
5069     /* as ref frame ,update later RefPicSelect of Current Picture*/
5070     obj_surface = encode_state->reconstructed_object;
5071     avc_priv_surface = obj_surface->private_data;
5072     if (avc_state->ref_pic_select_list_supported && avc_priv_surface->is_as_ref) {
5073         gpe_resource = &(avc_priv_surface->res_ref_pic_select_surface);
5074         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5075                                        gpe_resource,
5076                                        1,
5077                                        I965_SURFACEFORMAT_R8_UNORM,
5078                                        GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX);
5079
5080     }
5081
5082
5083     /* mb specific data, macroblock control parameters */
5084     if ((fei_param->mb_input | fei_param->mb_size_ctrl) &&
5085         (fei_param->mb_ctrl != VA_INVALID_ID)) {
5086         size = frame_mb_nums * FEI_AVC_MB_CONTROL_BUFFER_SIZE;
5087         gpe_resource = &avc_priv_surface->res_fei_mb_cntrl_surface;
5088         i965_add_buffer_gpe_surface(ctx,
5089                                     gpe_context,
5090                                     gpe_resource,
5091                                     0,
5092                                     size / 4,
5093                                     0,
5094                                     GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX);
5095     }
5096
5097     /* multi mv predictor surface */
5098     if (fei_param->mv_predictor_enable && (fei_param->mv_predictor != VA_INVALID_ID)) {
5099         size = frame_mb_nums * 48; //sizeof (VAEncMVPredictorH264Intel) == 40
5100         gpe_resource = &avc_priv_surface->res_fei_mv_predictor_surface;
5101         i965_add_buffer_gpe_surface(ctx,
5102                                     gpe_context,
5103                                     gpe_resource,
5104                                     0,
5105                                     size / 4,
5106                                     0,
5107                                     GEN9_AVC_MBENC_MV_PREDICTOR_INDEX);
5108     }
5109
5110     /* mb qp */
5111     if (fei_param->mb_qp && (fei_param->qp != VA_INVALID_ID)) {
5112         size = frame_mb_nums  + 3;
5113         gpe_resource = &avc_priv_surface->res_fei_mb_qp_surface,
5114         i965_add_buffer_gpe_surface(ctx,
5115                                     gpe_context,
5116                                     gpe_resource,
5117                                     0,
5118                                     size / 4,
5119                                     0,
5120                                     GEN9_AVC_MBENC_MBQP_INDEX);
5121     }
5122
5123
5124     /*=== FEI distortion surface ====*/
5125     size = frame_mb_nums * 48; //sizeof (VAEncFEIDistortionBufferH264Intel) == 48
5126     gpe_resource = &avc_priv_surface->res_fei_vme_distortion_surface;
5127     i965_add_buffer_gpe_surface(ctx,
5128                                 gpe_context,
5129                                 gpe_resource,
5130                                 0,
5131                                 size / 4,
5132                                 0,
5133                                 GEN9_AVC_MBENC_AUX_VME_OUT_INDEX);
5134
5135     return;
5136 }
5137
5138 static VAStatus
5139 gen9_avc_kernel_mbenc(VADriverContextP ctx,
5140                       struct encode_state *encode_state,
5141                       struct intel_encoder_context *encoder_context,
5142                       bool i_frame_dist_in_use)
5143 {
5144     struct i965_driver_data *i965 = i965_driver_data(ctx);
5145     struct i965_gpe_table *gpe = &i965->gpe_table;
5146     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5147     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5148     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5149     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5150     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5151
5152     struct i965_gpe_context *gpe_context;
5153     struct gpe_media_object_walker_parameter media_object_walker_param;
5154     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
5155     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
5156     int media_function = 0;
5157     int kernel_idx = 0;
5158     unsigned int mb_const_data_buffer_in_use = 0;
5159     unsigned int mb_qp_buffer_in_use = 0;
5160     unsigned int brc_enabled = 0;
5161     unsigned int roi_enable = (generic_state->num_roi > 0) ? 1 : 0;
5162     unsigned int dirty_roi_enable = ((generic_state->dirty_num_roi > 0) && (generic_state->frame_type == SLICE_TYPE_P) && (0));
5163     struct mbenc_param param ;
5164
5165     int mbenc_i_frame_dist_in_use = i_frame_dist_in_use;
5166     int mad_enable = 0;
5167     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5168
5169     mb_const_data_buffer_in_use =
5170         generic_state->mb_brc_enabled ||
5171         roi_enable ||
5172         dirty_roi_enable ||
5173         avc_state->mb_qp_data_enable ||
5174         avc_state->rolling_intra_refresh_enable;
5175     mb_qp_buffer_in_use =
5176         generic_state->mb_brc_enabled ||
5177         generic_state->brc_roi_enable ||
5178         avc_state->mb_qp_data_enable;
5179
5180     if (mbenc_i_frame_dist_in_use) {
5181         media_function = INTEL_MEDIA_STATE_ENC_I_FRAME_DIST;
5182         kernel_idx = GEN9_AVC_KERNEL_BRC_I_FRAME_DIST;
5183         downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
5184         downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
5185         mad_enable = 0;
5186         brc_enabled = 0;
5187
5188         gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
5189     } else {
5190         switch (generic_state->kernel_mode) {
5191         case INTEL_ENC_KERNEL_NORMAL : {
5192             media_function = INTEL_MEDIA_STATE_ENC_NORMAL;
5193             kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_NORMAL_I;
5194             break;
5195         }
5196         case INTEL_ENC_KERNEL_PERFORMANCE : {
5197             media_function = INTEL_MEDIA_STATE_ENC_PERFORMANCE;
5198             kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_PERFORMANCE_I;
5199             break;
5200         }
5201         case INTEL_ENC_KERNEL_QUALITY : {
5202             media_function = INTEL_MEDIA_STATE_ENC_QUALITY;
5203             kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_QUALITY_I;
5204             break;
5205         }
5206         default:
5207             assert(0);
5208
5209         }
5210
5211         if (encoder_context->fei_enabled) {
5212             media_function = INTEL_MEDIA_STATE_ENC_NORMAL;
5213             kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_FEI_KERNEL_I;
5214         }
5215
5216         if (generic_state->frame_type == SLICE_TYPE_P) {
5217             kernel_idx += 1;
5218         } else if (generic_state->frame_type == SLICE_TYPE_B) {
5219             kernel_idx += 2;
5220         }
5221
5222         downscaled_width_in_mb = generic_state->frame_width_in_mbs;
5223         downscaled_height_in_mb = generic_state->frame_height_in_mbs;
5224         mad_enable = avc_state->mad_enable;
5225         brc_enabled = generic_state->brc_enabled;
5226
5227         gpe_context = &(avc_ctx->context_mbenc.gpe_contexts[kernel_idx]);
5228     }
5229
5230     memset(&param, 0, sizeof(struct mbenc_param));
5231
5232     param.mb_const_data_buffer_in_use = mb_const_data_buffer_in_use;
5233     param.mb_qp_buffer_in_use = mb_qp_buffer_in_use;
5234     param.mbenc_i_frame_dist_in_use = mbenc_i_frame_dist_in_use;
5235     param.mad_enable = mad_enable;
5236     param.brc_enabled = brc_enabled;
5237     param.roi_enabled = roi_enable;
5238
5239     if (avc_state->mb_status_supported) {
5240         param.mb_vproc_stats_enable =  avc_state->flatness_check_enable || avc_state->adaptive_transform_decision_enable;
5241     }
5242
5243     if (!avc_state->mbenc_curbe_set_in_brc_update) {
5244         gpe->context_init(ctx, gpe_context);
5245     }
5246
5247     gpe->reset_binding_table(ctx, gpe_context);
5248
5249     if (!avc_state->mbenc_curbe_set_in_brc_update) {
5250         /*set curbe here*/
5251         generic_ctx->pfn_set_curbe_mbenc(ctx, encode_state, gpe_context, encoder_context, &param);
5252     }
5253
5254     /* MB brc const data buffer set up*/
5255     if (mb_const_data_buffer_in_use) {
5256         // caculate the lambda table, it is kernel controlled trellis quantization,gen95+
5257         if (avc_state->lambda_table_enable)
5258             gen95_avc_calc_lambda_table(ctx, encode_state, encoder_context);
5259
5260         gen9_avc_load_mb_brc_const_data(ctx, encode_state, encoder_context);
5261     }
5262
5263     /*clear the mad buffer*/
5264     if (mad_enable) {
5265         i965_zero_gpe_resource(&(avc_ctx->res_mad_data_buffer));
5266     }
5267     /*send surface*/
5268     generic_ctx->pfn_send_mbenc_surface(ctx, encode_state, gpe_context, encoder_context, &param);
5269
5270     gpe->setup_interface_data(ctx, gpe_context);
5271
5272     /*walker setting*/
5273     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
5274
5275     kernel_walker_param.use_scoreboard = 1;
5276     kernel_walker_param.resolution_x = downscaled_width_in_mb ;
5277     kernel_walker_param.resolution_y = downscaled_height_in_mb ;
5278     if (mbenc_i_frame_dist_in_use) {
5279         kernel_walker_param.no_dependency = 1;
5280     } else {
5281         switch (generic_state->frame_type) {
5282         case SLICE_TYPE_I:
5283             kernel_walker_param.walker_degree = WALKER_45_DEGREE;
5284             break;
5285         case SLICE_TYPE_P:
5286             kernel_walker_param.walker_degree = WALKER_26_DEGREE;
5287             break;
5288         case SLICE_TYPE_B:
5289             kernel_walker_param.walker_degree = WALKER_26_DEGREE;
5290             if (!slice_param->direct_spatial_mv_pred_flag) {
5291                 kernel_walker_param.walker_degree = WALKER_45_DEGREE;
5292             }
5293             break;
5294         default:
5295             assert(0);
5296         }
5297         kernel_walker_param.no_dependency = 0;
5298     }
5299
5300     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
5301
5302     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
5303                                             gpe_context,
5304                                             media_function,
5305                                             &media_object_walker_param);
5306     return VA_STATUS_SUCCESS;
5307 }
5308
5309 /*
5310 me kernle related function
5311 */
5312 static void
5313 gen9_avc_set_curbe_me(VADriverContextP ctx,
5314                       struct encode_state *encode_state,
5315                       struct i965_gpe_context *gpe_context,
5316                       struct intel_encoder_context *encoder_context,
5317                       void * param)
5318 {
5319     gen9_avc_me_curbe_data *curbe_cmd;
5320     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5321     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5322     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5323
5324     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5325
5326     struct me_param * curbe_param = (struct me_param *)param ;
5327     unsigned char  use_mv_from_prev_step = 0;
5328     unsigned char write_distortions = 0;
5329     unsigned char qp_prime_y = 0;
5330     unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
5331     unsigned char seach_table_idx = 0;
5332     unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
5333     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
5334     unsigned int scale_factor = 0;
5335
5336     qp_prime_y = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta;
5337     switch (curbe_param->hme_type) {
5338     case INTEL_ENC_HME_4x : {
5339         use_mv_from_prev_step = (generic_state->b16xme_enabled) ? 1 : 0;
5340         write_distortions = 1;
5341         mv_shift_factor = 2;
5342         scale_factor = 4;
5343         prev_mv_read_pos_factor = 0;
5344         break;
5345     }
5346     case INTEL_ENC_HME_16x : {
5347         use_mv_from_prev_step = (generic_state->b32xme_enabled) ? 1 : 0;
5348         write_distortions = 0;
5349         mv_shift_factor = 2;
5350         scale_factor = 16;
5351         prev_mv_read_pos_factor = 1;
5352         break;
5353     }
5354     case INTEL_ENC_HME_32x : {
5355         use_mv_from_prev_step = 0;
5356         write_distortions = 0;
5357         mv_shift_factor = 1;
5358         scale_factor = 32;
5359         prev_mv_read_pos_factor = 0;
5360         break;
5361     }
5362     default:
5363         assert(0);
5364
5365     }
5366     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
5367
5368     if (!curbe_cmd)
5369         return;
5370
5371     downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
5372     downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
5373
5374     memcpy(curbe_cmd, gen9_avc_me_curbe_init_data, sizeof(gen9_avc_me_curbe_data));
5375
5376     curbe_cmd->dw3.sub_pel_mode = 3;
5377     if (avc_state->field_scaling_output_interleaved) {
5378         /*frame set to zero,field specified*/
5379         curbe_cmd->dw3.src_access = 0;
5380         curbe_cmd->dw3.ref_access = 0;
5381         curbe_cmd->dw7.src_field_polarity = 0;
5382     }
5383     curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
5384     curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
5385     curbe_cmd->dw5.qp_prime_y = qp_prime_y;
5386
5387     curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
5388     curbe_cmd->dw6.write_distortions = write_distortions;
5389     curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
5390     curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
5391
5392     if (generic_state->frame_type == SLICE_TYPE_B) {
5393         curbe_cmd->dw1.bi_weight = 32;
5394         curbe_cmd->dw13.num_ref_idx_l1_minus1 = slice_param->num_ref_idx_l1_active_minus1;
5395         me_method = gen9_avc_b_me_method[generic_state->preset];
5396         seach_table_idx = 1;
5397     }
5398
5399     if (generic_state->frame_type == SLICE_TYPE_P ||
5400         generic_state->frame_type == SLICE_TYPE_B)
5401         curbe_cmd->dw13.num_ref_idx_l0_minus1 = slice_param->num_ref_idx_l0_active_minus1;
5402
5403     curbe_cmd->dw13.ref_streamin_cost = 5;
5404     curbe_cmd->dw13.roi_enable = 0;
5405
5406     curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
5407     curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
5408
5409     memcpy(&curbe_cmd->dw16, table_enc_search_path[seach_table_idx][me_method], 14 * sizeof(int));
5410
5411     curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN9_AVC_ME_MV_DATA_SURFACE_INDEX;
5412     curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x) ? GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX : GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX ;
5413     curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN9_AVC_ME_DISTORTION_SURFACE_INDEX;
5414     curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN9_AVC_ME_BRC_DISTORTION_INDEX;
5415     curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX;
5416     curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX;
5417     curbe_cmd->dw38.reserved = GEN9_AVC_ME_VDENC_STREAMIN_INDEX;
5418
5419     i965_gpe_context_unmap_curbe(gpe_context);
5420     return;
5421 }
5422
5423 static void
5424 gen9_avc_send_surface_me(VADriverContextP ctx,
5425                          struct encode_state *encode_state,
5426                          struct i965_gpe_context *gpe_context,
5427                          struct intel_encoder_context *encoder_context,
5428                          void * param)
5429 {
5430     struct i965_driver_data *i965 = i965_driver_data(ctx);
5431
5432     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5433     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5434     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5435     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5436
5437     struct object_surface *obj_surface, *input_surface;
5438     struct gen9_surface_avc *avc_priv_surface;
5439     struct i965_gpe_resource *gpe_resource;
5440     struct me_param * curbe_param = (struct me_param *)param ;
5441
5442     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5443     VASurfaceID surface_id;
5444     int i = 0;
5445
5446     /* all scaled input surface stored in reconstructed_object*/
5447     obj_surface = encode_state->reconstructed_object;
5448     if (!obj_surface || !obj_surface->private_data)
5449         return;
5450     avc_priv_surface = obj_surface->private_data;
5451
5452
5453     switch (curbe_param->hme_type) {
5454     case INTEL_ENC_HME_4x : {
5455         /*memv output 4x*/
5456         gpe_resource = &avc_ctx->s4x_memv_data_buffer;
5457         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5458                                        gpe_resource,
5459                                        1,
5460                                        I965_SURFACEFORMAT_R8_UNORM,
5461                                        GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
5462
5463         /*memv input 16x*/
5464         if (generic_state->b16xme_enabled) {
5465             gpe_resource = &avc_ctx->s16x_memv_data_buffer;
5466             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5467                                            gpe_resource,
5468                                            1,
5469                                            I965_SURFACEFORMAT_R8_UNORM,
5470                                            GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX);
5471         }
5472         /* brc distortion  output*/
5473         gpe_resource = &avc_ctx->res_brc_dist_data_surface;
5474         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5475                                        gpe_resource,
5476                                        1,
5477                                        I965_SURFACEFORMAT_R8_UNORM,
5478                                        GEN9_AVC_ME_BRC_DISTORTION_INDEX);
5479         /* memv distortion output*/
5480         gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
5481         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5482                                        gpe_resource,
5483                                        1,
5484                                        I965_SURFACEFORMAT_R8_UNORM,
5485                                        GEN9_AVC_ME_DISTORTION_SURFACE_INDEX);
5486         /*input current down scaled YUV surface*/
5487         obj_surface = encode_state->reconstructed_object;
5488         avc_priv_surface = obj_surface->private_data;
5489         input_surface = avc_priv_surface->scaled_4x_surface_obj;
5490         i965_add_adv_gpe_surface(ctx, gpe_context,
5491                                  input_surface,
5492                                  GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
5493         /*input ref scaled YUV surface*/
5494         for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
5495             surface_id = slice_param->RefPicList0[i].picture_id;
5496             obj_surface = SURFACE(surface_id);
5497             if (!obj_surface || !obj_surface->private_data)
5498                 break;
5499             avc_priv_surface = obj_surface->private_data;
5500
5501             input_surface = avc_priv_surface->scaled_4x_surface_obj;
5502
5503             i965_add_adv_gpe_surface(ctx, gpe_context,
5504                                      input_surface,
5505                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
5506         }
5507
5508         obj_surface = encode_state->reconstructed_object;
5509         avc_priv_surface = obj_surface->private_data;
5510         input_surface = avc_priv_surface->scaled_4x_surface_obj;
5511
5512         i965_add_adv_gpe_surface(ctx, gpe_context,
5513                                  input_surface,
5514                                  GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
5515
5516         for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
5517             surface_id = slice_param->RefPicList1[i].picture_id;
5518             obj_surface = SURFACE(surface_id);
5519             if (!obj_surface || !obj_surface->private_data)
5520                 break;
5521             avc_priv_surface = obj_surface->private_data;
5522
5523             input_surface = avc_priv_surface->scaled_4x_surface_obj;
5524
5525             i965_add_adv_gpe_surface(ctx, gpe_context,
5526                                      input_surface,
5527                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
5528         }
5529         break;
5530
5531     }
5532     case INTEL_ENC_HME_16x : {
5533         gpe_resource = &avc_ctx->s16x_memv_data_buffer;
5534         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5535                                        gpe_resource,
5536                                        1,
5537                                        I965_SURFACEFORMAT_R8_UNORM,
5538                                        GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
5539
5540         if (generic_state->b32xme_enabled) {
5541             gpe_resource = &avc_ctx->s32x_memv_data_buffer;
5542             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5543                                            gpe_resource,
5544                                            1,
5545                                            I965_SURFACEFORMAT_R8_UNORM,
5546                                            GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX);
5547         }
5548
5549         obj_surface = encode_state->reconstructed_object;
5550         avc_priv_surface = obj_surface->private_data;
5551         input_surface = avc_priv_surface->scaled_16x_surface_obj;
5552         i965_add_adv_gpe_surface(ctx, gpe_context,
5553                                  input_surface,
5554                                  GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
5555
5556         for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
5557             surface_id = slice_param->RefPicList0[i].picture_id;
5558             obj_surface = SURFACE(surface_id);
5559             if (!obj_surface || !obj_surface->private_data)
5560                 break;
5561             avc_priv_surface = obj_surface->private_data;
5562
5563             input_surface = avc_priv_surface->scaled_16x_surface_obj;
5564
5565             i965_add_adv_gpe_surface(ctx, gpe_context,
5566                                      input_surface,
5567                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
5568         }
5569
5570         obj_surface = encode_state->reconstructed_object;
5571         avc_priv_surface = obj_surface->private_data;
5572         input_surface = avc_priv_surface->scaled_16x_surface_obj;
5573
5574         i965_add_adv_gpe_surface(ctx, gpe_context,
5575                                  input_surface,
5576                                  GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
5577
5578         for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
5579             surface_id = slice_param->RefPicList1[i].picture_id;
5580             obj_surface = SURFACE(surface_id);
5581             if (!obj_surface || !obj_surface->private_data)
5582                 break;
5583             avc_priv_surface = obj_surface->private_data;
5584
5585             input_surface = avc_priv_surface->scaled_16x_surface_obj;
5586
5587             i965_add_adv_gpe_surface(ctx, gpe_context,
5588                                      input_surface,
5589                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
5590         }
5591         break;
5592     }
5593     case INTEL_ENC_HME_32x : {
5594         gpe_resource = &avc_ctx->s32x_memv_data_buffer;
5595         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5596                                        gpe_resource,
5597                                        1,
5598                                        I965_SURFACEFORMAT_R8_UNORM,
5599                                        GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
5600
5601         obj_surface = encode_state->reconstructed_object;
5602         avc_priv_surface = obj_surface->private_data;
5603         input_surface = avc_priv_surface->scaled_32x_surface_obj;
5604         i965_add_adv_gpe_surface(ctx, gpe_context,
5605                                  input_surface,
5606                                  GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
5607
5608         for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
5609             surface_id = slice_param->RefPicList0[i].picture_id;
5610             obj_surface = SURFACE(surface_id);
5611             if (!obj_surface || !obj_surface->private_data)
5612                 break;
5613             avc_priv_surface = obj_surface->private_data;
5614
5615             input_surface = avc_priv_surface->scaled_32x_surface_obj;
5616
5617             i965_add_adv_gpe_surface(ctx, gpe_context,
5618                                      input_surface,
5619                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
5620         }
5621
5622         obj_surface = encode_state->reconstructed_object;
5623         avc_priv_surface = obj_surface->private_data;
5624         input_surface = avc_priv_surface->scaled_32x_surface_obj;
5625
5626         i965_add_adv_gpe_surface(ctx, gpe_context,
5627                                  input_surface,
5628                                  GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
5629
5630         for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
5631             surface_id = slice_param->RefPicList1[i].picture_id;
5632             obj_surface = SURFACE(surface_id);
5633             if (!obj_surface || !obj_surface->private_data)
5634                 break;
5635             avc_priv_surface = obj_surface->private_data;
5636
5637             input_surface = avc_priv_surface->scaled_32x_surface_obj;
5638
5639             i965_add_adv_gpe_surface(ctx, gpe_context,
5640                                      input_surface,
5641                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
5642         }
5643         break;
5644     }
5645     default:
5646         assert(0);
5647
5648     }
5649 }
5650
5651 static VAStatus
5652 gen9_avc_kernel_me(VADriverContextP ctx,
5653                    struct encode_state *encode_state,
5654                    struct intel_encoder_context *encoder_context,
5655                    int hme_type)
5656 {
5657     struct i965_driver_data *i965 = i965_driver_data(ctx);
5658     struct i965_gpe_table *gpe = &i965->gpe_table;
5659     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5660     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5661     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5662     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5663
5664     struct i965_gpe_context *gpe_context;
5665     struct gpe_media_object_walker_parameter media_object_walker_param;
5666     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
5667     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
5668     int media_function = 0;
5669     int kernel_idx = 0;
5670     struct me_param param ;
5671     unsigned int scale_factor = 0;
5672
5673     switch (hme_type) {
5674     case INTEL_ENC_HME_4x : {
5675         media_function = INTEL_MEDIA_STATE_4X_ME;
5676         scale_factor = 4;
5677         break;
5678     }
5679     case INTEL_ENC_HME_16x : {
5680         media_function = INTEL_MEDIA_STATE_16X_ME;
5681         scale_factor = 16;
5682         break;
5683     }
5684     case INTEL_ENC_HME_32x : {
5685         media_function = INTEL_MEDIA_STATE_32X_ME;
5686         scale_factor = 32;
5687         break;
5688     }
5689     default:
5690         assert(0);
5691
5692     }
5693
5694     downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
5695     downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
5696
5697     /* I frame should not come here.*/
5698     kernel_idx = (generic_state->frame_type == SLICE_TYPE_P) ? GEN9_AVC_KERNEL_ME_P_IDX : GEN9_AVC_KERNEL_ME_B_IDX;
5699     gpe_context = &(avc_ctx->context_me.gpe_contexts[kernel_idx]);
5700
5701     gpe->context_init(ctx, gpe_context);
5702     gpe->reset_binding_table(ctx, gpe_context);
5703
5704     /*set curbe*/
5705     memset(&param, 0, sizeof(param));
5706     param.hme_type = hme_type;
5707     generic_ctx->pfn_set_curbe_me(ctx, encode_state, gpe_context, encoder_context, &param);
5708
5709     /*send surface*/
5710     generic_ctx->pfn_send_me_surface(ctx, encode_state, gpe_context, encoder_context, &param);
5711
5712     gpe->setup_interface_data(ctx, gpe_context);
5713
5714     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
5715     /* the scaling is based on 8x8 blk level */
5716     kernel_walker_param.resolution_x = downscaled_width_in_mb ;
5717     kernel_walker_param.resolution_y = downscaled_height_in_mb ;
5718     kernel_walker_param.no_dependency = 1;
5719
5720     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
5721
5722     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
5723                                             gpe_context,
5724                                             media_function,
5725                                             &media_object_walker_param);
5726
5727     return VA_STATUS_SUCCESS;
5728 }
5729
5730 /*
5731 wp related function
5732 */
5733 static void
5734 gen9_avc_set_curbe_wp(VADriverContextP ctx,
5735                       struct encode_state *encode_state,
5736                       struct i965_gpe_context *gpe_context,
5737                       struct intel_encoder_context *encoder_context,
5738                       void * param)
5739 {
5740     gen9_avc_wp_curbe_data *cmd;
5741     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5742     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5743     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5744     struct wp_param * curbe_param = (struct wp_param *)param;
5745
5746     cmd = i965_gpe_context_map_curbe(gpe_context);
5747
5748     if (!cmd)
5749         return;
5750     memset(cmd, 0, sizeof(gen9_avc_wp_curbe_data));
5751     if (curbe_param->ref_list_idx) {
5752         cmd->dw0.default_weight = slice_param->luma_weight_l1[0];
5753         cmd->dw0.default_offset = slice_param->luma_offset_l1[0];
5754     } else {
5755         cmd->dw0.default_weight = slice_param->luma_weight_l0[0];
5756         cmd->dw0.default_offset = slice_param->luma_offset_l0[0];
5757     }
5758
5759     cmd->dw49.input_surface = GEN9_AVC_WP_INPUT_REF_SURFACE_INDEX;
5760     cmd->dw50.output_surface = GEN9_AVC_WP_OUTPUT_SCALED_SURFACE_INDEX;
5761
5762     i965_gpe_context_unmap_curbe(gpe_context);
5763
5764 }
5765
5766 static void
5767 gen9_avc_send_surface_wp(VADriverContextP ctx,
5768                          struct encode_state *encode_state,
5769                          struct i965_gpe_context *gpe_context,
5770                          struct intel_encoder_context *encoder_context,
5771                          void * param)
5772 {
5773     struct i965_driver_data *i965 = i965_driver_data(ctx);
5774     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5775     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5776     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5777     struct wp_param * curbe_param = (struct wp_param *)param;
5778     struct object_surface *obj_surface;
5779     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5780     VASurfaceID surface_id;
5781
5782     if (curbe_param->ref_list_idx) {
5783         surface_id = slice_param->RefPicList1[0].picture_id;
5784         obj_surface = SURFACE(surface_id);
5785         if (!obj_surface || !obj_surface->private_data)
5786             avc_state->weighted_ref_l1_enable = 0;
5787         else
5788             avc_state->weighted_ref_l1_enable = 1;
5789     } else {
5790         surface_id = slice_param->RefPicList0[0].picture_id;
5791         obj_surface = SURFACE(surface_id);
5792         if (!obj_surface || !obj_surface->private_data)
5793             avc_state->weighted_ref_l0_enable = 0;
5794         else
5795             avc_state->weighted_ref_l0_enable = 1;
5796     }
5797     if (!obj_surface)
5798         obj_surface = encode_state->reference_objects[0];
5799
5800
5801     i965_add_adv_gpe_surface(ctx, gpe_context,
5802                              obj_surface,
5803                              GEN9_AVC_WP_INPUT_REF_SURFACE_INDEX);
5804
5805     obj_surface = avc_ctx->wp_output_pic_select_surface_obj[curbe_param->ref_list_idx];
5806     i965_add_adv_gpe_surface(ctx, gpe_context,
5807                              obj_surface,
5808                              GEN9_AVC_WP_OUTPUT_SCALED_SURFACE_INDEX);
5809 }
5810
5811
5812 static VAStatus
5813 gen9_avc_kernel_wp(VADriverContextP ctx,
5814                    struct encode_state *encode_state,
5815                    struct intel_encoder_context *encoder_context,
5816                    unsigned int list1_in_use)
5817 {
5818     struct i965_driver_data *i965 = i965_driver_data(ctx);
5819     struct i965_gpe_table *gpe = &i965->gpe_table;
5820     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5821     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5822     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5823     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5824
5825     struct i965_gpe_context *gpe_context;
5826     struct gpe_media_object_walker_parameter media_object_walker_param;
5827     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
5828     int media_function = INTEL_MEDIA_STATE_ENC_WP;
5829     struct wp_param param;
5830
5831     gpe_context = &(avc_ctx->context_wp.gpe_contexts);
5832
5833     gpe->context_init(ctx, gpe_context);
5834     gpe->reset_binding_table(ctx, gpe_context);
5835
5836     memset(&param, 0, sizeof(param));
5837     param.ref_list_idx = (list1_in_use == 1) ? 1 : 0;
5838     /*set curbe*/
5839     generic_ctx->pfn_set_curbe_wp(ctx, encode_state, gpe_context, encoder_context, &param);
5840
5841     /*send surface*/
5842     generic_ctx->pfn_send_wp_surface(ctx, encode_state, gpe_context, encoder_context, &param);
5843
5844     gpe->setup_interface_data(ctx, gpe_context);
5845
5846     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
5847     /* the scaling is based on 8x8 blk level */
5848     kernel_walker_param.resolution_x = generic_state->frame_width_in_mbs;
5849     kernel_walker_param.resolution_y = generic_state->frame_height_in_mbs;
5850     kernel_walker_param.no_dependency = 1;
5851
5852     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
5853
5854     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
5855                                             gpe_context,
5856                                             media_function,
5857                                             &media_object_walker_param);
5858
5859     return VA_STATUS_SUCCESS;
5860 }
5861
5862
5863 /*
5864 sfd related function
5865 */
5866 static void
5867 gen9_avc_set_curbe_sfd(VADriverContextP ctx,
5868                        struct encode_state *encode_state,
5869                        struct i965_gpe_context *gpe_context,
5870                        struct intel_encoder_context *encoder_context,
5871                        void * param)
5872 {
5873     gen9_avc_sfd_curbe_data *cmd;
5874     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5875     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5876     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5877     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5878
5879     cmd = i965_gpe_context_map_curbe(gpe_context);
5880
5881     if (!cmd)
5882         return;
5883     memset(cmd, 0, sizeof(gen9_avc_sfd_curbe_data));
5884
5885     cmd->dw0.enable_intra_cost_scaling_for_static_frame = 1 ;
5886     cmd->dw0.enable_adaptive_mv_stream_in = 0 ;
5887     cmd->dw0.stream_in_type = 7 ;
5888     cmd->dw0.slice_type = slice_type_kernel[generic_state->frame_type]  ;
5889     cmd->dw0.brc_mode_enable = generic_state->brc_enabled ;
5890     cmd->dw0.vdenc_mode_disable = 1 ;
5891
5892     cmd->dw1.hme_stream_in_ref_cost = 5 ;
5893     cmd->dw1.num_of_refs = slice_param->num_ref_idx_l0_active_minus1 ;
5894     cmd->dw1.qp_value = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta ;
5895
5896     cmd->dw2.frame_width_in_mbs = generic_state->frame_width_in_mbs ;
5897     cmd->dw2.frame_height_in_mbs = generic_state->frame_height_in_mbs ;
5898
5899     cmd->dw3.large_mv_threshold = 128 ;
5900     cmd->dw4.total_large_mv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs) / 100 ;
5901     cmd->dw5.zmv_threshold = 4 ;
5902     cmd->dw6.total_zmv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * avc_state->zero_mv_threshold) / 100 ; // zero_mv_threshold = 60;
5903     cmd->dw7.min_dist_threshold = 10 ;
5904
5905     if (generic_state->frame_type == SLICE_TYPE_P) {
5906         memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_p_frame, AVC_QP_MAX * sizeof(unsigned char));
5907
5908     } else if (generic_state->frame_type == SLICE_TYPE_B) {
5909         memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_b_frame, AVC_QP_MAX * sizeof(unsigned char));
5910     }
5911
5912     cmd->dw21.actual_width_in_mb = cmd->dw2.frame_width_in_mbs ;
5913     cmd->dw21.actual_height_in_mb = cmd->dw2.frame_height_in_mbs ;
5914     cmd->dw24.vdenc_input_image_state_index = GEN9_AVC_SFD_VDENC_INPUT_IMAGE_STATE_INDEX ;
5915     cmd->dw26.mv_data_surface_index = GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX ;
5916     cmd->dw27.inter_distortion_surface_index = GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX ;
5917     cmd->dw28.output_data_surface_index = GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX ;
5918     cmd->dw29.vdenc_output_image_state_index = GEN9_AVC_SFD_VDENC_OUTPUT_IMAGE_STATE_INDEX ;
5919
5920     i965_gpe_context_unmap_curbe(gpe_context);
5921
5922 }
5923
5924 static void
5925 gen9_avc_send_surface_sfd(VADriverContextP ctx,
5926                           struct encode_state *encode_state,
5927                           struct i965_gpe_context *gpe_context,
5928                           struct intel_encoder_context *encoder_context,
5929                           void * param)
5930 {
5931     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5932     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5933     struct i965_gpe_resource *gpe_resource;
5934     int size = 0;
5935
5936     /*HME mv data surface memv output 4x*/
5937     gpe_resource = &avc_ctx->s4x_memv_data_buffer;
5938     i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5939                                    gpe_resource,
5940                                    1,
5941                                    I965_SURFACEFORMAT_R8_UNORM,
5942                                    GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX);
5943
5944     /* memv distortion */
5945     gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
5946     i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5947                                    gpe_resource,
5948                                    1,
5949                                    I965_SURFACEFORMAT_R8_UNORM,
5950                                    GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX);
5951     /*buffer output*/
5952     size = 32 * 4 * 4;
5953     gpe_resource = &avc_ctx->res_sfd_output_buffer;
5954     i965_add_buffer_gpe_surface(ctx,
5955                                 gpe_context,
5956                                 gpe_resource,
5957                                 0,
5958                                 size / 4,
5959                                 0,
5960                                 GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX);
5961
5962 }
5963
5964 static VAStatus
5965 gen9_avc_kernel_sfd(VADriverContextP ctx,
5966                     struct encode_state *encode_state,
5967                     struct intel_encoder_context *encoder_context)
5968 {
5969     struct i965_driver_data *i965 = i965_driver_data(ctx);
5970     struct i965_gpe_table *gpe = &i965->gpe_table;
5971     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5972     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5973     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5974
5975     struct i965_gpe_context *gpe_context;
5976     struct gpe_media_object_parameter media_object_param;
5977     struct gpe_media_object_inline_data media_object_inline_data;
5978     int media_function = INTEL_MEDIA_STATE_STATIC_FRAME_DETECTION;
5979     gpe_context = &(avc_ctx->context_sfd.gpe_contexts);
5980
5981     gpe->context_init(ctx, gpe_context);
5982     gpe->reset_binding_table(ctx, gpe_context);
5983
5984     /*set curbe*/
5985     generic_ctx->pfn_set_curbe_sfd(ctx, encode_state, gpe_context, encoder_context, NULL);
5986
5987     /*send surface*/
5988     generic_ctx->pfn_send_sfd_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
5989
5990     gpe->setup_interface_data(ctx, gpe_context);
5991
5992     memset(&media_object_param, 0, sizeof(media_object_param));
5993     memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
5994     media_object_param.pinline_data = &media_object_inline_data;
5995     media_object_param.inline_size = sizeof(media_object_inline_data);
5996
5997     gen9_avc_run_kernel_media_object(ctx, encoder_context,
5998                                      gpe_context,
5999                                      media_function,
6000                                      &media_object_param);
6001
6002     return VA_STATUS_SUCCESS;
6003 }
6004
6005 /**************** PreEnc Scaling *************************************/
6006 /* function to run preenc scaling: gen9_avc_preenc_kernel_scaling()
6007  * function to set preenc scaling curbe is the same one using for avc encode
6008         == gen95_avc_set_curbe_scaling4x()
6009  * function to send buffer/surface resources is the same one using for avc encode
6010         == gen9_avc_send_surface_scaling()
6011  */
6012 static VAStatus
6013 gen9_avc_preenc_kernel_scaling(VADriverContextP ctx,
6014                                struct encode_state *encode_state,
6015                                struct intel_encoder_context *encoder_context,
6016                                int hme_type,
6017                                int scale_surface_type)
6018 {
6019     struct i965_driver_data *i965 = i965_driver_data(ctx);
6020     struct i965_gpe_table *gpe = &i965->gpe_table;
6021     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6022     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
6023     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6024     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6025     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
6026     VAStatsStatisticsParameterH264 *stat_param_h264 = NULL;
6027     VAStatsStatisticsParameter *stat_param = NULL;
6028     struct i965_gpe_context *gpe_context;
6029     struct scaling_param surface_param;
6030     struct object_surface *obj_surface = NULL;
6031     struct gpe_media_object_walker_parameter media_object_walker_param;
6032     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
6033     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
6034     int media_function = 0;
6035     int kernel_idx = 0;
6036     int enable_statistics_output;
6037
6038     stat_param_h264 = avc_state->stat_param;
6039     assert(stat_param_h264);
6040     stat_param = &stat_param_h264->stats_params;
6041     enable_statistics_output = !stat_param_h264->disable_statistics_output;
6042
6043     memset(&surface_param, 0, sizeof(struct scaling_param));
6044     media_function = INTEL_MEDIA_STATE_4X_SCALING;
6045     kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
6046     downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
6047     downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
6048
6049     surface_param.input_frame_width = generic_state->frame_width_in_pixel;
6050     surface_param.input_frame_height = generic_state->frame_height_in_pixel;
6051     surface_param.output_frame_width = generic_state->frame_width_4x;
6052     surface_param.output_frame_height = generic_state->frame_height_4x;
6053     surface_param.use_4x_scaling  = 1 ;
6054     surface_param.use_16x_scaling = 0 ;
6055     surface_param.use_32x_scaling = 0 ;
6056     surface_param.enable_mb_flatness_check = enable_statistics_output;
6057     surface_param.enable_mb_variance_output = enable_statistics_output;
6058     surface_param.enable_mb_pixel_average_output = enable_statistics_output;
6059     surface_param.blk8x8_stat_enabled = stat_param_h264->enable_8x8_statistics;
6060
6061     switch (scale_surface_type) {
6062
6063     case  SCALE_CUR_PIC:
6064         surface_param.input_surface = encode_state->input_yuv_object ;
6065         surface_param.output_surface = avc_ctx->preenc_scaled_4x_surface_obj ;
6066
6067         if (enable_statistics_output) {
6068             surface_param.pres_mbv_proc_stat_buffer =
6069                 &avc_ctx->preproc_stat_data_out_buffer;
6070             surface_param.mbv_proc_stat_enabled = 1;
6071         } else {
6072             surface_param.mbv_proc_stat_enabled = 0;
6073             surface_param.pres_mbv_proc_stat_buffer = NULL;
6074         }
6075         break;
6076
6077     case SCALE_PAST_REF_PIC:
6078         obj_surface = SURFACE(stat_param->past_references[0].picture_id);
6079         assert(obj_surface);
6080         surface_param.input_surface = obj_surface;
6081         surface_param.output_surface = avc_ctx->preenc_past_ref_scaled_4x_surface_obj;
6082
6083         if (stat_param->past_ref_stat_buf) {
6084             surface_param.pres_mbv_proc_stat_buffer =
6085                 &avc_ctx->preenc_past_ref_stat_data_out_buffer;
6086             surface_param.mbv_proc_stat_enabled = 1;
6087         } else {
6088             surface_param.mbv_proc_stat_enabled = 0;
6089             surface_param.pres_mbv_proc_stat_buffer = NULL;
6090         }
6091         break;
6092
6093     case SCALE_FUTURE_REF_PIC:
6094
6095         obj_surface = SURFACE(stat_param->future_references[0].picture_id);
6096         assert(obj_surface);
6097         surface_param.input_surface = obj_surface;
6098         surface_param.output_surface = avc_ctx->preenc_future_ref_scaled_4x_surface_obj;
6099
6100         if (stat_param->future_ref_stat_buf) {
6101             surface_param.pres_mbv_proc_stat_buffer =
6102                 &avc_ctx->preenc_future_ref_stat_data_out_buffer;
6103             surface_param.mbv_proc_stat_enabled = 1;
6104         } else {
6105             surface_param.mbv_proc_stat_enabled = 0;
6106             surface_param.pres_mbv_proc_stat_buffer = NULL;
6107         }
6108         break;
6109     default :
6110         assert(0);
6111     }
6112
6113     gpe_context = &(avc_ctx->context_scaling.gpe_contexts[kernel_idx]);
6114
6115     gpe->context_init(ctx, gpe_context);
6116     gpe->reset_binding_table(ctx, gpe_context);
6117
6118     generic_ctx->pfn_set_curbe_scaling4x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
6119
6120     surface_param.scaling_out_use_16unorm_surf_fmt = 0 ;
6121     surface_param.scaling_out_use_32unorm_surf_fmt = 1 ;
6122
6123     /* No need of explicit flatness_check surface allocation. The field mb_is_flat
6124      * VAStatsStatisticsH264 will be used to store the output.  */
6125     surface_param.enable_mb_flatness_check = 0;
6126     generic_ctx->pfn_send_scaling_surface(ctx, encode_state, gpe_context, encoder_context, &surface_param);
6127
6128     /* setup the interface data */
6129     gpe->setup_interface_data(ctx, gpe_context);
6130
6131     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
6132     /* the scaling is based on 8x8 blk level */
6133     kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
6134     kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
6135     kernel_walker_param.no_dependency = 1;
6136
6137     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
6138
6139     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
6140                                             gpe_context,
6141                                             media_function,
6142                                             &media_object_walker_param);
6143
6144     return VA_STATUS_SUCCESS;
6145 }
6146
6147 /**************** PreEnc HME *************************************/
6148 /* function to run preenc hme is the same one we using in avc encode:
6149          ==  gen9_avc_kernel_me()
6150  * function to set preenc hme curbe: gen9_avc_preenc_set_curbe_me()
6151  * function to send hme buffer/surface: gen9_avc_preenc_send_surface_me()
6152  */
6153 static void
6154 gen9_avc_preenc_set_curbe_me(VADriverContextP ctx,
6155                              struct encode_state *encode_state,
6156                              struct i965_gpe_context *gpe_context,
6157                              struct intel_encoder_context *encoder_context,
6158                              void * param)
6159 {
6160     gen9_avc_fei_me_curbe_data *curbe_cmd;
6161     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6162     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6163     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6164     VAStatsStatisticsParameterH264 * stat_param_h264 = avc_state->stat_param;
6165     VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
6166
6167     struct me_param * curbe_param = (struct me_param *)param ;
6168     unsigned char  use_mv_from_prev_step = 0;
6169     unsigned char write_distortions = 0;
6170     unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
6171     unsigned char seach_table_idx = 0;
6172     unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
6173     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
6174     unsigned int scale_factor = 0;
6175
6176     switch (curbe_param->hme_type) {
6177     case INTEL_ENC_HME_4x:
6178         use_mv_from_prev_step = 0;
6179         write_distortions = 0;
6180         mv_shift_factor = 2;
6181         scale_factor = 4;
6182         prev_mv_read_pos_factor = 0;
6183         break;
6184
6185     default:
6186         assert(0);
6187     }
6188
6189     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
6190     if (!curbe_cmd)
6191         return;
6192
6193     downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
6194     downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
6195
6196     memcpy(curbe_cmd, gen9_avc_me_curbe_init_data, sizeof(gen9_avc_me_curbe_init_data));
6197
6198     curbe_cmd->dw3.sub_pel_mode = stat_param_h264->sub_pel_mode;
6199     if (avc_state->field_scaling_output_interleaved) {
6200         /*frame set to zero,field specified*/
6201         curbe_cmd->dw3.src_access = 0;
6202         curbe_cmd->dw3.ref_access = 0;
6203         curbe_cmd->dw7.src_field_polarity = 0;
6204     }
6205     curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
6206     curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
6207     curbe_cmd->dw5.qp_prime_y = stat_param_h264->frame_qp;
6208
6209     curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
6210     curbe_cmd->dw6.write_distortions = write_distortions;
6211     curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
6212     curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(INTEL_AVC_LEVEL_52) * 4;//frame only
6213
6214     if (generic_state->frame_type == SLICE_TYPE_B) {
6215         curbe_cmd->dw1.bi_weight = 32;
6216         curbe_cmd->dw13.num_ref_idx_l1_minus1 = stat_param->num_future_references - 1;
6217         me_method = gen9_avc_b_me_method[generic_state->preset];
6218         seach_table_idx = 1;
6219     }
6220
6221     if (generic_state->frame_type == SLICE_TYPE_P ||
6222         generic_state->frame_type == SLICE_TYPE_B)
6223         curbe_cmd->dw13.num_ref_idx_l0_minus1 = stat_param->num_past_references - 1;
6224
6225     curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
6226     curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
6227
6228     memcpy(&curbe_cmd->dw16, table_enc_search_path[seach_table_idx][me_method], 14 * sizeof(int));
6229
6230     curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN9_AVC_ME_MV_DATA_SURFACE_INDEX;
6231     curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x) ? GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX : GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX ;
6232     curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN9_AVC_ME_DISTORTION_SURFACE_INDEX;
6233     curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN9_AVC_ME_BRC_DISTORTION_INDEX;
6234     curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX;
6235     curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX;
6236     curbe_cmd->dw38.reserved = 0;
6237
6238     i965_gpe_context_unmap_curbe(gpe_context);
6239     return;
6240 }
6241
6242 static void
6243 gen9_avc_preenc_send_surface_me(VADriverContextP ctx,
6244                                 struct encode_state *encode_state,
6245                                 struct i965_gpe_context *gpe_context,
6246                                 struct intel_encoder_context *encoder_context,
6247                                 void * param)
6248 {
6249     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6250     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
6251     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6252     VAStatsStatisticsParameterH264 * stat_param_h264 = avc_state->stat_param;
6253     VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
6254     struct object_surface *input_surface;
6255     struct i965_gpe_resource *gpe_resource;
6256     struct me_param * curbe_param = (struct me_param *)param ;
6257     int i = 0;
6258
6259     /* PreEnc Only supports 4xme */
6260     assert(curbe_param->hme_type == INTEL_ENC_HME_4x);
6261
6262     switch (curbe_param->hme_type) {
6263     case INTEL_ENC_HME_4x : {
6264         /*memv output 4x*/
6265         gpe_resource = &avc_ctx->s4x_memv_data_buffer;
6266         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
6267                                        gpe_resource,
6268                                        1,
6269                                        I965_SURFACEFORMAT_R8_UNORM,
6270                                        GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
6271
6272         /* memv distortion output*/
6273         gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
6274         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
6275                                        gpe_resource,
6276                                        1,
6277                                        I965_SURFACEFORMAT_R8_UNORM,
6278                                        GEN9_AVC_ME_DISTORTION_SURFACE_INDEX);
6279
6280         /* brc distortion  output*/
6281         gpe_resource = &avc_ctx->res_brc_dist_data_surface;
6282         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
6283                                        gpe_resource,
6284                                        1,
6285                                        I965_SURFACEFORMAT_R8_UNORM,
6286                                        GEN9_AVC_ME_BRC_DISTORTION_INDEX);
6287
6288         /* input past ref scaled YUV surface*/
6289         for (i = 0; i < stat_param->num_past_references; i++) {
6290             /*input current down scaled YUV surface for forward refef */
6291             input_surface = avc_ctx->preenc_scaled_4x_surface_obj;
6292             i965_add_adv_gpe_surface(ctx, gpe_context,
6293                                      input_surface,
6294                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
6295
6296             input_surface = avc_ctx->preenc_past_ref_scaled_4x_surface_obj;
6297             i965_add_adv_gpe_surface(ctx, gpe_context,
6298                                      input_surface,
6299                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
6300         }
6301
6302         /* input future ref scaled YUV surface*/
6303         for (i = 0; i < stat_param->num_future_references; i++) {
6304             /*input current down scaled YUV surface for backward ref */
6305             input_surface = avc_ctx->preenc_scaled_4x_surface_obj;
6306             i965_add_adv_gpe_surface(ctx, gpe_context,
6307                                      input_surface,
6308                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
6309
6310             input_surface = avc_ctx->preenc_future_ref_scaled_4x_surface_obj;
6311             i965_add_adv_gpe_surface(ctx, gpe_context,
6312                                      input_surface,
6313                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
6314         }
6315         break;
6316
6317     }
6318     default:
6319         break;
6320
6321     }
6322 }
6323
6324 /**************** PreEnc PreProc *************************************/
6325 /* function to run preenc preproc: gen9_avc_preenc_kernel_preproc()
6326  * function to set preenc preproc curbe: gen9_avc_preenc_set_curbe_preproc()
6327  * function to send preproc buffer/surface: gen9_avc_preenc_send_surface_preproc ()
6328  */
6329 static void
6330 gen9_avc_preenc_set_curbe_preproc(VADriverContextP ctx,
6331                                   struct encode_state *encode_state,
6332                                   struct i965_gpe_context *gpe_context,
6333                                   struct intel_encoder_context *encoder_context,
6334                                   void * param)
6335 {
6336     gen9_avc_preproc_curbe_data *cmd;
6337     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6338     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6339     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6340     VAStatsStatisticsParameterH264 *stat_param_h264 = avc_state->stat_param;
6341     VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
6342     unsigned char me_method = 0;
6343     unsigned int table_idx = 0;
6344     int ref_width, ref_height, len_sp;
6345     int is_bframe = (generic_state->frame_type == SLICE_TYPE_B);
6346     int is_pframe = (generic_state->frame_type == SLICE_TYPE_P);
6347     unsigned int preset = generic_state->preset;
6348
6349     cmd = (gen9_avc_preproc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
6350     if (!cmd)
6351         return;
6352     memset(cmd, 0, sizeof(gen9_avc_preproc_curbe_data));
6353
6354     switch (generic_state->frame_type) {
6355     case SLICE_TYPE_I:
6356         memcpy(cmd, gen9_avc_preenc_preproc_curbe_i_frame_init_data,
6357                sizeof(gen9_avc_preproc_curbe_data));
6358         break;
6359     case SLICE_TYPE_P:
6360         memcpy(cmd, gen9_avc_preenc_preproc_curbe_p_frame_init_data,
6361                sizeof(gen9_avc_preproc_curbe_data));
6362         break;
6363     case SLICE_TYPE_B:
6364         memcpy(cmd, gen9_avc_preenc_preproc_curbe_b_frame_init_data,
6365                sizeof(gen9_avc_preproc_curbe_data));
6366         break;
6367     default:
6368         assert(0);
6369     }
6370     /* 4 means full search, 6 means diamand search */
6371     me_method  = (stat_param_h264->search_window == 5) ||
6372                  (stat_param_h264->search_window == 8) ? 4 : 6;
6373
6374     ref_width    = stat_param_h264->ref_width;
6375     ref_height   = stat_param_h264->ref_height;
6376     len_sp       = stat_param_h264->len_sp;
6377     /* If there is a serch_window, discard user provided ref_width, ref_height
6378      * and search_path length */
6379     switch (stat_param_h264->search_window) {
6380     case 0:
6381         /*  not use predefined search window, there should be a search_path input */
6382         if ((stat_param_h264->search_path != 0) &&
6383             (stat_param_h264->search_path != 1) &&
6384             (stat_param_h264->search_path != 2)) {
6385             WARN_ONCE("Invalid input search_path for SearchWindow=0  \n");
6386             assert(0);
6387         }
6388         /* 4 means full search, 6 means diamand search */
6389         me_method = (stat_param_h264->search_path == 1) ? 6 : 4;
6390         if (((ref_width * ref_height) > 2048) || (ref_width > 64) || (ref_height > 64)) {
6391             WARN_ONCE("Invalid input ref_width/ref_height in"
6392                       "SearchWindow=0 case! \n");
6393             assert(0);
6394         }
6395         break;
6396
6397     case 1:
6398         /* Tiny - 4 SUs 24x24 window */
6399         ref_width  = 24;
6400         ref_height = 24;
6401         len_sp     = 4;
6402         break;
6403
6404     case 2:
6405         /* Small - 9 SUs 28x28 window */
6406         ref_width  = 28;
6407         ref_height = 28;
6408         len_sp     = 9;
6409         break;
6410     case 3:
6411         /* Diamond - 16 SUs 48x40 window */
6412         ref_width  = 48;
6413         ref_height = 40;
6414         len_sp     = 16;
6415         break;
6416     case 4:
6417         /* Large Diamond - 32 SUs 48x40 window */
6418         ref_width  = 48;
6419         ref_height = 40;
6420         len_sp     = 32;
6421         break;
6422     case 5:
6423         /* Exhaustive - 48 SUs 48x40 window */
6424         ref_width  = 48;
6425         ref_height = 40;
6426         len_sp     = 48;
6427         break;
6428     case 6:
6429         /* Diamond - 16 SUs 64x32 window */
6430         ref_width  = 64;
6431         ref_height = 32;
6432         len_sp     = 16;
6433         break;
6434     case 7:
6435         /* Large Diamond - 32 SUs 64x32 window */
6436         ref_width  = 64;
6437         ref_height = 32;
6438         len_sp     = 32;
6439         break;
6440     case 8:
6441         /* Exhaustive - 48 SUs 64x32 window */
6442         ref_width  = 64;
6443         ref_height = 32;
6444         len_sp     = 48;
6445         break;
6446
6447     default:
6448         assert(0);
6449     }
6450
6451     /* ref_width*ref_height = Max 64x32 one direction, Max 32x32 two directions */
6452     if (is_bframe) {
6453         CLIP(ref_width, 4, 32);
6454         CLIP(ref_height, 4, 32);
6455     } else if (is_pframe) {
6456         CLIP(ref_width, 4, 64);
6457         CLIP(ref_height, 4, 32);
6458     }
6459
6460     cmd->dw0.adaptive_enable =
6461         cmd->dw37.adaptive_enable = stat_param_h264->adaptive_search;
6462     cmd->dw2.max_len_sp = len_sp;
6463     cmd->dw38.max_len_sp = 0; // HLD mandates this field to be Zero
6464     cmd->dw2.max_num_su = cmd->dw38.max_num_su = 57;
6465     cmd->dw3.src_access =
6466         cmd->dw3.ref_access = 0; // change it to (is_frame ? 0: 1) when interlace is suppoted
6467
6468     if (generic_state->frame_type != SLICE_TYPE_I && avc_state->ftq_enable)
6469         cmd->dw3.ft_enable = stat_param_h264->ft_enable;
6470     else
6471         cmd->dw3.ft_enable = 0;
6472
6473     cmd->dw2.pic_width = generic_state->frame_width_in_mbs;
6474     cmd->dw6.pic_height = cmd->dw5.slice_mb_height = generic_state->frame_height_in_mbs;
6475     cmd->dw3.sub_mb_part_mask = stat_param_h264->sub_mb_part_mask;
6476     cmd->dw3.sub_pel_mode = stat_param_h264->sub_pel_mode;
6477     cmd->dw3.inter_sad = stat_param_h264->inter_sad;
6478     cmd->dw3.intra_sad = stat_param_h264->intra_sad;
6479     cmd->dw4.hme_enable = generic_state->hme_enabled;
6480     cmd->dw4.frame_qp = stat_param_h264->frame_qp;
6481     cmd->dw4.per_mb_qp_enable = stat_param_h264->mb_qp;
6482
6483     cmd->dw4.multiple_mv_predictor_per_mb_enable =
6484         (generic_state->frame_type != SLICE_TYPE_I) ? 0 : stat_param_h264->mv_predictor_ctrl;
6485
6486     cmd->dw4.disable_mv_output = (generic_state->frame_type == SLICE_TYPE_I) ? 1 : stat_param_h264->disable_mv_output;
6487     cmd->dw4.disable_mb_stats = stat_param_h264->disable_statistics_output;
6488
6489     cmd->dw4.fwd_ref_pic_enable = (stat_param->num_past_references > 0) ? 1 : 0;
6490     cmd->dw4.bwd_ref_pic_enable = (stat_param->num_future_references > 0) ? 1 : 0;
6491
6492     cmd->dw7.intra_part_mask = stat_param_h264->intra_part_mask;
6493
6494     /* mv mode cost */
6495     memcpy(&(cmd->dw8), gen75_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][stat_param_h264->frame_qp], 8 * sizeof(unsigned int));
6496
6497     /* reset all except sic_fwd_trans_coeff_threshold_* from dw8 to dw15 */
6498     memset(&(cmd->dw8), 0, 6 * (sizeof(unsigned int)));
6499
6500     /* search path tables */
6501     table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
6502     memcpy(&(cmd->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
6503
6504     if (stat_param_h264->intra_part_mask  == 0x07)
6505         cmd->dw31.intra_compute_type  = 3;
6506
6507     cmd->dw38.ref_threshold = 400;
6508     cmd->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
6509
6510     if (generic_state->frame_type == SLICE_TYPE_I) {
6511         cmd->dw0.skip_mode_enable = cmd->dw37.skip_mode_enable = 0;
6512         cmd->dw36.hme_combine_overlap = 0;
6513     } else if (generic_state->frame_type == SLICE_TYPE_P) {
6514         cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(INTEL_AVC_LEVEL_52) / 2;
6515         cmd->dw3.bme_disable_fbr = 1;
6516         cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
6517         cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
6518         cmd->dw7.non_skip_zmv_added = 1;
6519         cmd->dw7.non_skip_mode_added = 1;
6520         cmd->dw7.skip_center_mask = 1;
6521         cmd->dw32.max_vmv_r =
6522             i965_avc_get_max_mv_len(INTEL_AVC_LEVEL_52) * 4;
6523         cmd->dw36.hme_combine_overlap = 1;
6524
6525     } else if (generic_state->frame_type == SLICE_TYPE_P) { /* B slice */
6526
6527         cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(INTEL_AVC_LEVEL_52) / 2;
6528         cmd->dw3.search_ctrl = 0;
6529         cmd->dw3.skip_type = 1;
6530         cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
6531         cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
6532         cmd->dw7.skip_center_mask = 0xff;
6533         cmd->dw32.max_vmv_r =
6534             i965_avc_get_max_mv_len(INTEL_AVC_LEVEL_52) * 4;
6535         cmd->dw36.hme_combine_overlap = 1;
6536     }
6537
6538     cmd->dw40.curr_pic_surf_index = GEN9_AVC_PREPROC_CURR_Y_INDEX;
6539     cmd->dw41.hme_mv_dat_surf_index = GEN9_AVC_PREPROC_HME_MV_DATA_INDEX;
6540     cmd->dw42.mv_predictor_surf_index = GEN9_AVC_PREPROC_MV_PREDICTOR_INDEX;
6541     cmd->dw43.mb_qp_surf_index = GEN9_AVC_PREPROC_MBQP_INDEX;
6542     cmd->dw44.mv_data_out_surf_index = GEN9_AVC_PREPROC_MV_DATA_INDEX;
6543     cmd->dw45.mb_stats_out_surf_index = GEN9_AVC_PREPROC_MB_STATS_INDEX;
6544     cmd->dw46.vme_inter_prediction_surf_index = GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_0_INDEX;
6545     cmd->dw47.vme_Inter_prediction_mr_surf_index = GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_1_INDEX;
6546     cmd->dw48.ftq_lut_surf_index = GEN9_AVC_PREPROC_FTQ_LUT_INDEX;
6547
6548     i965_gpe_context_unmap_curbe(gpe_context);
6549 }
6550
6551 static void
6552 gen9_avc_preenc_send_surface_preproc(VADriverContextP ctx,
6553                                      struct encode_state *encode_state,
6554                                      struct i965_gpe_context *gpe_context,
6555                                      struct intel_encoder_context *encoder_context,
6556                                      void * param)
6557 {
6558     struct i965_driver_data *i965 = i965_driver_data(ctx);
6559     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6560     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
6561     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6562     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6563     struct object_surface *obj_surface;
6564     struct i965_gpe_resource *gpe_resource;
6565     VASurfaceID surface_id;
6566     VAStatsStatisticsParameterH264 *stat_param_h264 = avc_state->stat_param;
6567     VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
6568     unsigned int size = 0, frame_mb_nums = 0;
6569
6570     frame_mb_nums = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs;
6571
6572     /* input yuv surface, Y index */
6573     obj_surface = encode_state->input_yuv_object;
6574     i965_add_2d_gpe_surface(ctx,
6575                             gpe_context,
6576                             obj_surface,
6577                             0,
6578                             1,
6579                             I965_SURFACEFORMAT_R8_UNORM,
6580                             GEN9_AVC_PREPROC_CURR_Y_INDEX);
6581
6582     /* input yuv surface, UV index */
6583     i965_add_2d_gpe_surface(ctx,
6584                             gpe_context,
6585                             obj_surface,
6586                             1,
6587                             1,
6588                             I965_SURFACEFORMAT_R16_UINT,
6589                             GEN9_AVC_MBENC_CURR_UV_INDEX);
6590
6591
6592     if (generic_state->hme_enabled) {
6593         /* HME mv data buffer */
6594         gpe_resource = &avc_ctx->s4x_memv_data_buffer;
6595         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
6596                                        gpe_resource,
6597                                        1,
6598                                        I965_SURFACEFORMAT_R8_UNORM,
6599                                        GEN9_AVC_PREPROC_HME_MV_DATA_INDEX);
6600     }
6601
6602     /* mv predictor buffer */
6603     if (stat_param_h264->mv_predictor_ctrl) {
6604         size = frame_mb_nums * FEI_AVC_MV_PREDICTOR_BUFFER_SIZE;
6605         gpe_resource = &avc_ctx->preproc_mv_predictor_buffer;
6606         i965_add_buffer_gpe_surface(ctx,
6607                                     gpe_context,
6608                                     gpe_resource,
6609                                     0,
6610                                     size / 4,
6611                                     0,
6612                                     GEN9_AVC_PREPROC_MV_PREDICTOR_INDEX);
6613     }
6614
6615     /* MB qp buffer */
6616     if (stat_param_h264->mb_qp) {
6617         size = frame_mb_nums * FEI_AVC_QP_BUFFER_SIZE;
6618         gpe_resource = &avc_ctx->preproc_mb_qp_buffer;
6619         i965_add_buffer_gpe_surface(ctx,
6620                                     gpe_context,
6621                                     gpe_resource,
6622                                     0,
6623                                     size / 4,
6624                                     0,
6625                                     GEN9_AVC_PREPROC_MBQP_INDEX);
6626
6627         gpe_resource = &avc_ctx->res_mbbrc_const_data_buffer;
6628         size = 16 * AVC_QP_MAX * 4;
6629         i965_add_buffer_gpe_surface(ctx,
6630                                     gpe_context,
6631                                     gpe_resource,
6632                                     0,
6633                                     size / 4,
6634                                     0,
6635                                     GEN9_AVC_PREPROC_FTQ_LUT_INDEX);
6636
6637     }
6638
6639     /* mv data output buffer */
6640     if (!stat_param_h264->disable_mv_output) {
6641         gpe_resource = &avc_ctx->preproc_mv_data_out_buffer;
6642         size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
6643         i965_add_buffer_gpe_surface(ctx,
6644                                     gpe_context,
6645                                     gpe_resource,
6646                                     0,
6647                                     size / 4,
6648                                     0,
6649                                     GEN9_AVC_PREPROC_MV_DATA_INDEX);
6650     }
6651
6652     /* statistics output buffer */
6653     if (!stat_param_h264->disable_statistics_output) {
6654         gpe_resource = &avc_ctx->preproc_stat_data_out_buffer;
6655         size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
6656         i965_add_buffer_gpe_surface(ctx,
6657                                     gpe_context,
6658                                     gpe_resource,
6659                                     0,
6660                                     size / 4,
6661                                     0,
6662                                     GEN9_AVC_PREPROC_MB_STATS_INDEX);
6663     }
6664
6665     /* vme cur pic y */
6666     obj_surface = encode_state->input_yuv_object;
6667     i965_add_2d_gpe_surface(ctx,
6668                             gpe_context,
6669                             obj_surface,
6670                             0,
6671                             1,
6672                             I965_SURFACEFORMAT_R8_UNORM,
6673                             GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_0_INDEX);
6674
6675     /* vme cur pic y (repeating based on required BTI order for mediakerel)*/
6676     obj_surface = encode_state->input_yuv_object;
6677     i965_add_2d_gpe_surface(ctx,
6678                             gpe_context,
6679                             obj_surface,
6680                             0,
6681                             1,
6682                             I965_SURFACEFORMAT_R8_UNORM,
6683                             GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_1_INDEX);
6684
6685     /* vme forward ref */
6686     /* Only supports one past ref */
6687     if (stat_param->num_past_references > 0) {
6688         surface_id = stat_param->past_references[0].picture_id;
6689         assert(surface_id != VA_INVALID_ID);
6690         obj_surface = SURFACE(surface_id);
6691         if (!obj_surface)
6692             return;
6693         i965_add_adv_gpe_surface(ctx, gpe_context,
6694                                  obj_surface,
6695                                  GEN9_AVC_PREPROC_VME_FWD_PIC_IDX0_INDEX);
6696
6697     }
6698
6699     /* vme future ref */
6700     /* Only supports one future ref */
6701     if (stat_param->num_future_references > 0) {
6702         surface_id = stat_param->future_references[0].picture_id;
6703         assert(surface_id != VA_INVALID_ID);
6704         obj_surface = SURFACE(surface_id);
6705         if (!obj_surface)
6706             return;
6707         i965_add_adv_gpe_surface(ctx, gpe_context,
6708                                  obj_surface,
6709                                  GEN9_AVC_PREPROC_VME_BWD_PIC_IDX0_0_INDEX);
6710
6711         surface_id = stat_param->future_references[0].picture_id;
6712         assert(surface_id != VA_INVALID_ID);
6713         obj_surface = SURFACE(surface_id);
6714         if (!obj_surface)
6715             return;
6716         i965_add_adv_gpe_surface(ctx, gpe_context,
6717                                  obj_surface,
6718                                  GEN9_AVC_PREPROC_VME_BWD_PIC_IDX0_1_INDEX);
6719     }
6720
6721     return;
6722
6723 }
6724
6725 static VAStatus
6726 gen9_avc_preenc_kernel_preproc(VADriverContextP ctx,
6727                                struct encode_state *encode_state,
6728                                struct intel_encoder_context *encoder_context)
6729 {
6730     struct i965_driver_data *i965 = i965_driver_data(ctx);
6731     struct i965_gpe_table *gpe = &i965->gpe_table;
6732     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6733     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
6734     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
6735     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6736     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6737     VAStatsStatisticsParameterH264 * stat_param_h264 = avc_state->stat_param;
6738     struct i965_gpe_context *gpe_context;
6739     struct gpe_media_object_walker_parameter media_object_walker_param;
6740     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
6741     int media_function = INTEL_MEDIA_STATE_PREPROC;
6742     struct i965_gpe_resource *gpe_resource = NULL;
6743     unsigned int * data = NULL;
6744     unsigned int ftq_lut_table_size = 16 * 52; /* 16 DW per QP for each qp*/
6745
6746     gpe_context = &(avc_ctx->context_preproc.gpe_contexts);
6747     gpe->context_init(ctx, gpe_context);
6748     gpe->reset_binding_table(ctx, gpe_context);
6749
6750     /*set curbe*/
6751     generic_ctx->pfn_set_curbe_preproc(ctx, encode_state, gpe_context, encoder_context, NULL);
6752
6753     /*send surface*/
6754     generic_ctx->pfn_send_preproc_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
6755
6756     gpe->setup_interface_data(ctx, gpe_context);
6757
6758     /*  Set up FtqLut Buffer if there is QP change within a frame */
6759     if (stat_param_h264->mb_qp) {
6760         gpe_resource = &(avc_ctx->res_mbbrc_const_data_buffer);
6761         assert(gpe_resource);
6762         data = i965_map_gpe_resource(gpe_resource);
6763         assert(data);
6764         memcpy(data, gen9_avc_preenc_preproc_ftq_lut, ftq_lut_table_size * sizeof(unsigned int));
6765     }
6766
6767     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
6768     kernel_walker_param.resolution_x = generic_state->frame_width_in_mbs ;
6769     kernel_walker_param.resolution_y = generic_state->frame_height_in_mbs ;
6770     kernel_walker_param.no_dependency = 1;
6771
6772     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
6773
6774     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
6775                                             gpe_context,
6776                                             media_function,
6777                                             &media_object_walker_param);
6778
6779     return VA_STATUS_SUCCESS;
6780 }
6781
6782
6783 static void
6784 gen8_avc_set_curbe_mbenc(VADriverContextP ctx,
6785                          struct encode_state *encode_state,
6786                          struct i965_gpe_context *gpe_context,
6787                          struct intel_encoder_context *encoder_context,
6788                          void * param)
6789 {
6790     struct i965_driver_data *i965 = i965_driver_data(ctx);
6791     gen8_avc_mbenc_curbe_data *cmd;
6792     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6793     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6794     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6795
6796     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
6797     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
6798     VASurfaceID surface_id;
6799     struct object_surface *obj_surface;
6800
6801     struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
6802     unsigned char qp = 0;
6803     unsigned char me_method = 0;
6804     unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
6805     unsigned int table_idx = 0;
6806     unsigned int curbe_size = 0;
6807
6808     unsigned int preset = generic_state->preset;
6809     if (IS_GEN8(i965->intel.device_info)) {
6810         cmd = (gen8_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
6811         if (!cmd)
6812             return;
6813         curbe_size = sizeof(gen8_avc_mbenc_curbe_data);
6814         memset(cmd, 0, curbe_size);
6815
6816         if (mbenc_i_frame_dist_in_use) {
6817             memcpy(cmd, gen8_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
6818         } else {
6819             switch (generic_state->frame_type) {
6820             case SLICE_TYPE_I:
6821                 memcpy(cmd, gen8_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
6822                 break;
6823             case SLICE_TYPE_P:
6824                 memcpy(cmd, gen8_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
6825                 break;
6826             case SLICE_TYPE_B:
6827                 memcpy(cmd, gen8_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
6828                 break;
6829             default:
6830                 assert(0);
6831             }
6832         }
6833     } else {
6834         assert(0);
6835
6836         return;
6837     }
6838
6839     me_method = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_b_me_method[preset] : gen9_avc_p_me_method[preset];
6840     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
6841
6842     cmd->dw0.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
6843     cmd->dw37.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
6844     cmd->dw0.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
6845     cmd->dw37.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
6846
6847     cmd->dw2.max_len_sp = gen9_avc_max_len_sp[preset];
6848     cmd->dw38.max_len_sp = 0;
6849
6850     cmd->dw3.src_access = 0;
6851     cmd->dw3.ref_access = 0;
6852
6853     if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) {
6854         //disable ftq_override by now.
6855         if (avc_state->ftq_override) {
6856             cmd->dw3.ftq_enable = avc_state->ftq_enable;
6857
6858         } else {
6859             if (generic_state->frame_type == SLICE_TYPE_P) {
6860                 cmd->dw3.ftq_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
6861
6862             } else {
6863                 cmd->dw3.ftq_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
6864             }
6865         }
6866     } else {
6867         cmd->dw3.ftq_enable = 0;
6868     }
6869
6870     if (avc_state->disable_sub_mb_partion)
6871         cmd->dw3.sub_mb_part_mask = 0x7;
6872
6873     if (mbenc_i_frame_dist_in_use) {
6874         cmd->dw2.pitch_width = generic_state->downscaled_width_4x_in_mb;
6875         cmd->dw4.picture_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
6876         cmd->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4;
6877         cmd->dw6.batch_buffer_end = 0;
6878         cmd->dw31.intra_compute_type = 1;
6879     } else {
6880         cmd->dw2.pitch_width = generic_state->frame_width_in_mbs;
6881         cmd->dw4.picture_height_minus1 = generic_state->frame_height_in_mbs - 1;
6882         cmd->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ? generic_state->frame_height_in_mbs : avc_state->slice_height;
6883
6884         {
6885             memcpy(&(cmd->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
6886             if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
6887             } else if (avc_state->skip_bias_adjustment_enable) {
6888                 /* Load different MvCost for P picture when SkipBiasAdjustment is enabled
6889                 // No need to check for P picture as the flag is only enabled for P picture */
6890                 cmd->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
6891             }
6892         }
6893         table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
6894         memcpy(&(cmd->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
6895     }
6896     cmd->dw4.enable_intra_cost_scaling_for_static_frame = avc_state->sfd_enable && generic_state->hme_enabled;
6897     cmd->dw4.field_parity_flag = 0;//bottom field
6898     cmd->dw4.enable_cur_fld_idr = 0;//field realted
6899     cmd->dw4.contrained_intra_pred_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
6900     cmd->dw4.hme_enable = generic_state->hme_enabled;
6901     cmd->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
6902     cmd->dw4.use_actual_ref_qp_value = generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
6903
6904     cmd->dw7.intra_part_mask = avc_state->transform_8x8_mode_enable ? 0 : 0x02;
6905     cmd->dw7.src_field_polarity = 0;//field related
6906
6907     /*ftq_skip_threshold_lut set,dw14 /15*/
6908
6909     /*r5 disable NonFTQSkipThresholdLUT*/
6910     if (generic_state->frame_type == SLICE_TYPE_P) {
6911         cmd->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
6912     } else if (generic_state->frame_type == SLICE_TYPE_B) {
6913         cmd->dw32.skip_val = gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
6914     }
6915
6916     cmd->dw13.qp_prime_y = qp;
6917     cmd->dw13.qp_prime_cb = qp;
6918     cmd->dw13.qp_prime_cr = qp;
6919     cmd->dw13.target_size_in_word = 0xff;//hardcode for brc disable
6920
6921     if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) {
6922         switch (gen9_avc_multi_pred[preset]) {
6923         case 0:
6924             cmd->dw32.mult_pred_l0_disable = 128;
6925             cmd->dw32.mult_pred_l1_disable = 128;
6926             break;
6927         case 1:
6928             cmd->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_P) ? 1 : 128;
6929             cmd->dw32.mult_pred_l1_disable = 128;
6930             break;
6931         case 2:
6932             cmd->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
6933             cmd->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
6934             break;
6935         case 3:
6936             cmd->dw32.mult_pred_l0_disable = 1;
6937             cmd->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
6938             break;
6939         }
6940
6941     } else {
6942         cmd->dw32.mult_pred_l0_disable = 128;
6943         cmd->dw32.mult_pred_l1_disable = 128;
6944     }
6945
6946     if (generic_state->frame_type == SLICE_TYPE_B) {
6947         cmd->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
6948         cmd->dw34.list1_ref_id0_frm_field_parity = 0;
6949         cmd->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
6950     }
6951
6952     cmd->dw34.b_original_bff = 0; //frame only
6953     cmd->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
6954     cmd->dw34.roi_enable_flag = curbe_param->roi_enabled;
6955     cmd->dw34.mad_enable_falg = avc_state->mad_enable;
6956     cmd->dw34.mb_brc_enable = avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled;
6957     cmd->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
6958     cmd->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
6959
6960     if (cmd->dw34.force_non_skip_check) {
6961         cmd->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
6962     }
6963
6964     cmd->dw36.check_all_fractional_enable = avc_state->caf_enable;
6965     cmd->dw38.ref_threshold = 400;
6966     cmd->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
6967     cmd->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable) ? 0 : 2;
6968
6969     if (mbenc_i_frame_dist_in_use) {
6970         cmd->dw13.qp_prime_y = 0;
6971         cmd->dw13.qp_prime_cb = 0;
6972         cmd->dw13.qp_prime_cr = 0;
6973         cmd->dw33.intra_16x16_nondc_penalty = 0;
6974         cmd->dw33.intra_8x8_nondc_penalty = 0;
6975         cmd->dw33.intra_4x4_nondc_penalty = 0;
6976     }
6977     if (cmd->dw4.use_actual_ref_qp_value) {
6978         cmd->dw44.actual_qp_value_for_ref_id0_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 0);
6979         cmd->dw44.actual_qp_value_for_ref_id1_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 1);
6980         cmd->dw44.actual_qp_value_for_ref_id2_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 2);
6981         cmd->dw44.actual_qp_value_for_ref_id3_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 3);
6982         cmd->dw45.actual_qp_value_for_ref_id4_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 4);
6983         cmd->dw45.actual_qp_value_for_ref_id5_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 5);
6984         cmd->dw45.actual_qp_value_for_ref_id6_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 6);
6985         cmd->dw45.actual_qp_value_for_ref_id7_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 7);
6986         cmd->dw46.actual_qp_value_for_ref_id0_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 0);
6987         cmd->dw46.actual_qp_value_for_ref_id1_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 1);
6988     }
6989
6990     table_idx = slice_type_kernel[generic_state->frame_type];
6991     cmd->dw46.ref_cost = gen8_avc_ref_cost[table_idx][qp];
6992     if (generic_state->frame_type == SLICE_TYPE_I) {
6993         cmd->dw0.skip_mode_enable = 0;
6994         cmd->dw37.skip_mode_enable = 0;
6995         cmd->dw36.hme_combine_overlap = 0;
6996         cmd->dw47.intra_cost_sf = 16;
6997         cmd->dw34.enable_direct_bias_adjustment = 0;
6998         cmd->dw34.enable_global_motion_bias_adjustment = 0;
6999
7000     } else if (generic_state->frame_type == SLICE_TYPE_P) {
7001         cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
7002         cmd->dw3.bme_disable_fbr = 1;
7003         cmd->dw5.ref_width = gen9_avc_search_x[preset];
7004         cmd->dw5.ref_height = gen9_avc_search_y[preset];
7005         cmd->dw7.non_skip_zmv_added = 1;
7006         cmd->dw7.non_skip_mode_added = 1;
7007         cmd->dw7.skip_center_mask = 1;
7008         cmd->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
7009         cmd->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame onlys
7010         cmd->dw36.hme_combine_overlap = 1;
7011         cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
7012         cmd->dw39.ref_width = gen9_avc_search_x[preset];
7013         cmd->dw39.ref_height = gen9_avc_search_y[preset];
7014         cmd->dw34.enable_direct_bias_adjustment = 0;
7015         cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
7016         if (avc_state->global_motion_bias_adjustment_enable)
7017             cmd->dw58.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
7018     } else {
7019         cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
7020         cmd->dw1.bi_weight = avc_state->bi_weight;
7021         cmd->dw3.search_ctrl = 7;
7022         cmd->dw3.skip_type = 1;
7023         cmd->dw5.ref_width = gen9_avc_b_search_x[preset];
7024         cmd->dw5.ref_height = gen9_avc_b_search_y[preset];
7025         cmd->dw7.skip_center_mask = 0xff;
7026         cmd->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
7027         cmd->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
7028         cmd->dw36.hme_combine_overlap = 1;
7029         surface_id = slice_param->RefPicList1[0].picture_id;
7030         obj_surface = SURFACE(surface_id);
7031         if (!obj_surface) {
7032             WARN_ONCE("Invalid backward reference frame\n");
7033             return;
7034         }
7035         cmd->dw36.is_fwd_frame_short_term_ref = !!(slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
7036         cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
7037         cmd->dw36.num_ref_idx_l1_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l1_active_minus1 : 0;
7038         cmd->dw39.ref_width = gen9_avc_b_search_x[preset];
7039         cmd->dw39.ref_height = gen9_avc_b_search_y[preset];
7040         cmd->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
7041         cmd->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
7042         cmd->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
7043         cmd->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
7044         cmd->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
7045         cmd->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
7046         cmd->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
7047         cmd->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
7048         cmd->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
7049         if (cmd->dw34.enable_direct_bias_adjustment) {
7050             cmd->dw7.non_skip_zmv_added = 1;
7051             cmd->dw7.non_skip_mode_added = 1;
7052         }
7053
7054         cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
7055         if (avc_state->global_motion_bias_adjustment_enable)
7056             cmd->dw58.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
7057     }
7058     avc_state->block_based_skip_enable = cmd->dw3.block_based_skip_enable;
7059
7060     if (avc_state->rolling_intra_refresh_enable) {
7061         /*by now disable it*/
7062         if (generic_state->brc_enabled) {
7063             cmd->dw4.enable_intra_refresh = false;
7064             cmd->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
7065             cmd->dw48.widi_intra_refresh_mbx = 0;
7066             cmd->dw58.widi_intra_refresh_mby = 0;
7067         } else {
7068             cmd->dw4.enable_intra_refresh = true;
7069             cmd->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
7070         }
7071         cmd->dw32.mult_pred_l0_disable = 128;
7072         /* Pass the same IntraRefreshUnit to the kernel w/o the adjustment by -1, so as to have an overlap of one MB row or column of Intra macroblocks
7073          across one P frame to another P frame, as needed by the RollingI algo */
7074         cmd->dw48.widi_intra_refresh_mbx = 0;
7075         cmd->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
7076         cmd->dw48.widi_intra_refresh_qp_delta = 0;
7077
7078     } else {
7079         cmd->dw34.widi_intra_refresh_en = 0;
7080     }
7081
7082     /*roi set disable by now. 49-56*/
7083     if (curbe_param->roi_enabled) {
7084         cmd->dw49.roi_1_x_left   = generic_state->roi[0].left;
7085         cmd->dw49.roi_1_y_top    = generic_state->roi[0].top;
7086         cmd->dw50.roi_1_x_right  = generic_state->roi[0].right;
7087         cmd->dw50.roi_1_y_bottom = generic_state->roi[0].bottom;
7088
7089         cmd->dw51.roi_2_x_left   = generic_state->roi[1].left;
7090         cmd->dw51.roi_2_y_top    = generic_state->roi[1].top;
7091         cmd->dw52.roi_2_x_right  = generic_state->roi[1].right;
7092         cmd->dw52.roi_2_y_bottom = generic_state->roi[1].bottom;
7093
7094         cmd->dw53.roi_3_x_left   = generic_state->roi[2].left;
7095         cmd->dw53.roi_3_y_top    = generic_state->roi[2].top;
7096         cmd->dw54.roi_3_x_right  = generic_state->roi[2].right;
7097         cmd->dw54.roi_3_y_bottom = generic_state->roi[2].bottom;
7098
7099         cmd->dw55.roi_4_x_left   = generic_state->roi[3].left;
7100         cmd->dw55.roi_4_y_top    = generic_state->roi[3].top;
7101         cmd->dw56.roi_4_x_right  = generic_state->roi[3].right;
7102         cmd->dw56.roi_4_y_bottom = generic_state->roi[3].bottom;
7103
7104         cmd->dw36.enable_cabac_work_around = 0;
7105
7106         if (!generic_state->brc_enabled) {
7107             char tmp = 0;
7108             tmp = generic_state->roi[0].value;
7109             CLIP(tmp, -qp, AVC_QP_MAX - qp);
7110             cmd->dw57.roi_1_dqp_prime_y = tmp;
7111             tmp = generic_state->roi[1].value;
7112             CLIP(tmp, -qp, AVC_QP_MAX - qp);
7113             cmd->dw57.roi_2_dqp_prime_y = tmp;
7114             tmp = generic_state->roi[2].value;
7115             CLIP(tmp, -qp, AVC_QP_MAX - qp);
7116             cmd->dw57.roi_3_dqp_prime_y = tmp;
7117             tmp = generic_state->roi[3].value;
7118             CLIP(tmp, -qp, AVC_QP_MAX - qp);
7119             cmd->dw57.roi_4_dqp_prime_y = tmp;
7120         } else {
7121             cmd->dw34.roi_enable_flag = 0;
7122         }
7123     }
7124
7125     cmd->dw65.mb_data_surf_index = GEN8_AVC_MBENC_MFC_AVC_PAK_OBJ_CM;
7126     cmd->dw66.mv_data_surf_index =  GEN8_AVC_MBENC_IND_MV_DATA_CM;
7127     cmd->dw67.i_dist_surf_index = GEN8_AVC_MBENC_BRC_DISTORTION_CM;
7128     cmd->dw68.src_y_surf_index = GEN8_AVC_MBENC_CURR_Y_CM;
7129     cmd->dw69.mb_specific_data_surf_index = GEN8_AVC_MBENC_MB_SPECIFIC_DATA_CM;
7130     cmd->dw70.aux_vme_out_surf_index = GEN8_AVC_MBENC_AUX_VME_OUT_CM;
7131     cmd->dw71.curr_ref_pic_sel_surf_index = GEN8_AVC_MBENC_REFPICSELECT_L0_CM;
7132     cmd->dw72.hme_mv_pred_fwd_bwd_surf_index = GEN8_AVC_MBENC_MV_DATA_FROM_ME_CM;
7133     cmd->dw73.hme_dist_surf_index = GEN8_AVC_MBENC_4xME_DISTORTION_CM;
7134     cmd->dw74.slice_map_surf_index = GEN8_AVC_MBENC_SLICEMAP_DATA_CM;
7135     cmd->dw75.fwd_frm_mb_data_surf_index = GEN8_AVC_MBENC_FWD_MB_DATA_CM;
7136     cmd->dw76.fwd_frm_mv_surf_index = GEN8_AVC_MBENC_FWD_MV_DATA_CM;
7137     cmd->dw77.mb_qp_buffer = GEN8_AVC_MBENC_MBQP_CM;
7138     cmd->dw78.mb_brc_lut = GEN8_AVC_MBENC_MBBRC_CONST_DATA_CM;
7139     cmd->dw79.vme_inter_prediction_surf_index = GEN8_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_CM;
7140     cmd->dw80.vme_inter_prediction_mr_surf_index = GEN8_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_CM;
7141     cmd->dw81.flatness_chk_surf_index = GEN8_AVC_MBENC_FLATNESS_CHECK_CM;
7142     cmd->dw82.mad_surf_index = GEN8_AVC_MBENC_MAD_DATA_CM;
7143     cmd->dw83.force_non_skip_mb_map_surface = GEN8_AVC_MBENC_FORCE_NONSKIP_MB_MAP_CM;
7144     cmd->dw84.widi_wa_surf_index = GEN8_AVC_MBENC_WIDI_WA_DATA_CM;
7145     cmd->dw85.brc_curbe_surf_index = GEN8_AVC_MBENC_BRC_CURBE_DATA_CM;
7146     cmd->dw86.static_detection_cost_table_index = GEN8_AVC_MBENC_STATIC_FRAME_DETECTION_OUTPUT_CM;
7147
7148     i965_gpe_context_unmap_curbe(gpe_context);
7149
7150     return;
7151 }
7152
7153 static void
7154 gen8_avc_set_curbe_scaling4x(VADriverContextP ctx,
7155                              struct encode_state *encode_state,
7156                              struct i965_gpe_context *gpe_context,
7157                              struct intel_encoder_context *encoder_context,
7158                              void *param)
7159 {
7160     gen8_avc_scaling4x_curbe_data *curbe_cmd;
7161     struct scaling_param *surface_param = (struct scaling_param *)param;
7162
7163     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
7164
7165     if (!curbe_cmd)
7166         return;
7167
7168     memset(curbe_cmd, 0, sizeof(gen9_avc_scaling4x_curbe_data));
7169
7170     curbe_cmd->dw0.input_picture_width  = surface_param->input_frame_width;
7171     curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
7172
7173     curbe_cmd->dw1.input_y_bti = GEN8_SCALING_FRAME_SRC_Y_CM;
7174     curbe_cmd->dw2.output_y_bti = GEN8_SCALING_FRAME_DST_Y_CM;
7175
7176     curbe_cmd->dw5.flatness_threshold = 0;
7177     if (surface_param->enable_mb_flatness_check) {
7178         curbe_cmd->dw5.flatness_threshold = 128;
7179         curbe_cmd->dw8.flatness_output_bti_top_field = 4;
7180     }
7181
7182     curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
7183     curbe_cmd->dw6.enable_mb_variance_output = surface_param->enable_mb_variance_output;
7184     curbe_cmd->dw6.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
7185
7186     if (curbe_cmd->dw6.enable_mb_variance_output ||
7187         curbe_cmd->dw6.enable_mb_pixel_average_output) {
7188         curbe_cmd->dw10.mbv_proc_states_bti_top_field  = GEN8_SCALING_FIELD_TOP_MBVPROCSTATS_DST_CM;
7189         curbe_cmd->dw11.mbv_proc_states_bti_bottom_field = GEN8_SCALING_FIELD_BOT_MBVPROCSTATS_DST_CM;
7190     }
7191
7192     i965_gpe_context_unmap_curbe(gpe_context);
7193     return;
7194 }
7195
7196 static void
7197 gen8_avc_set_curbe_me(VADriverContextP ctx,
7198                       struct encode_state *encode_state,
7199                       struct i965_gpe_context *gpe_context,
7200                       struct intel_encoder_context *encoder_context,
7201                       void * param)
7202 {
7203     gen8_avc_me_curbe_data *curbe_cmd;
7204     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7205     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
7206     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
7207
7208     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
7209
7210     struct me_param * curbe_param = (struct me_param *)param ;
7211     unsigned char  use_mv_from_prev_step = 0;
7212     unsigned char write_distortions = 0;
7213     unsigned char qp_prime_y = 0;
7214     unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
7215     unsigned char seach_table_idx = 0;
7216     unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
7217     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
7218     unsigned int scale_factor = 0;
7219
7220     qp_prime_y = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta;
7221     switch (curbe_param->hme_type) {
7222     case INTEL_ENC_HME_4x : {
7223         use_mv_from_prev_step = (generic_state->b16xme_enabled) ? 1 : 0;
7224         write_distortions = 1;
7225         mv_shift_factor = 2;
7226         scale_factor = 4;
7227         prev_mv_read_pos_factor = 0;
7228         break;
7229     }
7230     case INTEL_ENC_HME_16x : {
7231         use_mv_from_prev_step = (generic_state->b32xme_enabled) ? 1 : 0;
7232         write_distortions = 0;
7233         mv_shift_factor = 2;
7234         scale_factor = 16;
7235         prev_mv_read_pos_factor = 1;
7236         break;
7237     }
7238     case INTEL_ENC_HME_32x : {
7239         use_mv_from_prev_step = 0;
7240         write_distortions = 0;
7241         mv_shift_factor = 1;
7242         scale_factor = 32;
7243         prev_mv_read_pos_factor = 0;
7244         break;
7245     }
7246     default:
7247         assert(0);
7248
7249     }
7250     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
7251
7252     if (!curbe_cmd)
7253         return;
7254
7255     downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
7256     downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
7257
7258     memcpy(curbe_cmd, gen8_avc_me_curbe_init_data, sizeof(gen8_avc_me_curbe_data));
7259
7260     curbe_cmd->dw3.sub_pel_mode = 3;
7261     if (avc_state->field_scaling_output_interleaved) {
7262         /*frame set to zero,field specified*/
7263         curbe_cmd->dw3.src_access = 0;
7264         curbe_cmd->dw3.ref_access = 0;
7265         curbe_cmd->dw7.src_field_polarity = 0;
7266     }
7267     curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
7268     curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
7269     curbe_cmd->dw5.qp_prime_y = qp_prime_y;
7270
7271     curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
7272     curbe_cmd->dw6.write_distortions = write_distortions;
7273     curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
7274     curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
7275
7276     if (generic_state->frame_type == SLICE_TYPE_B) {
7277         curbe_cmd->dw1.bi_weight = 32;
7278         curbe_cmd->dw13.num_ref_idx_l1_minus1 = slice_param->num_ref_idx_l1_active_minus1;
7279         me_method = gen9_avc_b_me_method[generic_state->preset];
7280         seach_table_idx = 1;
7281     }
7282
7283     if (generic_state->frame_type == SLICE_TYPE_P ||
7284         generic_state->frame_type == SLICE_TYPE_B)
7285         curbe_cmd->dw13.num_ref_idx_l0_minus1 = slice_param->num_ref_idx_l0_active_minus1;
7286
7287     curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
7288     curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
7289
7290     memcpy(&curbe_cmd->dw16, table_enc_search_path[seach_table_idx][me_method], 14 * sizeof(int));
7291
7292     curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN8_AVC_ME_MV_DATA_SURFACE_CM;
7293     curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x) ? GEN8_AVC_32xME_MV_DATA_SURFACE_CM : GEN8_AVC_16xME_MV_DATA_SURFACE_CM ;
7294     curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN8_AVC_ME_DISTORTION_SURFACE_CM;
7295     curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN8_AVC_ME_BRC_DISTORTION_CM;
7296     curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN8_AVC_ME_CURR_FOR_FWD_REF_CM;
7297     curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN8_AVC_ME_CURR_FOR_BWD_REF_CM;
7298     curbe_cmd->dw38.reserved = 0;
7299
7300     i965_gpe_context_unmap_curbe(gpe_context);
7301     return;
7302 }
7303
7304 static void
7305 gen8_avc_set_curbe_brc_frame_update(VADriverContextP ctx,
7306                                     struct encode_state *encode_state,
7307                                     struct i965_gpe_context *gpe_context,
7308                                     struct intel_encoder_context *encoder_context,
7309                                     void * param)
7310 {
7311     gen8_avc_frame_brc_update_curbe_data *cmd;
7312     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7313     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
7314     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
7315     struct object_surface *obj_surface;
7316     struct gen9_surface_avc *avc_priv_surface;
7317     struct avc_param common_param;
7318
7319     obj_surface = encode_state->reconstructed_object;
7320
7321     if (!obj_surface || !obj_surface->private_data)
7322         return;
7323     avc_priv_surface = obj_surface->private_data;
7324
7325     cmd = i965_gpe_context_map_curbe(gpe_context);
7326
7327     if (!cmd)
7328         return;
7329
7330     memcpy(cmd, &gen8_avc_frame_brc_update_curbe_init_data, sizeof(gen8_avc_frame_brc_update_curbe_data));
7331
7332     cmd->dw5.target_size_flag = 0 ;
7333     if (generic_state->brc_init_current_target_buf_full_in_bits > (double)generic_state->brc_init_reset_buf_size_in_bits) {
7334         /*overflow*/
7335         generic_state->brc_init_current_target_buf_full_in_bits -= (double)generic_state->brc_init_reset_buf_size_in_bits;
7336         cmd->dw5.target_size_flag = 1 ;
7337     }
7338
7339     if (generic_state->skip_frame_enbale) {
7340         cmd->dw6.num_skip_frames = generic_state->num_skip_frames ;
7341         cmd->dw7.size_skip_frames = generic_state->size_skip_frames;
7342
7343         generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame * generic_state->num_skip_frames;
7344
7345     }
7346     cmd->dw0.target_size = (unsigned int)generic_state->brc_init_current_target_buf_full_in_bits ;
7347     cmd->dw1.frame_number = generic_state->seq_frame_number ;
7348     cmd->dw2.size_of_pic_headers = generic_state->herder_bytes_inserted << 3 ;
7349     cmd->dw5.cur_frame_type = generic_state->frame_type ;
7350     cmd->dw5.brc_flag = 0 ;
7351     cmd->dw5.brc_flag |= (avc_priv_surface->is_as_ref) ? INTEL_ENCODE_BRCUPDATE_IS_REFERENCE : 0 ;
7352
7353     if (avc_state->multi_pre_enable) {
7354         cmd->dw5.brc_flag  |= INTEL_ENCODE_BRCUPDATE_IS_ACTUALQP ;
7355         cmd->dw14.qp_index_of_cur_pic = avc_priv_surface->frame_idx ; //do not know this. use -1
7356     }
7357
7358     cmd->dw5.max_num_paks = generic_state->num_pak_passes ;
7359     if (avc_state->min_max_qp_enable) {
7360         switch (generic_state->frame_type) {
7361         case SLICE_TYPE_I:
7362             cmd->dw6.minimum_qp = avc_state->min_qp_i ;
7363             cmd->dw6.maximum_qp = avc_state->max_qp_i ;
7364             break;
7365         case SLICE_TYPE_P:
7366             cmd->dw6.minimum_qp = avc_state->min_qp_p ;
7367             cmd->dw6.maximum_qp = avc_state->max_qp_p ;
7368             break;
7369         case SLICE_TYPE_B:
7370             cmd->dw6.minimum_qp = avc_state->min_qp_b ;
7371             cmd->dw6.maximum_qp = avc_state->max_qp_b ;
7372             break;
7373         }
7374     } else {
7375         cmd->dw6.minimum_qp = 0 ;
7376         cmd->dw6.maximum_qp = 0 ;
7377     }
7378
7379     generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame;
7380
7381     if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
7382         cmd->dw3.start_gadj_frame0 = (unsigned int)((10 *   generic_state->avbr_convergence) / (double)150);
7383         cmd->dw3.start_gadj_frame1 = (unsigned int)((50 *   generic_state->avbr_convergence) / (double)150);
7384         cmd->dw4.start_gadj_frame2 = (unsigned int)((100 *  generic_state->avbr_convergence) / (double)150);
7385         cmd->dw4.start_gadj_frame3 = (unsigned int)((150 *  generic_state->avbr_convergence) / (double)150);
7386         cmd->dw11.g_rate_ratio_threshold_0 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 40)));
7387         cmd->dw11.g_rate_ratio_threshold_1 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 75)));
7388         cmd->dw12.g_rate_ratio_threshold_2 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 97)));
7389         cmd->dw12.g_rate_ratio_threshold_3 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (103 - 100)));
7390         cmd->dw12.g_rate_ratio_threshold_4 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (125 - 100)));
7391         cmd->dw12.g_rate_ratio_threshold_5 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (160 - 100)));
7392
7393     }
7394
7395     memset(&common_param, 0, sizeof(common_param));
7396     common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
7397     common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
7398     common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
7399     common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
7400     common_param.frames_per_100s = generic_state->frames_per_100s;
7401     common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
7402     common_param.target_bit_rate = generic_state->target_bit_rate;
7403
7404     i965_gpe_context_unmap_curbe(gpe_context);
7405
7406     return;
7407 }
7408
7409 /*
7410 kernel related function:init/destroy etc
7411 */
7412 static void
7413 gen9_avc_kernel_init_scaling(VADriverContextP ctx,
7414                              struct generic_encoder_context *generic_context,
7415                              struct gen_avc_scaling_context *kernel_context,
7416                              int preenc_enabled)
7417 {
7418     struct i965_driver_data *i965 = i965_driver_data(ctx);
7419     struct i965_gpe_table *gpe = &i965->gpe_table;
7420     struct i965_gpe_context *gpe_context = NULL;
7421     struct encoder_kernel_parameter kernel_param ;
7422     struct encoder_scoreboard_parameter scoreboard_param;
7423     struct i965_kernel common_kernel;
7424
7425     memset(&kernel_param, 0, sizeof(kernel_param));
7426     if (IS_SKL(i965->intel.device_info) ||
7427         IS_BXT(i965->intel.device_info)) {
7428         if (!preenc_enabled) {
7429             kernel_param.curbe_size = sizeof(gen9_avc_scaling4x_curbe_data);
7430             kernel_param.inline_data_size = sizeof(gen9_avc_scaling4x_curbe_data);
7431         } else {
7432             /* Skylake PreEnc using GEN95/gen10 DS kernel */
7433             kernel_param.curbe_size = sizeof(gen95_avc_scaling4x_curbe_data);
7434             kernel_param.inline_data_size = sizeof(gen95_avc_scaling4x_curbe_data);
7435         }
7436     } else if (IS_KBL(i965->intel.device_info) ||
7437                IS_GEN10(i965->intel.device_info) ||
7438                IS_GLK(i965->intel.device_info)) {
7439         kernel_param.curbe_size = sizeof(gen95_avc_scaling4x_curbe_data);
7440         kernel_param.inline_data_size = sizeof(gen95_avc_scaling4x_curbe_data);
7441     } else if (IS_GEN8(i965->intel.device_info)) {
7442         kernel_param.curbe_size = sizeof(gen8_avc_scaling4x_curbe_data);
7443         kernel_param.inline_data_size = sizeof(gen8_avc_scaling4x_curbe_data);
7444     } else
7445         assert(0);
7446
7447     /* 4x scaling kernel*/
7448     kernel_param.sampler_size = 0;
7449
7450     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7451     scoreboard_param.mask = 0xFF;
7452     scoreboard_param.enable = generic_context->use_hw_scoreboard;
7453     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7454     scoreboard_param.walkpat_flag = 0;
7455
7456     gpe_context = &kernel_context->gpe_contexts[GEN9_AVC_KERNEL_SCALING_4X_IDX];
7457     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7458     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7459
7460     memset(&common_kernel, 0, sizeof(common_kernel));
7461
7462     generic_context->get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7463                                                 generic_context->enc_kernel_size,
7464                                                 INTEL_GENERIC_ENC_SCALING4X,
7465                                                 0,
7466                                                 &common_kernel);
7467
7468     gpe->load_kernels(ctx,
7469                       gpe_context,
7470                       &common_kernel,
7471                       1);
7472
7473     /* PreEnc using only the 4X scaling */
7474     if (preenc_enabled)
7475         return;
7476
7477     /*2x scaling kernel*/
7478     kernel_param.curbe_size = sizeof(gen9_avc_scaling2x_curbe_data);
7479     kernel_param.inline_data_size = 0;
7480     kernel_param.sampler_size = 0;
7481
7482     gpe_context = &kernel_context->gpe_contexts[GEN9_AVC_KERNEL_SCALING_2X_IDX];
7483     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7484     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7485
7486     memset(&common_kernel, 0, sizeof(common_kernel));
7487
7488     intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7489                                          generic_context->enc_kernel_size,
7490                                          INTEL_GENERIC_ENC_SCALING2X,
7491                                          0,
7492                                          &common_kernel);
7493
7494     gpe->load_kernels(ctx,
7495                       gpe_context,
7496                       &common_kernel,
7497                       1);
7498
7499 }
7500
7501 static void
7502 gen9_avc_kernel_init_me(VADriverContextP ctx,
7503                         struct generic_encoder_context *generic_context,
7504                         struct gen_avc_me_context *kernel_context,
7505                         int preenc_enabled)
7506 {
7507     struct i965_driver_data *i965 = i965_driver_data(ctx);
7508     struct i965_gpe_table *gpe = &i965->gpe_table;
7509     struct i965_gpe_context *gpe_context = NULL;
7510     struct encoder_kernel_parameter kernel_param ;
7511     struct encoder_scoreboard_parameter scoreboard_param;
7512     struct i965_kernel common_kernel;
7513     int i = 0;
7514     unsigned int curbe_size = 0;
7515
7516     if (IS_GEN8(i965->intel.device_info)) {
7517         curbe_size = sizeof(gen8_avc_me_curbe_data);
7518     } else {
7519         if (!preenc_enabled)
7520             curbe_size = sizeof(gen9_avc_me_curbe_data);
7521         else
7522             curbe_size = sizeof(gen9_avc_fei_me_curbe_data);
7523     }
7524
7525     kernel_param.curbe_size = curbe_size;
7526     kernel_param.inline_data_size = 0;
7527     kernel_param.sampler_size = 0;
7528
7529     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7530     scoreboard_param.mask = 0xFF;
7531     scoreboard_param.enable = generic_context->use_hw_scoreboard;
7532     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7533     scoreboard_param.walkpat_flag = 0;
7534
7535     /* There is two hme kernel, one for P and other for B frame */
7536     for (i = 0; i < 2; i++) {
7537         gpe_context = &kernel_context->gpe_contexts[i];
7538         gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7539         gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7540
7541         memset(&common_kernel, 0, sizeof(common_kernel));
7542
7543         generic_context->get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7544                                                     generic_context->enc_kernel_size,
7545                                                     INTEL_GENERIC_ENC_ME,
7546                                                     i,
7547                                                     &common_kernel);
7548
7549         gpe->load_kernels(ctx,
7550                           gpe_context,
7551                           &common_kernel,
7552                           1);
7553     }
7554
7555 }
7556
7557 static void
7558 gen9_avc_kernel_init_preproc(VADriverContextP ctx,
7559                              struct generic_encoder_context *generic_context,
7560                              struct gen_avc_preproc_context *kernel_context)
7561 {
7562     struct i965_driver_data *i965 = i965_driver_data(ctx);
7563     struct i965_gpe_table *gpe = &i965->gpe_table;
7564     struct i965_gpe_context *gpe_context = NULL;
7565     struct encoder_kernel_parameter kernel_param ;
7566     struct encoder_scoreboard_parameter scoreboard_param;
7567     struct i965_kernel common_kernel;
7568
7569     kernel_param.curbe_size = sizeof(gen9_avc_preproc_curbe_data);
7570     kernel_param.inline_data_size = 0;
7571     kernel_param.sampler_size = 0;
7572
7573     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7574     scoreboard_param.mask = 0xFF;
7575     scoreboard_param.enable = generic_context->use_hw_scoreboard;
7576     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7577     scoreboard_param.walkpat_flag = 0;
7578
7579     gpe_context = &kernel_context->gpe_contexts;
7580     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7581     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7582
7583     memset(&common_kernel, 0, sizeof(common_kernel));
7584
7585     intel_avc_fei_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7586                                              generic_context->enc_kernel_size,
7587                                              INTEL_GENERIC_ENC_PREPROC,
7588                                              0,
7589                                              &common_kernel);
7590
7591     gpe->load_kernels(ctx,
7592                       gpe_context,
7593                       &common_kernel,
7594                       1);
7595
7596 }
7597
7598 static void
7599 gen9_avc_kernel_init_mbenc(VADriverContextP ctx,
7600                            struct generic_encoder_context *generic_context,
7601                            struct gen_avc_mbenc_context *kernel_context,
7602                            int fei_enabled)
7603 {
7604     struct i965_driver_data *i965 = i965_driver_data(ctx);
7605     struct i965_gpe_table *gpe = &i965->gpe_table;
7606     struct i965_gpe_context *gpe_context = NULL;
7607     struct encoder_kernel_parameter kernel_param ;
7608     struct encoder_scoreboard_parameter scoreboard_param;
7609     struct i965_kernel common_kernel;
7610     int i = 0;
7611     unsigned int curbe_size = 0;
7612     unsigned int num_mbenc_kernels = 0;
7613
7614     if (IS_SKL(i965->intel.device_info) ||
7615         IS_BXT(i965->intel.device_info)) {
7616         if (!fei_enabled) {
7617             curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
7618             num_mbenc_kernels = NUM_GEN9_AVC_KERNEL_MBENC;
7619         } else {
7620             curbe_size = sizeof(gen9_avc_fei_mbenc_curbe_data);
7621             num_mbenc_kernels = NUM_GEN9_AVC_FEI_KERNEL_MBENC;
7622         }
7623     } else if (IS_KBL(i965->intel.device_info) ||
7624                IS_GEN10(i965->intel.device_info) ||
7625                IS_GLK(i965->intel.device_info)) {
7626         curbe_size = sizeof(gen95_avc_mbenc_curbe_data);
7627         num_mbenc_kernels = NUM_GEN9_AVC_KERNEL_MBENC;
7628     } else if (IS_GEN8(i965->intel.device_info)) {
7629         curbe_size = sizeof(gen8_avc_mbenc_curbe_data);
7630         num_mbenc_kernels = NUM_GEN9_AVC_KERNEL_MBENC;
7631     }
7632
7633     assert(curbe_size > 0);
7634     kernel_param.curbe_size = curbe_size;
7635     kernel_param.inline_data_size = 0;
7636     kernel_param.sampler_size = 0;
7637
7638     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7639     scoreboard_param.mask = 0xFF;
7640     scoreboard_param.enable = generic_context->use_hw_scoreboard;
7641     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7642     scoreboard_param.walkpat_flag = 0;
7643
7644     for (i = 0; i < num_mbenc_kernels ; i++) {
7645         gpe_context = &kernel_context->gpe_contexts[i];
7646         gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7647         gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7648
7649         memset(&common_kernel, 0, sizeof(common_kernel));
7650
7651         generic_context->get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7652                                                     generic_context->enc_kernel_size,
7653                                                     INTEL_GENERIC_ENC_MBENC,
7654                                                     i,
7655                                                     &common_kernel);
7656
7657         gpe->load_kernels(ctx,
7658                           gpe_context,
7659                           &common_kernel,
7660                           1);
7661     }
7662
7663 }
7664
7665 static void
7666 gen9_avc_kernel_init_brc(VADriverContextP ctx,
7667                          struct generic_encoder_context *generic_context,
7668                          struct gen_avc_brc_context *kernel_context)
7669 {
7670     struct i965_driver_data *i965 = i965_driver_data(ctx);
7671     struct i965_gpe_table *gpe = &i965->gpe_table;
7672     struct i965_gpe_context *gpe_context = NULL;
7673     struct encoder_kernel_parameter kernel_param ;
7674     struct encoder_scoreboard_parameter scoreboard_param;
7675     struct i965_kernel common_kernel;
7676     int num_brc_init_kernels = 0;
7677     int i = 0;
7678
7679     if (IS_GEN8(i965->intel.device_info)) {
7680         num_brc_init_kernels = NUM_GEN9_AVC_KERNEL_BRC - 1;
7681     } else {
7682         num_brc_init_kernels = NUM_GEN9_AVC_KERNEL_BRC;
7683     }
7684
7685     const int gen8_brc_curbe_size[NUM_GEN9_AVC_KERNEL_BRC - 1] = {
7686         (sizeof(gen9_avc_brc_init_reset_curbe_data)),
7687         (sizeof(gen8_avc_frame_brc_update_curbe_data)),
7688         (sizeof(gen9_avc_brc_init_reset_curbe_data)),
7689         (sizeof(gen8_avc_mbenc_curbe_data)),
7690         0,
7691     };
7692     const int gen9_brc_curbe_size[NUM_GEN9_AVC_KERNEL_BRC] = {
7693         (sizeof(gen9_avc_brc_init_reset_curbe_data)),
7694         (sizeof(gen9_avc_frame_brc_update_curbe_data)),
7695         (sizeof(gen9_avc_brc_init_reset_curbe_data)),
7696         ((IS_SKL(i965->intel.device_info) || IS_BXT(i965->intel.device_info)) ? sizeof(gen9_avc_mbenc_curbe_data) : sizeof(gen95_avc_mbenc_curbe_data)),
7697         0,
7698         (sizeof(gen9_avc_mb_brc_curbe_data))
7699     };
7700
7701     kernel_param.inline_data_size = 0;
7702     kernel_param.sampler_size = 0;
7703
7704     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7705     scoreboard_param.mask = 0xFF;
7706     scoreboard_param.enable = generic_context->use_hw_scoreboard;
7707     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7708     scoreboard_param.walkpat_flag = 0;
7709
7710     for (i = 0; i < num_brc_init_kernels; i++) {
7711         if (IS_GEN8(i965->intel.device_info)) {
7712             kernel_param.curbe_size = gen8_brc_curbe_size[i];
7713         } else {
7714             kernel_param.curbe_size = gen9_brc_curbe_size[i];
7715         }
7716         gpe_context = &kernel_context->gpe_contexts[i];
7717         gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7718         gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7719
7720         memset(&common_kernel, 0, sizeof(common_kernel));
7721
7722         intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7723                                              generic_context->enc_kernel_size,
7724                                              INTEL_GENERIC_ENC_BRC,
7725                                              i,
7726                                              &common_kernel);
7727
7728         gpe->load_kernels(ctx,
7729                           gpe_context,
7730                           &common_kernel,
7731                           1);
7732     }
7733
7734 }
7735
7736 static void
7737 gen9_avc_kernel_init_wp(VADriverContextP ctx,
7738                         struct generic_encoder_context *generic_context,
7739                         struct gen_avc_wp_context *kernel_context)
7740 {
7741     struct i965_driver_data *i965 = i965_driver_data(ctx);
7742     struct i965_gpe_table *gpe = &i965->gpe_table;
7743     struct i965_gpe_context *gpe_context = NULL;
7744     struct encoder_kernel_parameter kernel_param ;
7745     struct encoder_scoreboard_parameter scoreboard_param;
7746     struct i965_kernel common_kernel;
7747
7748     kernel_param.curbe_size = sizeof(gen9_avc_wp_curbe_data);
7749     kernel_param.inline_data_size = 0;
7750     kernel_param.sampler_size = 0;
7751
7752     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7753     scoreboard_param.mask = 0xFF;
7754     scoreboard_param.enable = generic_context->use_hw_scoreboard;
7755     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7756     scoreboard_param.walkpat_flag = 0;
7757
7758     gpe_context = &kernel_context->gpe_contexts;
7759     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7760     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7761
7762     memset(&common_kernel, 0, sizeof(common_kernel));
7763
7764     intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7765                                          generic_context->enc_kernel_size,
7766                                          INTEL_GENERIC_ENC_WP,
7767                                          0,
7768                                          &common_kernel);
7769
7770     gpe->load_kernels(ctx,
7771                       gpe_context,
7772                       &common_kernel,
7773                       1);
7774
7775 }
7776
7777 static void
7778 gen9_avc_kernel_init_sfd(VADriverContextP ctx,
7779                          struct generic_encoder_context *generic_context,
7780                          struct gen_avc_sfd_context *kernel_context)
7781 {
7782     struct i965_driver_data *i965 = i965_driver_data(ctx);
7783     struct i965_gpe_table *gpe = &i965->gpe_table;
7784     struct i965_gpe_context *gpe_context = NULL;
7785     struct encoder_kernel_parameter kernel_param ;
7786     struct encoder_scoreboard_parameter scoreboard_param;
7787     struct i965_kernel common_kernel;
7788
7789     kernel_param.curbe_size = sizeof(gen9_avc_sfd_curbe_data);
7790     kernel_param.inline_data_size = 0;
7791     kernel_param.sampler_size = 0;
7792
7793     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7794     scoreboard_param.mask = 0xFF;
7795     scoreboard_param.enable = generic_context->use_hw_scoreboard;
7796     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7797     scoreboard_param.walkpat_flag = 0;
7798
7799     gpe_context = &kernel_context->gpe_contexts;
7800     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7801     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7802
7803     memset(&common_kernel, 0, sizeof(common_kernel));
7804
7805     intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7806                                          generic_context->enc_kernel_size,
7807                                          INTEL_GENERIC_ENC_SFD,
7808                                          0,
7809                                          &common_kernel);
7810
7811     gpe->load_kernels(ctx,
7812                       gpe_context,
7813                       &common_kernel,
7814                       1);
7815
7816 }
7817
7818 static void
7819 gen9_avc_kernel_destroy(struct encoder_vme_mfc_context * vme_context)
7820 {
7821
7822     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
7823     struct i965_driver_data *i965 = i965_driver_data(avc_ctx->ctx);
7824     struct i965_gpe_table *gpe = &i965->gpe_table;
7825
7826     int i = 0;
7827
7828     gen9_avc_free_resources(vme_context);
7829
7830     for (i = 0; i < NUM_GEN9_AVC_KERNEL_SCALING; i++)
7831         gpe->context_destroy(&avc_ctx->context_scaling.gpe_contexts[i]);
7832
7833     for (i = 0; i < NUM_GEN9_AVC_KERNEL_BRC; i++)
7834         gpe->context_destroy(&avc_ctx->context_brc.gpe_contexts[i]);
7835
7836     for (i = 0; i < NUM_GEN9_AVC_KERNEL_ME; i++)
7837         gpe->context_destroy(&avc_ctx->context_me.gpe_contexts[i]);
7838
7839     for (i = 0; i < NUM_GEN9_AVC_KERNEL_MBENC; i++)
7840         gpe->context_destroy(&avc_ctx->context_mbenc.gpe_contexts[i]);
7841
7842     gpe->context_destroy(&avc_ctx->context_wp.gpe_contexts);
7843
7844     gpe->context_destroy(&avc_ctx->context_sfd.gpe_contexts);
7845
7846     gpe->context_destroy(&avc_ctx->context_preproc.gpe_contexts);
7847
7848 }
7849
7850 /*
7851 vme pipeline
7852 */
7853 static void
7854 gen9_avc_update_parameters(VADriverContextP ctx,
7855                            VAProfile profile,
7856                            struct encode_state *encode_state,
7857                            struct intel_encoder_context *encoder_context)
7858 {
7859     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7860     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
7861     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
7862     VAEncSequenceParameterBufferH264 *seq_param;
7863     VAEncSliceParameterBufferH264 *slice_param;
7864     VAEncMiscParameterBuffer *fei_misc_param;
7865     int i, j, slice_index;
7866     unsigned int preset = generic_state->preset;
7867     unsigned int fei_enabled = encoder_context->fei_enabled;
7868
7869     /* seq/pic/slice parameter setting */
7870     generic_state->b16xme_supported = gen9_avc_super_hme[preset];
7871     generic_state->b32xme_supported = gen9_avc_ultra_hme[preset];
7872
7873     avc_state->seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
7874     avc_state->pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
7875
7876     if (fei_enabled &&
7877         encode_state->misc_param[VAEncMiscParameterTypeFEIFrameControl]) {
7878         fei_misc_param = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeFEIFrameControl][0]->buffer;
7879         avc_state->fei_framectl_param =
7880             (VAEncMiscParameterFEIFrameControlH264 *)fei_misc_param->data;
7881     }
7882
7883     avc_state->slice_num = 0;
7884     slice_index = 0;
7885     for (j = 0; j < encode_state->num_slice_params_ext; j++) {
7886         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
7887         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
7888             avc_state->slice_param[slice_index] = slice_param;
7889             slice_param++;
7890             slice_index++;
7891             avc_state->slice_num++;
7892         }
7893     }
7894
7895     /* how many slices support by now? 1 slice or multi slices, but row slice.not slice group. */
7896     seq_param = avc_state->seq_param;
7897     slice_param = avc_state->slice_param[0];
7898
7899     generic_state->frame_type = avc_state->slice_param[0]->slice_type;
7900
7901     if (slice_param->slice_type == SLICE_TYPE_I ||
7902         slice_param->slice_type == SLICE_TYPE_SI)
7903         generic_state->frame_type = SLICE_TYPE_I;
7904     else if (slice_param->slice_type == SLICE_TYPE_P)
7905         generic_state->frame_type = SLICE_TYPE_P;
7906     else if (slice_param->slice_type == SLICE_TYPE_B)
7907         generic_state->frame_type = SLICE_TYPE_B;
7908     if (profile == VAProfileH264High)
7909         avc_state->transform_8x8_mode_enable = 0;//work around for high profile to disabel pic_param->pic_fields.bits.transform_8x8_mode_flag
7910     else
7911         avc_state->transform_8x8_mode_enable = 0;
7912
7913     /* rc init*/
7914     if (generic_state->brc_enabled && (!generic_state->brc_inited || generic_state->brc_need_reset)) {
7915         generic_state->target_bit_rate = ALIGN(seq_param->bits_per_second, 1000) / 1000;
7916         generic_state->init_vbv_buffer_fullness_in_bit = seq_param->bits_per_second;
7917         generic_state->vbv_buffer_size_in_bit = (uint64_t)seq_param->bits_per_second << 1;
7918         generic_state->frames_per_100s = 3000; /* 30fps */
7919     }
7920
7921     generic_state->gop_size = seq_param->intra_period;
7922     generic_state->gop_ref_distance = seq_param->ip_period;
7923
7924     if (generic_state->internal_rate_mode == VA_RC_CBR) {
7925         generic_state->max_bit_rate = generic_state->target_bit_rate;
7926         generic_state->min_bit_rate = generic_state->target_bit_rate;
7927     }
7928
7929     if (generic_state->frame_type == SLICE_TYPE_I || generic_state->first_frame) {
7930         gen9_avc_update_misc_parameters(ctx, encode_state, encoder_context);
7931     }
7932
7933     generic_state->preset = encoder_context->quality_level;
7934     if (encoder_context->quality_level == INTEL_PRESET_UNKNOWN) {
7935         generic_state->preset = INTEL_PRESET_RT_SPEED;
7936     }
7937     generic_state->kernel_mode = gen9_avc_kernel_mode[generic_state->preset];
7938
7939     if (!generic_state->brc_inited) {
7940         generic_state->brc_init_reset_input_bits_per_frame = ((double)(generic_state->max_bit_rate * 1000) * 100) / generic_state->frames_per_100s;;
7941         generic_state->brc_init_current_target_buf_full_in_bits = generic_state->init_vbv_buffer_fullness_in_bit;
7942         generic_state->brc_init_reset_buf_size_in_bits = generic_state->vbv_buffer_size_in_bit;
7943         generic_state->brc_target_size = generic_state->init_vbv_buffer_fullness_in_bit;
7944     }
7945
7946
7947     generic_state->curr_pak_pass = 0;
7948     generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
7949
7950     if (generic_state->internal_rate_mode == VA_RC_CBR ||
7951         generic_state->internal_rate_mode == VA_RC_VBR)
7952         generic_state->brc_enabled = 1;
7953     else
7954         generic_state->brc_enabled = 0;
7955
7956     if (generic_state->brc_enabled &&
7957         (!generic_state->init_vbv_buffer_fullness_in_bit ||
7958          !generic_state->vbv_buffer_size_in_bit ||
7959          !generic_state->max_bit_rate ||
7960          !generic_state->target_bit_rate ||
7961          !generic_state->frames_per_100s)) {
7962         WARN_ONCE("Rate control parameter is required for BRC\n");
7963         generic_state->brc_enabled = 0;
7964     }
7965
7966     if (!generic_state->brc_enabled) {
7967         generic_state->target_bit_rate = 0;
7968         generic_state->max_bit_rate = 0;
7969         generic_state->min_bit_rate = 0;
7970         generic_state->init_vbv_buffer_fullness_in_bit = 0;
7971         generic_state->vbv_buffer_size_in_bit = 0;
7972         generic_state->num_pak_passes = 1;
7973     } else {
7974         generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
7975     }
7976
7977
7978     generic_state->frame_width_in_mbs = seq_param->picture_width_in_mbs;
7979     generic_state->frame_height_in_mbs = seq_param->picture_height_in_mbs;
7980     generic_state->frame_width_in_pixel = generic_state->frame_width_in_mbs * 16;
7981     generic_state->frame_height_in_pixel = generic_state->frame_height_in_mbs * 16;
7982
7983     generic_state->frame_width_4x  = ALIGN(generic_state->frame_width_in_pixel / 4, 16);
7984     generic_state->frame_height_4x = ALIGN(generic_state->frame_height_in_pixel / 4, 16);
7985     generic_state->downscaled_width_4x_in_mb  = generic_state->frame_width_4x / 16 ;
7986     generic_state->downscaled_height_4x_in_mb = generic_state->frame_height_4x / 16;
7987
7988     generic_state->frame_width_16x  =  ALIGN(generic_state->frame_width_in_pixel / 16, 16);
7989     generic_state->frame_height_16x =  ALIGN(generic_state->frame_height_in_pixel / 16, 16);
7990     generic_state->downscaled_width_16x_in_mb  = generic_state->frame_width_16x / 16 ;
7991     generic_state->downscaled_height_16x_in_mb = generic_state->frame_height_16x / 16;
7992
7993     generic_state->frame_width_32x  = ALIGN(generic_state->frame_width_in_pixel / 32, 16);
7994     generic_state->frame_height_32x = ALIGN(generic_state->frame_height_in_pixel / 32, 16);
7995     generic_state->downscaled_width_32x_in_mb  = generic_state->frame_width_32x / 16 ;
7996     generic_state->downscaled_height_32x_in_mb = generic_state->frame_height_32x / 16;
7997
7998     if (generic_state->hme_supported) {
7999         generic_state->hme_enabled = 1;
8000     } else {
8001         generic_state->hme_enabled = 0;
8002     }
8003
8004     if (generic_state->b16xme_supported) {
8005         generic_state->b16xme_enabled = 1;
8006     } else {
8007         generic_state->b16xme_enabled = 0;
8008     }
8009
8010     if (generic_state->b32xme_supported) {
8011         generic_state->b32xme_enabled = 1;
8012     } else {
8013         generic_state->b32xme_enabled = 0;
8014     }
8015     /* disable HME/16xME if the size is too small */
8016     if (generic_state->frame_width_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8017         generic_state->b32xme_supported = 0;
8018         generic_state->b32xme_enabled = 0;
8019         generic_state->b16xme_supported = 0;
8020         generic_state->b16xme_enabled = 0;
8021         generic_state->frame_width_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8022         generic_state->downscaled_width_4x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8023     }
8024     if (generic_state->frame_height_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8025         generic_state->b32xme_supported = 0;
8026         generic_state->b32xme_enabled = 0;
8027         generic_state->b16xme_supported = 0;
8028         generic_state->b16xme_enabled = 0;
8029         generic_state->frame_height_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8030         generic_state->downscaled_height_4x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8031     }
8032
8033     if (generic_state->frame_width_16x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8034         generic_state->b32xme_supported = 0;
8035         generic_state->b32xme_enabled = 0;
8036         generic_state->frame_width_16x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8037         generic_state->downscaled_width_16x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8038     }
8039     if (generic_state->frame_height_16x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8040         generic_state->b32xme_supported = 0;
8041         generic_state->b32xme_enabled = 0;
8042         generic_state->frame_height_16x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8043         generic_state->downscaled_height_16x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8044     }
8045
8046     if (generic_state->frame_width_32x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8047         generic_state->frame_width_32x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8048         generic_state->downscaled_width_32x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8049     }
8050     if (generic_state->frame_height_32x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8051         generic_state->frame_height_32x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8052         generic_state->downscaled_height_32x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8053     }
8054
8055 }
8056
8057 static VAStatus
8058 gen9_avc_encode_check_parameter(VADriverContextP ctx,
8059                                 struct encode_state *encode_state,
8060                                 struct intel_encoder_context *encoder_context)
8061 {
8062     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8063     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8064     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8065     unsigned int rate_control_mode = encoder_context->rate_control_mode;
8066     unsigned int preset = generic_state->preset;
8067     VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param ;
8068     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
8069     int i = 0;
8070     int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
8071     /*avbr init*/
8072     generic_state->avbr_curracy = 30;
8073     generic_state->avbr_convergence = 150;
8074
8075     switch (rate_control_mode & 0x7f) {
8076     case VA_RC_CBR:
8077         generic_state->internal_rate_mode = VA_RC_CBR;
8078         break;
8079
8080     case VA_RC_VBR:
8081         generic_state->internal_rate_mode = VA_RC_VBR;
8082         break;
8083
8084     case VA_RC_CQP:
8085     default:
8086         generic_state->internal_rate_mode = VA_RC_CQP;
8087         break;
8088     }
8089
8090     if (rate_control_mode != VA_RC_NONE &&
8091         rate_control_mode != VA_RC_CQP) {
8092         generic_state->brc_enabled = 1;
8093         generic_state->brc_distortion_buffer_supported = 1;
8094         generic_state->brc_constant_buffer_supported = 1;
8095         generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
8096     }
8097
8098     /*check brc parameter*/
8099     if (generic_state->brc_enabled) {
8100         avc_state->mb_qp_data_enable = 0;
8101     }
8102
8103     /*set the brc init and reset accordingly*/
8104     if (generic_state->brc_need_reset &&
8105         (generic_state->brc_distortion_buffer_supported == 0 ||
8106          rate_control_mode == VA_RC_CQP)) {
8107         generic_state->brc_need_reset = 0;// not support by CQP
8108     }
8109     if (generic_state->internal_rate_mode == VA_RC_CBR || generic_state->internal_rate_mode == VA_RC_VBR || generic_state->frame_type == SLICE_TYPE_I) {
8110         avc_state->sfd_enable = 0;
8111     } else {
8112         avc_state->sfd_enable = 1;
8113     }
8114
8115     if (generic_state->frames_per_window_size == 0) {
8116         generic_state->frames_per_window_size = (generic_state->frames_per_100s / 100 < 60) ? (generic_state->frames_per_100s / 100) : 60;
8117     } else if (generic_state->frames_per_window_size > 2 * generic_state->frames_per_100s / 100) {
8118         generic_state->frames_per_window_size = (generic_state->frames_per_100s / 100 < 60) ? (generic_state->frames_per_100s / 100) : 60;
8119     }
8120
8121     if (generic_state->brc_enabled) {
8122         generic_state->hme_enabled = generic_state->frame_type != SLICE_TYPE_I;
8123         if (avc_state->min_max_qp_enable) {
8124             generic_state->num_pak_passes = 1;
8125         }
8126         generic_state->brc_roi_enable = (rate_control_mode != VA_RC_CQP) && (generic_state->num_roi > 0);// only !CQP
8127         generic_state->mb_brc_enabled = generic_state->mb_brc_enabled || generic_state->brc_roi_enable;
8128     } else {
8129         generic_state->num_pak_passes = 1;// CQP only one pass
8130     }
8131
8132     avc_state->mbenc_i_frame_dist_in_use = 0;
8133     avc_state->mbenc_i_frame_dist_in_use = (generic_state->brc_enabled) && (generic_state->brc_distortion_buffer_supported) && (generic_state->frame_type == SLICE_TYPE_I);
8134
8135     /*ROI must enable mbbrc.*/
8136
8137     /*CAD check*/
8138     if (avc_state->caf_supported) {
8139         switch (generic_state->frame_type) {
8140         case SLICE_TYPE_I:
8141             avc_state->caf_enable = 0;
8142             break;
8143         case SLICE_TYPE_P:
8144             avc_state->caf_enable = gen9_avc_all_fractional[preset] & 0x01;
8145             break;
8146         case SLICE_TYPE_B:
8147             avc_state->caf_enable = (gen9_avc_all_fractional[preset] >> 1) & 0x01;
8148             break;
8149         }
8150
8151         if (avc_state->caf_enable && avc_state->caf_disable_hd && gen9_avc_disable_all_fractional_check_for_high_res[preset]) {
8152             if (generic_state->frame_width_in_pixel >= 1280 && generic_state->frame_height_in_pixel >= 720)
8153                 avc_state->caf_enable = 0;
8154         }
8155     }
8156
8157     avc_state->adaptive_transform_decision_enable &= gen9_avc_enable_adaptive_tx_decision[preset & 0x7];
8158
8159     /* Flatness check is enabled only if scaling will be performed and CAF is enabled. here only frame */
8160     if (avc_state->flatness_check_supported) {
8161         avc_state->flatness_check_enable = ((avc_state->caf_enable) && (generic_state->brc_enabled || generic_state->hme_supported)) ;
8162     } else {
8163         avc_state->flatness_check_enable = 0;
8164     }
8165
8166     /* check mb_status_supported/enbale*/
8167     if (avc_state->adaptive_transform_decision_enable) {
8168         avc_state->mb_status_enable = 1;
8169     } else {
8170         avc_state->mb_status_enable = 0;
8171     }
8172     /*slice check,all the slices use the same slice height except the last slice*/
8173     avc_state->arbitrary_num_mbs_in_slice = 0;
8174     for (i = 0; i < avc_state->slice_num; i++) {
8175         if (avc_state->slice_param[i]->num_macroblocks % generic_state->frame_width_in_mbs > 0) {
8176             avc_state->arbitrary_num_mbs_in_slice = 1;
8177             avc_state->slice_height = 1; /* slice height will be ignored by kernel ans here set it as default value */
8178         } else {
8179             avc_state->slice_height = avc_state->slice_param[i]->num_macroblocks / generic_state->frame_width_in_mbs;
8180         }
8181     }
8182
8183     if (generic_state->frame_type == SLICE_TYPE_I) {
8184         generic_state->hme_enabled = 0;
8185         generic_state->b16xme_enabled = 0;
8186         generic_state->b32xme_enabled = 0;
8187     }
8188
8189     if (generic_state->frame_type == SLICE_TYPE_B) {
8190         gen9_avc_get_dist_scale_factor(ctx, encode_state, encoder_context);
8191         avc_state->bi_weight = gen9_avc_get_biweight(avc_state->dist_scale_factor_list0[0], pic_param->pic_fields.bits.weighted_bipred_idc);
8192     }
8193
8194     /* Determine if SkipBiasAdjustment should be enabled for P picture 1. No B frame 2. Qp >= 22 3. CQP mode */
8195     avc_state->skip_bias_adjustment_enable = avc_state->skip_bias_adjustment_supported && (generic_state->frame_type == SLICE_TYPE_P)
8196                                              && (generic_state->gop_ref_distance == 1) && (avc_state->pic_param->pic_init_qp + avc_state->slice_param[0]->slice_qp_delta >= 22) && !generic_state->brc_enabled;
8197
8198     if (generic_state->kernel_mode == INTEL_ENC_KERNEL_QUALITY) {
8199         avc_state->tq_enable = 1;
8200         avc_state->tq_rounding = 6;
8201         if (generic_state->brc_enabled) {
8202             generic_state->mb_brc_enabled = 1;
8203         }
8204     }
8205
8206     //check the inter rounding
8207     avc_state->rounding_value = 0;
8208     avc_state->rounding_inter_p = 255;//default
8209     avc_state->rounding_inter_b = 255; //default
8210     avc_state->rounding_inter_b_ref = 255; //default
8211
8212     if (generic_state->frame_type == SLICE_TYPE_P) {
8213         if (avc_state->rounding_inter_p == AVC_INVALID_ROUNDING_VALUE) {
8214             if (avc_state->adaptive_rounding_inter_enable && !(generic_state->brc_enabled)) {
8215                 if (generic_state->gop_ref_distance == 1)
8216                     avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_p_without_b[slice_qp];
8217                 else
8218                     avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_p[slice_qp];
8219             } else {
8220                 avc_state->rounding_value = gen9_avc_inter_rounding_p[generic_state->preset];
8221             }
8222
8223         } else {
8224             avc_state->rounding_value = avc_state->rounding_inter_p;
8225         }
8226     } else if (generic_state->frame_type == SLICE_TYPE_B) {
8227         if (pic_param->pic_fields.bits.reference_pic_flag) {
8228             if (avc_state->rounding_inter_b_ref == AVC_INVALID_ROUNDING_VALUE)
8229                 avc_state->rounding_value = gen9_avc_inter_rounding_b_ref[generic_state->preset];
8230             else
8231                 avc_state->rounding_value = avc_state->rounding_inter_b_ref;
8232         } else {
8233             if (avc_state->rounding_inter_b == AVC_INVALID_ROUNDING_VALUE) {
8234                 if (avc_state->adaptive_rounding_inter_enable && !(generic_state->brc_enabled))
8235                     avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_b[slice_qp];
8236                 else
8237                     avc_state->rounding_value = gen9_avc_inter_rounding_b[generic_state->preset];
8238             } else {
8239                 avc_state->rounding_value = avc_state->rounding_inter_b;
8240             }
8241         }
8242     }
8243     return VA_STATUS_SUCCESS;
8244 }
8245
8246 static VAStatus
8247 gen9_avc_vme_gpe_kernel_prepare(VADriverContextP ctx,
8248                                 struct encode_state *encode_state,
8249                                 struct intel_encoder_context *encoder_context)
8250 {
8251     VAStatus va_status;
8252     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8253     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
8254     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
8255     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8256     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8257
8258     struct object_surface *obj_surface;
8259     struct object_buffer *obj_buffer;
8260     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
8261     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
8262     struct i965_coded_buffer_segment *coded_buffer_segment;
8263
8264     struct gen9_surface_avc *avc_priv_surface;
8265     dri_bo *bo;
8266     struct avc_surface_param surface_param;
8267     int i, j = 0;
8268     unsigned char * pdata;
8269
8270     /* Setup current reconstruct frame */
8271     obj_surface = encode_state->reconstructed_object;
8272     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
8273
8274     if (va_status != VA_STATUS_SUCCESS)
8275         return va_status;
8276
8277     memset(&surface_param, 0, sizeof(surface_param));
8278     surface_param.frame_width = generic_state->frame_width_in_pixel;
8279     surface_param.frame_height = generic_state->frame_height_in_pixel;
8280     va_status = gen9_avc_init_check_surfaces(ctx,
8281                                              obj_surface,
8282                                              encoder_context,
8283                                              &surface_param);
8284     if (va_status != VA_STATUS_SUCCESS)
8285         return va_status;
8286     {
8287         /* init the member of avc_priv_surface,frame_store_id,qp_value*/
8288         avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
8289         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = 0;
8290         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = 0;
8291         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2]);
8292         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1]);
8293         i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2], avc_priv_surface->dmv_top);
8294         i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1], avc_priv_surface->dmv_bottom);
8295         avc_priv_surface->qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
8296         avc_priv_surface->frame_store_id = 0;
8297         avc_priv_surface->frame_idx = pic_param->CurrPic.frame_idx;
8298         avc_priv_surface->top_field_order_cnt = pic_param->CurrPic.TopFieldOrderCnt;
8299         avc_priv_surface->is_as_ref = pic_param->pic_fields.bits.reference_pic_flag;
8300         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = avc_priv_surface->top_field_order_cnt;
8301         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = avc_priv_surface->top_field_order_cnt + 1;
8302     }
8303     i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
8304     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_reconstructed_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
8305
8306     /* input YUV surface*/
8307     obj_surface = encode_state->input_yuv_object;
8308     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
8309
8310     if (va_status != VA_STATUS_SUCCESS)
8311         return va_status;
8312     i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
8313     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_uncompressed_input_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
8314
8315     /* Reference surfaces */
8316     for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
8317         i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
8318         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2]);
8319         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1]);
8320         obj_surface = encode_state->reference_objects[i];
8321         avc_state->top_field_poc[2 * i] = 0;
8322         avc_state->top_field_poc[2 * i + 1] = 0;
8323
8324         if (obj_surface && obj_surface->bo) {
8325             i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->list_reference_res[i], obj_surface, GPE_RESOURCE_ALIGNMENT);
8326
8327             /* actually it should be handled when it is reconstructed surface*/
8328             va_status = gen9_avc_init_check_surfaces(ctx,
8329                                                      obj_surface, encoder_context,
8330                                                      &surface_param);
8331             if (va_status != VA_STATUS_SUCCESS)
8332                 return va_status;
8333             avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
8334             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2], avc_priv_surface->dmv_top);
8335             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1], avc_priv_surface->dmv_bottom);
8336             avc_state->top_field_poc[2 * i] = avc_priv_surface->top_field_order_cnt;
8337             avc_state->top_field_poc[2 * i + 1] = avc_priv_surface->top_field_order_cnt + 1;
8338             avc_priv_surface->frame_store_id = i;
8339         } else {
8340             break;
8341         }
8342     }
8343
8344     /* Encoded bitstream ?*/
8345     obj_buffer = encode_state->coded_buf_object;
8346     bo = obj_buffer->buffer_store->bo;
8347     i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
8348     i965_dri_object_to_buffer_gpe_resource(&generic_ctx->compressed_bitstream.res, bo);
8349     generic_ctx->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
8350     generic_ctx->compressed_bitstream.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
8351
8352     /*status buffer */
8353     avc_ctx->status_buffer.bo = bo;
8354
8355     /* set the internal flag to 0 to indicate the coded size is unknown */
8356     dri_bo_map(bo, 1);
8357     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
8358     coded_buffer_segment->mapped = 0;
8359     coded_buffer_segment->codec = encoder_context->codec;
8360     coded_buffer_segment->status_support = 1;
8361
8362     pdata = bo->virtual + avc_ctx->status_buffer.base_offset;
8363     memset(pdata, 0, avc_ctx->status_buffer.status_buffer_size);
8364     dri_bo_unmap(bo);
8365
8366     //frame id, it is the ref pic id in the reference_objects list.
8367     avc_state->num_refs[0] = 0;
8368     avc_state->num_refs[1] = 0;
8369     if (generic_state->frame_type == SLICE_TYPE_P) {
8370         avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
8371
8372         if (slice_param->num_ref_idx_active_override_flag)
8373             avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
8374     } else if (generic_state->frame_type == SLICE_TYPE_B) {
8375         avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
8376         avc_state->num_refs[1] = pic_param->num_ref_idx_l1_active_minus1 + 1;
8377
8378         if (slice_param->num_ref_idx_active_override_flag) {
8379             avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
8380             avc_state->num_refs[1] = slice_param->num_ref_idx_l1_active_minus1 + 1;
8381         }
8382     }
8383
8384     if (avc_state->num_refs[0] > ARRAY_ELEMS(avc_state->list_ref_idx[0]))
8385         return VA_STATUS_ERROR_INVALID_VALUE;
8386     if (avc_state->num_refs[1] > ARRAY_ELEMS(avc_state->list_ref_idx[1]))
8387         return VA_STATUS_ERROR_INVALID_VALUE;
8388
8389     for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[0]); i++) {
8390         VAPictureH264 *va_pic;
8391
8392         assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(avc_state->list_ref_idx[0]));
8393         avc_state->list_ref_idx[0][i] = 0;
8394
8395         if (i >= avc_state->num_refs[0])
8396             continue;
8397
8398         va_pic = &slice_param->RefPicList0[i];
8399
8400         for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
8401             obj_surface = encode_state->reference_objects[j];
8402
8403             if (obj_surface &&
8404                 obj_surface->bo &&
8405                 obj_surface->base.id == va_pic->picture_id) {
8406
8407                 assert(obj_surface->base.id != VA_INVALID_SURFACE);
8408                 avc_state->list_ref_idx[0][i] = j;
8409
8410                 break;
8411             }
8412         }
8413     }
8414     for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[1]); i++) {
8415         VAPictureH264 *va_pic;
8416
8417         assert(ARRAY_ELEMS(slice_param->RefPicList1) == ARRAY_ELEMS(avc_state->list_ref_idx[1]));
8418         avc_state->list_ref_idx[1][i] = 0;
8419
8420         if (i >= avc_state->num_refs[1])
8421             continue;
8422
8423         va_pic = &slice_param->RefPicList1[i];
8424
8425         for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
8426             obj_surface = encode_state->reference_objects[j];
8427
8428             if (obj_surface &&
8429                 obj_surface->bo &&
8430                 obj_surface->base.id == va_pic->picture_id) {
8431
8432                 assert(obj_surface->base.id != VA_INVALID_SURFACE);
8433                 avc_state->list_ref_idx[1][i] = j;
8434
8435                 break;
8436             }
8437         }
8438     }
8439
8440     return VA_STATUS_SUCCESS;
8441 }
8442
8443 static VAStatus
8444 gen9_avc_vme_gpe_kernel_init(VADriverContextP ctx,
8445                              struct encode_state *encode_state,
8446                              struct intel_encoder_context *encoder_context)
8447 {
8448     return VA_STATUS_SUCCESS;
8449 }
8450
8451 static VAStatus
8452 gen9_avc_vme_gpe_kernel_final(VADriverContextP ctx,
8453                               struct encode_state *encode_state,
8454                               struct intel_encoder_context *encoder_context)
8455 {
8456
8457     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8458     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8459     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8460
8461     /*set this flag when all kernel is finished*/
8462     if (generic_state->brc_enabled) {
8463         generic_state->brc_inited = 1;
8464         generic_state->brc_need_reset = 0;
8465         avc_state->mbenc_curbe_set_in_brc_update = 0;
8466     }
8467     return VA_STATUS_SUCCESS;
8468 }
8469
8470 static VAStatus
8471 gen9_avc_vme_gpe_kernel_run(VADriverContextP ctx,
8472                             struct encode_state *encode_state,
8473                             struct intel_encoder_context *encoder_context)
8474 {
8475     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8476     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8477     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8478     int fei_enabled = encoder_context->fei_enabled;
8479
8480     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
8481     VAEncSliceParameterBufferH264 *slice_param = avc_state->slice_param[0];
8482     int sfd_in_use = 0;
8483
8484     /* BRC init/reset needs to be called before HME since it will reset the Brc Distortion surface*/
8485     if (!fei_enabled && generic_state->brc_enabled && (!generic_state->brc_inited || generic_state->brc_need_reset)) {
8486         gen9_avc_kernel_brc_init_reset(ctx, encode_state, encoder_context);
8487     }
8488
8489     /*down scaling*/
8490     if (generic_state->hme_supported) {
8491         gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
8492         if (generic_state->b16xme_supported) {
8493             gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_16x);
8494             if (generic_state->b32xme_supported) {
8495                 gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_32x);
8496             }
8497         }
8498     }
8499
8500     /*me kernel*/
8501     if (generic_state->hme_enabled) {
8502         if (generic_state->b16xme_enabled) {
8503             if (generic_state->b32xme_enabled) {
8504                 gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_32x);
8505             }
8506             gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_16x);
8507         }
8508         gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
8509     }
8510
8511     /*call SFD kernel after HME in same command buffer*/
8512     sfd_in_use = avc_state->sfd_enable && generic_state->hme_enabled;
8513     sfd_in_use = sfd_in_use && !avc_state->sfd_mb_enable;
8514     if (sfd_in_use) {
8515         gen9_avc_kernel_sfd(ctx, encode_state, encoder_context);
8516     }
8517
8518     /* BRC and MbEnc are included in the same task phase*/
8519     if (generic_state->brc_enabled) {
8520         if (avc_state->mbenc_i_frame_dist_in_use) {
8521             gen9_avc_kernel_mbenc(ctx, encode_state, encoder_context, true);
8522         }
8523         gen9_avc_kernel_brc_frame_update(ctx, encode_state, encoder_context);
8524
8525         if (avc_state->brc_split_enable && generic_state->mb_brc_enabled) {
8526             gen9_avc_kernel_brc_mb_update(ctx, encode_state, encoder_context);
8527         }
8528     }
8529
8530     /*weight prediction,disable by now */
8531     avc_state->weighted_ref_l0_enable = 0;
8532     avc_state->weighted_ref_l1_enable = 0;
8533     if (avc_state->weighted_prediction_supported &&
8534         ((generic_state->frame_type == SLICE_TYPE_P && pic_param->pic_fields.bits.weighted_pred_flag) ||
8535          (generic_state->frame_type == SLICE_TYPE_B && pic_param->pic_fields.bits.weighted_bipred_idc == INTEL_AVC_WP_MODE_EXPLICIT))) {
8536         if (slice_param->luma_weight_l0_flag & 1) {
8537             gen9_avc_kernel_wp(ctx, encode_state, encoder_context, 0);
8538
8539         } else if (!(slice_param->chroma_weight_l0_flag & 1)) {
8540             pic_param->pic_fields.bits.weighted_pred_flag = 0;// it should be handled in app
8541         }
8542
8543         if (generic_state->frame_type == SLICE_TYPE_B && pic_param->pic_fields.bits.weighted_bipred_idc == INTEL_AVC_WP_MODE_EXPLICIT) {
8544             if (slice_param->luma_weight_l1_flag & 1) {
8545                 gen9_avc_kernel_wp(ctx, encode_state, encoder_context, 1);
8546             } else if (!((slice_param->luma_weight_l0_flag & 1) ||
8547                          (slice_param->chroma_weight_l0_flag & 1) ||
8548                          (slice_param->chroma_weight_l1_flag & 1))) {
8549                 pic_param->pic_fields.bits.weighted_bipred_idc = INTEL_AVC_WP_MODE_DEFAULT;// it should be handled in app
8550             }
8551         }
8552     }
8553
8554     /*mbenc kernel*/
8555     gen9_avc_kernel_mbenc(ctx, encode_state, encoder_context, false);
8556
8557     /*ignore the reset vertical line kernel*/
8558
8559     return VA_STATUS_SUCCESS;
8560 }
8561
8562 static VAStatus
8563 gen9_avc_vme_pipeline(VADriverContextP ctx,
8564                       VAProfile profile,
8565                       struct encode_state *encode_state,
8566                       struct intel_encoder_context *encoder_context)
8567 {
8568     VAStatus va_status;
8569
8570     gen9_avc_update_parameters(ctx, profile, encode_state, encoder_context);
8571
8572     va_status = gen9_avc_encode_check_parameter(ctx, encode_state, encoder_context);
8573     if (va_status != VA_STATUS_SUCCESS)
8574         return va_status;
8575
8576     va_status = gen9_avc_allocate_resources(ctx, encode_state, encoder_context);
8577     if (va_status != VA_STATUS_SUCCESS)
8578         return va_status;
8579
8580     va_status = gen9_avc_vme_gpe_kernel_prepare(ctx, encode_state, encoder_context);
8581     if (va_status != VA_STATUS_SUCCESS)
8582         return va_status;
8583
8584     va_status = gen9_avc_vme_gpe_kernel_init(ctx, encode_state, encoder_context);
8585     if (va_status != VA_STATUS_SUCCESS)
8586         return va_status;
8587
8588     va_status = gen9_avc_vme_gpe_kernel_run(ctx, encode_state, encoder_context);
8589     if (va_status != VA_STATUS_SUCCESS)
8590         return va_status;
8591
8592     gen9_avc_vme_gpe_kernel_final(ctx, encode_state, encoder_context);
8593
8594     return VA_STATUS_SUCCESS;
8595 }
8596
8597 /* Update PreEnc specific parameters */
8598 static VAStatus
8599 gen9_avc_preenc_update_parameters(VADriverContextP ctx,
8600                                   VAProfile profile,
8601                                   struct encode_state *encode_state,
8602                                   struct intel_encoder_context *encoder_context)
8603 {
8604     struct i965_driver_data *i965 = i965_driver_data(ctx);
8605     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8606     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8607     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
8608     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8609     VAStatsStatisticsParameterH264 *stat_param_h264 = NULL;
8610     VAStatsStatisticsParameter *stat_param = NULL;
8611     struct object_buffer *obj_buffer = NULL;
8612     struct object_buffer *obj_buffer_mv = NULL, *obj_buffer_stat = NULL;
8613     struct buffer_store *buffer_store = NULL;
8614     unsigned int size = 0, i = 0;
8615     unsigned int frame_mb_nums = 0;
8616
8617     if (!encoder_context->preenc_enabled ||
8618         !encode_state->stat_param_ext ||
8619         !encode_state->stat_param_ext->buffer)
8620         return VA_STATUS_ERROR_OPERATION_FAILED;
8621
8622     stat_param_h264 = avc_state->stat_param =
8623                           (VAStatsStatisticsParameterH264 *)encode_state->stat_param_ext->buffer;
8624     stat_param = &stat_param_h264->stats_params;
8625
8626     /* Assume the frame type based on number of past/future ref frames */
8627     if (!stat_param->num_past_references && !stat_param->num_future_references)
8628         generic_state->frame_type = SLICE_TYPE_I;
8629     else if (stat_param->num_future_references > 0)
8630         generic_state->frame_type = SLICE_TYPE_B;
8631     else
8632         generic_state->frame_type = SLICE_TYPE_P;
8633
8634     generic_state->preset = INTEL_PRESET_RT_SPEED;
8635     generic_state->kernel_mode = gen9_avc_kernel_mode[generic_state->preset];
8636
8637     /* frame width and height */
8638     generic_state->frame_width_in_pixel = encoder_context->frame_width_in_pixel;
8639     generic_state->frame_height_in_pixel = encoder_context->frame_height_in_pixel;
8640     generic_state->frame_width_in_mbs = ALIGN(generic_state->frame_width_in_pixel, 16) / 16;
8641     generic_state->frame_height_in_mbs = ALIGN(generic_state->frame_height_in_pixel, 16) / 16;
8642
8643     /* 4x downscaled width and height */
8644     generic_state->frame_width_4x = ALIGN(generic_state->frame_width_in_pixel / 4, 16);
8645     generic_state->frame_height_4x = ALIGN(generic_state->frame_height_in_pixel / 4, 16);
8646     generic_state->downscaled_width_4x_in_mb  = generic_state->frame_width_4x / 16 ;
8647     generic_state->downscaled_height_4x_in_mb = generic_state->frame_height_4x / 16;
8648
8649     /* reset hme types for preenc */
8650     if (generic_state->frame_type != SLICE_TYPE_I)
8651         generic_state->hme_enabled = 1;
8652
8653     /* ensure frame width is not too small */
8654     if (generic_state->frame_width_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8655         generic_state->frame_width_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8656         generic_state->downscaled_width_4x_in_mb =
8657             WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8658     }
8659
8660     /* ensure frame height is not too small*/
8661     if (generic_state->frame_height_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8662         generic_state->frame_height_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8663         generic_state->downscaled_height_4x_in_mb =
8664             WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8665     }
8666
8667     /********** Ensure buffer object parameters ********/
8668     frame_mb_nums = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs;
8669
8670     /* mv predictor buffer */
8671     if (stat_param_h264->mv_predictor_ctrl) {
8672         if (stat_param->mv_predictor == VA_INVALID_ID)
8673             goto error;
8674         size = frame_mb_nums * FEI_AVC_MV_PREDICTOR_BUFFER_SIZE;
8675         obj_buffer = BUFFER(stat_param->mv_predictor);
8676         buffer_store = obj_buffer->buffer_store;
8677         if (buffer_store->bo->size < size)
8678             goto error;
8679         if (avc_ctx->preproc_mv_predictor_buffer.bo != NULL)
8680             i965_free_gpe_resource(&avc_ctx->preproc_mv_predictor_buffer);
8681         i965_dri_object_to_buffer_gpe_resource(
8682             &avc_ctx->preproc_mv_predictor_buffer,
8683             buffer_store->bo);
8684     }
8685
8686     /* MB qp buffer */
8687     if (stat_param_h264->mb_qp) {
8688         if (stat_param->qp == VA_INVALID_ID)
8689             goto error;
8690         size = frame_mb_nums * FEI_AVC_QP_BUFFER_SIZE;
8691         obj_buffer = BUFFER(stat_param->qp);
8692         buffer_store = obj_buffer->buffer_store;
8693         if (buffer_store->bo->size < size)
8694             goto error;
8695         if (avc_ctx->preproc_mb_qp_buffer.bo != NULL)
8696             i965_free_gpe_resource(&avc_ctx->preproc_mb_qp_buffer);
8697         i965_dri_object_to_buffer_gpe_resource(
8698             &avc_ctx->preproc_mb_qp_buffer,
8699             buffer_store->bo);
8700     }
8701
8702     /* locate mv and stat buffer */
8703     if (!stat_param_h264->disable_mv_output ||
8704         !stat_param_h264->disable_statistics_output) {
8705
8706         if (!stat_param->outputs)
8707             goto error;
8708
8709         for (i = 0; i < 2 ; i++) {
8710             if (stat_param->outputs[i] != VA_INVALID_ID) {
8711                 obj_buffer = BUFFER(stat_param->outputs[i]);
8712                 switch (obj_buffer->type) {
8713                 case VAStatsMVBufferType:
8714                     obj_buffer_mv = obj_buffer;
8715                     break;
8716                 case VAStatsStatisticsBufferType:
8717                     obj_buffer_stat = obj_buffer;
8718                     break;
8719                 default:
8720                     assert(0);
8721                 }
8722             }
8723             if (!(!stat_param_h264->disable_mv_output &&
8724                   !stat_param_h264->disable_statistics_output))
8725                 break;
8726         }
8727     }
8728     /* mv data output buffer */
8729     if (!stat_param_h264->disable_mv_output) {
8730         size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
8731         buffer_store = obj_buffer_mv->buffer_store;
8732         if (buffer_store->bo->size < size)
8733             goto error;
8734         if (avc_ctx->preproc_mv_data_out_buffer.bo != NULL)
8735             i965_free_gpe_resource(&avc_ctx->preproc_mv_data_out_buffer);
8736         i965_dri_object_to_buffer_gpe_resource(
8737             &avc_ctx->preproc_mv_data_out_buffer,
8738             buffer_store->bo);
8739     }
8740     /* statistics output buffer */
8741     if (!stat_param_h264->disable_statistics_output) {
8742         size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
8743         buffer_store = obj_buffer_stat->buffer_store;
8744         if (buffer_store->bo->size < size)
8745             goto error;
8746         if (avc_ctx->preproc_stat_data_out_buffer.bo != NULL)
8747             i965_free_gpe_resource(&avc_ctx->preproc_stat_data_out_buffer);
8748         i965_dri_object_to_buffer_gpe_resource(
8749             &avc_ctx->preproc_stat_data_out_buffer,
8750             buffer_store->bo);
8751     }
8752
8753     /* past ref stat out buffer */
8754     if (stat_param->num_past_references && stat_param->past_ref_stat_buf &&
8755         stat_param->past_ref_stat_buf[0] != VA_INVALID_ID) {
8756         size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
8757         obj_buffer = BUFFER(stat_param->past_ref_stat_buf[0]);
8758         buffer_store = obj_buffer->buffer_store;
8759         if (buffer_store->bo->size < size)
8760             goto error;
8761         if (avc_ctx->preenc_past_ref_stat_data_out_buffer.bo != NULL)
8762             i965_free_gpe_resource(&avc_ctx->preenc_past_ref_stat_data_out_buffer);
8763         i965_dri_object_to_buffer_gpe_resource(
8764             &avc_ctx->preenc_past_ref_stat_data_out_buffer,
8765             buffer_store->bo);
8766     }
8767     /* future ref stat out buffer */
8768     if (stat_param->num_past_references && stat_param->future_ref_stat_buf &&
8769         stat_param->future_ref_stat_buf[0] != VA_INVALID_ID) {
8770         size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
8771         obj_buffer = BUFFER(stat_param->future_ref_stat_buf[0]);
8772         buffer_store = obj_buffer->buffer_store;
8773         if (buffer_store->bo->size < size)
8774             goto error;
8775         if (avc_ctx->preenc_future_ref_stat_data_out_buffer.bo != NULL)
8776             i965_free_gpe_resource(&avc_ctx->preenc_future_ref_stat_data_out_buffer);
8777         i965_dri_object_to_buffer_gpe_resource(
8778             &avc_ctx->preenc_future_ref_stat_data_out_buffer,
8779             buffer_store->bo);
8780     }
8781     return VA_STATUS_SUCCESS;
8782
8783 error:
8784     return VA_STATUS_ERROR_INVALID_BUFFER;
8785 }
8786
8787 /* allocate internal resouces required for PreEenc */
8788 static VAStatus
8789 gen9_avc_preenc_allocate_internal_resources(VADriverContextP ctx,
8790                                             struct encode_state *encode_state,
8791                                             struct intel_encoder_context *encoder_context)
8792 {
8793     struct i965_driver_data *i965 = i965_driver_data(ctx);
8794     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8795     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
8796     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8797     unsigned int width  = 0;
8798     unsigned int height  = 0;
8799     unsigned int size  = 0;
8800     int allocate_flag = 1;
8801
8802     /* 4x MEMV data buffer */
8803     width = ALIGN(generic_state->downscaled_width_4x_in_mb * 32, 64);
8804     height = generic_state->downscaled_height_4x_in_mb * 4 * 2 * 10;
8805     i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
8806     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
8807                                                   &avc_ctx->s4x_memv_data_buffer,
8808                                                   width, height,
8809                                                   width,
8810                                                   "4x MEMV data buffer");
8811     if (!allocate_flag)
8812         goto failed_allocation;
8813     i965_zero_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
8814
8815     /*  Output DISTORTION surface from 4x ME */
8816     width = generic_state->downscaled_width_4x_in_mb * 8;
8817     height = generic_state->downscaled_height_4x_in_mb * 4 * 10;
8818     i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
8819     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
8820                                                   &avc_ctx->s4x_memv_distortion_buffer,
8821                                                   width, height,
8822                                                   ALIGN(width, 64),
8823                                                   "4x MEMV distortion buffer");
8824     if (!allocate_flag)
8825         goto failed_allocation;
8826     i965_zero_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
8827
8828     /* output BRC DISTORTION surface from 4x ME  */
8829     width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
8830     height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
8831     i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
8832     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
8833                                                   &avc_ctx->res_brc_dist_data_surface,
8834                                                   width, height,
8835                                                   width,
8836                                                   "brc dist data buffer");
8837     if (!allocate_flag)
8838         goto failed_allocation;
8839     i965_zero_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
8840
8841
8842     /* FTQ Lut buffer,whichs is the mbbrc_const_data_buffer */
8843     i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
8844     size = 16 * AVC_QP_MAX * 4;
8845     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
8846                                                &avc_ctx->res_mbbrc_const_data_buffer,
8847                                                ALIGN(size, 0x1000),
8848                                                "mbbrc const data buffer");
8849     if (!allocate_flag)
8850         goto failed_allocation;
8851     i965_zero_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
8852
8853     /* 4x downscaled surface  */
8854     if (!avc_ctx->preenc_scaled_4x_surface_obj) {
8855         i965_CreateSurfaces(ctx,
8856                             generic_state->frame_width_4x,
8857                             generic_state->frame_height_4x,
8858                             VA_RT_FORMAT_YUV420,
8859                             1,
8860                             &avc_ctx->preenc_scaled_4x_surface_id);
8861         avc_ctx->preenc_scaled_4x_surface_obj = SURFACE(avc_ctx->preenc_scaled_4x_surface_id);
8862         if (!avc_ctx->preenc_scaled_4x_surface_obj)
8863             goto failed_allocation;
8864         i965_check_alloc_surface_bo(ctx, avc_ctx->preenc_scaled_4x_surface_obj, 1,
8865                                     VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
8866     }
8867
8868     /* 4x downscaled past ref surface  */
8869     if (!avc_ctx->preenc_past_ref_scaled_4x_surface_obj) {
8870         i965_CreateSurfaces(ctx,
8871                             generic_state->frame_width_4x,
8872                             generic_state->frame_height_4x,
8873                             VA_RT_FORMAT_YUV420,
8874                             1,
8875                             &avc_ctx->preenc_past_ref_scaled_4x_surface_id);
8876         avc_ctx->preenc_past_ref_scaled_4x_surface_obj =
8877             SURFACE(avc_ctx->preenc_past_ref_scaled_4x_surface_id);
8878         if (!avc_ctx->preenc_past_ref_scaled_4x_surface_obj)
8879             goto failed_allocation;
8880         i965_check_alloc_surface_bo(ctx, avc_ctx->preenc_past_ref_scaled_4x_surface_obj, 1,
8881                                     VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
8882     }
8883
8884     /* 4x downscaled future ref surface  */
8885     if (!avc_ctx->preenc_future_ref_scaled_4x_surface_obj) {
8886         i965_CreateSurfaces(ctx,
8887                             generic_state->frame_width_4x,
8888                             generic_state->frame_height_4x,
8889                             VA_RT_FORMAT_YUV420,
8890                             1,
8891                             &avc_ctx->preenc_future_ref_scaled_4x_surface_id);
8892         avc_ctx->preenc_future_ref_scaled_4x_surface_obj =
8893             SURFACE(avc_ctx->preenc_future_ref_scaled_4x_surface_id);
8894         if (!avc_ctx->preenc_future_ref_scaled_4x_surface_obj)
8895             goto failed_allocation;
8896         i965_check_alloc_surface_bo(ctx, avc_ctx->preenc_future_ref_scaled_4x_surface_obj, 1,
8897                                     VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
8898     }
8899
8900     /* FeiPreEncFixme: dummy coded buffer. This is a tweak which helps to use
8901      * the generic AVC Encdoe codepath which allocate status buffer as extension
8902      * to CodedBuffer */
8903     if (!avc_ctx->status_buffer.bo) {
8904         size =
8905             generic_state->frame_width_in_pixel * generic_state->frame_height_in_pixel * 12;
8906         size += I965_CODEDBUFFER_HEADER_SIZE;
8907         size += 0x1000;
8908         avc_ctx->status_buffer.bo = dri_bo_alloc(i965->intel.bufmgr,
8909                                                  "Dummy Coded Buffer",
8910                                                  size, 64);
8911     }
8912
8913     return VA_STATUS_SUCCESS;
8914
8915 failed_allocation:
8916     return VA_STATUS_ERROR_ALLOCATION_FAILED;
8917 }
8918
8919
8920 static VAStatus
8921 gen9_avc_preenc_gpe_kernel_run(VADriverContextP ctx,
8922                                struct encode_state *encode_state,
8923                                struct intel_encoder_context *encoder_context)
8924 {
8925     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8926     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8927     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8928     VAStatsStatisticsParameterH264 *stat_param_h264 = avc_state->stat_param;;
8929     VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
8930
8931     /* FeiPreEncFixme: Optimize the scaling. Keep a cache of already scaled surfaces
8932      * to avoid repeated scaling of same surfaces */
8933
8934     /* down scaling */
8935     gen9_avc_preenc_kernel_scaling(ctx, encode_state, encoder_context,
8936                                    INTEL_ENC_HME_4x, SCALE_CUR_PIC);
8937     if (stat_param->num_past_references > 0) {
8938         gen9_avc_preenc_kernel_scaling(ctx, encode_state, encoder_context,
8939                                        INTEL_ENC_HME_4x, SCALE_PAST_REF_PIC);
8940     }
8941     if (stat_param->num_future_references > 0) {
8942         gen9_avc_preenc_kernel_scaling(ctx, encode_state, encoder_context,
8943                                        INTEL_ENC_HME_4x, SCALE_FUTURE_REF_PIC);
8944     }
8945
8946     /* me kernel */
8947     if (generic_state->hme_enabled) {
8948         gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
8949     }
8950
8951     /* preproc kernel */
8952     if (!stat_param_h264->disable_mv_output || !stat_param_h264->disable_statistics_output) {
8953         gen9_avc_preenc_kernel_preproc(ctx, encode_state, encoder_context);
8954     }
8955
8956     return VA_STATUS_SUCCESS;
8957 }
8958
8959 static VAStatus
8960 gen9_avc_preenc_pipeline(VADriverContextP ctx,
8961                          VAProfile profile,
8962                          struct encode_state *encode_state,
8963                          struct intel_encoder_context *encoder_context)
8964 {
8965     VAStatus va_status;
8966
8967     va_status = gen9_avc_preenc_update_parameters(ctx, profile, encode_state, encoder_context);
8968     if (va_status != VA_STATUS_SUCCESS)
8969         return va_status;
8970
8971     va_status = gen9_avc_preenc_allocate_internal_resources(ctx, encode_state, encoder_context);
8972     if (va_status != VA_STATUS_SUCCESS)
8973         return va_status;
8974
8975     va_status = gen9_avc_preenc_gpe_kernel_run(ctx, encode_state, encoder_context);
8976     if (va_status != VA_STATUS_SUCCESS)
8977         return va_status;
8978
8979     return VA_STATUS_SUCCESS;
8980 }
8981
8982 static void
8983 gen9_avc_vme_context_destroy(void * context)
8984 {
8985     struct encoder_vme_mfc_context *vme_context = (struct encoder_vme_mfc_context *)context;
8986     struct generic_encoder_context *generic_ctx;
8987     struct i965_avc_encoder_context *avc_ctx;
8988     struct generic_enc_codec_state *generic_state;
8989     struct avc_enc_state *avc_state;
8990
8991     if (!vme_context)
8992         return;
8993
8994     generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
8995     avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
8996     generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8997     avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8998
8999     gen9_avc_kernel_destroy(vme_context);
9000
9001     free(generic_ctx);
9002     free(avc_ctx);
9003     free(generic_state);
9004     free(avc_state);
9005     free(vme_context);
9006     return;
9007
9008 }
9009
9010 static void
9011 gen8_avc_kernel_init(VADriverContextP ctx,
9012                      struct intel_encoder_context *encoder_context)
9013 {
9014     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9015     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
9016     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
9017     int fei_enabled = encoder_context->fei_enabled;
9018
9019     generic_ctx->get_kernel_header_and_size = fei_enabled ?
9020                                               intel_avc_fei_get_kernel_header_and_size :
9021                                               intel_avc_get_kernel_header_and_size ;
9022     gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling, false);
9023     gen9_avc_kernel_init_brc(ctx, generic_ctx, &avc_ctx->context_brc);
9024     gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me, false);
9025     gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc, fei_enabled);
9026     gen9_avc_kernel_init_sfd(ctx, generic_ctx, &avc_ctx->context_sfd);
9027
9028     //function pointer
9029     generic_ctx->pfn_set_curbe_scaling2x = gen9_avc_set_curbe_scaling2x;
9030     generic_ctx->pfn_set_curbe_scaling4x = gen8_avc_set_curbe_scaling4x;
9031     generic_ctx->pfn_set_curbe_me = gen8_avc_set_curbe_me;
9032     generic_ctx->pfn_set_curbe_mbenc = gen8_avc_set_curbe_mbenc;
9033     generic_ctx->pfn_set_curbe_brc_init_reset = gen9_avc_set_curbe_brc_init_reset;
9034     generic_ctx->pfn_set_curbe_brc_frame_update = gen8_avc_set_curbe_brc_frame_update;
9035     generic_ctx->pfn_set_curbe_sfd = gen9_avc_set_curbe_sfd;
9036
9037     generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
9038     generic_ctx->pfn_send_me_surface = gen9_avc_send_surface_me;
9039     generic_ctx->pfn_send_mbenc_surface = gen9_avc_send_surface_mbenc;
9040     generic_ctx->pfn_send_brc_init_reset_surface = gen9_avc_send_surface_brc_init_reset;
9041     generic_ctx->pfn_send_brc_frame_update_surface = gen9_avc_send_surface_brc_frame_update;
9042     generic_ctx->pfn_send_sfd_surface = gen9_avc_send_surface_sfd;
9043 }
9044 static void
9045 gen9_avc_kernel_init(VADriverContextP ctx,
9046                      struct intel_encoder_context *encoder_context)
9047 {
9048     struct i965_driver_data *i965 = i965_driver_data(ctx);
9049     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9050     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
9051     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
9052     int fei_enabled = encoder_context->fei_enabled;
9053     int preenc_enabled = encoder_context->preenc_enabled;
9054
9055     generic_ctx->get_kernel_header_and_size = (fei_enabled || preenc_enabled) ?
9056                                               intel_avc_fei_get_kernel_header_and_size :
9057                                               intel_avc_get_kernel_header_and_size ;
9058
9059     if (!fei_enabled && !preenc_enabled) {
9060         /* generic AVC Encoder */
9061         gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling, false);
9062         gen9_avc_kernel_init_brc(ctx, generic_ctx, &avc_ctx->context_brc);
9063         gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me, false);
9064         gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc,
9065                                    encoder_context->fei_enabled);
9066         gen9_avc_kernel_init_wp(ctx, generic_ctx, &avc_ctx->context_wp);
9067         gen9_avc_kernel_init_sfd(ctx, generic_ctx, &avc_ctx->context_sfd);
9068
9069         //function pointer
9070         generic_ctx->pfn_set_curbe_scaling2x = gen9_avc_set_curbe_scaling2x;
9071         generic_ctx->pfn_set_curbe_scaling4x = gen9_avc_set_curbe_scaling4x;
9072         generic_ctx->pfn_set_curbe_me = gen9_avc_set_curbe_me;
9073         generic_ctx->pfn_set_curbe_mbenc = gen9_avc_set_curbe_mbenc;
9074         generic_ctx->pfn_set_curbe_brc_init_reset = gen9_avc_set_curbe_brc_init_reset;
9075         generic_ctx->pfn_set_curbe_brc_frame_update = gen9_avc_set_curbe_brc_frame_update;
9076         generic_ctx->pfn_set_curbe_brc_mb_update = gen9_avc_set_curbe_brc_mb_update;
9077         generic_ctx->pfn_set_curbe_sfd = gen9_avc_set_curbe_sfd;
9078         generic_ctx->pfn_set_curbe_wp = gen9_avc_set_curbe_wp;
9079
9080         generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
9081         generic_ctx->pfn_send_me_surface = gen9_avc_send_surface_me;
9082         generic_ctx->pfn_send_mbenc_surface = gen9_avc_send_surface_mbenc;
9083         generic_ctx->pfn_send_brc_init_reset_surface = gen9_avc_send_surface_brc_init_reset;
9084         generic_ctx->pfn_send_brc_frame_update_surface = gen9_avc_send_surface_brc_frame_update;
9085         generic_ctx->pfn_send_brc_mb_update_surface = gen9_avc_send_surface_brc_mb_update;
9086         generic_ctx->pfn_send_sfd_surface = gen9_avc_send_surface_sfd;
9087         generic_ctx->pfn_send_wp_surface = gen9_avc_send_surface_wp;
9088
9089         if (IS_SKL(i965->intel.device_info) ||
9090             IS_BXT(i965->intel.device_info))
9091             generic_ctx->pfn_set_curbe_scaling4x = gen9_avc_set_curbe_scaling4x;
9092         else if (IS_KBL(i965->intel.device_info) ||
9093                  IS_GEN10(i965->intel.device_info) ||
9094                  IS_GLK(i965->intel.device_info))
9095             generic_ctx->pfn_set_curbe_scaling4x = gen95_avc_set_curbe_scaling4x;
9096
9097     } else if (fei_enabled) {
9098         /* FEI AVC Encoding */
9099         gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc,
9100                                    encoder_context->fei_enabled);
9101         generic_ctx->pfn_set_curbe_mbenc = gen9_avc_fei_set_curbe_mbenc;
9102         generic_ctx->pfn_send_mbenc_surface = gen9_avc_fei_send_surface_mbenc;
9103
9104     } else {
9105         /* PreEnc for AVC */
9106         gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling,
9107                                      encoder_context->preenc_enabled);
9108         gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me,
9109                                 encoder_context->preenc_enabled);
9110         gen9_avc_kernel_init_preproc(ctx, generic_ctx, &avc_ctx->context_preproc);
9111
9112         /* preenc 4x scaling uses the gen95 kernel */
9113         generic_ctx->pfn_set_curbe_scaling4x = gen95_avc_set_curbe_scaling4x;
9114         generic_ctx->pfn_set_curbe_me = gen9_avc_preenc_set_curbe_me;
9115         generic_ctx->pfn_set_curbe_preproc = gen9_avc_preenc_set_curbe_preproc;
9116
9117         generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
9118         generic_ctx->pfn_send_me_surface = gen9_avc_preenc_send_surface_me;
9119         generic_ctx->pfn_send_preproc_surface = gen9_avc_preenc_send_surface_preproc;
9120     }
9121 }
9122
9123 /*
9124 PAK pipeline related function
9125 */
9126 extern int
9127 intel_avc_enc_slice_type_fixup(int slice_type);
9128
9129 /* Allocate resources needed for PAK only mode (get invoked only in FEI encode) */
9130 static VAStatus
9131 gen9_avc_allocate_pak_resources(VADriverContextP ctx,
9132                                 struct encode_state *encode_state,
9133                                 struct intel_encoder_context *encoder_context)
9134 {
9135     struct i965_driver_data *i965 = i965_driver_data(ctx);
9136     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9137     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
9138     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
9139     unsigned int size  = 0;
9140     int allocate_flag = 1;
9141
9142     /*second level batch buffer for image state write when cqp etc*/
9143     i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
9144     size = INTEL_AVC_IMAGE_STATE_CMD_SIZE ;
9145     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
9146                                                &avc_ctx->res_image_state_batch_buffer_2nd_level,
9147                                                ALIGN(size, 0x1000),
9148                                                "second levle batch (image state write) buffer");
9149     if (!allocate_flag)
9150         goto failed_allocation;
9151
9152     if (!generic_state->brc_allocated) {
9153         i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
9154         size = 64;//44
9155         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
9156                                                    &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
9157                                                    ALIGN(size, 0x1000),
9158                                                    "brc pak statistic buffer");
9159         if (!allocate_flag)
9160             goto failed_allocation;
9161     }
9162
9163     return VA_STATUS_SUCCESS;
9164
9165 failed_allocation:
9166     return VA_STATUS_ERROR_ALLOCATION_FAILED;
9167 }
9168
9169 static void
9170 gen9_mfc_avc_pipe_mode_select(VADriverContextP ctx,
9171                               struct encode_state *encode_state,
9172                               struct intel_encoder_context *encoder_context)
9173 {
9174     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9175     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9176     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9177     struct intel_batchbuffer *batch = encoder_context->base.batch;
9178
9179     BEGIN_BCS_BATCH(batch, 5);
9180
9181     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
9182     OUT_BCS_BATCH(batch,
9183                   (0 << 29) |
9184                   (MFX_LONG_MODE << 17) |       /* Must be long format for encoder */
9185                   (MFD_MODE_VLD << 15) |
9186                   (0 << 13) |                   /* Non-VDEnc mode  is 0*/
9187                   ((generic_state->curr_pak_pass != (generic_state->num_pak_passes - 1)) << 10) |                  /* Stream-Out Enable */
9188                   ((!!avc_ctx->res_post_deblocking_output.bo) << 9)  |    /* Post Deblocking Output */
9189                   ((!!avc_ctx->res_pre_deblocking_output.bo) << 8)  |     /* Pre Deblocking Output */
9190                   (0 << 7)  |                   /* Scaled surface enable */
9191                   (0 << 6)  |                   /* Frame statistics stream out enable */
9192                   (0 << 5)  |                   /* not in stitch mode */
9193                   (1 << 4)  |                   /* encoding mode */
9194                   (MFX_FORMAT_AVC << 0));
9195     OUT_BCS_BATCH(batch,
9196                   (0 << 7)  | /* expand NOA bus flag */
9197                   (0 << 6)  | /* disable slice-level clock gating */
9198                   (0 << 5)  | /* disable clock gating for NOA */
9199                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
9200                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
9201                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
9202                   (0 << 1)  |
9203                   (0 << 0));
9204     OUT_BCS_BATCH(batch, 0);
9205     OUT_BCS_BATCH(batch, 0);
9206
9207     ADVANCE_BCS_BATCH(batch);
9208 }
9209
9210 static void
9211 gen9_mfc_avc_surface_state(VADriverContextP ctx,
9212                            struct intel_encoder_context *encoder_context,
9213                            struct i965_gpe_resource *gpe_resource,
9214                            int id)
9215 {
9216     struct intel_batchbuffer *batch = encoder_context->base.batch;
9217
9218     BEGIN_BCS_BATCH(batch, 6);
9219
9220     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
9221     OUT_BCS_BATCH(batch, id);
9222     OUT_BCS_BATCH(batch,
9223                   ((gpe_resource->height - 1) << 18) |
9224                   ((gpe_resource->width - 1) << 4));
9225     OUT_BCS_BATCH(batch,
9226                   (MFX_SURFACE_PLANAR_420_8 << 28) |    /* 420 planar YUV surface */
9227                   (1 << 27) |                           /* must be 1 for interleave U/V, hardware requirement */
9228                   ((gpe_resource->pitch - 1) << 3) |    /* pitch */
9229                   (0 << 2)  |                           /* must be 0 for interleave U/V */
9230                   (1 << 1)  |                           /* must be tiled */
9231                   (I965_TILEWALK_YMAJOR << 0));         /* tile walk, TILEWALK_YMAJOR */
9232     OUT_BCS_BATCH(batch,
9233                   (0 << 16) |                   /* must be 0 for interleave U/V */
9234                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
9235     OUT_BCS_BATCH(batch,
9236                   (0 << 16) |                   /* must be 0 for interleave U/V */
9237                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
9238
9239     ADVANCE_BCS_BATCH(batch);
9240 }
9241
9242 static void
9243 gen9_mfc_avc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
9244 {
9245     struct i965_driver_data *i965 = i965_driver_data(ctx);
9246     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9247     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
9248     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9249     struct intel_batchbuffer *batch = encoder_context->base.batch;
9250     int i;
9251     unsigned int cmd_len = 65;
9252
9253     if (IS_GEN10(i965->intel.device_info))
9254         cmd_len = 68;
9255
9256     BEGIN_BCS_BATCH(batch, cmd_len);
9257
9258     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (cmd_len - 2));
9259
9260     /* the DW1-3 is for pre_deblocking */
9261     OUT_BUFFER_3DW(batch, avc_ctx->res_pre_deblocking_output.bo, 1, 0, i965->intel.mocs_state);
9262
9263     /* the DW4-6 is for the post_deblocking */
9264     OUT_BUFFER_3DW(batch, avc_ctx->res_post_deblocking_output.bo, 1, 0, i965->intel.mocs_state);
9265
9266     /* the DW7-9 is for the uncompressed_picture */
9267     OUT_BUFFER_3DW(batch, generic_ctx->res_uncompressed_input_surface.bo, 0, 0, i965->intel.mocs_state);
9268
9269     /* the DW10-12 is for PAK information (write) */
9270     OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, i965->intel.mocs_state);//?
9271
9272     /* the DW13-15 is for the intra_row_store_scratch */
9273     OUT_BUFFER_3DW(batch, avc_ctx->res_intra_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
9274
9275     /* the DW16-18 is for the deblocking filter */
9276     OUT_BUFFER_3DW(batch, avc_ctx->res_deblocking_filter_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
9277
9278     /* the DW 19-50 is for Reference pictures*/
9279     for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
9280         OUT_BUFFER_2DW(batch, avc_ctx->list_reference_res[i].bo, 0, 0);
9281     }
9282
9283     /* DW 51, reference picture attributes */
9284     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
9285
9286     /* The DW 52-54 is for PAK information (read) */
9287     OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, i965->intel.mocs_state);
9288
9289     /* the DW 55-57 is the ILDB buffer */
9290     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9291
9292     /* the DW 58-60 is the second ILDB buffer */
9293     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9294
9295     /* DW 61, memory compress enable & mode */
9296     OUT_BCS_BATCH(batch, 0);
9297
9298     /* the DW 62-64 is the buffer */
9299     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9300
9301     /*65-67 for CNL */
9302     if (IS_GEN10(i965->intel.device_info)) {
9303         OUT_BCS_BATCH(batch, 0);
9304         OUT_BCS_BATCH(batch, 0);
9305         OUT_BCS_BATCH(batch, 0);
9306     }
9307
9308     ADVANCE_BCS_BATCH(batch);
9309 }
9310
9311 static void
9312 gen9_mfc_avc_ind_obj_base_addr_state(VADriverContextP ctx,
9313                                      struct encode_state *encode_state,
9314                                      struct intel_encoder_context *encoder_context)
9315 {
9316     struct i965_driver_data *i965 = i965_driver_data(ctx);
9317     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9318     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
9319     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9320     struct intel_batchbuffer *batch = encoder_context->base.batch;
9321     struct object_surface *obj_surface;
9322     struct gen9_surface_avc *avc_priv_surface;
9323     unsigned int size = 0;
9324     unsigned int w_mb = generic_state->frame_width_in_mbs;
9325     unsigned int h_mb = generic_state->frame_height_in_mbs;
9326
9327     obj_surface = encode_state->reconstructed_object;
9328
9329     if (!obj_surface || !obj_surface->private_data)
9330         return;
9331     avc_priv_surface = obj_surface->private_data;
9332
9333     BEGIN_BCS_BATCH(batch, 26);
9334
9335     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
9336     /* The DW1-5 is for the MFX indirect bistream offset */
9337     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9338     OUT_BUFFER_2DW(batch, NULL, 0, 0);
9339
9340     /* the DW6-10 is for MFX Indirect MV Object Base Address */
9341     size = w_mb * h_mb * 32 * 4;
9342     OUT_BUFFER_3DW(batch,
9343                    avc_priv_surface->res_mv_data_surface.bo,
9344                    1,
9345                    0,
9346                    i965->intel.mocs_state);
9347     OUT_BUFFER_2DW(batch,
9348                    avc_priv_surface->res_mv_data_surface.bo,
9349                    1,
9350                    ALIGN(size, 0x1000));
9351
9352     /* The DW11-15 is for MFX IT-COFF. Not used on encoder */
9353     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9354     OUT_BUFFER_2DW(batch, NULL, 0, 0);
9355
9356     /* The DW16-20 is for MFX indirect DBLK. Not used on encoder */
9357     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9358     OUT_BUFFER_2DW(batch, NULL, 0, 0);
9359
9360     /* The DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder
9361      * Note: an offset is specified in MFX_AVC_SLICE_STATE
9362      */
9363     OUT_BUFFER_3DW(batch,
9364                    generic_ctx->compressed_bitstream.res.bo,
9365                    1,
9366                    0,
9367                    i965->intel.mocs_state);
9368     OUT_BUFFER_2DW(batch,
9369                    generic_ctx->compressed_bitstream.res.bo,
9370                    1,
9371                    generic_ctx->compressed_bitstream.end_offset);
9372
9373     ADVANCE_BCS_BATCH(batch);
9374 }
9375
9376 static void
9377 gen9_mfc_avc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
9378 {
9379     struct i965_driver_data *i965 = i965_driver_data(ctx);
9380     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9381     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9382     struct intel_batchbuffer *batch = encoder_context->base.batch;
9383
9384     BEGIN_BCS_BATCH(batch, 10);
9385
9386     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
9387
9388     /* The DW1-3 is for bsd/mpc row store scratch buffer */
9389     OUT_BUFFER_3DW(batch, avc_ctx->res_bsd_mpc_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
9390
9391     /* The DW4-6 is for MPR Row Store Scratch Buffer Base Address, ignore for encoder */
9392     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9393
9394     /* The DW7-9 is for Bitplane Read Buffer Base Address, ignore for encoder */
9395     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9396
9397     ADVANCE_BCS_BATCH(batch);
9398 }
9399
9400 static void
9401 gen9_mfc_avc_directmode_state(VADriverContextP ctx,
9402                               struct intel_encoder_context *encoder_context)
9403 {
9404     struct i965_driver_data *i965 = i965_driver_data(ctx);
9405     struct intel_batchbuffer *batch = encoder_context->base.batch;
9406     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9407     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9408     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
9409
9410     int i;
9411
9412     BEGIN_BCS_BATCH(batch, 71);
9413
9414     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
9415
9416     /* Reference frames and Current frames */
9417     /* the DW1-32 is for the direct MV for reference */
9418     for (i = 0; i < NUM_MFC_AVC_DMV_BUFFERS - 2; i += 2) {
9419         if (avc_ctx->res_direct_mv_buffersr[i].bo != NULL) {
9420             OUT_BCS_RELOC64(batch, avc_ctx->res_direct_mv_buffersr[i].bo,
9421                             I915_GEM_DOMAIN_INSTRUCTION, 0,
9422                             0);
9423         } else {
9424             OUT_BCS_BATCH(batch, 0);
9425             OUT_BCS_BATCH(batch, 0);
9426         }
9427     }
9428
9429     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
9430
9431     /* the DW34-36 is the MV for the current frame */
9432     OUT_BCS_RELOC64(batch, avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2].bo,
9433                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
9434                     0);
9435
9436     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
9437
9438     /* POL list */
9439     for (i = 0; i < 32; i++) {
9440         OUT_BCS_BATCH(batch, avc_state->top_field_poc[i]);
9441     }
9442     OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2]);
9443     OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1]);
9444
9445     ADVANCE_BCS_BATCH(batch);
9446 }
9447
9448 static void
9449 gen9_mfc_qm_state(VADriverContextP ctx,
9450                   int qm_type,
9451                   const unsigned int *qm,
9452                   int qm_length,
9453                   struct intel_encoder_context *encoder_context)
9454 {
9455     struct intel_batchbuffer *batch = encoder_context->base.batch;
9456     unsigned int qm_buffer[16];
9457
9458     assert(qm_length <= 16);
9459     assert(sizeof(*qm) == 4);
9460     memset(qm_buffer, 0, 16 * 4);
9461     memcpy(qm_buffer, qm, qm_length * 4);
9462
9463     BEGIN_BCS_BATCH(batch, 18);
9464     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
9465     OUT_BCS_BATCH(batch, qm_type << 0);
9466     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
9467     ADVANCE_BCS_BATCH(batch);
9468 }
9469
9470 static void
9471 gen9_mfc_avc_qm_state(VADriverContextP ctx,
9472                       struct encode_state *encode_state,
9473                       struct intel_encoder_context *encoder_context)
9474 {
9475     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9476     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
9477     VAEncSequenceParameterBufferH264  *seq_param = avc_state->seq_param;
9478     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
9479
9480
9481     const unsigned int *qm_4x4_intra;
9482     const unsigned int *qm_4x4_inter;
9483     const unsigned int *qm_8x8_intra;
9484     const unsigned int *qm_8x8_inter;
9485
9486     if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
9487         && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
9488         qm_4x4_intra = qm_4x4_inter = qm_8x8_intra = qm_8x8_inter = qm_flat;
9489     } else {
9490         VAIQMatrixBufferH264 *qm;
9491         assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
9492         qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
9493         qm_4x4_intra = (unsigned int *)qm->ScalingList4x4[0];
9494         qm_4x4_inter = (unsigned int *)qm->ScalingList4x4[3];
9495         qm_8x8_intra = (unsigned int *)qm->ScalingList8x8[0];
9496         qm_8x8_inter = (unsigned int *)qm->ScalingList8x8[1];
9497     }
9498
9499     gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm_4x4_intra, 12, encoder_context);
9500     gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm_4x4_inter, 12, encoder_context);
9501     gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm_8x8_intra, 16, encoder_context);
9502     gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm_8x8_inter, 16, encoder_context);
9503 }
9504
9505 static void
9506 gen9_mfc_fqm_state(VADriverContextP ctx,
9507                    int fqm_type,
9508                    const unsigned int *fqm,
9509                    int fqm_length,
9510                    struct intel_encoder_context *encoder_context)
9511 {
9512     struct intel_batchbuffer *batch = encoder_context->base.batch;
9513     unsigned int fqm_buffer[32];
9514
9515     assert(fqm_length <= 32);
9516     assert(sizeof(*fqm) == 4);
9517     memset(fqm_buffer, 0, 32 * 4);
9518     memcpy(fqm_buffer, fqm, fqm_length * 4);
9519
9520     BEGIN_BCS_BATCH(batch, 34);
9521     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
9522     OUT_BCS_BATCH(batch, fqm_type << 0);
9523     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
9524     ADVANCE_BCS_BATCH(batch);
9525 }
9526
9527 static void
9528 gen9_mfc_fill_fqm(uint8_t *qm, uint16_t *fqm, int len)
9529 {
9530     int i, j;
9531     for (i = 0; i < len; i++)
9532         for (j = 0; j < len; j++) {
9533             assert(qm[j * len + i]);
9534             fqm[i * len + j] = (1 << 16) / qm[j * len + i];
9535         }
9536 }
9537
9538 static void
9539 gen9_mfc_avc_fqm_state(VADriverContextP ctx,
9540                        struct encode_state *encode_state,
9541                        struct intel_encoder_context *encoder_context)
9542 {
9543     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9544     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
9545     VAEncSequenceParameterBufferH264  *seq_param = avc_state->seq_param;
9546     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
9547
9548     if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
9549         && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
9550         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm_flat, 24, encoder_context);
9551         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm_flat, 24, encoder_context);
9552         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm_flat, 32, encoder_context);
9553         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm_flat, 32, encoder_context);
9554     } else {
9555         int i;
9556         uint32_t fqm[32];
9557         VAIQMatrixBufferH264 *qm;
9558         assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
9559         qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
9560
9561         for (i = 0; i < 3; i++)
9562             gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * i, 4);
9563         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm, 24, encoder_context);
9564
9565         for (i = 3; i < 6; i++)
9566             gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * (i - 3), 4);
9567         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm, 24, encoder_context);
9568
9569         gen9_mfc_fill_fqm(qm->ScalingList8x8[0], (uint16_t *)fqm, 8);
9570         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm, 32, encoder_context);
9571
9572         gen9_mfc_fill_fqm(qm->ScalingList8x8[1], (uint16_t *)fqm, 8);
9573         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm, 32, encoder_context);
9574     }
9575 }
9576
9577 static void
9578 gen9_mfc_avc_insert_object(VADriverContextP ctx,
9579                            struct intel_encoder_context *encoder_context,
9580                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
9581                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
9582                            int slice_header_indicator,
9583                            struct intel_batchbuffer *batch)
9584 {
9585     if (data_bits_in_last_dw == 0)
9586         data_bits_in_last_dw = 32;
9587
9588     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
9589
9590     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws));
9591     OUT_BCS_BATCH(batch,
9592                   (0 << 16) |   /* always start at offset 0 */
9593                   (slice_header_indicator << 14) |
9594                   (data_bits_in_last_dw << 8) |
9595                   (skip_emul_byte_count << 4) |
9596                   (!!emulation_flag << 3) |
9597                   ((!!is_last_header) << 2) |
9598                   ((!!is_end_of_slice) << 1) |
9599                   (0 << 0));    /* check this flag */
9600     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
9601
9602     ADVANCE_BCS_BATCH(batch);
9603 }
9604
9605 static void
9606 gen9_mfc_avc_insert_aud_packed_data(VADriverContextP ctx,
9607                                     struct encode_state *encode_state,
9608                                     struct intel_encoder_context *encoder_context,
9609                                     struct intel_batchbuffer *batch)
9610 {
9611     VAEncPackedHeaderParameterBuffer *param = NULL;
9612     unsigned int length_in_bits;
9613     unsigned int *header_data = NULL;
9614     unsigned char *nal_type = NULL;
9615     int count, i, start_index;
9616
9617     count = encode_state->slice_rawdata_count[0];
9618     start_index = (encode_state->slice_rawdata_index[0] & SLICE_PACKED_DATA_INDEX_MASK);
9619
9620     for (i = 0; i < count; i++) {
9621         unsigned int skip_emul_byte_cnt;
9622
9623         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
9624         nal_type = (unsigned char *)header_data;
9625
9626         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
9627         if (param->type != VAEncPackedHeaderRawData)
9628             continue;
9629
9630         length_in_bits = param->bit_length;
9631
9632         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9633
9634         if ((*(nal_type + skip_emul_byte_cnt - 1) & 0x1f) == AVC_NAL_DELIMITER) {
9635             gen9_mfc_avc_insert_object(ctx,
9636                                        encoder_context,
9637                                        header_data,
9638                                        ALIGN(length_in_bits, 32) >> 5,
9639                                        length_in_bits & 0x1f,
9640                                        skip_emul_byte_cnt,
9641                                        0,
9642                                        0,
9643                                        !param->has_emulation_bytes,
9644                                        0,
9645                                        batch);
9646             break;
9647         }
9648     }
9649 }
9650
9651 static void
9652 gen9_mfc_avc_insert_slice_packed_data(VADriverContextP ctx,
9653                                       struct encode_state *encode_state,
9654                                       struct intel_encoder_context *encoder_context,
9655                                       int slice_index,
9656                                       struct intel_batchbuffer *batch)
9657 {
9658     VAEncPackedHeaderParameterBuffer *param = NULL;
9659     unsigned int length_in_bits;
9660     unsigned int *header_data = NULL;
9661     int count, i, start_index;
9662     int slice_header_index;
9663     unsigned char *nal_type = NULL;
9664
9665     if (encode_state->slice_header_index[slice_index] == 0)
9666         slice_header_index = -1;
9667     else
9668         slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
9669
9670     count = encode_state->slice_rawdata_count[slice_index];
9671     start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
9672
9673     for (i = 0; i < count; i++) {
9674         unsigned int skip_emul_byte_cnt;
9675
9676         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
9677         nal_type = (unsigned char *)header_data;
9678
9679         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
9680
9681         length_in_bits = param->bit_length;
9682
9683         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9684
9685         /* skip the slice header packed data type as it is lastly inserted */
9686         if (param->type == VAEncPackedHeaderSlice || (*(nal_type + skip_emul_byte_cnt - 1) & 0x1f) == AVC_NAL_DELIMITER)
9687             continue;
9688
9689         /* as the slice header is still required, the last header flag is set to
9690          * zero.
9691          */
9692         gen9_mfc_avc_insert_object(ctx,
9693                                    encoder_context,
9694                                    header_data,
9695                                    ALIGN(length_in_bits, 32) >> 5,
9696                                    length_in_bits & 0x1f,
9697                                    skip_emul_byte_cnt,
9698                                    0,
9699                                    0,
9700                                    !param->has_emulation_bytes,
9701                                    0,
9702                                    batch);
9703     }
9704
9705     if (slice_header_index == -1) {
9706         VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
9707         VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
9708         VAEncSliceParameterBufferH264 *slice_params = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
9709         unsigned char *slice_header = NULL;
9710         int slice_header_length_in_bits = 0;
9711
9712         /* No slice header data is passed. And the driver needs to generate it */
9713         /* For the Normal H264 */
9714         slice_header_length_in_bits = build_avc_slice_header(seq_param,
9715                                                              pic_param,
9716                                                              slice_params,
9717                                                              &slice_header);
9718         gen9_mfc_avc_insert_object(ctx,
9719                                    encoder_context,
9720                                    (unsigned int *)slice_header,
9721                                    ALIGN(slice_header_length_in_bits, 32) >> 5,
9722                                    slice_header_length_in_bits & 0x1f,
9723                                    5,  /* first 5 bytes are start code + nal unit type */
9724                                    1, 0, 1,
9725                                    1,
9726                                    batch);
9727
9728         free(slice_header);
9729     } else {
9730         unsigned int skip_emul_byte_cnt;
9731
9732         header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
9733
9734         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[slice_header_index]->buffer);
9735         length_in_bits = param->bit_length;
9736
9737         /* as the slice header is the last header data for one slice,
9738          * the last header flag is set to one.
9739          */
9740         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9741
9742         gen9_mfc_avc_insert_object(ctx,
9743                                    encoder_context,
9744                                    header_data,
9745                                    ALIGN(length_in_bits, 32) >> 5,
9746                                    length_in_bits & 0x1f,
9747                                    skip_emul_byte_cnt,
9748                                    1,
9749                                    0,
9750                                    !param->has_emulation_bytes,
9751                                    1,
9752                                    batch);
9753     }
9754
9755     return;
9756 }
9757
9758 static void
9759 gen9_mfc_avc_inset_headers(VADriverContextP ctx,
9760                            struct encode_state *encode_state,
9761                            struct intel_encoder_context *encoder_context,
9762                            VAEncSliceParameterBufferH264 *slice_param,
9763                            int slice_index,
9764                            struct intel_batchbuffer *batch)
9765 {
9766     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9767     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9768     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
9769     unsigned int internal_rate_mode = generic_state->internal_rate_mode;
9770     unsigned int skip_emul_byte_cnt;
9771
9772     if (slice_index == 0) {
9773
9774         /* if AUD exist and insert it firstly */
9775         gen9_mfc_avc_insert_aud_packed_data(ctx, encode_state, encoder_context, batch);
9776
9777         if (encode_state->packed_header_data[idx]) {
9778             VAEncPackedHeaderParameterBuffer *param = NULL;
9779             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
9780             unsigned int length_in_bits;
9781
9782             assert(encode_state->packed_header_param[idx]);
9783             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
9784             length_in_bits = param->bit_length;
9785
9786             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9787             gen9_mfc_avc_insert_object(ctx,
9788                                        encoder_context,
9789                                        header_data,
9790                                        ALIGN(length_in_bits, 32) >> 5,
9791                                        length_in_bits & 0x1f,
9792                                        skip_emul_byte_cnt,
9793                                        0,
9794                                        0,
9795                                        !param->has_emulation_bytes,
9796                                        0,
9797                                        batch);
9798         }
9799
9800         idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
9801
9802         if (encode_state->packed_header_data[idx]) {
9803             VAEncPackedHeaderParameterBuffer *param = NULL;
9804             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
9805             unsigned int length_in_bits;
9806
9807             assert(encode_state->packed_header_param[idx]);
9808             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
9809             length_in_bits = param->bit_length;
9810
9811             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9812
9813             gen9_mfc_avc_insert_object(ctx,
9814                                        encoder_context,
9815                                        header_data,
9816                                        ALIGN(length_in_bits, 32) >> 5,
9817                                        length_in_bits & 0x1f,
9818                                        skip_emul_byte_cnt,
9819                                        0,
9820                                        0,
9821                                        !param->has_emulation_bytes,
9822                                        0,
9823                                        batch);
9824         }
9825
9826         idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
9827
9828         if (encode_state->packed_header_data[idx]) {
9829             VAEncPackedHeaderParameterBuffer *param = NULL;
9830             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
9831             unsigned int length_in_bits;
9832
9833             assert(encode_state->packed_header_param[idx]);
9834             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
9835             length_in_bits = param->bit_length;
9836
9837             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9838             gen9_mfc_avc_insert_object(ctx,
9839                                        encoder_context,
9840                                        header_data,
9841                                        ALIGN(length_in_bits, 32) >> 5,
9842                                        length_in_bits & 0x1f,
9843                                        skip_emul_byte_cnt,
9844                                        0,
9845                                        0,
9846                                        !param->has_emulation_bytes,
9847                                        0,
9848                                        batch);
9849         } else if (internal_rate_mode == VA_RC_CBR) {
9850             /* insert others */
9851         }
9852     }
9853
9854     gen9_mfc_avc_insert_slice_packed_data(ctx,
9855                                           encode_state,
9856                                           encoder_context,
9857                                           slice_index,
9858                                           batch);
9859 }
9860
9861 static void
9862 gen9_mfc_avc_slice_state(VADriverContextP ctx,
9863                          struct encode_state *encode_state,
9864                          struct intel_encoder_context *encoder_context,
9865                          VAEncPictureParameterBufferH264 *pic_param,
9866                          VAEncSliceParameterBufferH264 *slice_param,
9867                          VAEncSliceParameterBufferH264 *next_slice_param,
9868                          struct intel_batchbuffer *batch)
9869 {
9870     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9871     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
9872     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9873     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
9874     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
9875     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
9876     unsigned char correct[6], grow, shrink;
9877     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
9878     int max_qp_n, max_qp_p;
9879     int i;
9880     int weighted_pred_idc = 0;
9881     int num_ref_l0 = 0, num_ref_l1 = 0;
9882     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
9883     int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
9884     unsigned int rc_panic_enable = 0;
9885     unsigned int rate_control_counter_enable = 0;
9886     unsigned int rounding_value = 0;
9887     unsigned int rounding_inter_enable = 0;
9888
9889     slice_hor_pos = slice_param->macroblock_address % generic_state->frame_width_in_mbs;
9890     slice_ver_pos = slice_param->macroblock_address / generic_state->frame_width_in_mbs;
9891
9892     if (next_slice_param) {
9893         next_slice_hor_pos = next_slice_param->macroblock_address % generic_state->frame_width_in_mbs;
9894         next_slice_ver_pos = next_slice_param->macroblock_address / generic_state->frame_width_in_mbs;
9895     } else {
9896         next_slice_hor_pos = 0;
9897         next_slice_ver_pos = generic_state->frame_height_in_mbs;
9898     }
9899
9900     if (slice_type == SLICE_TYPE_I) {
9901         luma_log2_weight_denom = 0;
9902         chroma_log2_weight_denom = 0;
9903     } else if (slice_type == SLICE_TYPE_P) {
9904         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
9905         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
9906         rounding_inter_enable = avc_state->rounding_inter_enable;
9907         rounding_value = avc_state->rounding_value;
9908
9909         if (slice_param->num_ref_idx_active_override_flag)
9910             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
9911     } else if (slice_type == SLICE_TYPE_B) {
9912         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
9913         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
9914         num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
9915         rounding_inter_enable = avc_state->rounding_inter_enable;
9916         rounding_value = avc_state->rounding_value;
9917
9918         if (slice_param->num_ref_idx_active_override_flag) {
9919             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
9920             num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
9921         }
9922
9923         if (weighted_pred_idc == 2) {
9924             /* 8.4.3 - Derivation process for prediction weights (8-279) */
9925             luma_log2_weight_denom = 5;
9926             chroma_log2_weight_denom = 5;
9927         }
9928     }
9929
9930     max_qp_n = 0;
9931     max_qp_p = 0;
9932     grow = 0;
9933     shrink = 0;
9934
9935     rate_control_counter_enable = (generic_state->brc_enabled && (generic_state->curr_pak_pass != 0));
9936     rc_panic_enable = (avc_state->rc_panic_enable &&
9937                        (!avc_state->min_max_qp_enable) &&
9938                        (encoder_context->rate_control_mode != VA_RC_CQP) &&
9939                        (generic_state->curr_pak_pass == (generic_state->num_pak_passes - 1)));
9940
9941     for (i = 0; i < 6; i++)
9942         correct[i] = 0;
9943
9944     BEGIN_BCS_BATCH(batch, 11);
9945
9946     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
9947     OUT_BCS_BATCH(batch, slice_type);
9948     OUT_BCS_BATCH(batch,
9949                   (num_ref_l1 << 24) |
9950                   (num_ref_l0 << 16) |
9951                   (chroma_log2_weight_denom << 8) |
9952                   (luma_log2_weight_denom << 0));
9953     OUT_BCS_BATCH(batch,
9954                   (weighted_pred_idc << 30) |
9955                   (((slice_type == SLICE_TYPE_B) ? slice_param->direct_spatial_mv_pred_flag : 0) << 29) |
9956                   (slice_param->disable_deblocking_filter_idc << 27) |
9957                   (slice_param->cabac_init_idc << 24) |
9958                   (slice_qp << 16) |
9959                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
9960                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
9961
9962     OUT_BCS_BATCH(batch,
9963                   slice_ver_pos << 24 |
9964                   slice_hor_pos << 16 |
9965                   slice_param->macroblock_address);
9966     OUT_BCS_BATCH(batch,
9967                   next_slice_ver_pos << 16 |
9968                   next_slice_hor_pos);
9969
9970     OUT_BCS_BATCH(batch,
9971                   (rate_control_counter_enable << 31) |
9972                   (1 << 30) |           /* ResetRateControlCounter */
9973                   (2 << 28) |           /* Loose Rate Control */
9974                   (0 << 24) |           /* RC Stable Tolerance */
9975                   (rc_panic_enable << 23) |           /* RC Panic Enable */
9976                   (1 << 22) |           /* CBP mode */
9977                   (0 << 21) |           /* MB Type Direct Conversion, 0: Enable, 1: Disable */
9978                   (0 << 20) |           /* MB Type Skip Conversion, 0: Enable, 1: Disable */
9979                   (!next_slice_param << 19) |                   /* Is Last Slice */
9980                   (0 << 18) |           /* BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable */
9981                   (1 << 17) |           /* HeaderPresentFlag */
9982                   (1 << 16) |           /* SliceData PresentFlag */
9983                   (0 << 15) |           /* TailPresentFlag  */
9984                   (1 << 13) |           /* RBSP NAL TYPE */
9985                   (1 << 12));           /* CabacZeroWordInsertionEnable */
9986
9987     OUT_BCS_BATCH(batch, generic_ctx->compressed_bitstream.start_offset);
9988
9989     OUT_BCS_BATCH(batch,
9990                   (max_qp_n << 24) |     /*Target QP - 24 is lowest QP*/
9991                   (max_qp_p << 16) |     /*Target QP + 20 is highest QP*/
9992                   (shrink << 8) |
9993                   (grow << 0));
9994     OUT_BCS_BATCH(batch,
9995                   (rounding_inter_enable << 31) |
9996                   (rounding_value << 28) |
9997                   (1 << 27) |
9998                   (5 << 24) |
9999                   (correct[5] << 20) |
10000                   (correct[4] << 16) |
10001                   (correct[3] << 12) |
10002                   (correct[2] << 8) |
10003                   (correct[1] << 4) |
10004                   (correct[0] << 0));
10005     OUT_BCS_BATCH(batch, 0);
10006
10007     ADVANCE_BCS_BATCH(batch);
10008 }
10009
10010 static uint8_t
10011 gen9_mfc_avc_get_ref_idx_state(VAPictureH264 *va_pic, unsigned int frame_store_id)
10012 {
10013     unsigned int is_long_term =
10014         !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
10015     unsigned int is_top_field =
10016         !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
10017     unsigned int is_bottom_field =
10018         !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
10019
10020     return ((is_long_term                         << 6) |
10021             (0 << 5) |
10022             (frame_store_id                       << 1) |
10023             ((is_top_field ^ 1) & is_bottom_field));
10024 }
10025
10026 static void
10027 gen9_mfc_avc_ref_idx_state(VADriverContextP ctx,
10028                            struct encode_state *encode_state,
10029                            struct intel_encoder_context *encoder_context,
10030                            VAEncSliceParameterBufferH264 *slice_param,
10031                            struct intel_batchbuffer *batch)
10032 {
10033     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10034     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10035     VAPictureH264 *ref_pic;
10036     int i, slice_type, ref_idx_shift;
10037     unsigned int fwd_ref_entry;
10038     unsigned int bwd_ref_entry;
10039
10040     /* max 4 ref frames are allowed for l0 and l1 */
10041     fwd_ref_entry = 0x80808080;
10042     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
10043
10044     if ((slice_type == SLICE_TYPE_P) ||
10045         (slice_type == SLICE_TYPE_B)) {
10046         for (i = 0; i < MIN(avc_state->num_refs[0], 4); i++) {
10047             ref_pic = &slice_param->RefPicList0[i];
10048             ref_idx_shift = i * 8;
10049
10050             fwd_ref_entry &= ~(0xFF << ref_idx_shift);
10051             fwd_ref_entry += (gen9_mfc_avc_get_ref_idx_state(ref_pic, avc_state->list_ref_idx[0][i]) << ref_idx_shift);
10052         }
10053     }
10054
10055     bwd_ref_entry = 0x80808080;
10056     if (slice_type == SLICE_TYPE_B) {
10057         for (i = 0; i < MIN(avc_state->num_refs[1], 4); i++) {
10058             ref_pic = &slice_param->RefPicList1[i];
10059             ref_idx_shift = i * 8;
10060
10061             bwd_ref_entry &= ~(0xFF << ref_idx_shift);
10062             bwd_ref_entry += (gen9_mfc_avc_get_ref_idx_state(ref_pic, avc_state->list_ref_idx[1][i]) << ref_idx_shift);
10063         }
10064     }
10065
10066     if ((slice_type == SLICE_TYPE_P) ||
10067         (slice_type == SLICE_TYPE_B)) {
10068         BEGIN_BCS_BATCH(batch, 10);
10069         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
10070         OUT_BCS_BATCH(batch, 0);                        // L0
10071         OUT_BCS_BATCH(batch, fwd_ref_entry);
10072
10073         for (i = 0; i < 7; i++) {
10074             OUT_BCS_BATCH(batch, 0x80808080);
10075         }
10076
10077         ADVANCE_BCS_BATCH(batch);
10078     }
10079
10080     if (slice_type == SLICE_TYPE_B) {
10081         BEGIN_BCS_BATCH(batch, 10);
10082         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
10083         OUT_BCS_BATCH(batch, 1);                  //Select L1
10084         OUT_BCS_BATCH(batch, bwd_ref_entry);      //max 4 reference allowed
10085         for (i = 0; i < 7; i++) {
10086             OUT_BCS_BATCH(batch, 0x80808080);
10087         }
10088         ADVANCE_BCS_BATCH(batch);
10089     }
10090 }
10091
10092 static void
10093 gen9_mfc_avc_weightoffset_state(VADriverContextP ctx,
10094                                 struct encode_state *encode_state,
10095                                 struct intel_encoder_context *encoder_context,
10096                                 VAEncPictureParameterBufferH264 *pic_param,
10097                                 VAEncSliceParameterBufferH264 *slice_param,
10098                                 struct intel_batchbuffer *batch)
10099 {
10100     int i, slice_type;
10101     short weightoffsets[32 * 6];
10102
10103     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
10104
10105     if (slice_type == SLICE_TYPE_P &&
10106         pic_param->pic_fields.bits.weighted_pred_flag == 1) {
10107         memset(weightoffsets, 0, 32 * 6 * sizeof(short));
10108         for (i = 0; i < 32; i++) {
10109             weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
10110             weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
10111             weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
10112             weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
10113             weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
10114             weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
10115         }
10116
10117         BEGIN_BCS_BATCH(batch, 98);
10118         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
10119         OUT_BCS_BATCH(batch, 0);
10120         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
10121
10122         ADVANCE_BCS_BATCH(batch);
10123     }
10124
10125     if (slice_type == SLICE_TYPE_B &&
10126         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
10127         memset(weightoffsets, 0, 32 * 6 * sizeof(short));
10128         for (i = 0; i < 32; i++) {
10129             weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
10130             weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
10131             weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
10132             weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
10133             weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
10134             weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
10135         }
10136
10137         BEGIN_BCS_BATCH(batch, 98);
10138         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
10139         OUT_BCS_BATCH(batch, 0);
10140         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
10141         ADVANCE_BCS_BATCH(batch);
10142
10143         memset(weightoffsets, 0, 32 * 6 * sizeof(short));
10144         for (i = 0; i < 32; i++) {
10145             weightoffsets[i * 6 + 0] = slice_param->luma_weight_l1[i];
10146             weightoffsets[i * 6 + 1] = slice_param->luma_offset_l1[i];
10147             weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l1[i][0];
10148             weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l1[i][0];
10149             weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l1[i][1];
10150             weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l1[i][1];
10151         }
10152
10153         BEGIN_BCS_BATCH(batch, 98);
10154         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
10155         OUT_BCS_BATCH(batch, 1);
10156         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
10157         ADVANCE_BCS_BATCH(batch);
10158     }
10159 }
10160
10161 static void
10162 gen9_mfc_avc_single_slice(VADriverContextP ctx,
10163                           struct encode_state *encode_state,
10164                           struct intel_encoder_context *encoder_context,
10165                           VAEncSliceParameterBufferH264 *slice_param,
10166                           VAEncSliceParameterBufferH264 *next_slice_param,
10167                           int slice_index)
10168 {
10169     struct i965_driver_data *i965 = i965_driver_data(ctx);
10170     struct i965_gpe_table *gpe = &i965->gpe_table;
10171     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10172     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10173     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10174     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10175     struct intel_batchbuffer *batch = encoder_context->base.batch;
10176     struct intel_batchbuffer *slice_batch = avc_ctx->pres_slice_batch_buffer_2nd_level;
10177     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
10178     struct gpe_mi_batch_buffer_start_parameter second_level_batch;
10179     struct object_surface *obj_surface;
10180     struct gen9_surface_avc *avc_priv_surface;
10181
10182     unsigned int slice_offset = 0;
10183
10184     if (generic_state->curr_pak_pass == 0) {
10185         slice_offset = intel_batchbuffer_used_size(slice_batch);
10186         avc_state->slice_batch_offset[slice_index] = slice_offset;
10187         gen9_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context, slice_param, slice_batch);
10188         gen9_mfc_avc_weightoffset_state(ctx,
10189                                         encode_state,
10190                                         encoder_context,
10191                                         pic_param,
10192                                         slice_param,
10193                                         slice_batch);
10194         gen9_mfc_avc_slice_state(ctx,
10195                                  encode_state,
10196                                  encoder_context,
10197                                  pic_param,
10198                                  slice_param,
10199                                  next_slice_param,
10200                                  slice_batch);
10201         gen9_mfc_avc_inset_headers(ctx,
10202                                    encode_state,
10203                                    encoder_context,
10204                                    slice_param,
10205                                    slice_index,
10206                                    slice_batch);
10207
10208         BEGIN_BCS_BATCH(slice_batch, 2);
10209         OUT_BCS_BATCH(slice_batch, 0);
10210         OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END);
10211         ADVANCE_BCS_BATCH(slice_batch);
10212
10213     } else {
10214         slice_offset = avc_state->slice_batch_offset[slice_index];
10215     }
10216     /* insert slice as second level.*/
10217     memset(&second_level_batch, 0, sizeof(second_level_batch));
10218     second_level_batch.is_second_level = 1; /* Must be the second level batch buffer */
10219     second_level_batch.offset = slice_offset;
10220     second_level_batch.bo = slice_batch->buffer;
10221     gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
10222
10223     /* insert mb code as second level.*/
10224     obj_surface = encode_state->reconstructed_object;
10225     assert(obj_surface->private_data);
10226     avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
10227
10228     memset(&second_level_batch, 0, sizeof(second_level_batch));
10229     second_level_batch.is_second_level = 1; /* Must be the second level batch buffer */
10230     second_level_batch.offset = slice_param->macroblock_address * 16 * 4;
10231     second_level_batch.bo = avc_priv_surface->res_mb_code_surface.bo;
10232     gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
10233
10234 }
10235
10236 static void
10237 gen9_avc_pak_slice_level(VADriverContextP ctx,
10238                          struct encode_state *encode_state,
10239                          struct intel_encoder_context *encoder_context)
10240 {
10241     struct i965_driver_data *i965 = i965_driver_data(ctx);
10242     struct i965_gpe_table *gpe = &i965->gpe_table;
10243     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10244     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10245     struct intel_batchbuffer *batch = encoder_context->base.batch;
10246     struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
10247     VAEncSliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
10248     int i, j;
10249     int slice_index = 0;
10250     int is_frame_level = (avc_state->slice_num > 1) ? 0 : 1;   /* check it for SKL,now single slice per frame */
10251     int has_tail = 0;             /* check it later */
10252
10253     for (j = 0; j < encode_state->num_slice_params_ext; j++) {
10254         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
10255
10256         if (j == encode_state->num_slice_params_ext - 1)
10257             next_slice_group_param = NULL;
10258         else
10259             next_slice_group_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j + 1]->buffer;
10260
10261         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
10262             if (i < encode_state->slice_params_ext[j]->num_elements - 1)
10263                 next_slice_param = slice_param + 1;
10264             else
10265                 next_slice_param = next_slice_group_param;
10266
10267             gen9_mfc_avc_single_slice(ctx,
10268                                       encode_state,
10269                                       encoder_context,
10270                                       slice_param,
10271                                       next_slice_param,
10272                                       slice_index);
10273             slice_param++;
10274             slice_index++;
10275
10276             if (is_frame_level)
10277                 break;
10278         }
10279
10280         if (is_frame_level)
10281             break;
10282     }
10283
10284     if (has_tail) {
10285         /* insert a tail if required */
10286     }
10287
10288     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
10289     mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
10290     gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_params);
10291 }
10292 static void
10293 gen9_avc_pak_picture_level(VADriverContextP ctx,
10294                            struct encode_state *encode_state,
10295                            struct intel_encoder_context *encoder_context)
10296 {
10297     struct i965_driver_data *i965 = i965_driver_data(ctx);
10298     struct i965_gpe_table *gpe = &i965->gpe_table;
10299     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10300     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
10301     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10302     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10303     struct gpe_mi_batch_buffer_start_parameter second_level_batch;
10304     struct intel_batchbuffer *batch = encoder_context->base.batch;
10305
10306     if (generic_state->brc_enabled &&
10307         generic_state->curr_pak_pass) {
10308         struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
10309         struct encoder_status_buffer_internal *status_buffer;
10310         status_buffer = &(avc_ctx->status_buffer);
10311
10312         memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
10313         mi_conditional_batch_buffer_end_params.offset = status_buffer->image_status_mask_offset;
10314         mi_conditional_batch_buffer_end_params.bo = status_buffer->bo;
10315         mi_conditional_batch_buffer_end_params.compare_data = 0;
10316         mi_conditional_batch_buffer_end_params.compare_mask_mode_disabled = 0;
10317         gpe->mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
10318     }
10319
10320     gen9_mfc_avc_pipe_mode_select(ctx, encode_state, encoder_context);
10321     gen9_mfc_avc_surface_state(ctx, encoder_context, &(generic_ctx->res_reconstructed_surface), 0);
10322     gen9_mfc_avc_surface_state(ctx, encoder_context, &(generic_ctx->res_uncompressed_input_surface), 4);
10323     gen9_mfc_avc_pipe_buf_addr_state(ctx, encoder_context);
10324     gen9_mfc_avc_ind_obj_base_addr_state(ctx, encode_state, encoder_context);
10325     gen9_mfc_avc_bsp_buf_base_addr_state(ctx, encoder_context);
10326
10327     if (generic_state->brc_enabled) {
10328         memset(&second_level_batch, 0, sizeof(second_level_batch));
10329         if (generic_state->curr_pak_pass == 0) {
10330             second_level_batch.offset = 0;
10331         } else {
10332             second_level_batch.offset = generic_state->curr_pak_pass * INTEL_AVC_IMAGE_STATE_CMD_SIZE;
10333         }
10334         second_level_batch.is_second_level = 1;
10335         second_level_batch.bo = avc_ctx->res_brc_image_state_read_buffer.bo;
10336         gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
10337     } else {
10338         /*generate a new image state */
10339         gen9_avc_set_image_state_non_brc(ctx, encode_state, encoder_context, &(avc_ctx->res_image_state_batch_buffer_2nd_level));
10340         memset(&second_level_batch, 0, sizeof(second_level_batch));
10341         second_level_batch.offset = 0;
10342         second_level_batch.is_second_level = 1;
10343         second_level_batch.bo = avc_ctx->res_image_state_batch_buffer_2nd_level.bo;
10344         gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
10345     }
10346
10347     gen9_mfc_avc_qm_state(ctx, encode_state, encoder_context);
10348     gen9_mfc_avc_fqm_state(ctx, encode_state, encoder_context);
10349     gen9_mfc_avc_directmode_state(ctx, encoder_context);
10350
10351 }
10352
10353 static void
10354 gen9_avc_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
10355 {
10356     struct i965_driver_data *i965 = i965_driver_data(ctx);
10357     struct i965_gpe_table *gpe = &i965->gpe_table;
10358     struct intel_batchbuffer *batch = encoder_context->base.batch;
10359     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10360     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10361     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10362
10363     struct gpe_mi_store_register_mem_parameter mi_store_reg_mem_param;
10364     struct gpe_mi_store_data_imm_parameter mi_store_data_imm_param;
10365     struct gpe_mi_flush_dw_parameter mi_flush_dw_param;
10366     struct encoder_status_buffer_internal *status_buffer;
10367
10368     status_buffer = &(avc_ctx->status_buffer);
10369
10370     memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
10371     gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
10372
10373     /* read register and store into status_buffer and pak_statitistic info */
10374     memset(&mi_store_reg_mem_param, 0, sizeof(mi_store_reg_mem_param));
10375     mi_store_reg_mem_param.bo = status_buffer->bo;
10376     mi_store_reg_mem_param.offset = status_buffer->bs_byte_count_frame_offset;
10377     mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_reg_offset;
10378     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10379
10380     mi_store_reg_mem_param.bo = status_buffer->bo;
10381     mi_store_reg_mem_param.offset = status_buffer->image_status_mask_offset;
10382     mi_store_reg_mem_param.mmio_offset = status_buffer->image_status_mask_reg_offset;
10383     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10384
10385     /*update the status in the pak_statistic_surface */
10386     mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
10387     mi_store_reg_mem_param.offset = 0;
10388     mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_reg_offset;
10389     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10390
10391     mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
10392     mi_store_reg_mem_param.offset = 4;
10393     mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_nh_reg_offset;
10394     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10395
10396     memset(&mi_store_data_imm_param, 0, sizeof(mi_store_data_imm_param));
10397     mi_store_data_imm_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
10398     mi_store_data_imm_param.offset = sizeof(unsigned int) * 2;
10399     mi_store_data_imm_param.dw0 = (generic_state->curr_pak_pass + 1);
10400     gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm_param);
10401
10402     mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
10403     mi_store_reg_mem_param.offset = sizeof(unsigned int) * (4 + generic_state->curr_pak_pass) ;
10404     mi_store_reg_mem_param.mmio_offset = status_buffer->image_status_ctrl_reg_offset;
10405     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10406
10407     memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
10408     gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
10409
10410     return;
10411 }
10412
10413 static void
10414 gen9_avc_pak_brc_prepare(struct encode_state *encode_state,
10415                          struct intel_encoder_context *encoder_context)
10416 {
10417     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10418     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10419     unsigned int rate_control_mode = encoder_context->rate_control_mode;
10420
10421     switch (rate_control_mode & 0x7f) {
10422     case VA_RC_CBR:
10423         generic_state->internal_rate_mode = VA_RC_CBR;
10424         break;
10425
10426     case VA_RC_VBR:
10427         generic_state->internal_rate_mode = VA_RC_VBR;//AVBR
10428         break;
10429
10430     case VA_RC_CQP:
10431     default:
10432         generic_state->internal_rate_mode = VA_RC_CQP;
10433         break;
10434     }
10435
10436     if (encoder_context->quality_level == 0)
10437         encoder_context->quality_level = ENCODER_DEFAULT_QUALITY_AVC;
10438 }
10439
10440 /* allcate resources for pak only (fei mode) */
10441 static VAStatus
10442 gen9_avc_fei_pak_pipeline_prepare(VADriverContextP ctx,
10443                                   VAProfile profile,
10444                                   struct encode_state *encode_state,
10445                                   struct intel_encoder_context *encoder_context)
10446 {
10447     VAStatus va_status;
10448     struct i965_driver_data *i965 = i965_driver_data(ctx);
10449     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10450     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
10451     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10452     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10453     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10454     struct gen9_surface_avc *avc_priv_surface;
10455     VAEncPictureParameterBufferH264  *pic_param;
10456     VAEncSliceParameterBufferH264 *slice_param;
10457     VAEncMiscParameterFEIFrameControlH264 *fei_param = NULL;
10458     unsigned int size = 0, i, j;
10459     unsigned int frame_mb_nums;
10460     struct object_buffer *obj_buffer = NULL;
10461     struct buffer_store *buffer_store = NULL;
10462     struct object_surface *obj_surface = NULL;
10463     struct avc_surface_param surface_param;
10464     struct i965_coded_buffer_segment *coded_buffer_segment;
10465     dri_bo *bo;
10466     unsigned char * pdata;
10467
10468     gen9_avc_update_parameters(ctx, profile, encode_state, encoder_context);
10469
10470     pic_param = avc_state->pic_param;
10471     slice_param = avc_state->slice_param[0];
10472
10473     va_status = gen9_avc_encode_check_parameter(ctx, encode_state, encoder_context);
10474     if (va_status != VA_STATUS_SUCCESS)
10475         return va_status;
10476
10477     va_status = gen9_avc_allocate_pak_resources(ctx, encode_state, encoder_context);
10478     if (va_status != VA_STATUS_SUCCESS)
10479         return va_status;
10480
10481     /* Encoded bitstream ?*/
10482     obj_buffer = encode_state->coded_buf_object;
10483     bo = obj_buffer->buffer_store->bo;
10484     i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
10485     i965_dri_object_to_buffer_gpe_resource(&generic_ctx->compressed_bitstream.res, bo);
10486     generic_ctx->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
10487     generic_ctx->compressed_bitstream.end_offset =
10488         ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
10489
10490     /*status buffer */
10491     dri_bo_unreference(avc_ctx->status_buffer.bo);
10492     avc_ctx->status_buffer.bo = bo;
10493     dri_bo_reference(bo);
10494
10495     /* set the internal flag to 0 to indicate the coded size is unknown */
10496     dri_bo_map(bo, 1);
10497     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
10498     coded_buffer_segment->mapped = 0;
10499     coded_buffer_segment->codec = encoder_context->codec;
10500     coded_buffer_segment->status_support = 1;
10501
10502     pdata = bo->virtual + avc_ctx->status_buffer.base_offset;
10503     memset(pdata, 0, avc_ctx->status_buffer.status_buffer_size);
10504     dri_bo_unmap(bo);
10505     //frame id, it is the ref pic id in the reference_objects list.
10506     avc_state->num_refs[0] = 0;
10507     avc_state->num_refs[1] = 0;
10508     if (generic_state->frame_type == SLICE_TYPE_P) {
10509         avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
10510
10511         if (slice_param->num_ref_idx_active_override_flag)
10512             avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
10513     } else if (generic_state->frame_type == SLICE_TYPE_B) {
10514         avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
10515         avc_state->num_refs[1] = pic_param->num_ref_idx_l1_active_minus1 + 1;
10516
10517         if (slice_param->num_ref_idx_active_override_flag) {
10518             avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
10519             avc_state->num_refs[1] = slice_param->num_ref_idx_l1_active_minus1 + 1;
10520         }
10521     }
10522     for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[0]); i++) {
10523         VAPictureH264 *va_pic;
10524
10525         assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(avc_state->list_ref_idx[0]));
10526         avc_state->list_ref_idx[0][i] = 0;
10527
10528         if (i >= avc_state->num_refs[0])
10529             continue;
10530
10531         va_pic = &slice_param->RefPicList0[i];
10532
10533         for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
10534             obj_surface = encode_state->reference_objects[j];
10535
10536             if (obj_surface &&
10537                 obj_surface->bo &&
10538                 obj_surface->base.id == va_pic->picture_id) {
10539
10540                 assert(obj_surface->base.id != VA_INVALID_SURFACE);
10541                 avc_state->list_ref_idx[0][i] = j;
10542
10543                 break;
10544             }
10545         }
10546     }
10547     for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[1]); i++) {
10548         VAPictureH264 *va_pic;
10549
10550         assert(ARRAY_ELEMS(slice_param->RefPicList1) == ARRAY_ELEMS(avc_state->list_ref_idx[1]));
10551         avc_state->list_ref_idx[1][i] = 0;
10552
10553         if (i >= avc_state->num_refs[1])
10554             continue;
10555
10556         va_pic = &slice_param->RefPicList1[i];
10557
10558         for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
10559             obj_surface = encode_state->reference_objects[j];
10560
10561
10562             if (obj_surface &&
10563                 obj_surface->bo &&
10564                 obj_surface->base.id == va_pic->picture_id) {
10565
10566                 assert(obj_surface->base.id != VA_INVALID_SURFACE);
10567                 avc_state->list_ref_idx[1][i] = j;
10568
10569                 break;
10570                 break;
10571             }
10572         }
10573     }
10574
10575     obj_surface = encode_state->reconstructed_object;
10576     fei_param = avc_state->fei_framectl_param;
10577     frame_mb_nums = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs;
10578
10579     /* Setup current reconstruct frame */
10580     obj_surface = encode_state->reconstructed_object;
10581     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
10582
10583     if (va_status != VA_STATUS_SUCCESS)
10584         return va_status;
10585
10586     memset(&surface_param, 0, sizeof(surface_param));
10587     surface_param.frame_width = generic_state->frame_width_in_pixel;
10588     surface_param.frame_height = generic_state->frame_height_in_pixel;
10589     va_status = gen9_avc_init_check_surfaces(ctx,
10590                                              obj_surface, encoder_context,
10591                                              &surface_param);
10592     avc_priv_surface = obj_surface->private_data;
10593
10594     /* res_mb_code_surface for MB code */
10595     /* PAK only mode must have the mb_code_surface from middleware,
10596      * so the code shouldn't reach here without an externally provided
10597      * MB Code buffer */
10598     assert(fei_param->mb_code_data != VA_INVALID_ID);
10599     size = frame_mb_nums * FEI_AVC_MB_CODE_BUFFER_SIZE;
10600     obj_buffer = BUFFER(fei_param->mb_code_data);
10601     assert(obj_buffer != NULL);
10602     buffer_store = obj_buffer->buffer_store;
10603     assert(size <= buffer_store->bo->size);
10604     if (avc_priv_surface->res_mb_code_surface.bo != NULL)
10605         i965_free_gpe_resource(&avc_priv_surface->res_mb_code_surface);
10606     i965_dri_object_to_buffer_gpe_resource(&avc_priv_surface->res_mb_code_surface,
10607                                            buffer_store->bo);
10608     /* res_mv_data_surface for MV data */
10609     size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
10610     if (fei_param->mv_data != VA_INVALID_ID) {
10611         obj_buffer = BUFFER(fei_param->mv_data);
10612         assert(obj_buffer != NULL);
10613         buffer_store = obj_buffer->buffer_store;
10614         assert(size <= buffer_store->bo->size);
10615         if (avc_priv_surface->res_mv_data_surface.bo != NULL)
10616             i965_free_gpe_resource(&avc_priv_surface->res_mv_data_surface);
10617         i965_dri_object_to_buffer_gpe_resource(&avc_priv_surface->res_mv_data_surface,
10618                                                buffer_store->bo);
10619     }
10620
10621     return VA_STATUS_SUCCESS;
10622
10623 }
10624
10625 static VAStatus
10626 gen9_avc_pak_pipeline_prepare(VADriverContextP ctx,
10627                               VAProfile profile,
10628                               struct encode_state *encode_state,
10629                               struct intel_encoder_context *encoder_context)
10630 {
10631     VAStatus va_status;
10632     struct i965_driver_data *i965 = i965_driver_data(ctx);
10633     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10634     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
10635     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10636     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10637     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10638
10639     struct object_surface *obj_surface;
10640     VAEncPictureParameterBufferH264  *pic_param;
10641     VAEncSliceParameterBufferH264 *slice_param;
10642
10643     struct gen9_surface_avc *avc_priv_surface;
10644     struct avc_surface_param surface_param;
10645     int i, j, enable_avc_ildb = 0;
10646     unsigned int allocate_flag = 1;
10647     unsigned int size, w_mb, h_mb;
10648
10649     if (encoder_context->fei_function_mode == VA_FEI_FUNCTION_PAK) {
10650         va_status = gen9_avc_fei_pak_pipeline_prepare(ctx, profile, encode_state, encoder_context);
10651         if (va_status != VA_STATUS_SUCCESS)
10652             return va_status;
10653     }
10654
10655     pic_param = avc_state->pic_param;
10656     slice_param = avc_state->slice_param[0];
10657     w_mb = generic_state->frame_width_in_mbs;
10658     h_mb = generic_state->frame_height_in_mbs;
10659
10660     /* update the parameter and check slice parameter */
10661     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
10662         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
10663         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
10664
10665         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
10666             assert((slice_param->slice_type == SLICE_TYPE_I) ||
10667                    (slice_param->slice_type == SLICE_TYPE_SI) ||
10668                    (slice_param->slice_type == SLICE_TYPE_P) ||
10669                    (slice_param->slice_type == SLICE_TYPE_SP) ||
10670                    (slice_param->slice_type == SLICE_TYPE_B));
10671
10672             if (slice_param->disable_deblocking_filter_idc != 1) {
10673                 enable_avc_ildb = 1;
10674                 break;
10675             }
10676
10677             slice_param++;
10678         }
10679     }
10680     avc_state->enable_avc_ildb = enable_avc_ildb;
10681
10682     /* setup the all surface and buffer for PAK */
10683     /* Setup current reconstruct frame */
10684     obj_surface = encode_state->reconstructed_object;
10685     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
10686
10687     if (va_status != VA_STATUS_SUCCESS)
10688         return va_status;
10689
10690     memset(&surface_param, 0, sizeof(surface_param));
10691     surface_param.frame_width = generic_state->frame_width_in_pixel;
10692     surface_param.frame_height = generic_state->frame_height_in_pixel;
10693     va_status = gen9_avc_init_check_surfaces(ctx,
10694                                              obj_surface, encoder_context,
10695                                              &surface_param);
10696     if (va_status != VA_STATUS_SUCCESS)
10697         return va_status;
10698     /* init the member of avc_priv_surface,frame_store_id,qp_value */
10699     {
10700         avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
10701         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = 0;
10702         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = 0;
10703         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2]);
10704         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1]);
10705         i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2], avc_priv_surface->dmv_top);
10706         i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1], avc_priv_surface->dmv_bottom);
10707         avc_priv_surface->qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
10708         avc_priv_surface->frame_store_id = 0;
10709         avc_priv_surface->frame_idx = pic_param->CurrPic.frame_idx;
10710         avc_priv_surface->top_field_order_cnt = pic_param->CurrPic.TopFieldOrderCnt;
10711         avc_priv_surface->is_as_ref = pic_param->pic_fields.bits.reference_pic_flag;
10712         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = avc_priv_surface->top_field_order_cnt;
10713         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = avc_priv_surface->top_field_order_cnt + 1;
10714     }
10715     i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
10716     i965_free_gpe_resource(&avc_ctx->res_post_deblocking_output);
10717     i965_free_gpe_resource(&avc_ctx->res_pre_deblocking_output);
10718     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_reconstructed_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
10719
10720
10721     if (avc_state->enable_avc_ildb) {
10722         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_post_deblocking_output, obj_surface, GPE_RESOURCE_ALIGNMENT);
10723     } else {
10724         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_pre_deblocking_output, obj_surface, GPE_RESOURCE_ALIGNMENT);
10725     }
10726     /* input YUV surface */
10727     obj_surface = encode_state->input_yuv_object;
10728     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
10729
10730     if (va_status != VA_STATUS_SUCCESS)
10731         return va_status;
10732     i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
10733     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_uncompressed_input_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
10734
10735     /* Reference surfaces */
10736     for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
10737         i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
10738         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2]);
10739         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1]);
10740         obj_surface = encode_state->reference_objects[i];
10741         avc_state->top_field_poc[2 * i] = 0;
10742         avc_state->top_field_poc[2 * i + 1] = 0;
10743
10744         if (obj_surface && obj_surface->bo) {
10745             i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->list_reference_res[i], obj_surface, GPE_RESOURCE_ALIGNMENT);
10746
10747             /* actually it should be handled when it is reconstructed surface */
10748             va_status = gen9_avc_init_check_surfaces(ctx,
10749                                                      obj_surface, encoder_context,
10750                                                      &surface_param);
10751             if (va_status != VA_STATUS_SUCCESS)
10752                 return va_status;
10753             avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
10754             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2], avc_priv_surface->dmv_top);
10755             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1], avc_priv_surface->dmv_bottom);
10756             avc_priv_surface->frame_store_id = i;
10757             avc_state->top_field_poc[2 * i] = avc_priv_surface->top_field_order_cnt;
10758             avc_state->top_field_poc[2 * i + 1] = avc_priv_surface->top_field_order_cnt + 1;
10759         } else {
10760             break;
10761         }
10762     }
10763
10764     if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
10765         intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
10766         avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
10767     }
10768
10769     avc_ctx->pres_slice_batch_buffer_2nd_level =
10770         intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD,
10771                               4096 *
10772                               encode_state->num_slice_params_ext);
10773     if (!avc_ctx->pres_slice_batch_buffer_2nd_level)
10774         return VA_STATUS_ERROR_ALLOCATION_FAILED;
10775
10776     for (i = 0; i < MAX_AVC_SLICE_NUM; i++) {
10777         avc_state->slice_batch_offset[i] = 0;
10778     }
10779
10780
10781     size = w_mb * 64;
10782     i965_free_gpe_resource(&avc_ctx->res_intra_row_store_scratch_buffer);
10783     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
10784                                                &avc_ctx->res_intra_row_store_scratch_buffer,
10785                                                size,
10786                                                "PAK Intra row store scratch buffer");
10787     if (!allocate_flag)
10788         goto failed_allocation;
10789
10790     size = w_mb * 4 * 64;
10791     i965_free_gpe_resource(&avc_ctx->res_deblocking_filter_row_store_scratch_buffer);
10792     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
10793                                                &avc_ctx->res_deblocking_filter_row_store_scratch_buffer,
10794                                                size,
10795                                                "PAK Deblocking filter row store scratch buffer");
10796     if (!allocate_flag)
10797         goto failed_allocation;
10798
10799     size = w_mb * 2 * 64;
10800     i965_free_gpe_resource(&avc_ctx->res_bsd_mpc_row_store_scratch_buffer);
10801     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
10802                                                &avc_ctx->res_bsd_mpc_row_store_scratch_buffer,
10803                                                size,
10804                                                "PAK BSD/MPC row store scratch buffer");
10805     if (!allocate_flag)
10806         goto failed_allocation;
10807
10808     size = w_mb * h_mb * 16;
10809     i965_free_gpe_resource(&avc_ctx->res_pak_mb_status_buffer);
10810     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
10811                                                &avc_ctx->res_pak_mb_status_buffer,
10812                                                size,
10813                                                "PAK MB status buffer");
10814     if (!allocate_flag)
10815         goto failed_allocation;
10816
10817     return VA_STATUS_SUCCESS;
10818
10819 failed_allocation:
10820     return VA_STATUS_ERROR_ALLOCATION_FAILED;
10821 }
10822
10823 static VAStatus
10824 gen9_avc_encode_picture(VADriverContextP ctx,
10825                         VAProfile profile,
10826                         struct encode_state *encode_state,
10827                         struct intel_encoder_context *encoder_context)
10828 {
10829     VAStatus va_status;
10830     struct i965_driver_data *i965 = i965_driver_data(ctx);
10831     struct i965_gpe_table *gpe = &i965->gpe_table;
10832     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10833     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
10834     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
10835     struct intel_batchbuffer *batch = encoder_context->base.batch;
10836
10837     va_status = gen9_avc_pak_pipeline_prepare(ctx, profile, encode_state, encoder_context);
10838
10839     if (va_status != VA_STATUS_SUCCESS)
10840         return va_status;
10841
10842     if (i965->intel.has_bsd2)
10843         intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
10844     else
10845         intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
10846     intel_batchbuffer_emit_mi_flush(batch);
10847     for (generic_state->curr_pak_pass = 0;
10848          generic_state->curr_pak_pass < generic_state->num_pak_passes;
10849          generic_state->curr_pak_pass++) {
10850
10851         if (generic_state->curr_pak_pass == 0) {
10852             /* Initialize the avc Image Ctrl reg for the first pass,write 0 to staturs/control register, is it needed in AVC? */
10853             struct gpe_mi_load_register_imm_parameter mi_load_reg_imm;
10854             struct encoder_status_buffer_internal *status_buffer;
10855
10856             status_buffer = &(avc_ctx->status_buffer);
10857             memset(&mi_load_reg_imm, 0, sizeof(mi_load_reg_imm));
10858             mi_load_reg_imm.mmio_offset = status_buffer->image_status_ctrl_reg_offset;
10859             mi_load_reg_imm.data = 0;
10860             gpe->mi_load_register_imm(ctx, batch, &mi_load_reg_imm);
10861         }
10862         gen9_avc_pak_picture_level(ctx, encode_state, encoder_context);
10863         gen9_avc_pak_slice_level(ctx, encode_state, encoder_context);
10864         gen9_avc_read_mfc_status(ctx, encoder_context);
10865     }
10866
10867     if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
10868         intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
10869         avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
10870     }
10871
10872     intel_batchbuffer_end_atomic(batch);
10873     intel_batchbuffer_flush(batch);
10874
10875     generic_state->seq_frame_number++;
10876     generic_state->total_frame_number++;
10877     generic_state->first_frame = 0;
10878     return VA_STATUS_SUCCESS;
10879 }
10880
10881 static VAStatus
10882 gen9_avc_pak_pipeline(VADriverContextP ctx,
10883                       VAProfile profile,
10884                       struct encode_state *encode_state,
10885                       struct intel_encoder_context *encoder_context)
10886 {
10887     VAStatus vaStatus;
10888
10889     switch (profile) {
10890     case VAProfileH264ConstrainedBaseline:
10891     case VAProfileH264Main:
10892     case VAProfileH264High:
10893     case VAProfileH264MultiviewHigh:
10894     case VAProfileH264StereoHigh:
10895         vaStatus = gen9_avc_encode_picture(ctx, profile, encode_state, encoder_context);
10896         break;
10897
10898     default:
10899         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
10900         break;
10901     }
10902
10903     return vaStatus;
10904 }
10905
10906 static void
10907 gen9_avc_pak_context_destroy(void * context)
10908 {
10909     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)context;
10910     struct generic_encoder_context * generic_ctx;
10911     struct i965_avc_encoder_context * avc_ctx;
10912     int i = 0;
10913
10914     if (!pak_context)
10915         return;
10916
10917     generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
10918     avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10919
10920     // other things
10921     i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
10922     i965_free_gpe_resource(&avc_ctx->res_post_deblocking_output);
10923     i965_free_gpe_resource(&avc_ctx->res_pre_deblocking_output);
10924     i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
10925
10926     i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
10927     i965_free_gpe_resource(&avc_ctx->res_intra_row_store_scratch_buffer);
10928     i965_free_gpe_resource(&avc_ctx->res_deblocking_filter_row_store_scratch_buffer);
10929     i965_free_gpe_resource(&avc_ctx->res_bsd_mpc_row_store_scratch_buffer);
10930     i965_free_gpe_resource(&avc_ctx->res_pak_mb_status_buffer);
10931
10932     for (i = 0 ; i < MAX_MFC_AVC_REFERENCE_SURFACES; i++) {
10933         i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
10934     }
10935
10936     for (i = 0 ; i < NUM_MFC_AVC_DMV_BUFFERS; i++) {
10937         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i]);
10938     }
10939
10940     if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
10941         intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
10942         avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
10943     }
10944
10945 }
10946
10947 static VAStatus
10948 gen9_avc_get_coded_status(VADriverContextP ctx,
10949                           struct intel_encoder_context *encoder_context,
10950                           struct i965_coded_buffer_segment *coded_buf_seg)
10951 {
10952     struct encoder_status *avc_encode_status;
10953
10954     if (!encoder_context || !coded_buf_seg)
10955         return VA_STATUS_ERROR_INVALID_BUFFER;
10956
10957     avc_encode_status = (struct encoder_status *)coded_buf_seg->codec_private_data;
10958     coded_buf_seg->base.size = avc_encode_status->bs_byte_count_frame;
10959
10960     return VA_STATUS_SUCCESS;
10961 }
10962
10963 Bool
10964 gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
10965 {
10966     /* VME & PAK share the same context */
10967     struct i965_driver_data *i965 = i965_driver_data(ctx);
10968     struct encoder_vme_mfc_context * vme_context = NULL;
10969     struct generic_encoder_context * generic_ctx = NULL;
10970     struct i965_avc_encoder_context * avc_ctx = NULL;
10971     struct generic_enc_codec_state * generic_state = NULL;
10972     struct avc_enc_state * avc_state = NULL;
10973     struct encoder_status_buffer_internal *status_buffer;
10974     uint32_t base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
10975
10976     vme_context = calloc(1, sizeof(struct encoder_vme_mfc_context));
10977     generic_ctx = calloc(1, sizeof(struct generic_encoder_context));
10978     avc_ctx = calloc(1, sizeof(struct i965_avc_encoder_context));
10979     generic_state = calloc(1, sizeof(struct generic_enc_codec_state));
10980     avc_state = calloc(1, sizeof(struct avc_enc_state));
10981
10982     if (!vme_context || !generic_ctx || !avc_ctx || !generic_state || !avc_state)
10983         goto allocate_structure_failed;
10984
10985     memset(vme_context, 0, sizeof(struct encoder_vme_mfc_context));
10986     memset(generic_ctx, 0, sizeof(struct generic_encoder_context));
10987     memset(avc_ctx, 0, sizeof(struct i965_avc_encoder_context));
10988     memset(generic_state, 0, sizeof(struct generic_enc_codec_state));
10989     memset(avc_state, 0, sizeof(struct avc_enc_state));
10990
10991     encoder_context->vme_context = vme_context;
10992     vme_context->generic_enc_ctx = generic_ctx;
10993     vme_context->private_enc_ctx = avc_ctx;
10994     vme_context->generic_enc_state = generic_state;
10995     vme_context->private_enc_state = avc_state;
10996
10997     if (IS_SKL(i965->intel.device_info) ||
10998         IS_BXT(i965->intel.device_info)) {
10999         if (!encoder_context->fei_enabled && !encoder_context->preenc_enabled) {
11000             generic_ctx->enc_kernel_ptr = (void *)skl_avc_encoder_kernels;
11001             generic_ctx->enc_kernel_size = sizeof(skl_avc_encoder_kernels);
11002         } else {
11003             /* FEI and PreEnc operation kernels are included in
11004             * the monolithic kernel binary */
11005             generic_ctx->enc_kernel_ptr = (void *)skl_avc_fei_encoder_kernels;
11006             generic_ctx->enc_kernel_size = sizeof(skl_avc_fei_encoder_kernels);
11007         }
11008     } else if (IS_GEN8(i965->intel.device_info)) {
11009         generic_ctx->enc_kernel_ptr = (void *)bdw_avc_encoder_kernels;
11010         generic_ctx->enc_kernel_size = sizeof(bdw_avc_encoder_kernels);
11011     } else if (IS_KBL(i965->intel.device_info) ||
11012                IS_GLK(i965->intel.device_info)) {
11013         generic_ctx->enc_kernel_ptr = (void *)kbl_avc_encoder_kernels;
11014         generic_ctx->enc_kernel_size = sizeof(kbl_avc_encoder_kernels);
11015     } else if (IS_GEN10(i965->intel.device_info)) {
11016         generic_ctx->enc_kernel_ptr = (void *)cnl_avc_encoder_kernels;
11017         generic_ctx->enc_kernel_size = sizeof(cnl_avc_encoder_kernels);
11018     } else
11019         goto allocate_structure_failed;
11020
11021     /* initialize misc ? */
11022     avc_ctx->ctx = ctx;
11023     generic_ctx->use_hw_scoreboard = 1;
11024     generic_ctx->use_hw_non_stalling_scoreboard = 1;
11025
11026     /* initialize generic state */
11027
11028     generic_state->kernel_mode = INTEL_ENC_KERNEL_NORMAL;
11029     generic_state->preset = INTEL_PRESET_RT_SPEED;
11030     generic_state->seq_frame_number = 0;
11031     generic_state->total_frame_number = 0;
11032     generic_state->frame_type = 0;
11033     generic_state->first_frame = 1;
11034
11035     generic_state->frame_width_in_pixel = 0;
11036     generic_state->frame_height_in_pixel = 0;
11037     generic_state->frame_width_in_mbs = 0;
11038     generic_state->frame_height_in_mbs = 0;
11039     generic_state->frame_width_4x = 0;
11040     generic_state->frame_height_4x = 0;
11041     generic_state->frame_width_16x = 0;
11042     generic_state->frame_height_16x = 0;
11043     generic_state->frame_width_32x = 0;
11044     generic_state->downscaled_width_4x_in_mb = 0;
11045     generic_state->downscaled_height_4x_in_mb = 0;
11046     generic_state->downscaled_width_16x_in_mb = 0;
11047     generic_state->downscaled_height_16x_in_mb = 0;
11048     generic_state->downscaled_width_32x_in_mb = 0;
11049     generic_state->downscaled_height_32x_in_mb = 0;
11050
11051     generic_state->hme_supported = 1;
11052     generic_state->b16xme_supported = 1;
11053     generic_state->b32xme_supported = 0;
11054     generic_state->hme_enabled = 0;
11055     generic_state->b16xme_enabled = 0;
11056     generic_state->b32xme_enabled = 0;
11057
11058     if (encoder_context->fei_enabled) {
11059         /* Disabling HME in FEI encode */
11060         generic_state->hme_supported = 0;
11061         generic_state->b16xme_supported = 0;
11062     } else if (encoder_context->preenc_enabled) {
11063         /* Disabling 16x16ME in PreEnc */
11064         generic_state->b16xme_supported = 0;
11065     }
11066
11067     generic_state->brc_distortion_buffer_supported = 1;
11068     generic_state->brc_constant_buffer_supported = 0;
11069
11070     generic_state->frame_rate = 30;
11071     generic_state->brc_allocated = 0;
11072     generic_state->brc_inited = 0;
11073     generic_state->brc_need_reset = 0;
11074     generic_state->is_low_delay = 0;
11075     generic_state->brc_enabled = 0;//default
11076     generic_state->internal_rate_mode = 0;
11077     generic_state->curr_pak_pass = 0;
11078     generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
11079     generic_state->is_first_pass = 1;
11080     generic_state->is_last_pass = 0;
11081     generic_state->mb_brc_enabled = 0; // enable mb brc
11082     generic_state->brc_roi_enable = 0;
11083     generic_state->brc_dirty_roi_enable = 0;
11084     generic_state->skip_frame_enbale = 0;
11085
11086     generic_state->target_bit_rate = 0;
11087     generic_state->max_bit_rate = 0;
11088     generic_state->min_bit_rate = 0;
11089     generic_state->init_vbv_buffer_fullness_in_bit = 0;
11090     generic_state->vbv_buffer_size_in_bit = 0;
11091     generic_state->frames_per_100s = 0;
11092     generic_state->gop_size = 0;
11093     generic_state->gop_ref_distance = 0;
11094     generic_state->brc_target_size = 0;
11095     generic_state->brc_mode = 0;
11096     generic_state->brc_init_current_target_buf_full_in_bits = 0.0;
11097     generic_state->brc_init_reset_input_bits_per_frame = 0.0;
11098     generic_state->brc_init_reset_buf_size_in_bits = 0;
11099     generic_state->brc_init_previous_target_buf_full_in_bits = 0;
11100     generic_state->frames_per_window_size = 0;//default
11101     generic_state->target_percentage = 0;
11102
11103     generic_state->avbr_curracy = 0;
11104     generic_state->avbr_convergence = 0;
11105
11106     generic_state->num_skip_frames = 0;
11107     generic_state->size_skip_frames = 0;
11108
11109     generic_state->num_roi = 0;
11110     generic_state->max_delta_qp = 0;
11111     generic_state->min_delta_qp = 0;
11112
11113     if (encoder_context->rate_control_mode != VA_RC_NONE &&
11114         encoder_context->rate_control_mode != VA_RC_CQP) {
11115         generic_state->brc_enabled = 1;
11116         generic_state->brc_distortion_buffer_supported = 1;
11117         generic_state->brc_constant_buffer_supported = 1;
11118         generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
11119     }
11120     /*avc state initialization */
11121     avc_state->mad_enable = 0;
11122     avc_state->mb_disable_skip_map_enable = 0;
11123     avc_state->sfd_enable = 1;//default
11124     avc_state->sfd_mb_enable = 1;//set it true
11125     avc_state->adaptive_search_window_enable = 1;//default
11126     avc_state->mb_qp_data_enable = 0;
11127     avc_state->intra_refresh_i_enable = 0;
11128     avc_state->min_max_qp_enable = 0;
11129     avc_state->skip_bias_adjustment_enable = 0;//default,same as   skip_bias_adjustment_supporte? no
11130
11131     //external input
11132     avc_state->non_ftq_skip_threshold_lut_input_enable = 0;
11133     avc_state->ftq_skip_threshold_lut_input_enable = 0;
11134     avc_state->ftq_override = 0;
11135
11136     avc_state->direct_bias_adjustment_enable = 0;
11137     avc_state->global_motion_bias_adjustment_enable = 0;
11138     avc_state->disable_sub_mb_partion = 0;
11139     avc_state->arbitrary_num_mbs_in_slice = 0;
11140     avc_state->adaptive_transform_decision_enable = 0;//default
11141     avc_state->skip_check_disable = 0;
11142     avc_state->tq_enable = 0;
11143     avc_state->enable_avc_ildb = 0;
11144     avc_state->mbaff_flag = 0;
11145     avc_state->enable_force_skip = 1;//default
11146     avc_state->rc_panic_enable = 1;//default
11147     avc_state->suppress_recon_enable = 1;//default
11148
11149     avc_state->ref_pic_select_list_supported = 1;
11150     avc_state->mb_brc_supported = 1;//?,default
11151     avc_state->multi_pre_enable = 1;//default
11152     avc_state->ftq_enable = 1;//default
11153     avc_state->caf_supported = 1; //default
11154     avc_state->caf_enable = 0;
11155     avc_state->caf_disable_hd = 1;//default
11156     avc_state->skip_bias_adjustment_supported = 1;//default
11157
11158     avc_state->adaptive_intra_scaling_enable = 1;//default
11159     avc_state->old_mode_cost_enable = 0;//default
11160     avc_state->multi_ref_qp_enable = 1;//default
11161     avc_state->weighted_ref_l0_enable = 1;//default
11162     avc_state->weighted_ref_l1_enable = 1;//default
11163     avc_state->weighted_prediction_supported = 0;
11164     avc_state->brc_split_enable = 0;
11165     avc_state->slice_level_report_supported = 0;
11166
11167     avc_state->fbr_bypass_enable = 1;//default
11168     avc_state->field_scaling_output_interleaved = 0;
11169     avc_state->mb_variance_output_enable = 0;
11170     avc_state->mb_pixel_average_output_enable = 0;
11171     avc_state->rolling_intra_refresh_enable = 0;// same as intra_refresh_i_enable?
11172     avc_state->mbenc_curbe_set_in_brc_update = 0;
11173     avc_state->rounding_inter_enable = 1; //default
11174     avc_state->adaptive_rounding_inter_enable = 1;//default
11175
11176     avc_state->mbenc_i_frame_dist_in_use = 0;
11177     avc_state->mb_status_supported = 1; //set in intialization for gen9
11178     avc_state->mb_status_enable = 0;
11179     avc_state->mb_vproc_stats_enable = 0;
11180     avc_state->flatness_check_enable = 0;
11181     avc_state->flatness_check_supported = 1;//default
11182     avc_state->block_based_skip_enable = 0;
11183     avc_state->use_widi_mbenc_kernel = 0;
11184     avc_state->kernel_trellis_enable = 0;
11185     avc_state->generic_reserved = 0;
11186
11187     avc_state->rounding_value = 0;
11188     avc_state->rounding_inter_p = AVC_INVALID_ROUNDING_VALUE;//default
11189     avc_state->rounding_inter_b = AVC_INVALID_ROUNDING_VALUE; //default
11190     avc_state->rounding_inter_b_ref = AVC_INVALID_ROUNDING_VALUE; //default
11191     avc_state->min_qp_i = INTEL_AVC_MIN_QP;
11192     avc_state->min_qp_p = INTEL_AVC_MIN_QP;
11193     avc_state->min_qp_b = INTEL_AVC_MIN_QP;
11194     avc_state->max_qp_i = INTEL_AVC_MAX_QP;
11195     avc_state->max_qp_p = INTEL_AVC_MAX_QP;
11196     avc_state->max_qp_b = INTEL_AVC_MAX_QP;
11197
11198     memset(avc_state->non_ftq_skip_threshold_lut, 0, AVC_QP_MAX * sizeof(uint8_t));
11199     memset(avc_state->ftq_skip_threshold_lut, 0, AVC_QP_MAX * sizeof(uint8_t));
11200     memset(avc_state->lamda_value_lut, 0, AVC_QP_MAX * 2 * sizeof(uint32_t));
11201
11202     avc_state->intra_refresh_qp_threshold = 0;
11203     avc_state->trellis_flag = 0;
11204     avc_state->hme_mv_cost_scaling_factor = 0;
11205     avc_state->slice_height = 1;
11206     avc_state->slice_num = 1;
11207     memset(avc_state->dist_scale_factor_list0, 0, 32 * sizeof(uint32_t));
11208     avc_state->bi_weight = 0;
11209
11210     avc_state->lambda_table_enable = 0;
11211
11212     if (IS_GEN8(i965->intel.device_info)) {
11213         avc_state->brc_const_data_surface_width = 64;
11214         avc_state->brc_const_data_surface_height = 44;
11215         avc_state->mb_status_supported = 0;
11216     } else if (IS_SKL(i965->intel.device_info) ||
11217                IS_BXT(i965->intel.device_info)) {
11218         avc_state->brc_const_data_surface_width = 64;
11219         avc_state->brc_const_data_surface_height = 44;
11220         avc_state->brc_split_enable = 1;
11221     } else if (IS_KBL(i965->intel.device_info) ||
11222                IS_GEN10(i965->intel.device_info) ||
11223                IS_GLK(i965->intel.device_info)) {
11224         avc_state->brc_const_data_surface_width = 64;
11225         avc_state->brc_const_data_surface_height = 53;
11226         //gen95
11227         avc_state->decouple_mbenc_curbe_from_brc_enable = 1;
11228         avc_state->extended_mv_cost_range_enable = 0;
11229         avc_state->reserved_g95 = 0;
11230         avc_state->mbenc_brc_buffer_size = 128;
11231         avc_state->kernel_trellis_enable = 1;
11232         avc_state->lambda_table_enable = 1;
11233         avc_state->brc_split_enable = 1;
11234         avc_state->adaptive_transform_decision_enable = 1;// CNL
11235     }
11236
11237     avc_state->num_refs[0] = 0;
11238     avc_state->num_refs[1] = 0;
11239     memset(avc_state->list_ref_idx, 0, 32 * 2 * sizeof(uint32_t));
11240     memset(avc_state->top_field_poc, 0, NUM_MFC_AVC_DMV_BUFFERS * sizeof(int32_t));
11241     avc_state->tq_rounding = 0;
11242     avc_state->zero_mv_threshold = 0;
11243     avc_state->slice_second_levle_batch_buffer_in_use = 0;
11244
11245     //1. seq/pic/slice
11246
11247     /* the definition of status buffer offset for Encoder */
11248
11249     status_buffer = &avc_ctx->status_buffer;
11250     memset(status_buffer, 0, sizeof(struct encoder_status_buffer_internal));
11251
11252     status_buffer->base_offset = base_offset;
11253     status_buffer->bs_byte_count_frame_offset = base_offset + offsetof(struct encoder_status, bs_byte_count_frame);
11254     status_buffer->bs_byte_count_frame_nh_offset = base_offset + offsetof(struct encoder_status, bs_byte_count_frame_nh);
11255     status_buffer->image_status_mask_offset = base_offset + offsetof(struct encoder_status, image_status_mask);
11256     status_buffer->image_status_ctrl_offset = base_offset + offsetof(struct encoder_status, image_status_ctrl);
11257     status_buffer->mfc_qp_status_count_offset = base_offset + offsetof(struct encoder_status, mfc_qp_status_count);
11258     status_buffer->media_index_offset       = base_offset + offsetof(struct encoder_status, media_index);
11259
11260     status_buffer->status_buffer_size = sizeof(struct encoder_status);
11261     status_buffer->bs_byte_count_frame_reg_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG;
11262     status_buffer->bs_byte_count_frame_nh_reg_offset = MFC_BITSTREAM_BYTECOUNT_SLICE_REG;
11263     status_buffer->image_status_mask_reg_offset = MFC_IMAGE_STATUS_MASK_REG;
11264     status_buffer->image_status_ctrl_reg_offset = MFC_IMAGE_STATUS_CTRL_REG;
11265     status_buffer->mfc_qp_status_count_reg_offset = MFC_QP_STATUS_COUNT_REG;
11266
11267     if (IS_GEN8(i965->intel.device_info)) {
11268         gen8_avc_kernel_init(ctx, encoder_context);
11269     } else {
11270         gen9_avc_kernel_init(ctx, encoder_context);
11271     }
11272     encoder_context->vme_context = vme_context;
11273     /* Handling PreEnc operations separately since it gives better
11274      * code readability, avoid possible vme operations mess-up */
11275     encoder_context->vme_pipeline =
11276         !encoder_context->preenc_enabled ? gen9_avc_vme_pipeline : gen9_avc_preenc_pipeline;
11277     encoder_context->vme_context_destroy = gen9_avc_vme_context_destroy;
11278
11279     return true;
11280
11281 allocate_structure_failed:
11282
11283     free(vme_context);
11284     free(generic_ctx);
11285     free(avc_ctx);
11286     free(generic_state);
11287     free(avc_state);
11288     return false;
11289 }
11290
11291 Bool
11292 gen9_avc_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
11293 {
11294     /* VME & PAK share the same context */
11295     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
11296
11297     if (!pak_context)
11298         return false;
11299
11300     encoder_context->mfc_context = pak_context;
11301     encoder_context->mfc_context_destroy = gen9_avc_pak_context_destroy;
11302     encoder_context->mfc_pipeline = gen9_avc_pak_pipeline;
11303     encoder_context->mfc_brc_prepare = gen9_avc_pak_brc_prepare;
11304     encoder_context->get_status = gen9_avc_get_coded_status;
11305     return true;
11306 }