OSDN Git Service

Remove self-assignment
[android-x86/hardware-intel-common-vaapi.git] / src / i965_avc_encoder.c
1 /*
2  * Copyright @ 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWAR OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Pengfei Qu <Pengfei.qu@intel.com>
26  *    Sreerenj Balachandran <sreerenj.balachandran@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <stdbool.h>
33 #include <string.h>
34 #include <math.h>
35 #include <assert.h>
36 #include <va/va.h>
37
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
40
41 #include "i965_defines.h"
42 #include "i965_structs.h"
43 #include "i965_drv_video.h"
44 #include "i965_encoder.h"
45 #include "i965_encoder_utils.h"
46 #include "intel_media.h"
47
48 #include "i965_gpe_utils.h"
49 #include "i965_encoder_common.h"
50 #include "i965_avc_encoder_common.h"
51 #include "i965_avc_encoder_kernels.h"
52 #include "i965_avc_encoder.h"
53 #include "i965_avc_const_def.h"
54
55 #define MAX_URB_SIZE                    4096 /* In register */
56 #define NUM_KERNELS_PER_GPE_CONTEXT     1
57 #define MBENC_KERNEL_BASE GEN9_AVC_KERNEL_MBENC_QUALITY_I
58 #define GPE_RESOURCE_ALIGNMENT 4  /* 4 means 16 = 1 << 4) */
59
60 #define OUT_BUFFER_2DW(batch, bo, is_target, delta)  do {               \
61         if (bo) {                                                       \
62             OUT_BCS_RELOC64(batch,                                        \
63                             bo,                                         \
64                             I915_GEM_DOMAIN_INSTRUCTION,                \
65                             is_target ? I915_GEM_DOMAIN_RENDER : 0,     \
66                             delta);                                     \
67         } else {                                                        \
68             OUT_BCS_BATCH(batch, 0);                                    \
69             OUT_BCS_BATCH(batch, 0);                                    \
70         }                                                               \
71     } while (0)
72
73 #define OUT_BUFFER_3DW(batch, bo, is_target, delta, attr)  do { \
74         OUT_BUFFER_2DW(batch, bo, is_target, delta);            \
75         OUT_BCS_BATCH(batch, attr);                             \
76     } while (0)
77
78 /* FEI specific buffer sizes per MB in bytes for gen9 */
79 #define FEI_AVC_MB_CODE_BUFFER_SIZE      64
80 #define FEI_AVC_MV_DATA_BUFFER_SIZE      128
81 #define FEI_AVC_MB_CONTROL_BUFFER_SIZE   16
82 #define FEI_AVC_MV_PREDICTOR_BUFFER_SIZE 40
83 #define FEI_AVC_DISTORTION_BUFFER_SIZE   48
84 #define FEI_AVC_QP_BUFFER_SIZE           1
85 #define PREENC_AVC_STATISTICS_BUFFER_SIZE 64
86
87 #define SCALE_CUR_PIC        1
88 #define SCALE_PAST_REF_PIC   2
89 #define SCALE_FUTURE_REF_PIC 3
90
91 static const uint32_t qm_flat[16] = {
92     0x10101010, 0x10101010, 0x10101010, 0x10101010,
93     0x10101010, 0x10101010, 0x10101010, 0x10101010,
94     0x10101010, 0x10101010, 0x10101010, 0x10101010,
95     0x10101010, 0x10101010, 0x10101010, 0x10101010
96 };
97
98 static const uint32_t fqm_flat[32] = {
99     0x10001000, 0x10001000, 0x10001000, 0x10001000,
100     0x10001000, 0x10001000, 0x10001000, 0x10001000,
101     0x10001000, 0x10001000, 0x10001000, 0x10001000,
102     0x10001000, 0x10001000, 0x10001000, 0x10001000,
103     0x10001000, 0x10001000, 0x10001000, 0x10001000,
104     0x10001000, 0x10001000, 0x10001000, 0x10001000,
105     0x10001000, 0x10001000, 0x10001000, 0x10001000,
106     0x10001000, 0x10001000, 0x10001000, 0x10001000
107 };
108
109 static const unsigned int slice_type_kernel[3] = {1, 2, 0};
110
111 static const gen9_avc_brc_init_reset_curbe_data gen9_avc_brc_init_reset_curbe_init_data = {
112     // unsigned int 0
113     {
114         0
115     },
116
117     // unsigned int 1
118     {
119         0
120     },
121
122     // unsigned int 2
123     {
124         0
125     },
126
127     // unsigned int 3
128     {
129         0
130     },
131
132     // unsigned int 4
133     {
134         0
135     },
136
137     // unsigned int 5
138     {
139         0
140     },
141
142     // unsigned int 6
143     {
144         0
145     },
146
147     // unsigned int 7
148     {
149         0
150     },
151
152     // unsigned int 8
153     {
154         0,
155         0
156     },
157
158     // unsigned int 9
159     {
160         0,
161         0
162     },
163
164     // unsigned int 10
165     {
166         0,
167         0
168     },
169
170     // unsigned int 11
171     {
172         0,
173         1
174     },
175
176     // unsigned int 12
177     {
178         51,
179         0
180     },
181
182     // unsigned int 13
183     {
184         40,
185         60,
186         80,
187         120
188     },
189
190     // unsigned int 14
191     {
192         35,
193         60,
194         80,
195         120
196     },
197
198     // unsigned int 15
199     {
200         40,
201         60,
202         90,
203         115
204     },
205
206     // unsigned int 16
207     {
208         0,
209         0,
210         0,
211         0
212     },
213
214     // unsigned int 17
215     {
216         0,
217         0,
218         0,
219         0
220     },
221
222     // unsigned int 18
223     {
224         0,
225         0,
226         0,
227         0
228     },
229
230     // unsigned int 19
231     {
232         0,
233         0,
234         0,
235         0
236     },
237
238     // unsigned int 20
239     {
240         0,
241         0,
242         0,
243         0
244     },
245
246     // unsigned int 21
247     {
248         0,
249         0,
250         0,
251         0
252     },
253
254     // unsigned int 22
255     {
256         0,
257         0,
258         0,
259         0
260     },
261
262     // unsigned int 23
263     {
264         0
265     }
266 };
267
268 static const gen8_avc_frame_brc_update_curbe_data gen8_avc_frame_brc_update_curbe_init_data = {
269     //unsigned int 0
270     {
271         0
272     },
273
274     //unsigned int 1
275     {
276         0
277     },
278
279     //unsigned int 2
280     {
281         0
282     },
283
284     //unsigned int 3
285     {
286
287         10,
288         50
289
290     },
291
292     //unsigned int 4
293     {
294
295         100,
296         150
297
298     },
299
300     //unsigned int 5
301     {
302         0, 0, 0, 0
303     },
304
305     //unsigned int 6
306     {
307         0, 0, 0, 0
308     },
309
310     //unsigned int 7
311     {
312         0
313     },
314
315     //unsigned int 8
316     {
317
318         1,
319         1,
320         3,
321         2
322
323     },
324
325     //unsigned int 9
326     {
327
328         1,
329         40,
330         5,
331         5
332
333     },
334
335     //unsigned int 10
336     {
337
338         3,
339         1,
340         7,
341         18
342
343     },
344
345     //unsigned int 11
346     {
347
348         25,
349         37,
350         40,
351         75
352
353     },
354
355     //unsigned int 12
356     {
357
358         97,
359         103,
360         125,
361         160
362
363     },
364
365     //unsigned int 13
366     {
367
368         -3,
369         -2,
370         -1,
371         0
372
373     },
374
375     //unsigned int 14
376     {
377
378         1,
379         2,
380         3,
381         0xff
382
383     },
384
385     //unsigned int 15
386     {
387         0, 0
388     },
389
390     //unsigned int 16
391     {
392         0, 0
393     },
394
395     //unsigned int 17
396     {
397         0, 0
398     },
399 };
400 static const gen9_avc_frame_brc_update_curbe_data gen9_avc_frame_brc_update_curbe_init_data = {
401     // unsigned int 0
402     {
403         0
404     },
405
406     // unsigned int 1
407     {
408         0
409     },
410
411     // unsigned int 2
412     {
413         0
414     },
415
416     // unsigned int 3
417     {
418         10,
419         50
420     },
421
422     // unsigned int 4
423     {
424         100,
425         150
426     },
427
428     // unsigned int 5
429     {
430         0,
431         0,
432         0,
433         0
434     },
435
436     // unsigned int 6
437     {
438         0,
439         0,
440         0,
441         0,
442         0,
443         0
444     },
445
446     // unsigned int 7
447     {
448         0
449     },
450
451     // unsigned int 8
452     {
453         1,
454         1,
455         3,
456         2
457     },
458
459     // unsigned int 9
460     {
461         1,
462         40,
463         5,
464         5
465     },
466
467     // unsigned int 10
468     {
469         3,
470         1,
471         7,
472         18
473     },
474
475     // unsigned int 11
476     {
477         25,
478         37,
479         40,
480         75
481     },
482
483     // unsigned int 12
484     {
485         97,
486         103,
487         125,
488         160
489     },
490
491     // unsigned int 13
492     {
493         -3,
494         -2,
495         -1,
496         0
497     },
498
499     // unsigned int 14
500     {
501         1,
502         2,
503         3,
504         0xff
505     },
506
507     // unsigned int 15
508     {
509         0,
510         0,
511         0,
512         0
513     },
514
515     // unsigned int 16
516     {
517         0
518     },
519
520     // unsigned int 17
521     {
522         0
523     },
524
525     // unsigned int 18
526     {
527         0
528     },
529
530     // unsigned int 19
531     {
532         0
533     },
534
535     // unsigned int 20
536     {
537         0
538     },
539
540     // unsigned int 21
541     {
542         0
543     },
544
545     // unsigned int 22
546     {
547         0
548     },
549
550     // unsigned int 23
551     {
552         0
553     },
554
555 };
556
557 static void
558 gen9_avc_update_misc_parameters(VADriverContextP ctx,
559                                 struct encode_state *encode_state,
560                                 struct intel_encoder_context *encoder_context)
561 {
562     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
563     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
564     int i;
565
566     /* brc */
567     generic_state->max_bit_rate = (encoder_context->brc.bits_per_second[0] + 1000 - 1) / 1000;
568
569     generic_state->brc_need_reset = encoder_context->brc.need_reset;
570
571     if (generic_state->internal_rate_mode == VA_RC_CBR) {
572         generic_state->min_bit_rate = generic_state->max_bit_rate;
573         generic_state->mb_brc_enabled = encoder_context->brc.mb_rate_control[0] == 1;
574
575         if (generic_state->target_bit_rate != generic_state->max_bit_rate) {
576             generic_state->target_bit_rate = generic_state->max_bit_rate;
577             generic_state->brc_need_reset = 1;
578         }
579     } else if (generic_state->internal_rate_mode == VA_RC_VBR) {
580         generic_state->min_bit_rate = generic_state->max_bit_rate * (2 * encoder_context->brc.target_percentage[0] - 100) / 100;
581         generic_state->mb_brc_enabled = encoder_context->brc.mb_rate_control[0] == 1;
582
583         if (generic_state->target_bit_rate != generic_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100) {
584             generic_state->target_bit_rate = generic_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100;
585             generic_state->brc_need_reset = 1;
586         }
587     }
588
589     /*  frame rate */
590     if (generic_state->internal_rate_mode != VA_RC_CQP) {
591         generic_state->frames_per_100s = encoder_context->brc.framerate[0].num * 100 / encoder_context->brc.framerate[0].den ;
592         generic_state->frame_rate = encoder_context->brc.framerate[0].num / encoder_context->brc.framerate[0].den ;
593         generic_state->frames_per_window_size = (int)(encoder_context->brc.window_size * generic_state->frame_rate / 1000); // brc.windows size in ms as the unit
594     } else {
595         generic_state->frames_per_100s = 30 * 100;
596         generic_state->frame_rate = 30 ;
597         generic_state->frames_per_window_size = 30;
598     }
599
600     /*  HRD */
601     if (generic_state->internal_rate_mode != VA_RC_CQP) {
602         generic_state->vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;//misc->buffer_size;
603         generic_state->init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;//misc->initial_buffer_fullness;
604     }
605
606     /* ROI */
607     generic_state->num_roi = MIN(encoder_context->brc.num_roi, 3);
608     if (generic_state->num_roi > 0) {
609         generic_state->max_delta_qp = encoder_context->brc.roi_max_delta_qp;
610         generic_state->min_delta_qp = encoder_context->brc.roi_min_delta_qp;
611
612         for (i = 0; i < generic_state->num_roi; i++) {
613             generic_state->roi[i].left   = encoder_context->brc.roi[i].left;
614             generic_state->roi[i].right  = encoder_context->brc.roi[i].right;
615             generic_state->roi[i].top    = encoder_context->brc.roi[i].top;
616             generic_state->roi[i].bottom = encoder_context->brc.roi[i].bottom;
617             generic_state->roi[i].value  = encoder_context->brc.roi[i].value;
618
619             generic_state->roi[i].left /= 16;
620             generic_state->roi[i].right /= 16;
621             generic_state->roi[i].top /= 16;
622             generic_state->roi[i].bottom /= 16;
623         }
624     }
625
626 }
627
628 static bool
629 intel_avc_get_kernel_header_and_size(void *pvbinary,
630                                      int binary_size,
631                                      INTEL_GENERIC_ENC_OPERATION operation,
632                                      int krnstate_idx,
633                                      struct i965_kernel *ret_kernel)
634 {
635     typedef uint32_t BIN_PTR[4];
636
637     char *bin_start;
638     gen9_avc_encoder_kernel_header      *pkh_table;
639     kernel_header          *pcurr_header, *pinvalid_entry, *pnext_header;
640     int next_krnoffset;
641
642     if (!pvbinary || !ret_kernel)
643         return false;
644
645     bin_start = (char *)pvbinary;
646     pkh_table = (gen9_avc_encoder_kernel_header *)pvbinary;
647     pinvalid_entry = &(pkh_table->static_detection) + 1;
648     next_krnoffset = binary_size;
649
650     if (operation == INTEL_GENERIC_ENC_SCALING4X) {
651         pcurr_header = &pkh_table->ply_dscale_ply;
652     } else if (operation == INTEL_GENERIC_ENC_SCALING2X) {
653         pcurr_header = &pkh_table->ply_2xdscale_ply;
654     } else if (operation == INTEL_GENERIC_ENC_ME) {
655         pcurr_header = &pkh_table->me_p;
656     } else if (operation == INTEL_GENERIC_ENC_BRC) {
657         pcurr_header = &pkh_table->frame_brc_init;
658     } else if (operation == INTEL_GENERIC_ENC_MBENC) {
659         pcurr_header = &pkh_table->mbenc_quality_I;
660     } else if (operation == INTEL_GENERIC_ENC_WP) {
661         pcurr_header = &pkh_table->wp;
662     } else if (operation == INTEL_GENERIC_ENC_SFD) {
663         pcurr_header = &pkh_table->static_detection;
664     } else {
665         return false;
666     }
667
668     pcurr_header += krnstate_idx;
669     ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
670
671     pnext_header = (pcurr_header + 1);
672     if (pnext_header < pinvalid_entry) {
673         next_krnoffset = pnext_header->kernel_start_pointer << 6;
674     }
675     ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
676
677     return true;
678 }
679
680 static bool
681 intel_avc_fei_get_kernel_header_and_size(
682     void                             *pvbinary,
683     int                              binary_size,
684     INTEL_GENERIC_ENC_OPERATION      operation,
685     int                              krnstate_idx,
686     struct i965_kernel               *ret_kernel)
687 {
688     typedef uint32_t BIN_PTR[4];
689
690     char *bin_start;
691     gen9_avc_fei_encoder_kernel_header      *pkh_table;
692     kernel_header          *pcurr_header, *pinvalid_entry, *pnext_header;
693     int next_krnoffset;
694
695     if (!pvbinary || !ret_kernel)
696         return false;
697
698     bin_start = (char *)pvbinary;
699     pkh_table = (gen9_avc_fei_encoder_kernel_header *)pvbinary;
700     pinvalid_entry = &(pkh_table->wp) + 1;
701     next_krnoffset = binary_size;
702
703     if (operation == INTEL_GENERIC_ENC_SCALING4X) {
704         pcurr_header = &pkh_table->ply_dscale_ply;
705     } else if (operation == INTEL_GENERIC_ENC_ME) {
706         pcurr_header = &pkh_table->me_p;
707     } else if (operation == INTEL_GENERIC_ENC_MBENC) {
708         pcurr_header = &pkh_table->mbenc_i;
709     } else if (operation == INTEL_GENERIC_ENC_PREPROC) {
710         pcurr_header =  &pkh_table->preproc;
711     } else {
712         return false;
713     }
714
715     pcurr_header += krnstate_idx;
716     ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
717
718     pnext_header = (pcurr_header + 1);
719     if (pnext_header < pinvalid_entry) {
720         next_krnoffset = pnext_header->kernel_start_pointer << 6;
721     }
722     ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
723
724     return true;
725 }
726
727 static void
728 gen9_free_surfaces_avc(void **data)
729 {
730     struct gen9_surface_avc *avc_surface;
731
732     if (!data || !*data)
733         return;
734
735     avc_surface = *data;
736
737     if (avc_surface->scaled_4x_surface_obj) {
738         i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_4x_surface_id, 1);
739         avc_surface->scaled_4x_surface_id = VA_INVALID_SURFACE;
740         avc_surface->scaled_4x_surface_obj = NULL;
741     }
742
743     if (avc_surface->scaled_16x_surface_obj) {
744         i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_16x_surface_id, 1);
745         avc_surface->scaled_16x_surface_id = VA_INVALID_SURFACE;
746         avc_surface->scaled_16x_surface_obj = NULL;
747     }
748
749     if (avc_surface->scaled_32x_surface_obj) {
750         i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_32x_surface_id, 1);
751         avc_surface->scaled_32x_surface_id = VA_INVALID_SURFACE;
752         avc_surface->scaled_32x_surface_obj = NULL;
753     }
754
755     i965_free_gpe_resource(&avc_surface->res_mb_code_surface);
756     i965_free_gpe_resource(&avc_surface->res_mv_data_surface);
757     i965_free_gpe_resource(&avc_surface->res_ref_pic_select_surface);
758
759     /* FEI specific resources */
760     /* since the driver previously taken an extra reference to the drm_bo
761      * in case the buffers were supplied by middleware, there shouldn't
762      * be any memory handling issue */
763     i965_free_gpe_resource(&avc_surface->res_fei_mb_cntrl_surface);
764     i965_free_gpe_resource(&avc_surface->res_fei_mv_predictor_surface);
765     i965_free_gpe_resource(&avc_surface->res_fei_vme_distortion_surface);
766     i965_free_gpe_resource(&avc_surface->res_fei_mb_qp_surface);
767
768     dri_bo_unreference(avc_surface->dmv_top);
769     avc_surface->dmv_top = NULL;
770     dri_bo_unreference(avc_surface->dmv_bottom);
771     avc_surface->dmv_bottom = NULL;
772
773     free(avc_surface);
774
775     *data = NULL;
776
777     return;
778 }
779
780 static VAStatus
781 gen9_avc_init_check_surfaces(VADriverContextP ctx,
782                              struct object_surface *obj_surface,
783                              struct intel_encoder_context *encoder_context,
784                              struct avc_surface_param *surface_param)
785 {
786     struct i965_driver_data *i965 = i965_driver_data(ctx);
787     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
788     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
789     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
790
791     struct gen9_surface_avc *avc_surface;
792     int downscaled_width_4x, downscaled_height_4x;
793     int downscaled_width_16x, downscaled_height_16x;
794     int downscaled_width_32x, downscaled_height_32x;
795     int size = 0;
796     unsigned int frame_width_in_mbs = ALIGN(surface_param->frame_width, 16) / 16;
797     unsigned int frame_height_in_mbs = ALIGN(surface_param->frame_height, 16) / 16;
798     unsigned int frame_mb_nums = frame_width_in_mbs * frame_height_in_mbs;
799     int allocate_flag = 1;
800     int width, height;
801
802     if (!obj_surface || !obj_surface->bo)
803         return VA_STATUS_ERROR_INVALID_SURFACE;
804
805     if (obj_surface->private_data) {
806         return VA_STATUS_SUCCESS;
807     }
808
809     avc_surface = calloc(1, sizeof(struct gen9_surface_avc));
810
811     if (!avc_surface)
812         return VA_STATUS_ERROR_ALLOCATION_FAILED;
813
814     avc_surface->ctx = ctx;
815     obj_surface->private_data = avc_surface;
816     obj_surface->free_private_data = gen9_free_surfaces_avc;
817
818     downscaled_width_4x = generic_state->frame_width_4x;
819     downscaled_height_4x = generic_state->frame_height_4x;
820
821     i965_CreateSurfaces(ctx,
822                         downscaled_width_4x,
823                         downscaled_height_4x,
824                         VA_RT_FORMAT_YUV420,
825                         1,
826                         &avc_surface->scaled_4x_surface_id);
827
828     avc_surface->scaled_4x_surface_obj = SURFACE(avc_surface->scaled_4x_surface_id);
829
830     if (!avc_surface->scaled_4x_surface_obj) {
831         return VA_STATUS_ERROR_ALLOCATION_FAILED;
832     }
833
834     i965_check_alloc_surface_bo(ctx, avc_surface->scaled_4x_surface_obj, 1,
835                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
836
837     downscaled_width_16x = generic_state->frame_width_16x;
838     downscaled_height_16x = generic_state->frame_height_16x;
839     i965_CreateSurfaces(ctx,
840                         downscaled_width_16x,
841                         downscaled_height_16x,
842                         VA_RT_FORMAT_YUV420,
843                         1,
844                         &avc_surface->scaled_16x_surface_id);
845     avc_surface->scaled_16x_surface_obj = SURFACE(avc_surface->scaled_16x_surface_id);
846
847     if (!avc_surface->scaled_16x_surface_obj) {
848         return VA_STATUS_ERROR_ALLOCATION_FAILED;
849     }
850
851     i965_check_alloc_surface_bo(ctx, avc_surface->scaled_16x_surface_obj, 1,
852                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
853
854     if (generic_state->b32xme_supported ||
855         generic_state->b32xme_enabled) {
856         downscaled_width_32x = generic_state->frame_width_32x;
857         downscaled_height_32x = generic_state->frame_height_32x;
858         i965_CreateSurfaces(ctx,
859                             downscaled_width_32x,
860                             downscaled_height_32x,
861                             VA_RT_FORMAT_YUV420,
862                             1,
863                             &avc_surface->scaled_32x_surface_id);
864         avc_surface->scaled_32x_surface_obj = SURFACE(avc_surface->scaled_32x_surface_id);
865
866         if (!avc_surface->scaled_32x_surface_obj) {
867             return VA_STATUS_ERROR_ALLOCATION_FAILED;
868         }
869
870         i965_check_alloc_surface_bo(ctx, avc_surface->scaled_32x_surface_obj, 1,
871                                     VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
872     }
873
874     /*mb code and mv data for each frame*/
875     if (!encoder_context->fei_enabled) {
876         size = frame_mb_nums * 16 * 4;
877         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
878                                                    &avc_surface->res_mb_code_surface,
879                                                    ALIGN(size, 0x1000),
880                                                    "mb code buffer");
881         if (!allocate_flag)
882             goto failed_allocation;
883
884         size = frame_mb_nums * 32 * 4;
885         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
886                                                    &avc_surface->res_mv_data_surface,
887                                                    ALIGN(size, 0x1000),
888                                                    "mv data buffer");
889         if (!allocate_flag)
890             goto failed_allocation;
891     }
892
893     /* ref pic list*/
894     if (avc_state->ref_pic_select_list_supported) {
895         width = ALIGN(frame_width_in_mbs * 8, 64);
896         height = frame_height_in_mbs ;
897         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
898                                                       &avc_surface->res_ref_pic_select_surface,
899                                                       width, height,
900                                                       width,
901                                                       "Ref pic select list buffer");
902         if (!allocate_flag)
903             goto failed_allocation;
904     }
905
906     /*direct mv*/
907     avc_surface->dmv_top =
908         dri_bo_alloc(i965->intel.bufmgr,
909                      "direct mv top Buffer",
910                      68 * frame_mb_nums,
911                      64);
912     avc_surface->dmv_bottom =
913         dri_bo_alloc(i965->intel.bufmgr,
914                      "direct mv bottom Buffer",
915                      68 * frame_mb_nums,
916                      64);
917     assert(avc_surface->dmv_top);
918     assert(avc_surface->dmv_bottom);
919
920     return VA_STATUS_SUCCESS;
921
922 failed_allocation:
923     return VA_STATUS_ERROR_ALLOCATION_FAILED;
924 }
925
926 static void
927 gen9_avc_generate_slice_map(VADriverContextP ctx,
928                             struct encode_state *encode_state,
929                             struct intel_encoder_context *encoder_context)
930 {
931     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
932     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
933     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
934     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
935
936     struct i965_gpe_resource *gpe_resource = NULL;
937     VAEncSliceParameterBufferH264 * slice_param = NULL;
938     unsigned int * data = NULL;
939     unsigned int * data_row = NULL;
940     int i, j, count = 0;
941     unsigned int pitch = ALIGN((generic_state->frame_width_in_mbs + 1) * 4, 64) / 4;
942
943     if (!avc_state->arbitrary_num_mbs_in_slice)
944         return;
945
946     gpe_resource = &(avc_ctx->res_mbenc_slice_map_surface);
947     assert(gpe_resource);
948
949     i965_zero_gpe_resource(gpe_resource);
950
951     data_row = (unsigned int *)i965_map_gpe_resource(gpe_resource);
952     assert(data_row);
953
954     data = data_row;
955     for (i = 0; i < avc_state->slice_num; i++) {
956         slice_param = avc_state->slice_param[i];
957         for (j = 0; j < slice_param->num_macroblocks; j++) {
958             *data++ = i;
959             if ((count > 0) && (count % generic_state->frame_width_in_mbs == 0)) {
960                 data_row += pitch;
961                 data = data_row;
962                 *data++ = i;
963             }
964             count++;
965         }
966     }
967     *data++ = 0xFFFFFFFF;
968
969     i965_unmap_gpe_resource(gpe_resource);
970 }
971
972 static VAStatus
973 gen9_avc_allocate_resources(VADriverContextP ctx,
974                             struct encode_state *encode_state,
975                             struct intel_encoder_context *encoder_context)
976 {
977     struct i965_driver_data *i965 = i965_driver_data(ctx);
978     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
979     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
980     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
981     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
982     unsigned int size  = 0;
983     unsigned int width  = 0;
984     unsigned int height  = 0;
985     unsigned char * data  = NULL;
986     int allocate_flag = 1;
987     int i = 0;
988
989     /*all the surface/buffer are allocated here*/
990
991     /*second level batch buffer for image state write when cqp etc*/
992     i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
993     size = INTEL_AVC_IMAGE_STATE_CMD_SIZE ;
994     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
995                                                &avc_ctx->res_image_state_batch_buffer_2nd_level,
996                                                ALIGN(size, 0x1000),
997                                                "second levle batch (image state write) buffer");
998     if (!allocate_flag)
999         goto failed_allocation;
1000
1001     /* scaling related surface   */
1002     if (avc_state->mb_status_supported) {
1003         i965_free_gpe_resource(&avc_ctx->res_mb_status_buffer);
1004         size = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * 16 * 4 + 1023) & ~0x3ff;
1005         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1006                                                    &avc_ctx->res_mb_status_buffer,
1007                                                    ALIGN(size, 0x1000),
1008                                                    "MB statistics output buffer");
1009         if (!allocate_flag)
1010             goto failed_allocation;
1011         i965_zero_gpe_resource(&avc_ctx->res_mb_status_buffer);
1012     }
1013
1014     if (avc_state->flatness_check_supported) {
1015         width = generic_state->frame_width_in_mbs * 4;
1016         height = generic_state->frame_height_in_mbs * 4;
1017         i965_free_gpe_resource(&avc_ctx->res_flatness_check_surface);
1018         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1019                                                       &avc_ctx->res_flatness_check_surface,
1020                                                       width, height,
1021                                                       ALIGN(width, 64),
1022                                                       "Flatness check buffer");
1023         if (!allocate_flag)
1024             goto failed_allocation;
1025     }
1026     /* me related surface */
1027     width = generic_state->downscaled_width_4x_in_mb * 8;
1028     height = generic_state->downscaled_height_4x_in_mb * 4 * 10;
1029     i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
1030     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1031                                                   &avc_ctx->s4x_memv_distortion_buffer,
1032                                                   width, height,
1033                                                   ALIGN(width, 64),
1034                                                   "4x MEMV distortion buffer");
1035     if (!allocate_flag)
1036         goto failed_allocation;
1037     i965_zero_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
1038
1039     width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
1040     height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
1041     i965_free_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
1042     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1043                                                   &avc_ctx->s4x_memv_min_distortion_brc_buffer,
1044                                                   width, height,
1045                                                   width,
1046                                                   "4x MEMV min distortion brc buffer");
1047     if (!allocate_flag)
1048         goto failed_allocation;
1049     i965_zero_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
1050
1051
1052     width = ALIGN(generic_state->downscaled_width_4x_in_mb * 32, 64);
1053     height = generic_state->downscaled_height_4x_in_mb * 4 * 2 * 10;
1054     i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
1055     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1056                                                   &avc_ctx->s4x_memv_data_buffer,
1057                                                   width, height,
1058                                                   width,
1059                                                   "4x MEMV data buffer");
1060     if (!allocate_flag)
1061         goto failed_allocation;
1062     i965_zero_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
1063
1064
1065     width = ALIGN(generic_state->downscaled_width_16x_in_mb * 32, 64);
1066     height = generic_state->downscaled_height_16x_in_mb * 4 * 2 * 10 ;
1067     i965_free_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
1068     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1069                                                   &avc_ctx->s16x_memv_data_buffer,
1070                                                   width, height,
1071                                                   width,
1072                                                   "16x MEMV data buffer");
1073     if (!allocate_flag)
1074         goto failed_allocation;
1075     i965_zero_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
1076
1077
1078     width = ALIGN(generic_state->downscaled_width_32x_in_mb * 32, 64);
1079     height = generic_state->downscaled_height_32x_in_mb * 4 * 2 * 10 ;
1080     i965_free_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
1081     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1082                                                   &avc_ctx->s32x_memv_data_buffer,
1083                                                   width, height,
1084                                                   width,
1085                                                   "32x MEMV data buffer");
1086     if (!allocate_flag)
1087         goto failed_allocation;
1088     i965_zero_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
1089
1090
1091     if (!generic_state->brc_allocated) {
1092         /*brc related surface */
1093         i965_free_gpe_resource(&avc_ctx->res_brc_history_buffer);
1094         size = 864;
1095         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1096                                                    &avc_ctx->res_brc_history_buffer,
1097                                                    ALIGN(size, 0x1000),
1098                                                    "brc history buffer");
1099         if (!allocate_flag)
1100             goto failed_allocation;
1101
1102         i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
1103         size = 64;//44
1104         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1105                                                    &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
1106                                                    ALIGN(size, 0x1000),
1107                                                    "brc pak statistic buffer");
1108         if (!allocate_flag)
1109             goto failed_allocation;
1110
1111         i965_free_gpe_resource(&avc_ctx->res_brc_image_state_read_buffer);
1112         size = INTEL_AVC_IMAGE_STATE_CMD_SIZE * 7;
1113         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1114                                                    &avc_ctx->res_brc_image_state_read_buffer,
1115                                                    ALIGN(size, 0x1000),
1116                                                    "brc image state read buffer");
1117         if (!allocate_flag)
1118             goto failed_allocation;
1119
1120         i965_free_gpe_resource(&avc_ctx->res_brc_image_state_write_buffer);
1121         size = INTEL_AVC_IMAGE_STATE_CMD_SIZE * 7;
1122         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1123                                                    &avc_ctx->res_brc_image_state_write_buffer,
1124                                                    ALIGN(size, 0x1000),
1125                                                    "brc image state write buffer");
1126         if (!allocate_flag)
1127             goto failed_allocation;
1128
1129         width = ALIGN(avc_state->brc_const_data_surface_width, 64);
1130         height = avc_state->brc_const_data_surface_height;
1131         i965_free_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
1132         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1133                                                       &avc_ctx->res_brc_const_data_buffer,
1134                                                       width, height,
1135                                                       width,
1136                                                       "brc const data buffer");
1137         if (!allocate_flag)
1138             goto failed_allocation;
1139         i965_zero_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
1140
1141         if (generic_state->brc_distortion_buffer_supported) {
1142             width = ALIGN(generic_state->downscaled_width_4x_in_mb * 8, 64);
1143             height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1144             width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
1145             height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
1146             i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
1147             allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1148                                                           &avc_ctx->res_brc_dist_data_surface,
1149                                                           width, height,
1150                                                           width,
1151                                                           "brc dist data buffer");
1152             if (!allocate_flag)
1153                 goto failed_allocation;
1154             i965_zero_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
1155         }
1156
1157         if (generic_state->brc_roi_enable) {
1158             width = ALIGN(generic_state->downscaled_width_4x_in_mb * 16, 64);
1159             height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1160             i965_free_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
1161             allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1162                                                           &avc_ctx->res_mbbrc_roi_surface,
1163                                                           width, height,
1164                                                           width,
1165                                                           "mbbrc roi buffer");
1166             if (!allocate_flag)
1167                 goto failed_allocation;
1168             i965_zero_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
1169         }
1170
1171         /*mb qp in mb brc*/
1172         width = ALIGN(generic_state->downscaled_width_4x_in_mb * 4, 64);
1173         height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1174         i965_free_gpe_resource(&avc_ctx->res_mbbrc_mb_qp_data_surface);
1175         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1176                                                       &avc_ctx->res_mbbrc_mb_qp_data_surface,
1177                                                       width, height,
1178                                                       width,
1179                                                       "mbbrc mb qp buffer");
1180         if (!allocate_flag)
1181             goto failed_allocation;
1182
1183         i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
1184         size = 16 * AVC_QP_MAX * 4;
1185         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1186                                                    &avc_ctx->res_mbbrc_const_data_buffer,
1187                                                    ALIGN(size, 0x1000),
1188                                                    "mbbrc const data buffer");
1189         if (!allocate_flag)
1190             goto failed_allocation;
1191
1192         if (avc_state->decouple_mbenc_curbe_from_brc_enable) {
1193             i965_free_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
1194             size = avc_state->mbenc_brc_buffer_size;
1195             allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1196                                                        &avc_ctx->res_mbenc_brc_buffer,
1197                                                        ALIGN(size, 0x1000),
1198                                                        "mbenc brc buffer");
1199             if (!allocate_flag)
1200                 goto failed_allocation;
1201             i965_zero_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
1202         }
1203         generic_state->brc_allocated = 1;
1204     }
1205
1206     /*mb qp external*/
1207     if (avc_state->mb_qp_data_enable) {
1208         width = ALIGN(generic_state->downscaled_width_4x_in_mb * 4, 64);
1209         height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1210         i965_free_gpe_resource(&avc_ctx->res_mb_qp_data_surface);
1211         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1212                                                       &avc_ctx->res_mb_qp_data_surface,
1213                                                       width, height,
1214                                                       width,
1215                                                       "external mb qp buffer");
1216         if (!allocate_flag)
1217             goto failed_allocation;
1218     }
1219
1220     /*     mbenc related surface. it share most of surface with other kernels     */
1221     if (avc_state->arbitrary_num_mbs_in_slice) {
1222         width = ALIGN((generic_state->frame_width_in_mbs + 1) * 4, 64);
1223         height = generic_state->frame_height_in_mbs ;
1224         i965_free_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1225         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1226                                                       &avc_ctx->res_mbenc_slice_map_surface,
1227                                                       width, height,
1228                                                       width,
1229                                                       "slice map buffer");
1230         if (!allocate_flag)
1231             goto failed_allocation;
1232         i965_zero_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1233
1234         /*generate slice map,default one slice per frame.*/
1235     }
1236
1237     /* sfd related surface  */
1238     if (avc_state->sfd_enable) {
1239         i965_free_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1240         size = 128;
1241         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1242                                                    &avc_ctx->res_sfd_output_buffer,
1243                                                    size,
1244                                                    "sfd output buffer");
1245         if (!allocate_flag)
1246             goto failed_allocation;
1247         i965_zero_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1248
1249         i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer);
1250         size = ALIGN(52, 64);
1251         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1252                                                    &avc_ctx->res_sfd_cost_table_p_frame_buffer,
1253                                                    size,
1254                                                    "sfd P frame cost table buffer");
1255         if (!allocate_flag)
1256             goto failed_allocation;
1257         data = i965_map_gpe_resource(&(avc_ctx->res_sfd_cost_table_p_frame_buffer));
1258         assert(data);
1259         memcpy(data, gen9_avc_sfd_cost_table_p_frame, sizeof(unsigned char) * 52);
1260         i965_unmap_gpe_resource(&(avc_ctx->res_sfd_cost_table_p_frame_buffer));
1261
1262         i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_b_frame_buffer);
1263         size = ALIGN(52, 64);
1264         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1265                                                    &avc_ctx->res_sfd_cost_table_b_frame_buffer,
1266                                                    size,
1267                                                    "sfd B frame cost table buffer");
1268         if (!allocate_flag)
1269             goto failed_allocation;
1270         data = i965_map_gpe_resource(&(avc_ctx->res_sfd_cost_table_b_frame_buffer));
1271         assert(data);
1272         memcpy(data, gen9_avc_sfd_cost_table_b_frame, sizeof(unsigned char) * 52);
1273         i965_unmap_gpe_resource(&(avc_ctx->res_sfd_cost_table_b_frame_buffer));
1274     }
1275
1276     /* wp related surfaces */
1277     if (avc_state->weighted_prediction_supported) {
1278         for (i = 0; i < 2 ; i++) {
1279             if (avc_ctx->wp_output_pic_select_surface_obj[i]) {
1280                 continue;
1281             }
1282
1283             width = generic_state->frame_width_in_pixel;
1284             height = generic_state->frame_height_in_pixel ;
1285             i965_CreateSurfaces(ctx,
1286                                 width,
1287                                 height,
1288                                 VA_RT_FORMAT_YUV420,
1289                                 1,
1290                                 &avc_ctx->wp_output_pic_select_surface_id[i]);
1291             avc_ctx->wp_output_pic_select_surface_obj[i] = SURFACE(avc_ctx->wp_output_pic_select_surface_id[i]);
1292
1293             if (!avc_ctx->wp_output_pic_select_surface_obj[i]) {
1294                 goto failed_allocation;
1295             }
1296
1297             i965_check_alloc_surface_bo(ctx, avc_ctx->wp_output_pic_select_surface_obj[i], 1,
1298                                         VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
1299         }
1300         i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[0]);
1301         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_wp_output_pic_select_surface_list[0], avc_ctx->wp_output_pic_select_surface_obj[0], GPE_RESOURCE_ALIGNMENT);
1302         i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[1]);
1303         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_wp_output_pic_select_surface_list[1], avc_ctx->wp_output_pic_select_surface_obj[1], GPE_RESOURCE_ALIGNMENT);
1304     }
1305
1306     /* other   */
1307
1308     i965_free_gpe_resource(&avc_ctx->res_mad_data_buffer);
1309     size = 4 * 1;
1310     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1311                                                &avc_ctx->res_mad_data_buffer,
1312                                                ALIGN(size, 0x1000),
1313                                                "MAD data buffer");
1314     if (!allocate_flag)
1315         goto failed_allocation;
1316
1317     return VA_STATUS_SUCCESS;
1318
1319 failed_allocation:
1320     return VA_STATUS_ERROR_ALLOCATION_FAILED;
1321 }
1322
1323 static void
1324 gen9_avc_free_resources(struct encoder_vme_mfc_context * vme_context)
1325 {
1326     if (!vme_context)
1327         return;
1328
1329     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1330     VADriverContextP ctx = avc_ctx->ctx;
1331     int i = 0;
1332
1333     /* free all the surface/buffer here*/
1334     i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
1335     i965_free_gpe_resource(&avc_ctx->res_mb_status_buffer);
1336     i965_free_gpe_resource(&avc_ctx->res_flatness_check_surface);
1337     i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
1338     i965_free_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
1339     i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
1340     i965_free_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
1341     i965_free_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
1342     i965_free_gpe_resource(&avc_ctx->res_brc_history_buffer);
1343     i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
1344     i965_free_gpe_resource(&avc_ctx->res_brc_image_state_read_buffer);
1345     i965_free_gpe_resource(&avc_ctx->res_brc_image_state_write_buffer);
1346     i965_free_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
1347     i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
1348     i965_free_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
1349     i965_free_gpe_resource(&avc_ctx->res_mbbrc_mb_qp_data_surface);
1350     i965_free_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
1351     i965_free_gpe_resource(&avc_ctx->res_mb_qp_data_surface);
1352     i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
1353     i965_free_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1354     i965_free_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1355     i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer);
1356     i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_b_frame_buffer);
1357     i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[0]);
1358     i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[1]);
1359     i965_free_gpe_resource(&avc_ctx->res_mad_data_buffer);
1360
1361     for (i = 0; i < 2 ; i++) {
1362         if (avc_ctx->wp_output_pic_select_surface_obj[i]) {
1363             i965_DestroySurfaces(ctx, &avc_ctx->wp_output_pic_select_surface_id[i], 1);
1364             avc_ctx->wp_output_pic_select_surface_id[i] = VA_INVALID_SURFACE;
1365             avc_ctx->wp_output_pic_select_surface_obj[i] = NULL;
1366         }
1367     }
1368
1369     /* free preenc resources */
1370     i965_free_gpe_resource(&avc_ctx->preproc_mv_predictor_buffer);
1371     i965_free_gpe_resource(&avc_ctx->preproc_mb_qp_buffer);
1372     i965_free_gpe_resource(&avc_ctx->preproc_mv_data_out_buffer);
1373     i965_free_gpe_resource(&avc_ctx->preproc_stat_data_out_buffer);
1374
1375     i965_free_gpe_resource(&avc_ctx->preenc_past_ref_stat_data_out_buffer);
1376     i965_free_gpe_resource(&avc_ctx->preenc_future_ref_stat_data_out_buffer);
1377
1378     i965_DestroySurfaces(ctx, &avc_ctx->preenc_scaled_4x_surface_id, 1);
1379     avc_ctx->preenc_scaled_4x_surface_id = VA_INVALID_SURFACE;
1380     avc_ctx->preenc_scaled_4x_surface_obj = NULL;
1381
1382     i965_DestroySurfaces(ctx, &avc_ctx->preenc_past_ref_scaled_4x_surface_id, 1);
1383     avc_ctx->preenc_past_ref_scaled_4x_surface_id = VA_INVALID_SURFACE;
1384     avc_ctx->preenc_past_ref_scaled_4x_surface_obj = NULL;
1385
1386     i965_DestroySurfaces(ctx, &avc_ctx->preenc_future_ref_scaled_4x_surface_id, 1);
1387     avc_ctx->preenc_future_ref_scaled_4x_surface_id = VA_INVALID_SURFACE;
1388     avc_ctx->preenc_future_ref_scaled_4x_surface_obj = NULL;
1389 }
1390
1391 static void
1392 gen9_avc_run_kernel_media_object(VADriverContextP ctx,
1393                                  struct intel_encoder_context *encoder_context,
1394                                  struct i965_gpe_context *gpe_context,
1395                                  int media_function,
1396                                  struct gpe_media_object_parameter *param)
1397 {
1398     struct i965_driver_data *i965 = i965_driver_data(ctx);
1399     struct i965_gpe_table *gpe = &i965->gpe_table;
1400     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1401     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1402
1403     struct intel_batchbuffer *batch = encoder_context->base.batch;
1404     struct encoder_status_buffer_internal *status_buffer;
1405     struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1406
1407     if (!batch)
1408         return;
1409
1410     intel_batchbuffer_start_atomic(batch, 0x1000);
1411     intel_batchbuffer_emit_mi_flush(batch);
1412
1413     status_buffer = &(avc_ctx->status_buffer);
1414     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1415     mi_store_data_imm.bo = status_buffer->bo;
1416     mi_store_data_imm.offset = status_buffer->media_index_offset;
1417     mi_store_data_imm.dw0 = media_function;
1418     gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1419
1420     gpe->pipeline_setup(ctx, gpe_context, batch);
1421     gpe->media_object(ctx, gpe_context, batch, param);
1422     gpe->media_state_flush(ctx, gpe_context, batch);
1423
1424     gpe->pipeline_end(ctx, gpe_context, batch);
1425
1426     intel_batchbuffer_end_atomic(batch);
1427
1428     intel_batchbuffer_flush(batch);
1429 }
1430
1431 static void
1432 gen9_avc_run_kernel_media_object_walker(VADriverContextP ctx,
1433                                         struct intel_encoder_context *encoder_context,
1434                                         struct i965_gpe_context *gpe_context,
1435                                         int media_function,
1436                                         struct gpe_media_object_walker_parameter *param)
1437 {
1438     struct i965_driver_data *i965 = i965_driver_data(ctx);
1439     struct i965_gpe_table *gpe = &i965->gpe_table;
1440     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1441     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1442
1443     struct intel_batchbuffer *batch = encoder_context->base.batch;
1444     struct encoder_status_buffer_internal *status_buffer;
1445     struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1446
1447     if (!batch)
1448         return;
1449
1450     intel_batchbuffer_start_atomic(batch, 0x1000);
1451
1452     intel_batchbuffer_emit_mi_flush(batch);
1453
1454     status_buffer = &(avc_ctx->status_buffer);
1455     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1456     mi_store_data_imm.bo = status_buffer->bo;
1457     mi_store_data_imm.offset = status_buffer->media_index_offset;
1458     mi_store_data_imm.dw0 = media_function;
1459     gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1460
1461     gpe->pipeline_setup(ctx, gpe_context, batch);
1462     gpe->media_object_walker(ctx, gpe_context, batch, param);
1463     gpe->media_state_flush(ctx, gpe_context, batch);
1464
1465     gpe->pipeline_end(ctx, gpe_context, batch);
1466
1467     intel_batchbuffer_end_atomic(batch);
1468
1469     intel_batchbuffer_flush(batch);
1470 }
1471
1472 static void
1473 gen9_init_gpe_context_avc(VADriverContextP ctx,
1474                           struct i965_gpe_context *gpe_context,
1475                           struct encoder_kernel_parameter *kernel_param)
1476 {
1477     struct i965_driver_data *i965 = i965_driver_data(ctx);
1478
1479     gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
1480
1481     gpe_context->sampler.entry_size = 0;
1482     gpe_context->sampler.max_entries = 0;
1483
1484     if (kernel_param->sampler_size) {
1485         gpe_context->sampler.entry_size = ALIGN(kernel_param->sampler_size, 64);
1486         gpe_context->sampler.max_entries = 1;
1487     }
1488
1489     gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
1490     gpe_context->idrt.max_entries = NUM_KERNELS_PER_GPE_CONTEXT;
1491
1492     gpe_context->surface_state_binding_table.max_entries = MAX_AVC_ENCODER_SURFACES;
1493     gpe_context->surface_state_binding_table.binding_table_offset = 0;
1494     gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_AVC_ENCODER_SURFACES * 4, 64);
1495     gpe_context->surface_state_binding_table.length = ALIGN(MAX_AVC_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_AVC_ENCODER_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
1496
1497     if (i965->intel.eu_total > 0)
1498         gpe_context->vfe_state.max_num_threads = 6 * i965->intel.eu_total;
1499     else
1500         gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads
1501
1502     gpe_context->vfe_state.curbe_allocation_size = MAX(1, ALIGN(gpe_context->curbe.length, 32) >> 5); // in registers
1503     gpe_context->vfe_state.urb_entry_size = MAX(1, ALIGN(kernel_param->inline_data_size, 32) >> 5); // in registers
1504     gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE -
1505                                               gpe_context->vfe_state.curbe_allocation_size -
1506                                               ((gpe_context->idrt.entry_size >> 5) *
1507                                                gpe_context->idrt.max_entries)) / gpe_context->vfe_state.urb_entry_size;
1508     gpe_context->vfe_state.num_urb_entries = CLAMP(1, 127, gpe_context->vfe_state.num_urb_entries);
1509     gpe_context->vfe_state.gpgpu_mode = 0;
1510 }
1511
1512 static void
1513 gen9_init_vfe_scoreboard_avc(struct i965_gpe_context *gpe_context,
1514                              struct encoder_scoreboard_parameter *scoreboard_param)
1515 {
1516     gpe_context->vfe_desc5.scoreboard0.mask = scoreboard_param->mask;
1517     gpe_context->vfe_desc5.scoreboard0.type = scoreboard_param->type;
1518     gpe_context->vfe_desc5.scoreboard0.enable = scoreboard_param->enable;
1519
1520     if (scoreboard_param->walkpat_flag) {
1521         gpe_context->vfe_desc5.scoreboard0.mask = 0x0F;
1522         gpe_context->vfe_desc5.scoreboard0.type = 1;
1523
1524         gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0;
1525         gpe_context->vfe_desc6.scoreboard1.delta_y0 = -1;
1526
1527         gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0;
1528         gpe_context->vfe_desc6.scoreboard1.delta_y1 = -2;
1529
1530         gpe_context->vfe_desc6.scoreboard1.delta_x2 = -1;
1531         gpe_context->vfe_desc6.scoreboard1.delta_y2 = 3;
1532
1533         gpe_context->vfe_desc6.scoreboard1.delta_x3 = -1;
1534         gpe_context->vfe_desc6.scoreboard1.delta_y3 = 1;
1535     } else {
1536         // Scoreboard 0
1537         gpe_context->vfe_desc6.scoreboard1.delta_x0 = -1;
1538         gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0;
1539
1540         // Scoreboard 1
1541         gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0;
1542         gpe_context->vfe_desc6.scoreboard1.delta_y1 = -1;
1543
1544         // Scoreboard 2
1545         gpe_context->vfe_desc6.scoreboard1.delta_x2 = 1;
1546         gpe_context->vfe_desc6.scoreboard1.delta_y2 = -1;
1547
1548         // Scoreboard 3
1549         gpe_context->vfe_desc6.scoreboard1.delta_x3 = -1;
1550         gpe_context->vfe_desc6.scoreboard1.delta_y3 = -1;
1551
1552         // Scoreboard 4
1553         gpe_context->vfe_desc7.scoreboard2.delta_x4 = -1;
1554         gpe_context->vfe_desc7.scoreboard2.delta_y4 = 1;
1555
1556         // Scoreboard 5
1557         gpe_context->vfe_desc7.scoreboard2.delta_x5 = 0;
1558         gpe_context->vfe_desc7.scoreboard2.delta_y5 = -2;
1559
1560         // Scoreboard 6
1561         gpe_context->vfe_desc7.scoreboard2.delta_x6 = 1;
1562         gpe_context->vfe_desc7.scoreboard2.delta_y6 = -2;
1563
1564         // Scoreboard 7
1565         gpe_context->vfe_desc7.scoreboard2.delta_x6 = -1;
1566         gpe_context->vfe_desc7.scoreboard2.delta_y6 = -2;
1567     }
1568 }
1569 /*
1570 VME pipeline related function
1571 */
1572
1573 /*
1574 scaling kernel related function
1575 */
1576 static void
1577 gen9_avc_set_curbe_scaling4x(VADriverContextP ctx,
1578                              struct encode_state *encode_state,
1579                              struct i965_gpe_context *gpe_context,
1580                              struct intel_encoder_context *encoder_context,
1581                              void *param)
1582 {
1583     gen9_avc_scaling4x_curbe_data *curbe_cmd;
1584     struct scaling_param *surface_param = (struct scaling_param *)param;
1585
1586     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1587
1588     if (!curbe_cmd)
1589         return;
1590
1591     memset(curbe_cmd, 0, sizeof(gen9_avc_scaling4x_curbe_data));
1592
1593     curbe_cmd->dw0.input_picture_width  = surface_param->input_frame_width;
1594     curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1595
1596     curbe_cmd->dw1.input_y_bti = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1597     curbe_cmd->dw2.output_y_bti = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1598
1599
1600     curbe_cmd->dw5.flatness_threshold = 128;
1601     curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
1602     curbe_cmd->dw7.enable_mb_variance_output = surface_param->enable_mb_variance_output;
1603     curbe_cmd->dw8.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
1604
1605     if (curbe_cmd->dw6.enable_mb_flatness_check ||
1606         curbe_cmd->dw7.enable_mb_variance_output ||
1607         curbe_cmd->dw8.enable_mb_pixel_average_output) {
1608         curbe_cmd->dw10.mbv_proc_stat_bti = GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX;
1609     }
1610
1611     i965_gpe_context_unmap_curbe(gpe_context);
1612     return;
1613 }
1614
1615 static void
1616 gen95_avc_set_curbe_scaling4x(VADriverContextP ctx,
1617                               struct encode_state *encode_state,
1618                               struct i965_gpe_context *gpe_context,
1619                               struct intel_encoder_context *encoder_context,
1620                               void *param)
1621 {
1622     gen95_avc_scaling4x_curbe_data *curbe_cmd;
1623     struct scaling_param *surface_param = (struct scaling_param *)param;
1624
1625     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1626
1627     if (!curbe_cmd)
1628         return;
1629
1630     memset(curbe_cmd, 0, sizeof(gen95_avc_scaling4x_curbe_data));
1631
1632     curbe_cmd->dw0.input_picture_width  = surface_param->input_frame_width;
1633     curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1634
1635     curbe_cmd->dw1.input_y_bti_frame = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1636     curbe_cmd->dw2.output_y_bti_frame = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1637
1638     if (surface_param->enable_mb_flatness_check)
1639         curbe_cmd->dw5.flatness_threshold = 128;
1640     curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
1641     curbe_cmd->dw6.enable_mb_variance_output = surface_param->enable_mb_variance_output;
1642     curbe_cmd->dw6.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
1643     curbe_cmd->dw6.enable_block8x8_statistics_output = surface_param->blk8x8_stat_enabled;
1644
1645     if (curbe_cmd->dw6.enable_mb_flatness_check ||
1646         curbe_cmd->dw6.enable_mb_variance_output ||
1647         curbe_cmd->dw6.enable_mb_pixel_average_output) {
1648         curbe_cmd->dw8.mbv_proc_stat_bti_frame = GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX;
1649     }
1650
1651     i965_gpe_context_unmap_curbe(gpe_context);
1652     return;
1653 }
1654
1655 static void
1656 gen9_avc_set_curbe_scaling2x(VADriverContextP ctx,
1657                              struct encode_state *encode_state,
1658                              struct i965_gpe_context *gpe_context,
1659                              struct intel_encoder_context *encoder_context,
1660                              void *param)
1661 {
1662     gen9_avc_scaling2x_curbe_data *curbe_cmd;
1663     struct scaling_param *surface_param = (struct scaling_param *)param;
1664
1665     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1666
1667     if (!curbe_cmd)
1668         return;
1669
1670     memset(curbe_cmd, 0, sizeof(gen9_avc_scaling2x_curbe_data));
1671
1672     curbe_cmd->dw0.input_picture_width  = surface_param->input_frame_width;
1673     curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1674
1675     curbe_cmd->dw8.input_y_bti = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1676     curbe_cmd->dw9.output_y_bti = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1677
1678     i965_gpe_context_unmap_curbe(gpe_context);
1679     return;
1680 }
1681
1682 static void
1683 gen9_avc_send_surface_scaling(VADriverContextP ctx,
1684                               struct encode_state *encode_state,
1685                               struct i965_gpe_context *gpe_context,
1686                               struct intel_encoder_context *encoder_context,
1687                               void *param)
1688 {
1689     struct scaling_param *surface_param = (struct scaling_param *)param;
1690     struct i965_driver_data *i965 = i965_driver_data(ctx);
1691     unsigned int surface_format;
1692     unsigned int res_size;
1693
1694     if (surface_param->scaling_out_use_32unorm_surf_fmt)
1695         surface_format = I965_SURFACEFORMAT_R32_UNORM;
1696     else if (surface_param->scaling_out_use_16unorm_surf_fmt)
1697         surface_format = I965_SURFACEFORMAT_R16_UNORM;
1698     else
1699         surface_format = I965_SURFACEFORMAT_R8_UNORM;
1700
1701     i965_add_2d_gpe_surface(ctx, gpe_context,
1702                             surface_param->input_surface,
1703                             0, 1, surface_format,
1704                             GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX);
1705
1706     i965_add_2d_gpe_surface(ctx, gpe_context,
1707                             surface_param->output_surface,
1708                             0, 1, surface_format,
1709                             GEN9_AVC_SCALING_FRAME_DST_Y_INDEX);
1710
1711     /*add buffer mv_proc_stat, here need change*/
1712     if (IS_GEN8(i965->intel.device_info)) {
1713         if (surface_param->mbv_proc_stat_enabled) {
1714             res_size = 16 * (surface_param->input_frame_width / 16) * (surface_param->input_frame_height / 16) * sizeof(unsigned int);
1715
1716             i965_add_buffer_gpe_surface(ctx,
1717                                         gpe_context,
1718                                         surface_param->pres_mbv_proc_stat_buffer,
1719                                         0,
1720                                         res_size / 4,
1721                                         0,
1722                                         GEN8_SCALING_FRAME_MBVPROCSTATS_DST_CM);
1723         }
1724         if (surface_param->enable_mb_flatness_check) {
1725             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
1726                                            surface_param->pres_flatness_check_surface,
1727                                            1,
1728                                            I965_SURFACEFORMAT_R8_UNORM,
1729                                            GEN8_SCALING_FRAME_FLATNESS_DST_CM);
1730         }
1731     } else {
1732         if (surface_param->mbv_proc_stat_enabled) {
1733             res_size = 16 * (surface_param->input_frame_width / 16) * (surface_param->input_frame_height / 16) * sizeof(unsigned int);
1734
1735             i965_add_buffer_gpe_surface(ctx,
1736                                         gpe_context,
1737                                         surface_param->pres_mbv_proc_stat_buffer,
1738                                         0,
1739                                         res_size / 4,
1740                                         0,
1741                                         GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
1742         } else if (surface_param->enable_mb_flatness_check) {
1743             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
1744                                            surface_param->pres_flatness_check_surface,
1745                                            1,
1746                                            I965_SURFACEFORMAT_R8_UNORM,
1747                                            GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
1748         }
1749     }
1750     return;
1751 }
1752
1753 static VAStatus
1754 gen9_avc_kernel_scaling(VADriverContextP ctx,
1755                         struct encode_state *encode_state,
1756                         struct intel_encoder_context *encoder_context,
1757                         int hme_type)
1758 {
1759     struct i965_driver_data *i965 = i965_driver_data(ctx);
1760     struct i965_gpe_table *gpe = &i965->gpe_table;
1761     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1762     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1763     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1764     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1765     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
1766
1767     struct i965_gpe_context *gpe_context;
1768     struct scaling_param surface_param;
1769     struct object_surface *obj_surface;
1770     struct gen9_surface_avc *avc_priv_surface;
1771     struct gpe_media_object_walker_parameter media_object_walker_param;
1772     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
1773     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
1774     int media_function = 0;
1775     int kernel_idx = 0;
1776
1777     obj_surface = encode_state->reconstructed_object;
1778     avc_priv_surface = obj_surface->private_data;
1779
1780     memset(&surface_param, 0, sizeof(struct scaling_param));
1781     switch (hme_type) {
1782     case INTEL_ENC_HME_4x : {
1783         media_function = INTEL_MEDIA_STATE_4X_SCALING;
1784         kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
1785         downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
1786         downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
1787
1788         surface_param.input_surface = encode_state->input_yuv_object ;
1789         surface_param.input_frame_width = generic_state->frame_width_in_pixel ;
1790         surface_param.input_frame_height = generic_state->frame_height_in_pixel ;
1791
1792         surface_param.output_surface = avc_priv_surface->scaled_4x_surface_obj ;
1793         surface_param.output_frame_width = generic_state->frame_width_4x ;
1794         surface_param.output_frame_height = generic_state->frame_height_4x ;
1795
1796         surface_param.enable_mb_flatness_check = avc_state->flatness_check_enable;
1797         surface_param.enable_mb_variance_output = avc_state->mb_status_enable;
1798         surface_param.enable_mb_pixel_average_output = avc_state->mb_status_enable;
1799
1800         surface_param.blk8x8_stat_enabled = 0 ;
1801         surface_param.use_4x_scaling  = 1 ;
1802         surface_param.use_16x_scaling = 0 ;
1803         surface_param.use_32x_scaling = 0 ;
1804         break;
1805     }
1806     case INTEL_ENC_HME_16x : {
1807         media_function = INTEL_MEDIA_STATE_16X_SCALING;
1808         kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
1809         downscaled_width_in_mb = generic_state->downscaled_width_16x_in_mb;
1810         downscaled_height_in_mb = generic_state->downscaled_height_16x_in_mb;
1811
1812         surface_param.input_surface = avc_priv_surface->scaled_4x_surface_obj ;
1813         surface_param.input_frame_width = generic_state->frame_width_4x ;
1814         surface_param.input_frame_height = generic_state->frame_height_4x ;
1815
1816         surface_param.output_surface = avc_priv_surface->scaled_16x_surface_obj ;
1817         surface_param.output_frame_width = generic_state->frame_width_16x ;
1818         surface_param.output_frame_height = generic_state->frame_height_16x ;
1819
1820         surface_param.enable_mb_flatness_check = 0 ;
1821         surface_param.enable_mb_variance_output = 0 ;
1822         surface_param.enable_mb_pixel_average_output = 0 ;
1823
1824         surface_param.blk8x8_stat_enabled = 0 ;
1825         surface_param.use_4x_scaling  = 0 ;
1826         surface_param.use_16x_scaling = 1 ;
1827         surface_param.use_32x_scaling = 0 ;
1828
1829         break;
1830     }
1831     case INTEL_ENC_HME_32x : {
1832         media_function = INTEL_MEDIA_STATE_32X_SCALING;
1833         kernel_idx = GEN9_AVC_KERNEL_SCALING_2X_IDX;
1834         downscaled_width_in_mb = generic_state->downscaled_width_32x_in_mb;
1835         downscaled_height_in_mb = generic_state->downscaled_height_32x_in_mb;
1836
1837         surface_param.input_surface = avc_priv_surface->scaled_16x_surface_obj ;
1838         surface_param.input_frame_width = generic_state->frame_width_16x ;
1839         surface_param.input_frame_height = generic_state->frame_height_16x ;
1840
1841         surface_param.output_surface = avc_priv_surface->scaled_32x_surface_obj ;
1842         surface_param.output_frame_width = generic_state->frame_width_32x ;
1843         surface_param.output_frame_height = generic_state->frame_height_32x ;
1844
1845         surface_param.enable_mb_flatness_check = 0 ;
1846         surface_param.enable_mb_variance_output = 0 ;
1847         surface_param.enable_mb_pixel_average_output = 0 ;
1848
1849         surface_param.blk8x8_stat_enabled = 0 ;
1850         surface_param.use_4x_scaling  = 0 ;
1851         surface_param.use_16x_scaling = 0 ;
1852         surface_param.use_32x_scaling = 1 ;
1853         break;
1854     }
1855     default :
1856         assert(0);
1857
1858     }
1859
1860     gpe_context = &(avc_ctx->context_scaling.gpe_contexts[kernel_idx]);
1861
1862     gpe->context_init(ctx, gpe_context);
1863     gpe->reset_binding_table(ctx, gpe_context);
1864
1865     if (surface_param.use_32x_scaling) {
1866         generic_ctx->pfn_set_curbe_scaling2x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1867     } else {
1868         generic_ctx->pfn_set_curbe_scaling4x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1869     }
1870
1871     if (surface_param.use_32x_scaling) {
1872         surface_param.scaling_out_use_16unorm_surf_fmt = 1 ;
1873         surface_param.scaling_out_use_32unorm_surf_fmt = 0 ;
1874     } else {
1875         surface_param.scaling_out_use_16unorm_surf_fmt = 0 ;
1876         surface_param.scaling_out_use_32unorm_surf_fmt = 1 ;
1877     }
1878
1879     if (surface_param.use_4x_scaling) {
1880         if (avc_state->mb_status_supported) {
1881             surface_param.enable_mb_flatness_check = 0;
1882             surface_param.mbv_proc_stat_enabled = (surface_param.use_4x_scaling) ? (avc_state->mb_status_enable || avc_state->flatness_check_enable) : 0 ;
1883             surface_param.pres_mbv_proc_stat_buffer = &(avc_ctx->res_mb_status_buffer);
1884
1885         } else {
1886             surface_param.enable_mb_flatness_check = (surface_param.use_4x_scaling) ? avc_state->flatness_check_enable : 0;
1887             surface_param.mbv_proc_stat_enabled = 0 ;
1888             surface_param.pres_flatness_check_surface = &(avc_ctx->res_flatness_check_surface);
1889         }
1890     }
1891
1892     generic_ctx->pfn_send_scaling_surface(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1893
1894     /* setup the interface data */
1895     gpe->setup_interface_data(ctx, gpe_context);
1896
1897     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
1898     if (surface_param.use_32x_scaling) {
1899         kernel_walker_param.resolution_x = downscaled_width_in_mb ;
1900         kernel_walker_param.resolution_y = downscaled_height_in_mb ;
1901     } else {
1902         /* the scaling is based on 8x8 blk level */
1903         kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
1904         kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
1905     }
1906     kernel_walker_param.no_dependency = 1;
1907
1908     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
1909
1910     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
1911                                             gpe_context,
1912                                             media_function,
1913                                             &media_object_walker_param);
1914
1915     return VA_STATUS_SUCCESS;
1916 }
1917
1918 /*
1919 frame/mb brc related function
1920 */
1921 static void
1922 gen9_avc_init_mfx_avc_img_state(VADriverContextP ctx,
1923                                 struct encode_state *encode_state,
1924                                 struct intel_encoder_context *encoder_context,
1925                                 struct gen9_mfx_avc_img_state *pstate)
1926 {
1927     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1928     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1929     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1930
1931     VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
1932     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
1933
1934     memset(pstate, 0, sizeof(*pstate));
1935
1936     pstate->dw0.dword_length = (sizeof(struct gen9_mfx_avc_img_state)) / 4 - 2;
1937     pstate->dw0.sub_opcode_b = 0;
1938     pstate->dw0.sub_opcode_a = 0;
1939     pstate->dw0.command_opcode = 1;
1940     pstate->dw0.pipeline = 2;
1941     pstate->dw0.command_type = 3;
1942
1943     pstate->dw1.frame_size_in_mbs = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs ;
1944
1945     pstate->dw2.frame_width_in_mbs_minus1 = generic_state->frame_width_in_mbs - 1;
1946     pstate->dw2.frame_height_in_mbs_minus1 = generic_state->frame_height_in_mbs - 1;
1947
1948     pstate->dw3.image_structure = 0;//frame is zero
1949     pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
1950     pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
1951     pstate->dw3.brc_domain_rate_control_enable = 0;//0,set for non-vdenc mode;
1952     pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
1953     pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
1954
1955     pstate->dw4.field_picture_flag = 0;
1956     pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
1957     pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
1958     pstate->dw4.transform_8x8_idct_mode_flag = pic_param->pic_fields.bits.transform_8x8_mode_flag;
1959     pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
1960     pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
1961     pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
1962     pstate->dw4.mb_mv_format_flag = 1;
1963     pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
1964     pstate->dw4.mv_unpacked_flag = 1;
1965     pstate->dw4.insert_test_flag = 0;
1966     pstate->dw4.load_slice_pointer_flag = 0;
1967     pstate->dw4.macroblock_stat_enable = 0;        /* disable in the first pass */
1968     pstate->dw4.minimum_frame_size = 0;
1969     pstate->dw5.intra_mb_max_bit_flag = 1;
1970     pstate->dw5.inter_mb_max_bit_flag = 1;
1971     pstate->dw5.frame_size_over_flag = 1;
1972     pstate->dw5.frame_size_under_flag = 1;
1973     pstate->dw5.intra_mb_ipcm_flag = 1;
1974     pstate->dw5.mb_rate_ctrl_flag = 0;
1975     pstate->dw5.non_first_pass_flag = 0;
1976     pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
1977     pstate->dw5.aq_chroma_disable = 1;
1978     if (pstate->dw4.entropy_coding_flag && (avc_state->tq_enable)) {
1979         pstate->dw5.aq_enable = avc_state->tq_enable;
1980         pstate->dw5.aq_rounding = avc_state->tq_rounding;
1981     } else {
1982         pstate->dw5.aq_rounding = 0;
1983     }
1984
1985     pstate->dw6.intra_mb_max_size = 2700;
1986     pstate->dw6.inter_mb_max_size = 4095;
1987
1988     pstate->dw8.slice_delta_qp_max0 = 0;
1989     pstate->dw8.slice_delta_qp_max1 = 0;
1990     pstate->dw8.slice_delta_qp_max2 = 0;
1991     pstate->dw8.slice_delta_qp_max3 = 0;
1992
1993     pstate->dw9.slice_delta_qp_min0 = 0;
1994     pstate->dw9.slice_delta_qp_min1 = 0;
1995     pstate->dw9.slice_delta_qp_min2 = 0;
1996     pstate->dw9.slice_delta_qp_min3 = 0;
1997
1998     pstate->dw10.frame_bitrate_min = 0;
1999     pstate->dw10.frame_bitrate_min_unit = 1;
2000     pstate->dw10.frame_bitrate_min_unit_mode = 1;
2001     pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
2002     pstate->dw10.frame_bitrate_max_unit = 1;
2003     pstate->dw10.frame_bitrate_max_unit_mode = 1;
2004
2005     pstate->dw11.frame_bitrate_min_delta = 0;
2006     pstate->dw11.frame_bitrate_max_delta = 0;
2007
2008     pstate->dw12.vad_error_logic = 1;
2009     /* set paramters DW19/DW20 for slices */
2010 }
2011
2012 static void
2013 gen8_avc_init_mfx_avc_img_state(VADriverContextP ctx,
2014                                 struct encode_state *encode_state,
2015                                 struct intel_encoder_context *encoder_context,
2016                                 struct gen8_mfx_avc_img_state *pstate)
2017 {
2018     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2019     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2020     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2021
2022     VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
2023     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
2024
2025     memset(pstate, 0, sizeof(*pstate));
2026
2027     pstate->dw0.dword_length = (sizeof(struct gen8_mfx_avc_img_state)) / 4 - 2;
2028     pstate->dw0.command_sub_opcode_b = 0;
2029     pstate->dw0.command_sub_opcode_a = 0;
2030     pstate->dw0.command_opcode = 1;
2031     pstate->dw0.command_pipeline = 2;
2032     pstate->dw0.command_type = 3;
2033
2034     pstate->dw1.frame_size_in_mbs = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs ;
2035
2036     pstate->dw2.frame_width_in_mbs_minus1 = generic_state->frame_width_in_mbs - 1;
2037     pstate->dw2.frame_height_in_mbs_minus1 = generic_state->frame_height_in_mbs - 1;
2038
2039     pstate->dw3.image_structure = 0;//frame is zero
2040     pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
2041     pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
2042     pstate->dw3.inter_mb_conf_flag = 0;
2043     pstate->dw3.intra_mb_conf_flag = 0;
2044     pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
2045     pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
2046
2047     pstate->dw4.field_picture_flag = 0;
2048     pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
2049     pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
2050     pstate->dw4.transform_8x8_idct_mode_flag = pic_param->pic_fields.bits.transform_8x8_mode_flag;
2051     pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
2052     pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
2053     pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
2054     pstate->dw4.mb_mv_format_flag = 1;
2055     pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
2056     pstate->dw4.mv_unpacked_flag = 1;
2057     pstate->dw4.insert_test_flag = 0;
2058     pstate->dw4.load_slice_pointer_flag = 0;
2059     pstate->dw4.macroblock_stat_enable = 0;        /* disable in the first pass */
2060     pstate->dw4.minimum_frame_size = 0;
2061     pstate->dw5.intra_mb_max_bit_flag = 1;
2062     pstate->dw5.inter_mb_max_bit_flag = 1;
2063     pstate->dw5.frame_size_over_flag = 1;
2064     pstate->dw5.frame_size_under_flag = 1;
2065     pstate->dw5.intra_mb_ipcm_flag = 1;
2066     pstate->dw5.mb_rate_ctrl_flag = 0;
2067     pstate->dw5.non_first_pass_flag = 0;
2068     pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
2069     pstate->dw5.aq_chroma_disable = 1;
2070     if (pstate->dw4.entropy_coding_flag && (avc_state->tq_enable)) {
2071         pstate->dw5.aq_enable = avc_state->tq_enable;
2072         pstate->dw5.aq_rounding = avc_state->tq_rounding;
2073     } else {
2074         pstate->dw5.aq_rounding = 0;
2075     }
2076
2077     pstate->dw6.intra_mb_max_size = 2700;
2078     pstate->dw6.inter_mb_max_size = 4095;
2079
2080     pstate->dw8.slice_delta_qp_max0 = 0;
2081     pstate->dw8.slice_delta_qp_max1 = 0;
2082     pstate->dw8.slice_delta_qp_max2 = 0;
2083     pstate->dw8.slice_delta_qp_max3 = 0;
2084
2085     pstate->dw9.slice_delta_qp_min0 = 0;
2086     pstate->dw9.slice_delta_qp_min1 = 0;
2087     pstate->dw9.slice_delta_qp_min2 = 0;
2088     pstate->dw9.slice_delta_qp_min3 = 0;
2089
2090     pstate->dw10.frame_bitrate_min = 0;
2091     pstate->dw10.frame_bitrate_min_unit = 1;
2092     pstate->dw10.frame_bitrate_min_unit_mode = 1;
2093     pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
2094     pstate->dw10.frame_bitrate_max_unit = 1;
2095     pstate->dw10.frame_bitrate_max_unit_mode = 1;
2096
2097     pstate->dw11.frame_bitrate_min_delta = 0;
2098     pstate->dw11.frame_bitrate_max_delta = 0;
2099     /* set paramters DW19/DW20 for slices */
2100 }
2101 void gen9_avc_set_image_state(VADriverContextP ctx,
2102                               struct encode_state *encode_state,
2103                               struct intel_encoder_context *encoder_context,
2104                               struct i965_gpe_resource *gpe_resource)
2105 {
2106     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2107     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
2108     char *pdata;
2109     int i;
2110     unsigned int * data;
2111     struct gen9_mfx_avc_img_state cmd;
2112
2113     pdata = i965_map_gpe_resource(gpe_resource);
2114
2115     if (!pdata)
2116         return;
2117
2118     gen9_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
2119     for (i = 0; i < generic_state->num_pak_passes; i++) {
2120
2121         if (i == 0) {
2122             cmd.dw4.macroblock_stat_enable = 0;
2123             cmd.dw5.non_first_pass_flag = 0;
2124         } else {
2125             cmd.dw4.macroblock_stat_enable = 1;
2126             cmd.dw5.non_first_pass_flag = 1;
2127             cmd.dw5.intra_mb_ipcm_flag = 1;
2128
2129         }
2130         cmd.dw5.mb_rate_ctrl_flag = 0;
2131         memcpy(pdata, &cmd, sizeof(struct gen9_mfx_avc_img_state));
2132         data = (unsigned int *)(pdata + sizeof(struct gen9_mfx_avc_img_state));
2133         *data = MI_BATCH_BUFFER_END;
2134
2135         pdata += INTEL_AVC_IMAGE_STATE_CMD_SIZE;
2136     }
2137     i965_unmap_gpe_resource(gpe_resource);
2138     return;
2139 }
2140
2141 void gen8_avc_set_image_state(VADriverContextP ctx,
2142                               struct encode_state *encode_state,
2143                               struct intel_encoder_context *encoder_context,
2144                               struct i965_gpe_resource *gpe_resource)
2145 {
2146     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2147     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
2148     char *pdata;
2149     int i;
2150     unsigned int * data;
2151     struct gen8_mfx_avc_img_state cmd;
2152
2153     pdata = i965_map_gpe_resource(gpe_resource);
2154
2155     if (!pdata)
2156         return;
2157
2158     gen8_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
2159     for (i = 0; i < generic_state->num_pak_passes; i++) {
2160
2161         if (i == 0) {
2162             cmd.dw4.macroblock_stat_enable = 0;
2163             cmd.dw5.non_first_pass_flag = 0;
2164         } else {
2165             cmd.dw4.macroblock_stat_enable = 1;
2166             cmd.dw5.non_first_pass_flag = 1;
2167             cmd.dw5.intra_mb_ipcm_flag = 1;
2168             cmd.dw3.inter_mb_conf_flag = 1;
2169             cmd.dw3.intra_mb_conf_flag = 1;
2170         }
2171         cmd.dw5.mb_rate_ctrl_flag = 0;
2172         memcpy(pdata, &cmd, sizeof(struct gen8_mfx_avc_img_state));
2173         data = (unsigned int *)(pdata + sizeof(struct gen8_mfx_avc_img_state));
2174         *data = MI_BATCH_BUFFER_END;
2175
2176         pdata += INTEL_AVC_IMAGE_STATE_CMD_SIZE;
2177     }
2178     i965_unmap_gpe_resource(gpe_resource);
2179     return;
2180 }
2181
2182 void gen9_avc_set_image_state_non_brc(VADriverContextP ctx,
2183                                       struct encode_state *encode_state,
2184                                       struct intel_encoder_context *encoder_context,
2185                                       struct i965_gpe_resource *gpe_resource)
2186 {
2187     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2188     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
2189     char *pdata;
2190
2191     unsigned int * data;
2192     struct gen9_mfx_avc_img_state cmd;
2193
2194     pdata = i965_map_gpe_resource(gpe_resource);
2195
2196     if (!pdata)
2197         return;
2198
2199     gen9_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
2200
2201     if (generic_state->curr_pak_pass == 0) {
2202         cmd.dw4.macroblock_stat_enable = 0;
2203         cmd.dw5.non_first_pass_flag = 0;
2204
2205     } else {
2206         cmd.dw4.macroblock_stat_enable = 1;
2207         cmd.dw5.non_first_pass_flag = 0;
2208         cmd.dw5.intra_mb_ipcm_flag = 1;
2209     }
2210
2211     cmd.dw5.mb_rate_ctrl_flag = 0;
2212     memcpy(pdata, &cmd, sizeof(struct gen9_mfx_avc_img_state));
2213     data = (unsigned int *)(pdata + sizeof(struct gen9_mfx_avc_img_state));
2214     *data = MI_BATCH_BUFFER_END;
2215
2216     i965_unmap_gpe_resource(gpe_resource);
2217     return;
2218 }
2219
2220 static void
2221 gen95_avc_calc_lambda_table(VADriverContextP ctx,
2222                             struct encode_state *encode_state,
2223                             struct intel_encoder_context *encoder_context)
2224 {
2225     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2226     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2227     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2228     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
2229     unsigned int value, inter, intra;
2230     unsigned int rounding_value = 0;
2231     unsigned int size = 0;
2232     int i = 0;
2233     int col = 0;
2234     unsigned int * lambda_table = (unsigned int *)(avc_state->lamda_value_lut);
2235
2236     value = 0;
2237     inter = 0;
2238     intra = 0;
2239
2240     size = AVC_QP_MAX * 2 * sizeof(unsigned int);
2241     switch (generic_state->frame_type) {
2242     case SLICE_TYPE_I:
2243         memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_i_frame[0][0], size * sizeof(unsigned char));
2244         break;
2245     case SLICE_TYPE_P:
2246         memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_p_frame[0][0], size * sizeof(unsigned char));
2247         break;
2248     case SLICE_TYPE_B:
2249         memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_b_frame[0][0], size * sizeof(unsigned char));
2250         break;
2251     default:
2252         assert(0);
2253         break;
2254     }
2255
2256     for (i = 0; i < AVC_QP_MAX ; i++) {
2257         for (col = 0; col < 2; col++) {
2258             value = *(lambda_table + i * 2 + col);
2259             intra = value >> 16;
2260
2261             if (intra < GEN95_AVC_MAX_LAMBDA) {
2262                 if (intra == 0xfffa) {
2263                     intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
2264                 }
2265             }
2266
2267             intra = intra << 16;
2268             inter = value & 0xffff;
2269
2270             if (inter < GEN95_AVC_MAX_LAMBDA) {
2271                 if (inter == 0xffef) {
2272                     if (generic_state->frame_type == SLICE_TYPE_P) {
2273                         if (avc_state->rounding_inter_p == AVC_INVALID_ROUNDING_VALUE)
2274                             rounding_value = gen9_avc_inter_rounding_p[generic_state->preset];
2275                         else
2276                             rounding_value = avc_state->rounding_inter_p;
2277                     } else if (generic_state->frame_type == SLICE_TYPE_B) {
2278                         if (pic_param->pic_fields.bits.reference_pic_flag) {
2279                             if (avc_state->rounding_inter_b_ref == AVC_INVALID_ROUNDING_VALUE)
2280                                 rounding_value = gen9_avc_inter_rounding_b_ref[generic_state->preset];
2281                             else
2282                                 rounding_value = avc_state->rounding_inter_b_ref;
2283                         } else {
2284                             if (avc_state->rounding_inter_b == AVC_INVALID_ROUNDING_VALUE)
2285                                 rounding_value = gen9_avc_inter_rounding_b[generic_state->preset];
2286                             else
2287                                 rounding_value = avc_state->rounding_inter_b;
2288                         }
2289                     }
2290                 }
2291                 inter = 0xf000 + rounding_value;
2292             }
2293             *(lambda_table + i * 2 + col) = intra + inter;
2294         }
2295     }
2296 }
2297
2298 static void
2299 gen9_avc_init_brc_const_data(VADriverContextP ctx,
2300                              struct encode_state *encode_state,
2301                              struct intel_encoder_context *encoder_context)
2302 {
2303     struct i965_driver_data *i965 = i965_driver_data(ctx);
2304     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2305     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2306     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2307     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2308
2309     struct i965_gpe_resource *gpe_resource = NULL;
2310     unsigned char * data = NULL;
2311     unsigned char * data_tmp = NULL;
2312     unsigned int size = 0;
2313     unsigned int table_idx = 0;
2314     unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
2315     int i = 0;
2316
2317     struct object_surface *obj_surface;
2318     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
2319     VASurfaceID surface_id;
2320     unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
2321
2322     gpe_resource = &(avc_ctx->res_brc_const_data_buffer);
2323     assert(gpe_resource);
2324
2325     i965_zero_gpe_resource(gpe_resource);
2326
2327     data = i965_map_gpe_resource(gpe_resource);
2328     assert(data);
2329
2330     table_idx = slice_type_kernel[generic_state->frame_type];
2331
2332     /* Fill surface with QP Adjustment table, Distortion threshold table, MaxFrame threshold table, Distortion QP Adjustment Table*/
2333     size = sizeof(gen9_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb);
2334     memcpy(data, gen9_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb, size * sizeof(unsigned char));
2335
2336     data += size;
2337
2338     /* skip threshold table*/
2339     size = 128;
2340     switch (generic_state->frame_type) {
2341     case SLICE_TYPE_P:
2342         memcpy(data, gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2343         break;
2344     case SLICE_TYPE_B:
2345         memcpy(data, gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2346         break;
2347     default:
2348         /*SLICE_TYPE_I,no change */
2349         break;
2350     }
2351
2352     if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->non_ftq_skip_threshold_lut_input_enable) {
2353         for (i = 0; i < AVC_QP_MAX ; i++) {
2354             *(data + 1 + (i * 2)) = (unsigned char)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
2355         }
2356     }
2357     data += size;
2358
2359     /*fill the qp for ref list*/
2360     size = 32 + 32 + 32 + 160;
2361     memset(data, 0xff, 32);
2362     memset(data + 32 + 32, 0xff, 32);
2363     switch (generic_state->frame_type) {
2364     case SLICE_TYPE_P: {
2365         for (i = 0 ; i <  slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
2366             surface_id = slice_param->RefPicList0[i].picture_id;
2367             obj_surface = SURFACE(surface_id);
2368             if (!obj_surface)
2369                 break;
2370             *(data + i) = avc_state->list_ref_idx[0][i];//?
2371         }
2372     }
2373     break;
2374     case SLICE_TYPE_B: {
2375         data = data + 32 + 32;
2376         for (i = 0 ; i <  slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
2377             surface_id = slice_param->RefPicList1[i].picture_id;
2378             obj_surface = SURFACE(surface_id);
2379             if (!obj_surface)
2380                 break;
2381             *(data + i) = avc_state->list_ref_idx[1][i];//?
2382         }
2383
2384         data = data - 32 - 32;
2385
2386         for (i = 0 ; i <  slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
2387             surface_id = slice_param->RefPicList0[i].picture_id;
2388             obj_surface = SURFACE(surface_id);
2389             if (!obj_surface)
2390                 break;
2391             *(data + i) = avc_state->list_ref_idx[0][i];//?
2392         }
2393     }
2394     break;
2395     default:
2396         /*SLICE_TYPE_I,no change */
2397         break;
2398     }
2399     data += size;
2400
2401     /*mv cost and mode cost*/
2402     size = 1664;
2403     memcpy(data, (unsigned char *)&gen9_avc_mode_mv_cost_table[table_idx][0][0], size * sizeof(unsigned char));
2404
2405     if (avc_state->old_mode_cost_enable) {
2406         data_tmp = data;
2407         for (i = 0; i < AVC_QP_MAX ; i++) {
2408             *(data_tmp + 3) = (unsigned int)gen9_avc_old_intra_mode_cost[i];
2409             data_tmp += 16;
2410         }
2411     }
2412
2413     if (avc_state->ftq_skip_threshold_lut_input_enable) {
2414         for (i = 0; i < AVC_QP_MAX ; i++) {
2415             *(data + (i * 32) + 24) =
2416                 *(data + (i * 32) + 25) =
2417                     *(data + (i * 32) + 27) =
2418                         *(data + (i * 32) + 28) =
2419                             *(data + (i * 32) + 29) =
2420                                 *(data + (i * 32) + 30) =
2421                                     *(data + (i * 32) + 31) = avc_state->ftq_skip_threshold_lut[i];
2422         }
2423
2424     }
2425     data += size;
2426
2427     /*ref cost*/
2428     size = 128;
2429     memcpy(data, (unsigned char *)&gen9_avc_ref_cost[table_idx][0], size * sizeof(unsigned char));
2430     data += size;
2431
2432     /*scaling factor*/
2433     size = 64;
2434     if (avc_state->adaptive_intra_scaling_enable) {
2435         memcpy(data, (unsigned char *)gen9_avc_adaptive_intra_scaling_factor, size * sizeof(unsigned char));
2436     } else {
2437         memcpy(data, (unsigned char *)gen9_avc_intra_scaling_factor, size * sizeof(unsigned char));
2438     }
2439
2440     if (IS_KBL(i965->intel.device_info) ||
2441         IS_GEN10(i965->intel.device_info) ||
2442         IS_GLK(i965->intel.device_info)) {
2443         data += size;
2444
2445         size = 512;
2446         memcpy(data, (unsigned char *)gen95_avc_lambda_data, size * sizeof(unsigned char));
2447         data += size;
2448
2449         size = 64;
2450         memcpy(data, (unsigned char *)gen95_avc_ftq25, size * sizeof(unsigned char));
2451     }
2452
2453     i965_unmap_gpe_resource(gpe_resource);
2454 }
2455
2456 static void
2457 gen9_avc_init_brc_const_data_old(VADriverContextP ctx,
2458                                  struct encode_state *encode_state,
2459                                  struct intel_encoder_context *encoder_context)
2460 {
2461     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2462     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2463     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2464     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2465
2466     struct i965_gpe_resource *gpe_resource = NULL;
2467     unsigned int * data = NULL;
2468     unsigned int * data_tmp = NULL;
2469     unsigned int size = 0;
2470     unsigned int table_idx = 0;
2471     unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
2472     unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
2473     int i = 0;
2474
2475     gpe_resource = &(avc_ctx->res_brc_const_data_buffer);
2476     assert(gpe_resource);
2477
2478     i965_zero_gpe_resource(gpe_resource);
2479
2480     data = i965_map_gpe_resource(gpe_resource);
2481     assert(data);
2482
2483     table_idx = slice_type_kernel[generic_state->frame_type];
2484
2485     /* Fill surface with QP Adjustment table, Distortion threshold table, MaxFrame threshold table, Distortion QP Adjustment Table*/
2486     size = sizeof(gen75_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb);
2487     memcpy(data, gen75_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb, size * sizeof(unsigned char));
2488
2489     data += size;
2490
2491     /* skip threshold table*/
2492     size = 128;
2493     switch (generic_state->frame_type) {
2494     case SLICE_TYPE_P:
2495         memcpy(data, gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2496         break;
2497     case SLICE_TYPE_B:
2498         memcpy(data, gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2499         break;
2500     default:
2501         /*SLICE_TYPE_I,no change */
2502         break;
2503     }
2504
2505     if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->non_ftq_skip_threshold_lut_input_enable) {
2506         for (i = 0; i < AVC_QP_MAX ; i++) {
2507             *(data + 1 + (i * 2)) = (unsigned char)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
2508         }
2509     }
2510     data += size;
2511
2512     /*fill the qp for ref list*/
2513     size = 128;
2514     data += size;
2515     size = 128;
2516     data += size;
2517
2518     /*mv cost and mode cost*/
2519     size = 1664;
2520     memcpy(data, (unsigned char *)&gen75_avc_mode_mv_cost_table[table_idx][0][0], size * sizeof(unsigned char));
2521
2522     if (avc_state->old_mode_cost_enable) {
2523         data_tmp = data;
2524         for (i = 0; i < AVC_QP_MAX ; i++) {
2525             *(data_tmp + 3) = (unsigned int)gen9_avc_old_intra_mode_cost[i];
2526             data_tmp += 16;
2527         }
2528     }
2529
2530     if (avc_state->ftq_skip_threshold_lut_input_enable) {
2531         for (i = 0; i < AVC_QP_MAX ; i++) {
2532             *(data + (i * 32) + 24) =
2533                 *(data + (i * 32) + 25) =
2534                     *(data + (i * 32) + 27) =
2535                         *(data + (i * 32) + 28) =
2536                             *(data + (i * 32) + 29) =
2537                                 *(data + (i * 32) + 30) =
2538                                     *(data + (i * 32) + 31) = avc_state->ftq_skip_threshold_lut[i];
2539         }
2540
2541     }
2542     data += size;
2543
2544     /*ref cost*/
2545     size = 128;
2546     memcpy(data, (unsigned char *)&gen9_avc_ref_cost[table_idx][0], size * sizeof(unsigned char));
2547
2548     i965_unmap_gpe_resource(gpe_resource);
2549 }
2550 static void
2551 gen9_avc_set_curbe_brc_init_reset(VADriverContextP ctx,
2552                                   struct encode_state *encode_state,
2553                                   struct i965_gpe_context *gpe_context,
2554                                   struct intel_encoder_context *encoder_context,
2555                                   void * param)
2556 {
2557     gen9_avc_brc_init_reset_curbe_data *cmd;
2558     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2559     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2560     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2561     double input_bits_per_frame = 0;
2562     double bps_ratio = 0;
2563     VAEncSequenceParameterBufferH264 * seq_param = avc_state->seq_param;
2564     struct avc_param common_param;
2565
2566     cmd = i965_gpe_context_map_curbe(gpe_context);
2567
2568     if (!cmd)
2569         return;
2570
2571     memcpy(cmd, &gen9_avc_brc_init_reset_curbe_init_data, sizeof(gen9_avc_brc_init_reset_curbe_data));
2572
2573     memset(&common_param, 0, sizeof(common_param));
2574     common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
2575     common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
2576     common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
2577     common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
2578     common_param.frames_per_100s = generic_state->frames_per_100s;
2579     common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
2580     common_param.target_bit_rate = generic_state->target_bit_rate;
2581
2582     cmd->dw0.profile_level_max_frame = i965_avc_get_profile_level_max_frame(&common_param, seq_param->level_idc);
2583     cmd->dw1.init_buf_full_in_bits = generic_state->init_vbv_buffer_fullness_in_bit;
2584     cmd->dw2.buf_size_in_bits = generic_state->vbv_buffer_size_in_bit;
2585     cmd->dw3.average_bit_rate = generic_state->target_bit_rate * 1000;
2586     cmd->dw4.max_bit_rate = generic_state->max_bit_rate * 1000;
2587     cmd->dw8.gop_p = (generic_state->gop_ref_distance) ? ((generic_state->gop_size - 1) / generic_state->gop_ref_distance) : 0;
2588     cmd->dw9.gop_b = (generic_state->gop_size - 1 - cmd->dw8.gop_p);
2589     cmd->dw9.frame_width_in_bytes = generic_state->frame_width_in_pixel;
2590     cmd->dw10.frame_height_in_bytes = generic_state->frame_height_in_pixel;
2591     cmd->dw12.no_slices = avc_state->slice_num;
2592
2593     //VUI
2594     if (seq_param->vui_parameters_present_flag && generic_state->internal_rate_mode != INTEL_BRC_AVBR) {
2595         if (generic_state->internal_rate_mode == VA_RC_CBR) {
2596             cmd->dw3.average_bit_rate = cmd->dw4.max_bit_rate;
2597
2598         }
2599
2600     }
2601     cmd->dw6.frame_rate_m = generic_state->frames_per_100s;
2602     cmd->dw7.frame_rate_d = 100;
2603     cmd->dw8.brc_flag = 0;
2604     cmd->dw8.brc_flag |= (generic_state->mb_brc_enabled) ? 0 : 0x8000;
2605
2606
2607     if (generic_state->internal_rate_mode == VA_RC_CBR) {
2608         //CBR
2609         cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate;
2610         cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISCBR;
2611
2612     } else if (generic_state->internal_rate_mode == VA_RC_VBR) {
2613         //VBR
2614         if (cmd->dw4.max_bit_rate < cmd->dw3.average_bit_rate) {
2615             cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate << 1;
2616         }
2617         cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISVBR;
2618
2619     } else if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2620         //AVBR
2621         cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate;
2622         cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISAVBR;
2623
2624     }
2625     //igonre icq/vcm/qvbr
2626
2627     cmd->dw10.avbr_accuracy = generic_state->avbr_curracy;
2628     cmd->dw11.avbr_convergence = generic_state->avbr_convergence;
2629
2630     //frame bits
2631     input_bits_per_frame = (double)(cmd->dw4.max_bit_rate) * (double)(cmd->dw7.frame_rate_d) / (double)(cmd->dw6.frame_rate_m);;
2632
2633     if (cmd->dw2.buf_size_in_bits == 0) {
2634         cmd->dw2.buf_size_in_bits = (unsigned int)(input_bits_per_frame * 4);
2635     }
2636
2637     if (cmd->dw1.init_buf_full_in_bits == 0) {
2638         cmd->dw1.init_buf_full_in_bits = cmd->dw2.buf_size_in_bits * 7 / 8;
2639     }
2640     if (cmd->dw1.init_buf_full_in_bits < (unsigned int)(input_bits_per_frame * 2)) {
2641         cmd->dw1.init_buf_full_in_bits = (unsigned int)(input_bits_per_frame * 2);
2642     }
2643     if (cmd->dw1.init_buf_full_in_bits > cmd->dw2.buf_size_in_bits) {
2644         cmd->dw1.init_buf_full_in_bits = cmd->dw2.buf_size_in_bits;
2645     }
2646
2647     //AVBR
2648     if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2649         cmd->dw2.buf_size_in_bits = 2 * generic_state->target_bit_rate * 1000;
2650         cmd->dw1.init_buf_full_in_bits = (unsigned int)(3 * cmd->dw2.buf_size_in_bits / 4);
2651
2652     }
2653
2654     bps_ratio = input_bits_per_frame / (cmd->dw2.buf_size_in_bits / 30.0);
2655     bps_ratio = (bps_ratio < 0.1) ? 0.1 : (bps_ratio > 3.5) ? 3.5 : bps_ratio;
2656
2657
2658     cmd->dw16.deviation_threshold_0_pand_b = (unsigned int)(-50 * pow(0.90, bps_ratio));
2659     cmd->dw16.deviation_threshold_1_pand_b = (unsigned int)(-50 * pow(0.66, bps_ratio));
2660     cmd->dw16.deviation_threshold_2_pand_b = (unsigned int)(-50 * pow(0.46, bps_ratio));
2661     cmd->dw16.deviation_threshold_3_pand_b = (unsigned int)(-50 * pow(0.3, bps_ratio));
2662     cmd->dw17.deviation_threshold_4_pand_b = (unsigned int)(50 *  pow(0.3, bps_ratio));
2663     cmd->dw17.deviation_threshold_5_pand_b = (unsigned int)(50 * pow(0.46, bps_ratio));
2664     cmd->dw17.deviation_threshold_6_pand_b = (unsigned int)(50 * pow(0.7,  bps_ratio));
2665     cmd->dw17.deviation_threshold_7_pand_b = (unsigned int)(50 * pow(0.9,  bps_ratio));
2666     cmd->dw18.deviation_threshold_0_vbr = (unsigned int)(-50 * pow(0.9, bps_ratio));
2667     cmd->dw18.deviation_threshold_1_vbr = (unsigned int)(-50 * pow(0.7, bps_ratio));
2668     cmd->dw18.deviation_threshold_2_vbr = (unsigned int)(-50 * pow(0.5, bps_ratio));
2669     cmd->dw18.deviation_threshold_3_vbr = (unsigned int)(-50 * pow(0.3, bps_ratio));
2670     cmd->dw19.deviation_threshold_4_vbr = (unsigned int)(100 * pow(0.4, bps_ratio));
2671     cmd->dw19.deviation_threshold_5_vbr = (unsigned int)(100 * pow(0.5, bps_ratio));
2672     cmd->dw19.deviation_threshold_6_vbr = (unsigned int)(100 * pow(0.75, bps_ratio));
2673     cmd->dw19.deviation_threshold_7_vbr = (unsigned int)(100 * pow(0.9, bps_ratio));
2674     cmd->dw20.deviation_threshold_0_i = (unsigned int)(-50 * pow(0.8, bps_ratio));
2675     cmd->dw20.deviation_threshold_1_i = (unsigned int)(-50 * pow(0.6, bps_ratio));
2676     cmd->dw20.deviation_threshold_2_i = (unsigned int)(-50 * pow(0.34, bps_ratio));
2677     cmd->dw20.deviation_threshold_3_i = (unsigned int)(-50 * pow(0.2, bps_ratio));
2678     cmd->dw21.deviation_threshold_4_i = (unsigned int)(50 * pow(0.2,  bps_ratio));
2679     cmd->dw21.deviation_threshold_5_i = (unsigned int)(50 * pow(0.4,  bps_ratio));
2680     cmd->dw21.deviation_threshold_6_i = (unsigned int)(50 * pow(0.66, bps_ratio));
2681     cmd->dw21.deviation_threshold_7_i = (unsigned int)(50 * pow(0.9,  bps_ratio));
2682
2683     cmd->dw22.sliding_window_size = generic_state->frames_per_window_size;
2684
2685     i965_gpe_context_unmap_curbe(gpe_context);
2686
2687     return;
2688 }
2689
2690 static void
2691 gen9_avc_send_surface_brc_init_reset(VADriverContextP ctx,
2692                                      struct encode_state *encode_state,
2693                                      struct i965_gpe_context *gpe_context,
2694                                      struct intel_encoder_context *encoder_context,
2695                                      void * param_mbenc)
2696 {
2697     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2698     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2699
2700     i965_add_buffer_gpe_surface(ctx,
2701                                 gpe_context,
2702                                 &avc_ctx->res_brc_history_buffer,
2703                                 0,
2704                                 avc_ctx->res_brc_history_buffer.size,
2705                                 0,
2706                                 GEN9_AVC_BRC_INIT_RESET_HISTORY_INDEX);
2707
2708     i965_add_buffer_2d_gpe_surface(ctx,
2709                                    gpe_context,
2710                                    &avc_ctx->res_brc_dist_data_surface,
2711                                    1,
2712                                    I965_SURFACEFORMAT_R8_UNORM,
2713                                    GEN9_AVC_BRC_INIT_RESET_DISTORTION_INDEX);
2714
2715     return;
2716 }
2717
2718 static VAStatus
2719 gen9_avc_kernel_brc_init_reset(VADriverContextP ctx,
2720                                struct encode_state *encode_state,
2721                                struct intel_encoder_context *encoder_context)
2722 {
2723     struct i965_driver_data *i965 = i965_driver_data(ctx);
2724     struct i965_gpe_table *gpe = &i965->gpe_table;
2725     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2726     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2727     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2728     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
2729
2730     struct i965_gpe_context *gpe_context;
2731     struct gpe_media_object_parameter media_object_param;
2732     struct gpe_media_object_inline_data media_object_inline_data;
2733     int media_function = 0;
2734     int kernel_idx = GEN9_AVC_KERNEL_BRC_INIT;
2735
2736     media_function = INTEL_MEDIA_STATE_BRC_INIT_RESET;
2737
2738     if (generic_state->brc_inited)
2739         kernel_idx = GEN9_AVC_KERNEL_BRC_RESET;
2740
2741     gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
2742
2743     gpe->context_init(ctx, gpe_context);
2744     gpe->reset_binding_table(ctx, gpe_context);
2745
2746     generic_ctx->pfn_set_curbe_brc_init_reset(ctx, encode_state, gpe_context, encoder_context, NULL);
2747
2748     generic_ctx->pfn_send_brc_init_reset_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
2749
2750     gpe->setup_interface_data(ctx, gpe_context);
2751
2752     memset(&media_object_param, 0, sizeof(media_object_param));
2753     memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
2754     media_object_param.pinline_data = &media_object_inline_data;
2755     media_object_param.inline_size = sizeof(media_object_inline_data);
2756
2757     gen9_avc_run_kernel_media_object(ctx, encoder_context,
2758                                      gpe_context,
2759                                      media_function,
2760                                      &media_object_param);
2761
2762     return VA_STATUS_SUCCESS;
2763 }
2764
2765 static void
2766 gen9_avc_set_curbe_brc_frame_update(VADriverContextP ctx,
2767                                     struct encode_state *encode_state,
2768                                     struct i965_gpe_context *gpe_context,
2769                                     struct intel_encoder_context *encoder_context,
2770                                     void * param)
2771 {
2772     gen9_avc_frame_brc_update_curbe_data *cmd;
2773     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2774     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2775     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2776     struct object_surface *obj_surface;
2777     struct gen9_surface_avc *avc_priv_surface;
2778     struct avc_param common_param;
2779     VAEncSequenceParameterBufferH264 * seq_param = avc_state->seq_param;
2780
2781     obj_surface = encode_state->reconstructed_object;
2782
2783     if (!obj_surface || !obj_surface->private_data)
2784         return;
2785     avc_priv_surface = obj_surface->private_data;
2786
2787     cmd = i965_gpe_context_map_curbe(gpe_context);
2788
2789     if (!cmd)
2790         return;
2791
2792     memcpy(cmd, &gen9_avc_frame_brc_update_curbe_init_data, sizeof(gen9_avc_frame_brc_update_curbe_data));
2793
2794     cmd->dw5.target_size_flag = 0 ;
2795     if (generic_state->brc_init_current_target_buf_full_in_bits > (double)generic_state->brc_init_reset_buf_size_in_bits) {
2796         /*overflow*/
2797         generic_state->brc_init_current_target_buf_full_in_bits -= (double)generic_state->brc_init_reset_buf_size_in_bits;
2798         cmd->dw5.target_size_flag = 1 ;
2799     }
2800
2801     if (generic_state->skip_frame_enbale) {
2802         cmd->dw6.num_skip_frames = generic_state->num_skip_frames ;
2803         cmd->dw7.size_skip_frames = generic_state->size_skip_frames;
2804
2805         generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame * generic_state->num_skip_frames;
2806
2807     }
2808     cmd->dw0.target_size = (unsigned int)generic_state->brc_init_current_target_buf_full_in_bits ;
2809     cmd->dw1.frame_number = generic_state->seq_frame_number ;
2810     cmd->dw2.size_of_pic_headers = generic_state->herder_bytes_inserted << 3 ;
2811     cmd->dw5.cur_frame_type = generic_state->frame_type ;
2812     cmd->dw5.brc_flag = 0 ;
2813     cmd->dw5.brc_flag |= (avc_priv_surface->is_as_ref) ? INTEL_ENCODE_BRCUPDATE_IS_REFERENCE : 0 ;
2814
2815     if (avc_state->multi_pre_enable) {
2816         cmd->dw5.brc_flag  |= INTEL_ENCODE_BRCUPDATE_IS_ACTUALQP ;
2817         cmd->dw14.qp_index_of_cur_pic = avc_priv_surface->frame_idx ; //do not know this. use -1
2818     }
2819
2820     cmd->dw5.max_num_paks = generic_state->num_pak_passes ;
2821     if (avc_state->min_max_qp_enable) {
2822         switch (generic_state->frame_type) {
2823         case SLICE_TYPE_I:
2824             cmd->dw6.minimum_qp = avc_state->min_qp_i ;
2825             cmd->dw6.maximum_qp = avc_state->max_qp_i ;
2826             break;
2827         case SLICE_TYPE_P:
2828             cmd->dw6.minimum_qp = avc_state->min_qp_p ;
2829             cmd->dw6.maximum_qp = avc_state->max_qp_p ;
2830             break;
2831         case SLICE_TYPE_B:
2832             cmd->dw6.minimum_qp = avc_state->min_qp_b ;
2833             cmd->dw6.maximum_qp = avc_state->max_qp_b ;
2834             break;
2835         }
2836     } else {
2837         cmd->dw6.minimum_qp = 0 ;
2838         cmd->dw6.maximum_qp = 0 ;
2839     }
2840     cmd->dw6.enable_force_skip = avc_state->enable_force_skip ;
2841     cmd->dw6.enable_sliding_window = 0 ;
2842
2843     generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame;
2844
2845     if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2846         cmd->dw3.start_gadj_frame0 = (unsigned int)((10 *   generic_state->avbr_convergence) / (double)150);
2847         cmd->dw3.start_gadj_frame1 = (unsigned int)((50 *   generic_state->avbr_convergence) / (double)150);
2848         cmd->dw4.start_gadj_frame2 = (unsigned int)((100 *  generic_state->avbr_convergence) / (double)150);
2849         cmd->dw4.start_gadj_frame3 = (unsigned int)((150 *  generic_state->avbr_convergence) / (double)150);
2850         cmd->dw11.g_rate_ratio_threshold_0 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 40)));
2851         cmd->dw11.g_rate_ratio_threshold_1 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 75)));
2852         cmd->dw12.g_rate_ratio_threshold_2 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 97)));
2853         cmd->dw12.g_rate_ratio_threshold_3 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (103 - 100)));
2854         cmd->dw12.g_rate_ratio_threshold_4 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (125 - 100)));
2855         cmd->dw12.g_rate_ratio_threshold_5 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (160 - 100)));
2856
2857     }
2858     cmd->dw15.enable_roi = generic_state->brc_roi_enable ;
2859
2860     memset(&common_param, 0, sizeof(common_param));
2861     common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
2862     common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
2863     common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
2864     common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
2865     common_param.frames_per_100s = generic_state->frames_per_100s;
2866     common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
2867     common_param.target_bit_rate = generic_state->target_bit_rate;
2868
2869     cmd->dw19.user_max_frame = i965_avc_get_profile_level_max_frame(&common_param, seq_param->level_idc);
2870     i965_gpe_context_unmap_curbe(gpe_context);
2871
2872     return;
2873 }
2874
2875 static void
2876 gen9_avc_send_surface_brc_frame_update(VADriverContextP ctx,
2877                                        struct encode_state *encode_state,
2878                                        struct i965_gpe_context *gpe_context,
2879                                        struct intel_encoder_context *encoder_context,
2880                                        void * param_brc)
2881 {
2882     struct i965_driver_data *i965 = i965_driver_data(ctx);
2883     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2884     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2885     struct brc_param * param = (struct brc_param *)param_brc ;
2886     struct i965_gpe_context * gpe_context_mbenc = param->gpe_context_mbenc;
2887     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2888     unsigned char is_g95 = 0;
2889
2890     if (IS_SKL(i965->intel.device_info) ||
2891         IS_BXT(i965->intel.device_info) ||
2892         IS_GEN8(i965->intel.device_info))
2893         is_g95 = 0;
2894     else if (IS_KBL(i965->intel.device_info) ||
2895              IS_GEN10(i965->intel.device_info) ||
2896              IS_GLK(i965->intel.device_info))
2897         is_g95 = 1;
2898
2899     /* brc history buffer*/
2900     i965_add_buffer_gpe_surface(ctx,
2901                                 gpe_context,
2902                                 &avc_ctx->res_brc_history_buffer,
2903                                 0,
2904                                 avc_ctx->res_brc_history_buffer.size,
2905                                 0,
2906                                 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_HISTORY_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_HISTORY_INDEX));
2907
2908     /* previous pak buffer*/
2909     i965_add_buffer_gpe_surface(ctx,
2910                                 gpe_context,
2911                                 &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
2912                                 0,
2913                                 avc_ctx->res_brc_pre_pak_statistics_output_buffer.size,
2914                                 0,
2915                                 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_PAK_STATISTICS_OUTPUT_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_PAK_STATISTICS_OUTPUT_INDEX));
2916
2917     /* image state command buffer read only*/
2918     i965_add_buffer_gpe_surface(ctx,
2919                                 gpe_context,
2920                                 &avc_ctx->res_brc_image_state_read_buffer,
2921                                 0,
2922                                 avc_ctx->res_brc_image_state_read_buffer.size,
2923                                 0,
2924                                 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_READ_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_READ_INDEX));
2925
2926     /* image state command buffer write only*/
2927     i965_add_buffer_gpe_surface(ctx,
2928                                 gpe_context,
2929                                 &avc_ctx->res_brc_image_state_write_buffer,
2930                                 0,
2931                                 avc_ctx->res_brc_image_state_write_buffer.size,
2932                                 0,
2933                                 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_WRITE_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_WRITE_INDEX));
2934
2935     if (avc_state->mbenc_brc_buffer_size > 0) {
2936         i965_add_buffer_gpe_surface(ctx,
2937                                     gpe_context,
2938                                     &(avc_ctx->res_mbenc_brc_buffer),
2939                                     0,
2940                                     avc_ctx->res_mbenc_brc_buffer.size,
2941                                     0,
2942                                     GEN95_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_WRITE_INDEX);
2943     } else {
2944         /*  Mbenc curbe input buffer */
2945         gen9_add_dri_buffer_gpe_surface(ctx,
2946                                         gpe_context,
2947                                         gpe_context_mbenc->dynamic_state.bo,
2948                                         0,
2949                                         ALIGN(gpe_context_mbenc->curbe.length, 64),
2950                                         gpe_context_mbenc->curbe.offset,
2951                                         GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_READ_INDEX);
2952         /* Mbenc curbe output buffer */
2953         gen9_add_dri_buffer_gpe_surface(ctx,
2954                                         gpe_context,
2955                                         gpe_context_mbenc->dynamic_state.bo,
2956                                         0,
2957                                         ALIGN(gpe_context_mbenc->curbe.length, 64),
2958                                         gpe_context_mbenc->curbe.offset,
2959                                         GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_WRITE_INDEX);
2960     }
2961
2962     /* AVC_ME Distortion 2D surface buffer,input/output. is it res_brc_dist_data_surface*/
2963     i965_add_buffer_2d_gpe_surface(ctx,
2964                                    gpe_context,
2965                                    &avc_ctx->res_brc_dist_data_surface,
2966                                    1,
2967                                    I965_SURFACEFORMAT_R8_UNORM,
2968                                    (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_DISTORTION_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_DISTORTION_INDEX));
2969
2970     /* BRC const data 2D surface buffer */
2971     i965_add_buffer_2d_gpe_surface(ctx,
2972                                    gpe_context,
2973                                    &avc_ctx->res_brc_const_data_buffer,
2974                                    1,
2975                                    I965_SURFACEFORMAT_R8_UNORM,
2976                                    (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX));
2977
2978     /* MB statistical data surface*/
2979     if (!IS_GEN8(i965->intel.device_info)) {
2980         i965_add_buffer_gpe_surface(ctx,
2981                                     gpe_context,
2982                                     &avc_ctx->res_mb_status_buffer,
2983                                     0,
2984                                     avc_ctx->res_mb_status_buffer.size,
2985                                     0,
2986                                     (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX));
2987     } else {
2988         i965_add_buffer_2d_gpe_surface(ctx,
2989                                        gpe_context,
2990                                        &avc_ctx->res_mbbrc_mb_qp_data_surface,
2991                                        1,
2992                                        I965_SURFACEFORMAT_R8_UNORM,
2993                                        GEN9_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX);
2994     }
2995     return;
2996 }
2997
2998 static VAStatus
2999 gen9_avc_kernel_brc_frame_update(VADriverContextP ctx,
3000                                  struct encode_state *encode_state,
3001                                  struct intel_encoder_context *encoder_context)
3002
3003 {
3004     struct i965_driver_data *i965 = i965_driver_data(ctx);
3005     struct i965_gpe_table *gpe = &i965->gpe_table;
3006     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3007     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
3008     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3009     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3010     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3011
3012     struct i965_gpe_context *gpe_context = NULL;
3013     struct gpe_media_object_parameter media_object_param;
3014     struct gpe_media_object_inline_data media_object_inline_data;
3015     int media_function = 0;
3016     int kernel_idx = 0;
3017     unsigned int mb_const_data_buffer_in_use, mb_qp_buffer_in_use;
3018     unsigned int brc_enabled = 0;
3019     unsigned int roi_enable = (generic_state->num_roi > 0) ? 1 : 0;
3020     unsigned int dirty_roi_enable = ((generic_state->dirty_num_roi > 0) && (generic_state->frame_type == SLICE_TYPE_P) && (0));
3021
3022     /* the following set the mbenc curbe*/
3023     struct mbenc_param curbe_mbenc_param ;
3024     struct brc_param curbe_brc_param ;
3025
3026     mb_const_data_buffer_in_use =
3027         generic_state->mb_brc_enabled ||
3028         roi_enable ||
3029         dirty_roi_enable ||
3030         avc_state->mb_qp_data_enable ||
3031         avc_state->rolling_intra_refresh_enable;
3032     mb_qp_buffer_in_use =
3033         generic_state->mb_brc_enabled ||
3034         generic_state->brc_roi_enable ||
3035         avc_state->mb_qp_data_enable;
3036
3037     switch (generic_state->kernel_mode) {
3038     case INTEL_ENC_KERNEL_NORMAL : {
3039         kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_NORMAL_I;
3040         break;
3041     }
3042     case INTEL_ENC_KERNEL_PERFORMANCE : {
3043         kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_PERFORMANCE_I;
3044         break;
3045     }
3046     case INTEL_ENC_KERNEL_QUALITY : {
3047         kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_QUALITY_I;
3048         break;
3049     }
3050     default:
3051         assert(0);
3052
3053     }
3054
3055     if (generic_state->frame_type == SLICE_TYPE_P) {
3056         kernel_idx += 1;
3057     } else if (generic_state->frame_type == SLICE_TYPE_B) {
3058         kernel_idx += 2;
3059     }
3060
3061     gpe_context = &(avc_ctx->context_mbenc.gpe_contexts[kernel_idx]);
3062     gpe->context_init(ctx, gpe_context);
3063
3064     memset(&curbe_mbenc_param, 0, sizeof(struct mbenc_param));
3065
3066     curbe_mbenc_param.mb_const_data_buffer_in_use = mb_const_data_buffer_in_use;
3067     curbe_mbenc_param.mb_qp_buffer_in_use = mb_qp_buffer_in_use;
3068     curbe_mbenc_param.mbenc_i_frame_dist_in_use = 0;
3069     curbe_mbenc_param.brc_enabled = brc_enabled;
3070     curbe_mbenc_param.roi_enabled = roi_enable;
3071
3072     /* set curbe mbenc*/
3073     generic_ctx->pfn_set_curbe_mbenc(ctx, encode_state, gpe_context, encoder_context, &curbe_mbenc_param);
3074
3075     // gen95 set curbe out of the brc. gen9 do it here
3076     avc_state->mbenc_curbe_set_in_brc_update = !avc_state->decouple_mbenc_curbe_from_brc_enable;
3077     /*begin brc frame update*/
3078     memset(&curbe_brc_param, 0, sizeof(struct brc_param));
3079     curbe_brc_param.gpe_context_mbenc = gpe_context;
3080     media_function = INTEL_MEDIA_STATE_BRC_UPDATE;
3081     kernel_idx = GEN9_AVC_KERNEL_BRC_FRAME_UPDATE;
3082     gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
3083     curbe_brc_param.gpe_context_brc_frame_update = gpe_context;
3084
3085     gpe->context_init(ctx, gpe_context);
3086     gpe->reset_binding_table(ctx, gpe_context);
3087     /*brc copy ignored*/
3088
3089     /* set curbe frame update*/
3090     generic_ctx->pfn_set_curbe_brc_frame_update(ctx, encode_state, gpe_context, encoder_context, &curbe_brc_param);
3091
3092     /* load brc constant data, is it same as mbenc mb brc constant data? no.*/
3093     if (avc_state->multi_pre_enable) {
3094         gen9_avc_init_brc_const_data(ctx, encode_state, encoder_context);
3095     } else {
3096         gen9_avc_init_brc_const_data_old(ctx, encode_state, encoder_context);
3097     }
3098     /* image state construct*/
3099     if (IS_GEN8(i965->intel.device_info)) {
3100         gen8_avc_set_image_state(ctx, encode_state, encoder_context, &(avc_ctx->res_brc_image_state_read_buffer));
3101     } else {
3102         gen9_avc_set_image_state(ctx, encode_state, encoder_context, &(avc_ctx->res_brc_image_state_read_buffer));
3103     }
3104     /* set surface frame mbenc*/
3105     generic_ctx->pfn_send_brc_frame_update_surface(ctx, encode_state, gpe_context, encoder_context, &curbe_brc_param);
3106
3107
3108     gpe->setup_interface_data(ctx, gpe_context);
3109
3110     memset(&media_object_param, 0, sizeof(media_object_param));
3111     memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
3112     media_object_param.pinline_data = &media_object_inline_data;
3113     media_object_param.inline_size = sizeof(media_object_inline_data);
3114
3115     gen9_avc_run_kernel_media_object(ctx, encoder_context,
3116                                      gpe_context,
3117                                      media_function,
3118                                      &media_object_param);
3119
3120     return VA_STATUS_SUCCESS;
3121 }
3122
3123 static void
3124 gen9_avc_set_curbe_brc_mb_update(VADriverContextP ctx,
3125                                  struct encode_state *encode_state,
3126                                  struct i965_gpe_context *gpe_context,
3127                                  struct intel_encoder_context *encoder_context,
3128                                  void * param)
3129 {
3130     gen9_avc_mb_brc_curbe_data *cmd;
3131     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3132     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3133
3134     cmd = i965_gpe_context_map_curbe(gpe_context);
3135
3136     if (!cmd)
3137         return;
3138
3139     memset(cmd, 0, sizeof(gen9_avc_mb_brc_curbe_data));
3140
3141     cmd->dw0.cur_frame_type = generic_state->frame_type;
3142     if (generic_state->brc_roi_enable) {
3143         cmd->dw0.enable_roi = 1;
3144     } else {
3145         cmd->dw0.enable_roi = 0;
3146     }
3147
3148     i965_gpe_context_unmap_curbe(gpe_context);
3149
3150     return;
3151 }
3152
3153 static void
3154 gen9_avc_send_surface_brc_mb_update(VADriverContextP ctx,
3155                                     struct encode_state *encode_state,
3156                                     struct i965_gpe_context *gpe_context,
3157                                     struct intel_encoder_context *encoder_context,
3158                                     void * param_mbenc)
3159 {
3160     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3161     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3162     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3163
3164     /* brc history buffer*/
3165     i965_add_buffer_gpe_surface(ctx,
3166                                 gpe_context,
3167                                 &avc_ctx->res_brc_history_buffer,
3168                                 0,
3169                                 avc_ctx->res_brc_history_buffer.size,
3170                                 0,
3171                                 GEN9_AVC_MB_BRC_UPDATE_HISTORY_INDEX);
3172
3173     /* MB qp data buffer is it same as res_mbbrc_mb_qp_data_surface*/
3174     if (generic_state->mb_brc_enabled) {
3175         i965_add_buffer_2d_gpe_surface(ctx,
3176                                        gpe_context,
3177                                        &avc_ctx->res_mbbrc_mb_qp_data_surface,
3178                                        1,
3179                                        I965_SURFACEFORMAT_R8_UNORM,
3180                                        GEN9_AVC_MB_BRC_UPDATE_MB_QP_INDEX);
3181
3182     }
3183
3184     /* BRC roi feature*/
3185     if (generic_state->brc_roi_enable) {
3186         i965_add_buffer_gpe_surface(ctx,
3187                                     gpe_context,
3188                                     &avc_ctx->res_mbbrc_roi_surface,
3189                                     0,
3190                                     avc_ctx->res_mbbrc_roi_surface.size,
3191                                     0,
3192                                     GEN9_AVC_MB_BRC_UPDATE_ROI_INDEX);
3193
3194     }
3195
3196     /* MB statistical data surface*/
3197     i965_add_buffer_gpe_surface(ctx,
3198                                 gpe_context,
3199                                 &avc_ctx->res_mb_status_buffer,
3200                                 0,
3201                                 avc_ctx->res_mb_status_buffer.size,
3202                                 0,
3203                                 GEN9_AVC_MB_BRC_UPDATE_MB_STATUS_INDEX);
3204
3205     return;
3206 }
3207
3208 static VAStatus
3209 gen9_avc_kernel_brc_mb_update(VADriverContextP ctx,
3210                               struct encode_state *encode_state,
3211                               struct intel_encoder_context *encoder_context)
3212
3213 {
3214     struct i965_driver_data *i965 = i965_driver_data(ctx);
3215     struct i965_gpe_table *gpe = &i965->gpe_table;
3216     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3217     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3218     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3219     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
3220
3221     struct i965_gpe_context *gpe_context;
3222     struct gpe_media_object_walker_parameter media_object_walker_param;
3223     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
3224     int media_function = 0;
3225     int kernel_idx = 0;
3226
3227     media_function = INTEL_MEDIA_STATE_MB_BRC_UPDATE;
3228     kernel_idx = GEN9_AVC_KERNEL_BRC_MB_UPDATE;
3229     gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
3230
3231     gpe->context_init(ctx, gpe_context);
3232     gpe->reset_binding_table(ctx, gpe_context);
3233
3234     /* set curbe brc mb update*/
3235     generic_ctx->pfn_set_curbe_brc_mb_update(ctx, encode_state, gpe_context, encoder_context, NULL);
3236
3237
3238     /* set surface brc mb update*/
3239     generic_ctx->pfn_send_brc_mb_update_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
3240
3241
3242     gpe->setup_interface_data(ctx, gpe_context);
3243
3244     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
3245     /* the scaling is based on 8x8 blk level */
3246     kernel_walker_param.resolution_x = (generic_state->frame_width_in_mbs + 1) / 2;
3247     kernel_walker_param.resolution_y = (generic_state->frame_height_in_mbs + 1) / 2 ;
3248     kernel_walker_param.no_dependency = 1;
3249
3250     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
3251
3252     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
3253                                             gpe_context,
3254                                             media_function,
3255                                             &media_object_walker_param);
3256
3257     return VA_STATUS_SUCCESS;
3258 }
3259
3260 /*
3261 mbenc kernel related function,it include intra dist kernel
3262 */
3263 static int
3264 gen9_avc_get_biweight(int dist_scale_factor_ref_id0_list0, unsigned short weighted_bipredidc)
3265 {
3266     int biweight = 32;      // default value
3267
3268     /* based on kernel HLD*/
3269     if (weighted_bipredidc != INTEL_AVC_WP_MODE_IMPLICIT) {
3270         biweight = 32;
3271     } else {
3272         biweight = (dist_scale_factor_ref_id0_list0 + 2) >> 2;
3273
3274         if (biweight != 16 && biweight != 21 &&
3275             biweight != 32 && biweight != 43 && biweight != 48) {
3276             biweight = 32;        // If # of B-pics between two refs is more than 3. VME does not support it.
3277         }
3278     }
3279
3280     return biweight;
3281 }
3282
3283 static void
3284 gen9_avc_get_dist_scale_factor(VADriverContextP ctx,
3285                                struct encode_state *encode_state,
3286                                struct intel_encoder_context *encoder_context)
3287 {
3288     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3289     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3290     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3291     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
3292
3293     int max_num_references;
3294     VAPictureH264 *curr_pic;
3295     VAPictureH264 *ref_pic_l0;
3296     VAPictureH264 *ref_pic_l1;
3297     int i = 0;
3298     int tb = 0;
3299     int td = 0;
3300     int tx = 0;
3301     int tmp = 0;
3302     int poc0 = 0;
3303     int poc1 = 0;
3304
3305     max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
3306
3307     memset(avc_state->dist_scale_factor_list0, 0, 32 * sizeof(unsigned int));
3308     curr_pic = &pic_param->CurrPic;
3309     for (i = 0; i < max_num_references; i++) {
3310         ref_pic_l0 = &(slice_param->RefPicList0[i]);
3311
3312         if ((ref_pic_l0->flags & VA_PICTURE_H264_INVALID) ||
3313             (ref_pic_l0->picture_id == VA_INVALID_SURFACE))
3314             break;
3315         ref_pic_l1 = &(slice_param->RefPicList1[0]);
3316         if ((ref_pic_l0->flags & VA_PICTURE_H264_INVALID) ||
3317             (ref_pic_l0->picture_id == VA_INVALID_SURFACE))
3318             break;
3319
3320         poc0 = (curr_pic->TopFieldOrderCnt - ref_pic_l0->TopFieldOrderCnt);
3321         poc1 = (ref_pic_l1->TopFieldOrderCnt - ref_pic_l0->TopFieldOrderCnt);
3322         CLIP(poc0, -128, 127);
3323         CLIP(poc1, -128, 127);
3324         tb = poc0;
3325         td = poc1;
3326
3327         if (td == 0) {
3328             td = 1;
3329         }
3330         tmp = (td / 2 > 0) ? (td / 2) : (-(td / 2));
3331         tx = (16384 + tmp) / td ;
3332         tmp = (tb * tx + 32) >> 6;
3333         CLIP(tmp, -1024, 1023);
3334         avc_state->dist_scale_factor_list0[i] = tmp;
3335     }
3336     return;
3337 }
3338
3339 static unsigned int
3340 gen9_avc_get_qp_from_ref_list(VADriverContextP ctx,
3341                               VAEncSliceParameterBufferH264 *slice_param,
3342                               int list,
3343                               int ref_frame_idx)
3344 {
3345     struct i965_driver_data *i965 = i965_driver_data(ctx);
3346     struct object_surface *obj_surface;
3347     struct gen9_surface_avc *avc_priv_surface;
3348     VASurfaceID surface_id;
3349
3350     assert(slice_param);
3351     assert(list < 2);
3352
3353     if (list == 0) {
3354         if (ref_frame_idx < slice_param->num_ref_idx_l0_active_minus1 + 1)
3355             surface_id = slice_param->RefPicList0[ref_frame_idx].picture_id;
3356         else
3357             return 0;
3358     } else {
3359         if (ref_frame_idx < slice_param->num_ref_idx_l1_active_minus1 + 1)
3360             surface_id = slice_param->RefPicList1[ref_frame_idx].picture_id;
3361         else
3362             return 0;
3363     }
3364     obj_surface = SURFACE(surface_id);
3365     if (obj_surface && obj_surface->private_data) {
3366         avc_priv_surface = obj_surface->private_data;
3367         return avc_priv_surface->qp_value;
3368     } else {
3369         return 0;
3370     }
3371 }
3372
3373 static void
3374 gen9_avc_load_mb_brc_const_data(VADriverContextP ctx,
3375                                 struct encode_state *encode_state,
3376                                 struct intel_encoder_context *encoder_context)
3377 {
3378     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3379     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3380     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3381     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3382
3383     struct i965_gpe_resource *gpe_resource = NULL;
3384     unsigned int * data = NULL;
3385     unsigned int * data_tmp = NULL;
3386     unsigned int size = 16 * 52;
3387     unsigned int table_idx = 0;
3388     unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
3389     unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
3390     int i = 0;
3391
3392     gpe_resource = &(avc_ctx->res_mbbrc_const_data_buffer);
3393     assert(gpe_resource);
3394     data = i965_map_gpe_resource(gpe_resource);
3395     assert(data);
3396
3397     table_idx = slice_type_kernel[generic_state->frame_type];
3398
3399     memcpy(data, gen9_avc_mb_brc_const_data[table_idx][0], size * sizeof(unsigned int));
3400
3401     data_tmp = data;
3402
3403     switch (generic_state->frame_type) {
3404     case SLICE_TYPE_I:
3405         for (i = 0; i < AVC_QP_MAX ; i++) {
3406             if (avc_state->old_mode_cost_enable)
3407                 *data = (unsigned int)gen9_avc_old_intra_mode_cost[i];
3408             data += 16;
3409         }
3410         break;
3411     case SLICE_TYPE_P:
3412     case SLICE_TYPE_B:
3413         for (i = 0; i < AVC_QP_MAX ; i++) {
3414             if (generic_state->frame_type == SLICE_TYPE_P) {
3415                 if (avc_state->skip_bias_adjustment_enable)
3416                     *(data + 3) = (unsigned int)gen9_avc_mv_cost_p_skip_adjustment[i];
3417             }
3418             if (avc_state->non_ftq_skip_threshold_lut_input_enable) {
3419                 *(data + 9) = (unsigned int)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
3420             } else if (generic_state->frame_type == SLICE_TYPE_P) {
3421                 *(data + 9) = (unsigned int)gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag][i];
3422             } else {
3423                 *(data + 9) = (unsigned int)gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag][i];
3424             }
3425
3426             if (avc_state->adaptive_intra_scaling_enable) {
3427                 *(data + 10) = (unsigned int)gen9_avc_adaptive_intra_scaling_factor[i];
3428             } else {
3429                 *(data + 10) = (unsigned int)gen9_avc_intra_scaling_factor[i];
3430
3431             }
3432             data += 16;
3433
3434         }
3435         break;
3436     default:
3437         assert(0);
3438     }
3439
3440     data = data_tmp;
3441     for (i = 0; i < AVC_QP_MAX ; i++) {
3442         if (avc_state->ftq_skip_threshold_lut_input_enable) {
3443             *(data + 6) = (avc_state->ftq_skip_threshold_lut[i] |
3444                            (avc_state->ftq_skip_threshold_lut[i] << 16) |
3445                            (avc_state->ftq_skip_threshold_lut[i] << 24));
3446             *(data + 7) = (avc_state->ftq_skip_threshold_lut[i] |
3447                            (avc_state->ftq_skip_threshold_lut[i] << 8) |
3448                            (avc_state->ftq_skip_threshold_lut[i] << 16) |
3449                            (avc_state->ftq_skip_threshold_lut[i] << 24));
3450         }
3451
3452         if (avc_state->kernel_trellis_enable) {
3453             *(data + 11) = (unsigned int)avc_state->lamda_value_lut[i][0];
3454             *(data + 12) = (unsigned int)avc_state->lamda_value_lut[i][1];
3455
3456         }
3457         data += 16;
3458
3459     }
3460     i965_unmap_gpe_resource(gpe_resource);
3461 }
3462
3463 static void
3464 gen9_avc_set_curbe_mbenc(VADriverContextP ctx,
3465                          struct encode_state *encode_state,
3466                          struct i965_gpe_context *gpe_context,
3467                          struct intel_encoder_context *encoder_context,
3468                          void * param)
3469 {
3470     struct i965_driver_data *i965 = i965_driver_data(ctx);
3471     union {
3472         gen9_avc_mbenc_curbe_data *g9;
3473         gen95_avc_mbenc_curbe_data *g95;
3474     } cmd;
3475     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3476     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3477     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3478
3479     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3480     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
3481     VASurfaceID surface_id;
3482     struct object_surface *obj_surface;
3483
3484     struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
3485     unsigned char qp = 0;
3486     unsigned char me_method = 0;
3487     unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
3488     unsigned int table_idx = 0;
3489     unsigned char is_g9 = 0;
3490     unsigned char is_g95 = 0;
3491     unsigned int curbe_size = 0;
3492
3493     unsigned int preset = generic_state->preset;
3494     if (IS_SKL(i965->intel.device_info) ||
3495         IS_BXT(i965->intel.device_info)) {
3496         cmd.g9 = (gen9_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
3497         if (!cmd.g9)
3498             return;
3499         is_g9 = 1;
3500         curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
3501         memset(cmd.g9, 0, curbe_size);
3502
3503         if (mbenc_i_frame_dist_in_use) {
3504             memcpy(cmd.g9, gen9_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
3505
3506         } else {
3507             switch (generic_state->frame_type) {
3508             case SLICE_TYPE_I:
3509                 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
3510                 break;
3511             case SLICE_TYPE_P:
3512                 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
3513                 break;
3514             case SLICE_TYPE_B:
3515                 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
3516                 break;
3517             default:
3518                 assert(0);
3519             }
3520
3521         }
3522     } else if (IS_KBL(i965->intel.device_info) ||
3523                IS_GEN10(i965->intel.device_info) ||
3524                IS_GLK(i965->intel.device_info)) {
3525         cmd.g95 = (gen95_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
3526         if (!cmd.g95)
3527             return;
3528         is_g95 = 1;
3529         curbe_size = sizeof(gen95_avc_mbenc_curbe_data);
3530         memset(cmd.g9, 0, curbe_size);
3531
3532         if (mbenc_i_frame_dist_in_use) {
3533             memcpy(cmd.g95, gen95_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
3534
3535         } else {
3536             switch (generic_state->frame_type) {
3537             case SLICE_TYPE_I:
3538                 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
3539                 break;
3540             case SLICE_TYPE_P:
3541                 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
3542                 break;
3543             case SLICE_TYPE_B:
3544                 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
3545                 break;
3546             default:
3547                 assert(0);
3548             }
3549
3550         }
3551     } else {
3552         /* Never get here, just silence a gcc warning */
3553         assert(0);
3554
3555         return;
3556     }
3557
3558     me_method = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_b_me_method[preset] : gen9_avc_p_me_method[preset];
3559     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
3560
3561     cmd.g9->dw0.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
3562     cmd.g9->dw37.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
3563     cmd.g9->dw0.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
3564     cmd.g9->dw37.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
3565
3566     cmd.g9->dw2.max_len_sp = gen9_avc_max_len_sp[preset];
3567     cmd.g9->dw38.max_len_sp = 0;
3568
3569     if (is_g95)
3570         cmd.g95->dw1.extended_mv_cost_range = avc_state->extended_mv_cost_range_enable;
3571
3572     cmd.g9->dw3.src_access = 0;
3573     cmd.g9->dw3.ref_access = 0;
3574
3575     if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) {
3576         //disable ftq_override by now.
3577         if (avc_state->ftq_override) {
3578             cmd.g9->dw3.ftq_enable = avc_state->ftq_enable;
3579
3580         } else {
3581             // both gen9 and gen95 come here by now
3582             if (generic_state->frame_type == SLICE_TYPE_P) {
3583                 cmd.g9->dw3.ftq_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
3584
3585             } else {
3586                 cmd.g9->dw3.ftq_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
3587             }
3588         }
3589     } else {
3590         cmd.g9->dw3.ftq_enable = 0;
3591     }
3592
3593     if (avc_state->disable_sub_mb_partion)
3594         cmd.g9->dw3.sub_mb_part_mask = 0x7;
3595
3596     if (mbenc_i_frame_dist_in_use) {
3597         cmd.g9->dw2.pitch_width = generic_state->downscaled_width_4x_in_mb;
3598         cmd.g9->dw4.picture_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
3599         cmd.g9->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4;
3600         cmd.g9->dw6.batch_buffer_end = 0;
3601         cmd.g9->dw31.intra_compute_type = 1;
3602
3603     } else {
3604         cmd.g9->dw2.pitch_width = generic_state->frame_width_in_mbs;
3605         cmd.g9->dw4.picture_height_minus1 = generic_state->frame_height_in_mbs - 1;
3606         cmd.g9->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ? generic_state->frame_height_in_mbs : avc_state->slice_height;
3607
3608         {
3609             memcpy(&(cmd.g9->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
3610             if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
3611             } else if (avc_state->skip_bias_adjustment_enable) {
3612                 /* Load different MvCost for P picture when SkipBiasAdjustment is enabled
3613                 // No need to check for P picture as the flag is only enabled for P picture */
3614                 cmd.g9->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
3615
3616             }
3617         }
3618
3619         table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
3620         memcpy(&(cmd.g9->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
3621     }
3622     cmd.g9->dw4.enable_fbr_bypass = avc_state->fbr_bypass_enable;
3623     cmd.g9->dw4.enable_intra_cost_scaling_for_static_frame = avc_state->sfd_enable && generic_state->hme_enabled;
3624     cmd.g9->dw4.field_parity_flag = 0;//bottom field
3625     cmd.g9->dw4.enable_cur_fld_idr = 0;//field realted
3626     cmd.g9->dw4.contrained_intra_pred_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
3627     cmd.g9->dw4.hme_enable = generic_state->hme_enabled;
3628     cmd.g9->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
3629     cmd.g9->dw4.use_actual_ref_qp_value = generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
3630
3631
3632     cmd.g9->dw7.intra_part_mask = avc_state->transform_8x8_mode_enable ? 0 : 0x02;
3633     cmd.g9->dw7.src_field_polarity = 0;//field related
3634
3635     /*ftq_skip_threshold_lut set,dw14 /15*/
3636
3637     /*r5 disable NonFTQSkipThresholdLUT*/
3638     if (generic_state->frame_type == SLICE_TYPE_P) {
3639         cmd.g9->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
3640
3641     } else if (generic_state->frame_type == SLICE_TYPE_B) {
3642         cmd.g9->dw32.skip_val = gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
3643
3644     }
3645
3646     cmd.g9->dw13.qp_prime_y = qp;
3647     cmd.g9->dw13.qp_prime_cb = qp;
3648     cmd.g9->dw13.qp_prime_cr = qp;
3649     cmd.g9->dw13.target_size_in_word = 0xff;//hardcode for brc disable
3650
3651     if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) {
3652         switch (gen9_avc_multi_pred[preset]) {
3653         case 0:
3654             cmd.g9->dw32.mult_pred_l0_disable = 128;
3655             cmd.g9->dw32.mult_pred_l1_disable = 128;
3656             break;
3657         case 1:
3658             cmd.g9->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_P) ? 1 : 128;
3659             cmd.g9->dw32.mult_pred_l1_disable = 128;
3660             break;
3661         case 2:
3662             cmd.g9->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3663             cmd.g9->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3664             break;
3665         case 3:
3666             cmd.g9->dw32.mult_pred_l0_disable = 1;
3667             cmd.g9->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3668             break;
3669
3670         }
3671
3672     } else {
3673         cmd.g9->dw32.mult_pred_l0_disable = 128;
3674         cmd.g9->dw32.mult_pred_l1_disable = 128;
3675     }
3676
3677     /*field setting for dw33 34, ignored*/
3678
3679     if (avc_state->adaptive_transform_decision_enable) {
3680         if (generic_state->frame_type != SLICE_TYPE_I) {
3681             if (is_g9) {
3682                 cmd.g9->dw34.enable_adaptive_tx_decision = 1;
3683                 cmd.g9->dw58.mb_texture_threshold = 1024;
3684                 cmd.g9->dw58.tx_decision_threshold = 128;
3685             } else if (is_g95) {
3686                 cmd.g95->dw34.enable_adaptive_tx_decision = 1;
3687                 cmd.g95->dw60.mb_texture_threshold = 1024;
3688                 cmd.g95->dw60.tx_decision_threshold = 128;
3689             }
3690         }
3691     }
3692
3693
3694     if (generic_state->frame_type == SLICE_TYPE_B) {
3695         cmd.g9->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
3696         cmd.g9->dw34.list1_ref_id0_frm_field_parity = 0;
3697         cmd.g9->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
3698     }
3699
3700     cmd.g9->dw34.b_original_bff = 0; //frame only
3701     cmd.g9->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
3702     cmd.g9->dw34.roi_enable_flag = curbe_param->roi_enabled;
3703     cmd.g9->dw34.mad_enable_falg = avc_state->mad_enable;
3704     cmd.g9->dw34.mb_brc_enable = avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled;
3705     cmd.g9->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
3706     if (is_g95) {
3707         cmd.g95->dw34.tq_enable = avc_state->tq_enable;
3708         cmd.g95->dw34.cqp_flag = !generic_state->brc_enabled;
3709     }
3710
3711     if (is_g9) {
3712         cmd.g9->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
3713
3714         if (cmd.g9->dw34.force_non_skip_check) {
3715             cmd.g9->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
3716         }
3717     }
3718
3719
3720     cmd.g9->dw36.check_all_fractional_enable = avc_state->caf_enable;
3721     cmd.g9->dw38.ref_threshold = 400;
3722     cmd.g9->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
3723
3724     /* Default:2 used for MBBRC (MB QP Surface width and height are 4x downscaled picture in MB unit * 4  bytes)
3725        0 used for MBQP data surface (MB QP Surface width and height are same as the input picture size in MB unit * 1bytes)
3726        starting GEN9, BRC use split kernel, MB QP surface is same size as input picture */
3727     cmd.g9->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled) ? 0 : 2;
3728
3729     if (mbenc_i_frame_dist_in_use) {
3730         cmd.g9->dw13.qp_prime_y = 0;
3731         cmd.g9->dw13.qp_prime_cb = 0;
3732         cmd.g9->dw13.qp_prime_cr = 0;
3733         cmd.g9->dw33.intra_16x16_nondc_penalty = 0;
3734         cmd.g9->dw33.intra_8x8_nondc_penalty = 0;
3735         cmd.g9->dw33.intra_4x4_nondc_penalty = 0;
3736
3737     }
3738     if (cmd.g9->dw4.use_actual_ref_qp_value) {
3739         cmd.g9->dw44.actual_qp_value_for_ref_id0_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 0);
3740         cmd.g9->dw44.actual_qp_value_for_ref_id1_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 1);
3741         cmd.g9->dw44.actual_qp_value_for_ref_id2_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 2);
3742         cmd.g9->dw44.actual_qp_value_for_ref_id3_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 3);
3743         cmd.g9->dw45.actual_qp_value_for_ref_id4_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 4);
3744         cmd.g9->dw45.actual_qp_value_for_ref_id5_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 5);
3745         cmd.g9->dw45.actual_qp_value_for_ref_id6_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 6);
3746         cmd.g9->dw45.actual_qp_value_for_ref_id7_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 7);
3747         cmd.g9->dw46.actual_qp_value_for_ref_id0_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 0);
3748         cmd.g9->dw46.actual_qp_value_for_ref_id1_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 1);
3749     }
3750
3751     table_idx = slice_type_kernel[generic_state->frame_type];
3752     cmd.g9->dw46.ref_cost = gen9_avc_ref_cost[table_idx][qp];
3753
3754     if (generic_state->frame_type == SLICE_TYPE_I) {
3755         cmd.g9->dw0.skip_mode_enable = 0;
3756         cmd.g9->dw37.skip_mode_enable = 0;
3757         cmd.g9->dw36.hme_combine_overlap = 0;
3758         cmd.g9->dw47.intra_cost_sf = 16;
3759         cmd.g9->dw34.enable_direct_bias_adjustment = 0;
3760         if (is_g9)
3761             cmd.g9->dw34.enable_global_motion_bias_adjustment = 0;
3762
3763     } else if (generic_state->frame_type == SLICE_TYPE_P) {
3764         cmd.g9->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
3765         cmd.g9->dw3.bme_disable_fbr = 1;
3766         cmd.g9->dw5.ref_width = gen9_avc_search_x[preset];
3767         cmd.g9->dw5.ref_height = gen9_avc_search_y[preset];
3768         cmd.g9->dw7.non_skip_zmv_added = 1;
3769         cmd.g9->dw7.non_skip_mode_added = 1;
3770         cmd.g9->dw7.skip_center_mask = 1;
3771         cmd.g9->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
3772         cmd.g9->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame onlys
3773         cmd.g9->dw36.hme_combine_overlap = 1;
3774         cmd.g9->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
3775         cmd.g9->dw39.ref_width = gen9_avc_search_x[preset];
3776         cmd.g9->dw39.ref_height = gen9_avc_search_y[preset];
3777         cmd.g9->dw34.enable_direct_bias_adjustment = 0;
3778         cmd.g9->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
3779         if (is_g9 && avc_state->global_motion_bias_adjustment_enable)
3780             cmd.g9->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
3781
3782     } else {
3783         cmd.g9->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
3784         cmd.g9->dw1.bi_weight = avc_state->bi_weight;
3785         cmd.g9->dw3.search_ctrl = 7;
3786         cmd.g9->dw3.skip_type = 1;
3787         cmd.g9->dw5.ref_width = gen9_avc_b_search_x[preset];
3788         cmd.g9->dw5.ref_height = gen9_avc_b_search_y[preset];
3789         cmd.g9->dw7.skip_center_mask = 0xff;
3790         cmd.g9->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
3791         cmd.g9->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
3792         cmd.g9->dw36.hme_combine_overlap = 1;
3793         surface_id = slice_param->RefPicList1[0].picture_id;
3794         obj_surface = SURFACE(surface_id);
3795         if (!obj_surface) {
3796             WARN_ONCE("Invalid backward reference frame\n");
3797             return;
3798         }
3799         cmd.g9->dw36.is_fwd_frame_short_term_ref = !!(slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
3800
3801         cmd.g9->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
3802         cmd.g9->dw36.num_ref_idx_l1_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l1_active_minus1 : 0;
3803         cmd.g9->dw39.ref_width = gen9_avc_b_search_x[preset];
3804         cmd.g9->dw39.ref_height = gen9_avc_b_search_y[preset];
3805         cmd.g9->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
3806         cmd.g9->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
3807         cmd.g9->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
3808         cmd.g9->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
3809         cmd.g9->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
3810         cmd.g9->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
3811         cmd.g9->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
3812         cmd.g9->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
3813
3814         cmd.g9->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
3815         if (cmd.g9->dw34.enable_direct_bias_adjustment) {
3816             cmd.g9->dw7.non_skip_zmv_added = 1;
3817             cmd.g9->dw7.non_skip_mode_added = 1;
3818         }
3819
3820         cmd.g9->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
3821         if (is_g9 && avc_state->global_motion_bias_adjustment_enable)
3822             cmd.g9->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
3823
3824     }
3825
3826     avc_state->block_based_skip_enable = cmd.g9->dw3.block_based_skip_enable;
3827
3828     if (avc_state->rolling_intra_refresh_enable) {
3829         /*by now disable it*/
3830         cmd.g9->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
3831         cmd.g9->dw32.mult_pred_l0_disable = 128;
3832         /* Pass the same IntraRefreshUnit to the kernel w/o the adjustment by -1, so as to have an overlap of one MB row or column of Intra macroblocks
3833          across one P frame to another P frame, as needed by the RollingI algo */
3834         if (is_g9) {
3835             cmd.g9->dw48.widi_intra_refresh_mb_num = 0;
3836             cmd.g9->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
3837             cmd.g9->dw48.widi_intra_refresh_qp_delta = 0;
3838         }
3839
3840         if (is_g95) {
3841             if (avc_state->rolling_intra_refresh_enable == INTEL_ROLLING_I_SQUARE && generic_state->brc_enabled) {
3842                 cmd.g95->dw4.enable_intra_refresh = 0;
3843                 cmd.g95->dw34.widi_intra_refresh_en = INTEL_ROLLING_I_DISABLED;
3844                 cmd.g95->dw48.widi_intra_refresh_mb_x = 0;
3845                 cmd.g95->dw61.widi_intra_refresh_mb_y = 0;
3846             } else {
3847                 cmd.g95->dw4.enable_intra_refresh = 1;
3848                 cmd.g95->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
3849                 cmd.g95->dw48.widi_intra_refresh_mb_x = 0;
3850                 cmd.g95->dw61.widi_intra_refresh_mb_y = 0;
3851                 cmd.g9->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
3852                 cmd.g9->dw48.widi_intra_refresh_qp_delta = 0;
3853             }
3854         }
3855
3856     } else {
3857         cmd.g9->dw34.widi_intra_refresh_en = 0;
3858     }
3859
3860     cmd.g9->dw34.enable_per_mb_static_check = avc_state->sfd_enable && generic_state->hme_enabled;
3861     if (is_g9)
3862         cmd.g9->dw34.enable_adaptive_search_window_size = avc_state->adaptive_search_window_enable;
3863     else if (is_g95)
3864         cmd.g95->dw34.enable_adaptive_search_window_size = avc_state->adaptive_search_window_enable;
3865
3866     /*roi set disable by now. 49-56*/
3867     if (curbe_param->roi_enabled) {
3868         cmd.g9->dw49.roi_1_x_left   = generic_state->roi[0].left;
3869         cmd.g9->dw49.roi_1_y_top    = generic_state->roi[0].top;
3870         cmd.g9->dw50.roi_1_x_right  = generic_state->roi[0].right;
3871         cmd.g9->dw50.roi_1_y_bottom = generic_state->roi[0].bottom;
3872
3873         cmd.g9->dw51.roi_2_x_left   = generic_state->roi[1].left;
3874         cmd.g9->dw51.roi_2_y_top    = generic_state->roi[1].top;
3875         cmd.g9->dw52.roi_2_x_right  = generic_state->roi[1].right;
3876         cmd.g9->dw52.roi_2_y_bottom = generic_state->roi[1].bottom;
3877
3878         cmd.g9->dw53.roi_3_x_left   = generic_state->roi[2].left;
3879         cmd.g9->dw53.roi_3_y_top    = generic_state->roi[2].top;
3880         cmd.g9->dw54.roi_3_x_right  = generic_state->roi[2].right;
3881         cmd.g9->dw54.roi_3_y_bottom = generic_state->roi[2].bottom;
3882
3883         cmd.g9->dw55.roi_4_x_left   = generic_state->roi[3].left;
3884         cmd.g9->dw55.roi_4_y_top    = generic_state->roi[3].top;
3885         cmd.g9->dw56.roi_4_x_right  = generic_state->roi[3].right;
3886         cmd.g9->dw56.roi_4_y_bottom = generic_state->roi[3].bottom;
3887
3888         if (!generic_state->brc_enabled) {
3889             char tmp = 0;
3890             tmp = generic_state->roi[0].value;
3891             CLIP(tmp, -qp, AVC_QP_MAX - qp);
3892             cmd.g9->dw57.roi_1_dqp_prime_y = tmp;
3893             tmp = generic_state->roi[1].value;
3894             CLIP(tmp, -qp, AVC_QP_MAX - qp);
3895             cmd.g9->dw57.roi_2_dqp_prime_y = tmp;
3896             tmp = generic_state->roi[2].value;
3897             CLIP(tmp, -qp, AVC_QP_MAX - qp);
3898             cmd.g9->dw57.roi_3_dqp_prime_y = tmp;
3899             tmp = generic_state->roi[3].value;
3900             CLIP(tmp, -qp, AVC_QP_MAX - qp);
3901             cmd.g9->dw57.roi_4_dqp_prime_y = tmp;
3902         } else {
3903             cmd.g9->dw34.roi_enable_flag = 0;
3904         }
3905     }
3906
3907     if (is_g95) {
3908         if (avc_state->tq_enable) {
3909             if (generic_state->frame_type == SLICE_TYPE_I) {
3910                 cmd.g95->dw58.value = gen95_avc_tq_lambda_i_frame[qp][0];
3911                 cmd.g95->dw59.value = gen95_avc_tq_lambda_i_frame[qp][1];
3912
3913             } else if (generic_state->frame_type == SLICE_TYPE_P) {
3914                 cmd.g95->dw58.value = gen95_avc_tq_lambda_p_frame[qp][0];
3915                 cmd.g95->dw59.value = gen95_avc_tq_lambda_p_frame[qp][1];
3916
3917             } else {
3918                 cmd.g95->dw58.value = gen95_avc_tq_lambda_b_frame[qp][0];
3919                 cmd.g95->dw59.value = gen95_avc_tq_lambda_b_frame[qp][1];
3920             }
3921
3922             if (cmd.g95->dw58.lambda_8x8_inter > GEN95_AVC_MAX_LAMBDA)
3923                 cmd.g95->dw58.lambda_8x8_inter = 0xf000 + avc_state->rounding_value;
3924
3925             if (cmd.g95->dw58.lambda_8x8_intra > GEN95_AVC_MAX_LAMBDA)
3926                 cmd.g95->dw58.lambda_8x8_intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
3927
3928             if (cmd.g95->dw59.lambda_inter > GEN95_AVC_MAX_LAMBDA)
3929                 cmd.g95->dw59.lambda_inter = 0xf000 + avc_state->rounding_value;
3930
3931             if (cmd.g95->dw59.lambda_intra > GEN95_AVC_MAX_LAMBDA)
3932                 cmd.g95->dw59.lambda_intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
3933         }
3934     }
3935
3936     if (is_g95) {
3937         cmd.g95->dw66.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
3938         cmd.g95->dw67.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
3939         cmd.g95->dw68.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
3940         cmd.g95->dw69.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
3941         cmd.g95->dw70.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
3942         cmd.g95->dw71.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
3943         cmd.g95->dw72.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
3944         cmd.g95->dw73.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
3945         cmd.g95->dw74.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
3946         cmd.g95->dw75.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
3947         cmd.g95->dw76.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
3948         cmd.g95->dw77.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
3949         cmd.g95->dw78.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
3950         cmd.g95->dw79.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
3951         cmd.g95->dw80.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
3952         cmd.g95->dw81.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
3953         cmd.g95->dw82.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
3954         cmd.g95->dw83.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
3955         cmd.g95->dw84.brc_curbe_surf_index = GEN95_AVC_MBENC_BRC_CURBE_DATA_INDEX;
3956         cmd.g95->dw85.force_non_skip_mb_map_surface = GEN95_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
3957         cmd.g95->dw86.widi_wa_surf_index = GEN95_AVC_MBENC_WIDI_WA_INDEX;
3958         cmd.g95->dw87.static_detection_cost_table_index = GEN95_AVC_MBENC_SFD_COST_TABLE_INDEX;
3959     }
3960
3961     if (is_g9) {
3962         cmd.g9->dw64.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
3963         cmd.g9->dw65.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
3964         cmd.g9->dw66.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
3965         cmd.g9->dw67.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
3966         cmd.g9->dw68.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
3967         cmd.g9->dw69.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
3968         cmd.g9->dw70.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
3969         cmd.g9->dw71.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
3970         cmd.g9->dw72.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
3971         cmd.g9->dw73.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
3972         cmd.g9->dw74.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
3973         cmd.g9->dw75.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
3974         cmd.g9->dw76.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
3975         cmd.g9->dw77.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
3976         cmd.g9->dw78.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
3977         cmd.g9->dw79.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
3978         cmd.g9->dw80.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
3979         cmd.g9->dw81.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
3980         cmd.g9->dw82.force_non_skip_mb_map_surface = GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
3981         cmd.g9->dw83.widi_wa_surf_index = GEN9_AVC_MBENC_WIDI_WA_INDEX;
3982         cmd.g9->dw84.brc_curbe_surf_index = GEN9_AVC_MBENC_BRC_CURBE_DATA_INDEX;
3983         cmd.g9->dw85.static_detection_cost_table_index = GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX;
3984     }
3985
3986     i965_gpe_context_unmap_curbe(gpe_context);
3987
3988     return;
3989 }
3990
3991 static void
3992 gen9_avc_fei_set_curbe_mbenc(VADriverContextP ctx,
3993                              struct encode_state *encode_state,
3994                              struct i965_gpe_context *gpe_context,
3995                              struct intel_encoder_context *encoder_context,
3996                              void * param)
3997 {
3998     struct i965_driver_data *i965 = i965_driver_data(ctx);
3999     gen9_avc_fei_mbenc_curbe_data *cmd;
4000     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4001     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4002     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4003     VASurfaceID surface_id;
4004     struct object_surface *obj_surface;
4005     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4006     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
4007     VAEncMiscParameterFEIFrameControlH264 *fei_param = avc_state->fei_framectl_param;
4008
4009     struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
4010     unsigned char qp = 0;
4011     unsigned char me_method = 0;
4012     unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
4013     unsigned int table_idx = 0;
4014     int ref_width, ref_height, len_sp;
4015     int is_bframe = (generic_state->frame_type == SLICE_TYPE_B);
4016     int is_pframe = (generic_state->frame_type == SLICE_TYPE_P);
4017     unsigned int preset = generic_state->preset;
4018
4019     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
4020
4021     assert(gpe_context != NULL);
4022     cmd = (gen9_avc_fei_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
4023     memset(cmd, 0, sizeof(gen9_avc_fei_mbenc_curbe_data));
4024
4025     if (mbenc_i_frame_dist_in_use) {
4026         memcpy(cmd, gen9_avc_fei_mbenc_curbe_i_frame_dist_init_data, sizeof(gen9_avc_fei_mbenc_curbe_data));
4027
4028     } else {
4029         switch (generic_state->frame_type) {
4030         case SLICE_TYPE_I:
4031             memcpy(cmd, gen9_avc_fei_mbenc_curbe_i_frame_init_data, sizeof(gen9_avc_fei_mbenc_curbe_data));
4032             break;
4033         case SLICE_TYPE_P:
4034             memcpy(cmd, gen9_avc_fei_mbenc_curbe_p_frame_init_data, sizeof(gen9_avc_fei_mbenc_curbe_data));
4035             break;
4036         case SLICE_TYPE_B:
4037             memcpy(cmd, gen9_avc_fei_mbenc_curbe_b_frame_init_data, sizeof(gen9_avc_fei_mbenc_curbe_data));
4038             break;
4039         default:
4040             assert(0);
4041         }
4042
4043     }
4044     /* 4 means full search, 6 means diamand search */
4045     me_method  = (fei_param->search_window == 5) ||
4046                  (fei_param->search_window == 8) ? 4 : 6;
4047
4048     ref_width    = fei_param->ref_width;
4049     ref_height   = fei_param->ref_height;
4050     len_sp       = fei_param->len_sp;
4051     /* If there is a serch_window, discard user provided ref_width, ref_height
4052      * and search_path length */
4053     switch (fei_param->search_window) {
4054     case 0:
4055         /*  not use predefined search window, there should be a search_path input */
4056         if ((fei_param->search_path != 0) &&
4057             (fei_param->search_path != 1) &&
4058             (fei_param->search_path != 2)) {
4059             WARN_ONCE("Invalid input search_path for SearchWindow=0  \n");
4060             assert(0);
4061         }
4062         /* 4 means full search, 6 means diamand search */
4063         me_method = (fei_param->search_path == 1) ? 6 : 4;
4064         if (((ref_width * ref_height) > 2048) || (ref_width > 64) || (ref_height > 64)) {
4065             WARN_ONCE("Invalid input ref_width/ref_height in"
4066                       "SearchWindow=0 case! \n");
4067             assert(0);
4068         }
4069         break;
4070
4071     case 1:
4072         /* Tiny - 4 SUs 24x24 window */
4073         ref_width  = 24;
4074         ref_height = 24;
4075         len_sp     = 4;
4076         break;
4077
4078     case 2:
4079         /* Small - 9 SUs 28x28 window */
4080         ref_width  = 28;
4081         ref_height = 28;
4082         len_sp     = 9;
4083         break;
4084     case 3:
4085         /* Diamond - 16 SUs 48x40 window */
4086         ref_width  = 48;
4087         ref_height = 40;
4088         len_sp     = 16;
4089         break;
4090     case 4:
4091         /* Large Diamond - 32 SUs 48x40 window */
4092         ref_width  = 48;
4093         ref_height = 40;
4094         len_sp     = 32;
4095         break;
4096     case 5:
4097         /* Exhaustive - 48 SUs 48x40 window */
4098         ref_width  = 48;
4099         ref_height = 40;
4100         len_sp     = 48;
4101         break;
4102     case 6:
4103         /* Diamond - 16 SUs 64x32 window */
4104         ref_width  = 64;
4105         ref_height = 32;
4106         len_sp     = 16;
4107         break;
4108     case 7:
4109         /* Large Diamond - 32 SUs 64x32 window */
4110         ref_width  = 64;
4111         ref_height = 32;
4112         len_sp     = 32;
4113         break;
4114     case 8:
4115         /* Exhaustive - 48 SUs 64x32 window */
4116         ref_width  = 64;
4117         ref_height = 32;
4118         len_sp     = 48;
4119         break;
4120
4121     default:
4122         assert(0);
4123     }
4124
4125     /* ref_width*ref_height = Max 64x32 one direction, Max 32x32 two directions */
4126     if (is_bframe) {
4127         CLIP(ref_width, 4, 32);
4128         CLIP(ref_height, 4, 32);
4129     } else if (is_pframe) {
4130         CLIP(ref_width, 4, 64);
4131         CLIP(ref_height, 4, 32);
4132     }
4133
4134     cmd->dw0.adaptive_enable =
4135         cmd->dw37.adaptive_enable = fei_param->adaptive_search;
4136     cmd->dw0.t8x8_flag_for_inter_enable = cmd->dw37.t8x8_flag_for_inter_enable
4137                                           = avc_state->transform_8x8_mode_enable;
4138     cmd->dw2.max_len_sp = len_sp;
4139     cmd->dw38.max_len_sp = 0; // HLD mandates this field to be Zero
4140     cmd->dw2.max_num_su = cmd->dw38.max_num_su = 57;
4141     cmd->dw3.src_access =
4142         cmd->dw3.ref_access = 0; // change it to (is_frame ? 0: 1) when interlace is suppoted
4143
4144     if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) {
4145         if (avc_state->ftq_override) {
4146             cmd->dw3.ft_enable = avc_state->ftq_enable;
4147         } else {
4148             if (generic_state->frame_type == SLICE_TYPE_P) {
4149                 cmd->dw3.ft_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
4150             } else {
4151                 cmd->dw3.ft_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
4152             }
4153         }
4154     } else {
4155         cmd->dw3.ft_enable = 0;
4156     }
4157
4158     if (avc_state->disable_sub_mb_partion)
4159         cmd->dw3.sub_mb_part_mask = 0x7;
4160
4161     if (mbenc_i_frame_dist_in_use) {
4162         /* Fixme: Not supported, no brc in fei */
4163         assert(0);
4164         cmd->dw2.pic_width = generic_state->downscaled_width_4x_in_mb;
4165         cmd->dw4.pic_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
4166         cmd->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4;
4167         cmd->dw6.batch_buffer_end = 0;
4168         cmd->dw31.intra_compute_type = 1;
4169     }
4170
4171     cmd->dw2.pic_width = generic_state->frame_width_in_mbs;
4172     cmd->dw4.pic_height_minus1 = generic_state->frame_height_in_mbs - 1;
4173     cmd->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ?
4174                                generic_state->frame_height_in_mbs : avc_state->slice_height;
4175     cmd->dw3.sub_mb_part_mask = fei_param->sub_mb_part_mask;
4176     cmd->dw3.sub_pel_mode = fei_param->sub_pel_mode;
4177     cmd->dw3.inter_sad = fei_param->inter_sad;
4178     cmd->dw3.Intra_sad = fei_param->intra_sad;
4179     cmd->dw3.search_ctrl = (is_bframe) ? 7 : 0;
4180     cmd->dw4.enable_fbr_bypass = avc_state->fbr_bypass_enable;
4181     cmd->dw4.enable_intra_cost_scaling_for_static_frame =
4182         avc_state->sfd_enable && generic_state->hme_enabled;
4183     cmd->dw4.true_distortion_enable = fei_param->distortion_type == 0 ? 1 : 0;
4184     cmd->dw4.constrained_intra_pred_flag =
4185         pic_param->pic_fields.bits.constrained_intra_pred_flag;
4186     cmd->dw4.hme_enable = 0;
4187     cmd->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
4188     cmd->dw4.use_actual_ref_qp_value =
4189         generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
4190     cmd->dw7.intra_part_mask = fei_param->intra_part_mask;
4191     cmd->dw7.src_field_polarity = 0;
4192
4193     /* mv mode cost */
4194     memcpy(&(cmd->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
4195     if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
4196         // cmd->dw8 = gen9_avc_old_intra_mode_cost[qp];
4197     } else if (avc_state->skip_bias_adjustment_enable) {
4198         // Load different MvCost for P picture when SkipBiasAdjustment is enabled
4199         // No need to check for P picture as the flag is only enabled for P picture
4200         cmd->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
4201     }
4202
4203     //dw16
4204     /* search path tables */
4205     table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
4206     memcpy(&(cmd->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
4207
4208     //ftq_skip_threshold_lut set,dw14 /15
4209
4210     //r5 disable NonFTQSkipThresholdLUT
4211     if (generic_state->frame_type == SLICE_TYPE_P) {
4212         cmd->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
4213     } else if (generic_state->frame_type == SLICE_TYPE_B) {
4214         cmd->dw32.skip_val =
4215             gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
4216     }
4217     cmd->dw13.qp_prime_y = qp;
4218     cmd->dw13.qp_prime_cb = qp;
4219     cmd->dw13.qp_prime_cr = qp;
4220     cmd->dw13.target_size_in_word = 0xff; /* hardcoded for brc disable */
4221
4222     if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) {
4223         cmd->dw32.mult_pred_l0_disable = fei_param->multi_pred_l0 ? 0x01 : 0x80;
4224         cmd->dw32.mult_pred_l1_disable = ((generic_state->frame_type == SLICE_TYPE_B) && fei_param->multi_pred_l1) ? 0x01 : 0x80;
4225     } else {
4226         /* disable */
4227         cmd->dw32.mult_pred_l0_disable = 0x80;
4228         cmd->dw32.mult_pred_l1_disable = 0x80;
4229     }
4230     /* no field pic setting, not supported */
4231
4232     //dw34 58
4233     if (avc_state->adaptive_transform_decision_enable) {
4234         if (generic_state->frame_type != SLICE_TYPE_I) {
4235             cmd->dw34.enable_adaptive_tx_decision = 1;
4236         }
4237
4238         cmd->dw58.mb_texture_threshold = 1024;
4239         cmd->dw58.tx_decision_threshold = 128;
4240     }
4241     if (generic_state->frame_type == SLICE_TYPE_B) {
4242         cmd->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
4243         cmd->dw34.list1_ref_id1_frm_field_parity = 0;
4244         cmd->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
4245     }
4246     cmd->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
4247     cmd->dw34.roi_enable_flag = generic_state->brc_roi_enable;
4248     cmd->dw34.mad_enable_falg = avc_state->mad_enable;
4249     cmd->dw34.mb_brc_enable = avc_state->mb_qp_data_enable && generic_state->mb_brc_enabled;
4250     cmd->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
4251     cmd->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
4252
4253     if (cmd->dw34.force_non_skip_check) {
4254         cmd->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
4255     }
4256     cmd->dw36.check_all_fractional_enable = avc_state->caf_enable;
4257     cmd->dw38.ref_threshold = 400;
4258     cmd->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
4259     // Default:2 used for MBBRC (MB QP Surface width and height are 4x downscaled picture in MB unit * 4  bytes)
4260     // 0 used for MBQP data surface (MB QP Surface width and height are same as the input picture size in MB unit * 1bytes)
4261     // starting GEN9, BRC use split kernel, MB QP surface is same size as input picture
4262     cmd->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled) ? 0 : 2;
4263     if (mbenc_i_frame_dist_in_use) {
4264         cmd->dw13.qp_prime_y = 0;
4265         cmd->dw13.qp_prime_cb = 0;
4266         cmd->dw13.qp_prime_cr = 0;
4267         cmd->dw33.intra_16x16_nondc_penalty = 0;
4268         cmd->dw33.intra_8x8_nondc_penalty = 0;
4269         cmd->dw33.intra_4x4_nondc_penalty = 0;
4270     }
4271     if (cmd->dw4.use_actual_ref_qp_value) {
4272         cmd->dw44.actual_qp_value_for_ref_id0_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 0);
4273         cmd->dw44.actual_qp_value_for_ref_id1_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 1);
4274         cmd->dw44.actual_qp_value_for_ref_id2_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 2);
4275         cmd->dw44.actual_qp_value_for_ref_id3_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 3);
4276         cmd->dw45.actual_qp_value_for_ref_id4_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 4);
4277         cmd->dw45.actual_qp_value_for_ref_id5_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 5);
4278         cmd->dw45.actual_qp_value_for_ref_id6_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 6);
4279         cmd->dw45.actual_qp_value_for_ref_id7_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 7);
4280         cmd->dw46.actual_qp_value_for_ref_id0_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 0);
4281         cmd->dw46.actual_qp_value_for_ref_id1_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 1);
4282     }
4283
4284     table_idx = slice_type_kernel[generic_state->frame_type];
4285     cmd->dw46.ref_cost = gen9_avc_ref_cost[table_idx][qp];
4286
4287     if (generic_state->frame_type == SLICE_TYPE_I) {
4288         cmd->dw0.skip_mode_enable = 0;
4289         cmd->dw37.skip_mode_enable = 0;
4290         cmd->dw36.hme_combine_overlap = 0;
4291         cmd->dw36.check_all_fractional_enable = 0;
4292         cmd->dw47.intra_cost_sf = 16;/* not used, but recommended to set 16 by kernel team */
4293         cmd->dw34.enable_direct_bias_adjustment = 0;
4294         cmd->dw34.enable_global_motion_bias_adjustment = 0;
4295
4296     } else if (generic_state->frame_type == SLICE_TYPE_P) {
4297         cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
4298         cmd->dw3.bme_disable_fbr = 1;
4299         cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
4300         cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
4301         cmd->dw7.non_skip_zmv_added = 1;
4302         cmd->dw7.non_skip_mode_added = 1;
4303         cmd->dw7.skip_center_mask = 1;
4304
4305         cmd->dw47.intra_cost_sf =
4306             (avc_state->adaptive_intra_scaling_enable) ?
4307             gen9_avc_adaptive_intra_scaling_factor[preset] :
4308             gen9_avc_intra_scaling_factor[preset];
4309
4310         cmd->dw47.max_vmv_r =
4311             i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
4312
4313         cmd->dw36.hme_combine_overlap = 1;
4314         cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
4315         cmd->dw36.check_all_fractional_enable = fei_param->repartition_check_enable;
4316         cmd->dw34.enable_direct_bias_adjustment = 0;
4317         cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
4318         if (avc_state->global_motion_bias_adjustment_enable)
4319             cmd->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
4320
4321         cmd->dw64.num_mv_predictors_l0 = fei_param->num_mv_predictors_l0;
4322
4323     } else { /* B slice */
4324
4325         cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
4326         cmd->dw1.bi_Weight = avc_state->bi_weight;
4327         cmd->dw3.search_ctrl = 7;
4328         cmd->dw3.skip_type = 1;
4329         cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
4330         cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
4331         cmd->dw7.skip_center_mask = 0xff;
4332
4333         cmd->dw47.intra_cost_sf = avc_state->adaptive_intra_scaling_enable ?
4334                                   gen9_avc_adaptive_intra_scaling_factor[qp] :
4335                                   gen9_avc_intra_scaling_factor[qp];
4336
4337         cmd->dw47.max_vmv_r =
4338             i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
4339
4340         cmd->dw36.hme_combine_overlap = 1;
4341
4342         //check is_fwd_frame_short_term_ref
4343         surface_id = slice_param->RefPicList1[0].picture_id;
4344         obj_surface = SURFACE(surface_id);
4345         if (!obj_surface) {
4346             WARN_ONCE("Invalid backward reference frame\n");
4347             if (gpe_context)
4348                 i965_gpe_context_unmap_curbe(gpe_context);
4349             return;
4350         }
4351         cmd->dw36.is_fwd_frame_short_term_ref = !!(slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
4352
4353         cmd->dw36.num_ref_idx_l0_minus_one =
4354             (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1
4355             : 0;
4356         cmd->dw36.num_ref_idx_l1_minus_one =
4357             (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l1_active_minus1
4358             : 0;
4359         cmd->dw36.check_all_fractional_enable = fei_param->repartition_check_enable;
4360
4361         cmd->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
4362         cmd->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
4363         cmd->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
4364         cmd->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
4365         cmd->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
4366         cmd->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
4367         cmd->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
4368         cmd->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
4369
4370         cmd->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
4371         if (cmd->dw34.enable_direct_bias_adjustment) {
4372             cmd->dw7.non_skip_mode_added = 1;
4373             cmd->dw7.non_skip_zmv_added = 1;
4374         }
4375
4376         cmd->dw34.enable_global_motion_bias_adjustment =
4377             avc_state->global_motion_bias_adjustment_enable;
4378         if (avc_state->global_motion_bias_adjustment_enable)
4379             cmd->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
4380
4381         cmd->dw64.num_mv_predictors_l0 = fei_param->num_mv_predictors_l0;
4382         cmd->dw64.num_mv_predictors_l1 = fei_param->num_mv_predictors_l1;
4383     }
4384
4385     avc_state->block_based_skip_enable = cmd->dw3.block_based_skip_enable;
4386
4387     if (avc_state->rolling_intra_refresh_enable) {
4388         //Not supported
4389         cmd->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
4390
4391     } else {
4392         cmd->dw34.widi_intra_refresh_en = 0;
4393     }
4394     cmd->dw34.enable_per_mb_static_check = avc_state->sfd_enable && generic_state->hme_enabled;
4395     cmd->dw34.enable_adaptive_search_window_size = avc_state->adaptive_search_window_enable;
4396
4397     /* Fixme: Skipped ROI stuffs for now */
4398
4399     /* r64: FEI specific fields */
4400     cmd->dw64.fei_enable = 1;
4401     cmd->dw64.multiple_mv_predictor_per_mb_enable = fei_param->mv_predictor_enable;
4402     if (fei_param->distortion != VA_INVALID_ID)
4403         cmd->dw64.vme_distortion_output_enable = 1;
4404     cmd->dw64.per_mb_qp_enable = fei_param->mb_qp;
4405     cmd->dw64.mb_input_enable = fei_param->mb_input;
4406
4407     // FEI mode is disabled when external MVP is available
4408     if (fei_param->mv_predictor_enable)
4409         cmd->dw64.fei_mode = 0;
4410     else
4411         cmd->dw64.fei_mode = 1;
4412
4413     cmd->dw80.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
4414     cmd->dw81.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
4415     cmd->dw82.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
4416     cmd->dw83.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
4417     cmd->dw84.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
4418     cmd->dw85.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
4419     cmd->dw86.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
4420     cmd->dw87.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
4421     cmd->dw88.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
4422     cmd->dw89.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
4423     cmd->dw90.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
4424     cmd->dw91.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
4425     cmd->dw92.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
4426     cmd->dw93.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
4427     cmd->dw94.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
4428     cmd->dw95.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
4429     cmd->dw96.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
4430     cmd->dw97.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
4431     cmd->dw98.force_non_skip_mb_map_surface = GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
4432     cmd->dw99.widi_wa_surf_index = GEN9_AVC_MBENC_WIDI_WA_INDEX;
4433     cmd->dw100.brc_curbe_surf_index = GEN9_AVC_MBENC_BRC_CURBE_DATA_INDEX;
4434     cmd->dw101.static_detection_cost_table_index = GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX;
4435     cmd->dw102.fei_mv_predictor_surf_index = GEN9_AVC_MBENC_MV_PREDICTOR_INDEX;
4436     i965_gpe_context_unmap_curbe(gpe_context);
4437
4438     return;
4439 }
4440
4441 static void
4442 gen9_avc_send_surface_mbenc(VADriverContextP ctx,
4443                             struct encode_state *encode_state,
4444                             struct i965_gpe_context *gpe_context,
4445                             struct intel_encoder_context *encoder_context,
4446                             void * param_mbenc)
4447 {
4448     struct i965_driver_data *i965 = i965_driver_data(ctx);
4449     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4450     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4451     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4452     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4453     struct object_surface *obj_surface;
4454     struct gen9_surface_avc *avc_priv_surface;
4455     struct i965_gpe_resource *gpe_resource;
4456     struct mbenc_param * param = (struct mbenc_param *)param_mbenc ;
4457     VASurfaceID surface_id;
4458     unsigned int mbenc_i_frame_dist_in_use = param->mbenc_i_frame_dist_in_use;
4459     unsigned int size = 0;
4460     unsigned int frame_mb_size = generic_state->frame_width_in_mbs *
4461                                  generic_state->frame_height_in_mbs;
4462     int i = 0;
4463     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4464     unsigned char is_g95 = 0;
4465
4466     if (IS_SKL(i965->intel.device_info) ||
4467         IS_BXT(i965->intel.device_info))
4468         is_g95 = 0;
4469     else if (IS_KBL(i965->intel.device_info) ||
4470              IS_GEN10(i965->intel.device_info) ||
4471              IS_GLK(i965->intel.device_info))
4472         is_g95 = 1;
4473
4474     obj_surface = encode_state->reconstructed_object;
4475
4476     if (!obj_surface || !obj_surface->private_data)
4477         return;
4478     avc_priv_surface = obj_surface->private_data;
4479
4480     /*pak obj command buffer output*/
4481     size = frame_mb_size * 16 * 4;
4482     gpe_resource = &avc_priv_surface->res_mb_code_surface;
4483     i965_add_buffer_gpe_surface(ctx,
4484                                 gpe_context,
4485                                 gpe_resource,
4486                                 0,
4487                                 size / 4,
4488                                 0,
4489                                 GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX);
4490
4491     /*mv data buffer output*/
4492     size = frame_mb_size * 32 * 4;
4493     gpe_resource = &avc_priv_surface->res_mv_data_surface;
4494     i965_add_buffer_gpe_surface(ctx,
4495                                 gpe_context,
4496                                 gpe_resource,
4497                                 0,
4498                                 size / 4,
4499                                 0,
4500                                 GEN9_AVC_MBENC_IND_MV_DATA_INDEX);
4501
4502     /*input current  YUV surface, current input Y/UV object*/
4503     if (mbenc_i_frame_dist_in_use) {
4504         obj_surface = encode_state->reconstructed_object;
4505         if (!obj_surface || !obj_surface->private_data)
4506             return;
4507         avc_priv_surface = obj_surface->private_data;
4508         obj_surface = avc_priv_surface->scaled_4x_surface_obj;
4509     } else {
4510         obj_surface = encode_state->input_yuv_object;
4511     }
4512     i965_add_2d_gpe_surface(ctx,
4513                             gpe_context,
4514                             obj_surface,
4515                             0,
4516                             1,
4517                             I965_SURFACEFORMAT_R8_UNORM,
4518                             GEN9_AVC_MBENC_CURR_Y_INDEX);
4519
4520     i965_add_2d_gpe_surface(ctx,
4521                             gpe_context,
4522                             obj_surface,
4523                             1,
4524                             1,
4525                             I965_SURFACEFORMAT_R16_UINT,
4526                             GEN9_AVC_MBENC_CURR_UV_INDEX);
4527
4528     if (generic_state->hme_enabled) {
4529         /*memv input 4x*/
4530         if (!IS_GEN8(i965->intel.device_info)) {
4531             gpe_resource = &(avc_ctx->s4x_memv_data_buffer);
4532             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4533                                            gpe_resource,
4534                                            1,
4535                                            I965_SURFACEFORMAT_R8_UNORM,
4536                                            GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX);
4537             /* memv distortion input*/
4538             gpe_resource = &(avc_ctx->s4x_memv_distortion_buffer);
4539             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4540                                            gpe_resource,
4541                                            1,
4542                                            I965_SURFACEFORMAT_R8_UNORM,
4543                                            GEN9_AVC_MBENC_4XME_DISTORTION_INDEX);
4544         } else if (generic_state->frame_type != SLICE_TYPE_I) {
4545             gpe_resource = &(avc_ctx->s4x_memv_data_buffer);
4546             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4547                                            gpe_resource,
4548                                            1,
4549                                            I965_SURFACEFORMAT_R8_UNORM,
4550                                            GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX);
4551             /* memv distortion input*/
4552             gpe_resource = &(avc_ctx->s4x_memv_distortion_buffer);
4553             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4554                                            gpe_resource,
4555                                            1,
4556                                            I965_SURFACEFORMAT_R8_UNORM,
4557                                            GEN9_AVC_MBENC_4XME_DISTORTION_INDEX);
4558         }
4559     }
4560
4561     /*mbbrc const data_buffer*/
4562     if (param->mb_const_data_buffer_in_use) {
4563         size = 16 * AVC_QP_MAX * sizeof(unsigned int);
4564         gpe_resource = &avc_ctx->res_mbbrc_const_data_buffer;
4565         i965_add_buffer_gpe_surface(ctx,
4566                                     gpe_context,
4567                                     gpe_resource,
4568                                     0,
4569                                     size / 4,
4570                                     0,
4571                                     GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX);
4572
4573     }
4574
4575     /*mb qp data_buffer*/
4576     if (param->mb_qp_buffer_in_use) {
4577         if (avc_state->mb_qp_data_enable)
4578             gpe_resource = &(avc_ctx->res_mb_qp_data_surface);
4579         else
4580             gpe_resource = &(avc_ctx->res_mbbrc_mb_qp_data_surface);
4581         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4582                                        gpe_resource,
4583                                        1,
4584                                        I965_SURFACEFORMAT_R8_UNORM,
4585                                        GEN9_AVC_MBENC_MBQP_INDEX);
4586     }
4587
4588     /*input current  YUV surface, current input Y/UV object*/
4589     if (mbenc_i_frame_dist_in_use) {
4590         obj_surface = encode_state->reconstructed_object;
4591         if (!obj_surface || !obj_surface->private_data)
4592             return;
4593         avc_priv_surface = obj_surface->private_data;
4594         obj_surface = avc_priv_surface->scaled_4x_surface_obj;
4595     } else {
4596         obj_surface = encode_state->input_yuv_object;
4597     }
4598     i965_add_adv_gpe_surface(ctx, gpe_context,
4599                              obj_surface,
4600                              GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX);
4601     /*input ref YUV surface*/
4602     for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
4603         surface_id = slice_param->RefPicList0[i].picture_id;
4604         obj_surface = SURFACE(surface_id);
4605         if (!obj_surface || !obj_surface->private_data)
4606             break;
4607
4608         i965_add_adv_gpe_surface(ctx, gpe_context,
4609                                  obj_surface,
4610                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 1);
4611     }
4612     /*input current  YUV surface, current input Y/UV object*/
4613     if (mbenc_i_frame_dist_in_use) {
4614         obj_surface = encode_state->reconstructed_object;
4615         if (!obj_surface || !obj_surface->private_data)
4616             return;
4617         avc_priv_surface = obj_surface->private_data;
4618         obj_surface = avc_priv_surface->scaled_4x_surface_obj;
4619     } else {
4620         obj_surface = encode_state->input_yuv_object;
4621     }
4622     i965_add_adv_gpe_surface(ctx, gpe_context,
4623                              obj_surface,
4624                              GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX);
4625
4626     for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
4627         if (i > 0) break; // only  one ref supported here for B frame
4628         surface_id = slice_param->RefPicList1[i].picture_id;
4629         obj_surface = SURFACE(surface_id);
4630         if (!obj_surface || !obj_surface->private_data)
4631             break;
4632
4633         i965_add_adv_gpe_surface(ctx, gpe_context,
4634                                  obj_surface,
4635                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1);
4636         i965_add_adv_gpe_surface(ctx, gpe_context,
4637                                  obj_surface,
4638                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 2);
4639         if (i == 0) {
4640             avc_priv_surface = obj_surface->private_data;
4641             /*pak obj command buffer output(mb code)*/
4642             size = frame_mb_size * 16 * 4;
4643             gpe_resource = &avc_priv_surface->res_mb_code_surface;
4644             i965_add_buffer_gpe_surface(ctx,
4645                                         gpe_context,
4646                                         gpe_resource,
4647                                         0,
4648                                         size / 4,
4649                                         0,
4650                                         GEN9_AVC_MBENC_FWD_MB_DATA_INDEX);
4651
4652             /*mv data buffer output*/
4653             size = frame_mb_size * 32 * 4;
4654             gpe_resource = &avc_priv_surface->res_mv_data_surface;
4655             i965_add_buffer_gpe_surface(ctx,
4656                                         gpe_context,
4657                                         gpe_resource,
4658                                         0,
4659                                         size / 4,
4660                                         0,
4661                                         GEN9_AVC_MBENC_FWD_MV_DATA_INDEX);
4662
4663         }
4664
4665         if (i < INTEL_AVC_MAX_BWD_REF_NUM) {
4666             i965_add_adv_gpe_surface(ctx, gpe_context,
4667                                      obj_surface,
4668                                      GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1 + INTEL_AVC_MAX_BWD_REF_NUM);
4669         }
4670
4671     }
4672
4673     /* BRC distortion data buffer for I frame*/
4674     if (mbenc_i_frame_dist_in_use) {
4675         gpe_resource = &(avc_ctx->res_brc_dist_data_surface);
4676         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4677                                        gpe_resource,
4678                                        1,
4679                                        I965_SURFACEFORMAT_R8_UNORM,
4680                                        GEN9_AVC_MBENC_BRC_DISTORTION_INDEX);
4681     }
4682
4683     /* as ref frame ,update later RefPicSelect of Current Picture*/
4684     obj_surface = encode_state->reconstructed_object;
4685     avc_priv_surface = obj_surface->private_data;
4686     if (avc_state->ref_pic_select_list_supported && avc_priv_surface->is_as_ref) {
4687         gpe_resource = &(avc_priv_surface->res_ref_pic_select_surface);
4688         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4689                                        gpe_resource,
4690                                        1,
4691                                        I965_SURFACEFORMAT_R8_UNORM,
4692                                        GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX);
4693
4694     }
4695     if (!IS_GEN8(i965->intel.device_info)) {
4696         if (param->mb_vproc_stats_enable) {
4697             /*mb status buffer input*/
4698             size = frame_mb_size * 16 * 4;
4699             gpe_resource = &(avc_ctx->res_mb_status_buffer);
4700             i965_add_buffer_gpe_surface(ctx,
4701                                         gpe_context,
4702                                         gpe_resource,
4703                                         0,
4704                                         size / 4,
4705                                         0,
4706                                         GEN9_AVC_MBENC_MB_STATS_INDEX);
4707
4708         } else if (avc_state->flatness_check_enable) {
4709             gpe_resource = &(avc_ctx->res_flatness_check_surface);
4710             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4711                                            gpe_resource,
4712                                            1,
4713                                            I965_SURFACEFORMAT_R8_UNORM,
4714                                            GEN9_AVC_MBENC_MB_STATS_INDEX);
4715         }
4716     } else if (avc_state->flatness_check_enable) {
4717         gpe_resource = &(avc_ctx->res_flatness_check_surface);
4718         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4719                                        gpe_resource,
4720                                        1,
4721                                        I965_SURFACEFORMAT_R8_UNORM,
4722                                        GEN9_AVC_MBENC_MB_STATS_INDEX);
4723     }
4724
4725     if (param->mad_enable) {
4726         /*mad buffer input*/
4727         size = 4;
4728         gpe_resource = &(avc_ctx->res_mad_data_buffer);
4729         i965_add_buffer_gpe_surface(ctx,
4730                                     gpe_context,
4731                                     gpe_resource,
4732                                     0,
4733                                     size / 4,
4734                                     0,
4735                                     GEN9_AVC_MBENC_MAD_DATA_INDEX);
4736         i965_zero_gpe_resource(gpe_resource);
4737     }
4738
4739     /*brc updated mbenc curbe data buffer,it is ignored by gen9 and used in gen95*/
4740     if (avc_state->mbenc_brc_buffer_size > 0) {
4741         size = avc_state->mbenc_brc_buffer_size;
4742         gpe_resource = &(avc_ctx->res_mbenc_brc_buffer);
4743         i965_add_buffer_gpe_surface(ctx,
4744                                     gpe_context,
4745                                     gpe_resource,
4746                                     0,
4747                                     size / 4,
4748                                     0,
4749                                     GEN95_AVC_MBENC_BRC_CURBE_DATA_INDEX);
4750     }
4751
4752     /*artitratry num mbs in slice*/
4753     if (avc_state->arbitrary_num_mbs_in_slice) {
4754         /*slice surface input*/
4755         gpe_resource = &(avc_ctx->res_mbenc_slice_map_surface);
4756         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4757                                        gpe_resource,
4758                                        1,
4759                                        I965_SURFACEFORMAT_R8_UNORM,
4760                                        GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX);
4761         gen9_avc_generate_slice_map(ctx, encode_state, encoder_context);
4762     }
4763
4764     /* BRC distortion data buffer for I frame */
4765     if (!mbenc_i_frame_dist_in_use) {
4766         if (avc_state->mb_disable_skip_map_enable) {
4767             gpe_resource = &(avc_ctx->res_mb_disable_skip_map_surface);
4768             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4769                                            gpe_resource,
4770                                            1,
4771                                            I965_SURFACEFORMAT_R8_UNORM,
4772                                            (is_g95 ? GEN95_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX : GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX));
4773         }
4774         if (IS_GEN8(i965->intel.device_info)) {
4775             if (avc_state->sfd_enable) {
4776                 size = 128 / sizeof(unsigned long);
4777                 gpe_resource = &(avc_ctx->res_sfd_output_buffer);
4778                 i965_add_buffer_gpe_surface(ctx,
4779                                             gpe_context,
4780                                             gpe_resource,
4781                                             0,
4782                                             size / 4,
4783                                             0,
4784                                             GEN8_AVC_MBENC_STATIC_FRAME_DETECTION_OUTPUT_CM);
4785
4786             }
4787         } else {
4788             if (avc_state->sfd_enable && generic_state->hme_enabled) {
4789                 if (generic_state->frame_type == SLICE_TYPE_P) {
4790                     gpe_resource = &(avc_ctx->res_sfd_cost_table_p_frame_buffer);
4791                 } else if (generic_state->frame_type == SLICE_TYPE_B) {
4792                     gpe_resource = &(avc_ctx->res_sfd_cost_table_b_frame_buffer);
4793                 }
4794                 if (generic_state->frame_type != SLICE_TYPE_I) {
4795                     size = 64;
4796                     i965_add_buffer_gpe_surface(ctx,
4797                                                 gpe_context,
4798                                                 gpe_resource,
4799                                                 0,
4800                                                 size / 4,
4801                                                 0,
4802                                                 (is_g95 ? GEN95_AVC_MBENC_SFD_COST_TABLE_INDEX : GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX));
4803
4804
4805                 }
4806             }
4807         }
4808     }
4809     return;
4810 }
4811
4812 static void
4813 gen9_avc_fei_send_surface_mbenc(VADriverContextP ctx,
4814                                 struct encode_state *encode_state,
4815                                 struct i965_gpe_context *gpe_context,
4816                                 struct intel_encoder_context *encoder_context,
4817                                 void * param_mbenc)
4818 {
4819     struct i965_driver_data *i965 = i965_driver_data(ctx);
4820     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4821     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4822     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4823     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4824     VAEncMiscParameterFEIFrameControlH264 *fei_param = NULL;
4825     struct object_buffer *obj_buffer = NULL;
4826     struct buffer_store *buffer_store = NULL;
4827     struct object_surface *obj_surface = NULL;
4828     struct gen9_surface_avc *avc_priv_surface;
4829     struct i965_gpe_resource *gpe_resource;
4830     VASurfaceID surface_id;
4831     unsigned int size = 0;
4832     unsigned int frame_mb_nums;
4833     int i = 0, allocate_flag = 1;
4834
4835     obj_surface = encode_state->reconstructed_object;
4836     if (!obj_surface || !obj_surface->private_data)
4837         return;
4838     avc_priv_surface = obj_surface->private_data;
4839
4840     frame_mb_nums = generic_state->frame_width_in_mbs *
4841                     generic_state->frame_height_in_mbs;
4842     fei_param = avc_state->fei_framectl_param;
4843
4844     assert(fei_param != NULL);
4845
4846     /* res_mb_code_surface for MB code */
4847     size = frame_mb_nums * FEI_AVC_MB_CODE_BUFFER_SIZE;
4848     if (avc_priv_surface->res_mb_code_surface.bo != NULL)
4849         i965_free_gpe_resource(&avc_priv_surface->res_mb_code_surface);
4850     if (fei_param->mb_code_data != VA_INVALID_ID) {
4851         obj_buffer = BUFFER(fei_param->mb_code_data);
4852         assert(obj_buffer != NULL);
4853         buffer_store = obj_buffer->buffer_store;
4854         assert(size <= buffer_store->bo->size);
4855         i965_dri_object_to_buffer_gpe_resource(
4856             &avc_priv_surface->res_mb_code_surface,
4857             buffer_store->bo);
4858     } else {
4859         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
4860                                                    &avc_priv_surface->res_mb_code_surface,
4861                                                    ALIGN(size, 0x1000),
4862                                                    "mb code buffer");
4863         assert(allocate_flag != 0);
4864     }
4865
4866     /* res_mv_data_surface for MV data */
4867     size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
4868     if (avc_priv_surface->res_mv_data_surface.bo != NULL)
4869         i965_free_gpe_resource(&avc_priv_surface->res_mv_data_surface);
4870     if (fei_param->mv_data != VA_INVALID_ID) {
4871         obj_buffer = BUFFER(fei_param->mv_data);
4872         assert(obj_buffer != NULL);
4873         buffer_store = obj_buffer->buffer_store;
4874         assert(size <= buffer_store->bo->size);
4875         i965_dri_object_to_buffer_gpe_resource(
4876             &avc_priv_surface->res_mv_data_surface,
4877             buffer_store->bo);
4878     } else {
4879         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
4880                                                    &avc_priv_surface->res_mv_data_surface,
4881                                                    ALIGN(size, 0x1000),
4882                                                    "mv data buffer");
4883         assert(allocate_flag != 0);
4884     }
4885
4886     /* fei mb control data surface */
4887     size = frame_mb_nums * FEI_AVC_MB_CONTROL_BUFFER_SIZE;
4888     if (fei_param->mb_input | fei_param->mb_size_ctrl) {
4889         assert(fei_param->mb_ctrl != VA_INVALID_ID);
4890         obj_buffer = BUFFER(fei_param->mb_ctrl);
4891         assert(obj_buffer != NULL);
4892         buffer_store = obj_buffer->buffer_store;
4893         assert(size <= buffer_store->bo->size);
4894         if (avc_priv_surface->res_fei_mb_cntrl_surface.bo != NULL)
4895             i965_free_gpe_resource(&avc_priv_surface->res_fei_mb_cntrl_surface);
4896         i965_dri_object_to_buffer_gpe_resource(
4897             &avc_priv_surface->res_fei_mb_cntrl_surface,
4898             buffer_store->bo);
4899     }
4900
4901     /* fei mv predictor surface*/
4902     size = frame_mb_nums * FEI_AVC_MV_PREDICTOR_BUFFER_SIZE;
4903     if (fei_param->mv_predictor_enable &&
4904         (fei_param->mv_predictor != VA_INVALID_ID)) {
4905         obj_buffer = BUFFER(fei_param->mv_predictor);
4906         assert(obj_buffer != NULL);
4907         buffer_store = obj_buffer->buffer_store;
4908         assert(size <= buffer_store->bo->size);
4909         if (avc_priv_surface->res_fei_mv_predictor_surface.bo != NULL)
4910             i965_free_gpe_resource(&avc_priv_surface->res_fei_mv_predictor_surface);
4911         i965_dri_object_to_buffer_gpe_resource(
4912             &avc_priv_surface->res_fei_mv_predictor_surface,
4913             buffer_store->bo);
4914     } else {
4915         if (fei_param->mv_predictor_enable)
4916             assert(fei_param->mv_predictor != VA_INVALID_ID);
4917     }
4918
4919     /* fei vme distortion */
4920     size = frame_mb_nums * FEI_AVC_DISTORTION_BUFFER_SIZE;
4921     if (avc_priv_surface->res_fei_vme_distortion_surface.bo != NULL)
4922         i965_free_gpe_resource(&avc_priv_surface->res_fei_vme_distortion_surface);
4923     if (fei_param->distortion != VA_INVALID_ID) {
4924         obj_buffer = BUFFER(fei_param->distortion);
4925         assert(obj_buffer != NULL);
4926         buffer_store = obj_buffer->buffer_store;
4927         assert(size <= buffer_store->bo->size);
4928         i965_dri_object_to_buffer_gpe_resource(
4929             &avc_priv_surface->res_fei_vme_distortion_surface,
4930             buffer_store->bo);
4931     } else {
4932         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
4933                                                    &avc_priv_surface->res_fei_vme_distortion_surface,
4934                                                    ALIGN(size, 0x1000),
4935                                                    "fei vme distortion");
4936         assert(allocate_flag != 0);
4937     }
4938
4939     /* fei mb qp  */
4940     /* Fixme/Confirm:  not sure why we need 3 byte padding here */
4941     size = frame_mb_nums * FEI_AVC_QP_BUFFER_SIZE + 3;
4942     if (fei_param->mb_qp && (fei_param->qp != VA_INVALID_ID)) {
4943         obj_buffer = BUFFER(fei_param->qp);
4944         assert(obj_buffer != NULL);
4945         buffer_store = obj_buffer->buffer_store;
4946         assert((size - 3) <= buffer_store->bo->size);
4947         if (avc_priv_surface->res_fei_mb_qp_surface.bo != NULL)
4948             i965_free_gpe_resource(&avc_priv_surface->res_fei_mb_qp_surface);
4949         i965_dri_object_to_buffer_gpe_resource(
4950             &avc_priv_surface->res_fei_mb_qp_surface,
4951             buffer_store->bo);
4952     } else {
4953         if (fei_param->mb_qp)
4954             assert(fei_param->qp != VA_INVALID_ID);
4955     }
4956
4957     /*==== pak obj command buffer output ====*/
4958     size = frame_mb_nums * FEI_AVC_MB_CODE_BUFFER_SIZE;
4959     gpe_resource = &avc_priv_surface->res_mb_code_surface;
4960     i965_add_buffer_gpe_surface(ctx,
4961                                 gpe_context,
4962                                 gpe_resource,
4963                                 0,
4964                                 size / 4,
4965                                 0,
4966                                 GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX);
4967
4968
4969     /*=== mv data buffer output */
4970     size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
4971     gpe_resource = &avc_priv_surface->res_mv_data_surface;
4972     i965_add_buffer_gpe_surface(ctx,
4973                                 gpe_context,
4974                                 gpe_resource,
4975                                 0,
4976                                 size / 4,
4977                                 0,
4978                                 GEN9_AVC_MBENC_IND_MV_DATA_INDEX);
4979
4980
4981     /* === current input Y (binding table offset = 3)=== */
4982     obj_surface = encode_state->input_yuv_object;
4983     i965_add_2d_gpe_surface(ctx,
4984                             gpe_context,
4985                             obj_surface,
4986                             0,
4987                             1,
4988                             I965_SURFACEFORMAT_R8_UNORM,
4989                             GEN9_AVC_MBENC_CURR_Y_INDEX);
4990
4991     /* === current input UV === (binding table offset == 4)*/
4992     i965_add_2d_gpe_surface(ctx,
4993                             gpe_context,
4994                             obj_surface,
4995                             1,
4996                             1,
4997                             I965_SURFACEFORMAT_R16_UINT,
4998                             GEN9_AVC_MBENC_CURR_UV_INDEX);
4999
5000     /* === input current YUV surface, (binding table offset == 15) === */
5001     i965_add_adv_gpe_surface(ctx, gpe_context,
5002                              obj_surface,
5003                              GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX);
5004
5005
5006     /*== input current  YUV surface, (binding table offset == 32)*/
5007     i965_add_adv_gpe_surface(ctx, gpe_context,
5008                              obj_surface,
5009                              GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX);
5010
5011     /* list 0 references */
5012     for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
5013
5014         surface_id = slice_param->RefPicList0[i].picture_id;
5015         obj_surface = SURFACE(surface_id);
5016         if (!obj_surface || !obj_surface->private_data)
5017             break;
5018         i965_add_adv_gpe_surface(ctx, gpe_context,
5019                                  obj_surface,
5020                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 1);
5021     }
5022
5023
5024     /* list 1 references */
5025     for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
5026         if (i > 0) break; // only  one ref supported here for B frame
5027         surface_id = slice_param->RefPicList1[i].picture_id;
5028         obj_surface = SURFACE(surface_id);
5029         if (!obj_surface || !obj_surface->private_data)
5030             break;
5031
5032         i965_add_adv_gpe_surface(ctx, gpe_context,
5033                                  obj_surface,
5034                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 2);
5035         if (i == 0) {
5036             avc_priv_surface = obj_surface->private_data;
5037             /* mb code of Backward reference frame */
5038             size = frame_mb_nums * FEI_AVC_MB_CODE_BUFFER_SIZE;
5039             gpe_resource = &avc_priv_surface->res_mb_code_surface;
5040             i965_add_buffer_gpe_surface(ctx,
5041                                         gpe_context,
5042                                         gpe_resource,
5043                                         0,
5044                                         size / 4,
5045                                         0,
5046                                         GEN9_AVC_MBENC_FWD_MB_DATA_INDEX);
5047
5048             /* mv data of backward ref frame */
5049             size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
5050             gpe_resource = &avc_priv_surface->res_mv_data_surface;
5051             i965_add_buffer_gpe_surface(ctx,
5052                                         gpe_context,
5053                                         gpe_resource,
5054                                         0,
5055                                         size / 4,
5056                                         0,
5057                                         GEN9_AVC_MBENC_FWD_MV_DATA_INDEX);
5058
5059         }
5060         //again
5061         if (i < INTEL_AVC_MAX_BWD_REF_NUM) {
5062             i965_add_adv_gpe_surface(ctx, gpe_context,
5063                                      obj_surface,
5064                                      GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1);
5065         }
5066     }
5067
5068     /* as ref frame ,update later RefPicSelect of Current Picture*/
5069     obj_surface = encode_state->reconstructed_object;
5070     avc_priv_surface = obj_surface->private_data;
5071     if (avc_state->ref_pic_select_list_supported && avc_priv_surface->is_as_ref) {
5072         gpe_resource = &(avc_priv_surface->res_ref_pic_select_surface);
5073         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5074                                        gpe_resource,
5075                                        1,
5076                                        I965_SURFACEFORMAT_R8_UNORM,
5077                                        GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX);
5078
5079     }
5080
5081
5082     /* mb specific data, macroblock control parameters */
5083     if ((fei_param->mb_input | fei_param->mb_size_ctrl) &&
5084         (fei_param->mb_ctrl != VA_INVALID_ID)) {
5085         size = frame_mb_nums * FEI_AVC_MB_CONTROL_BUFFER_SIZE;
5086         gpe_resource = &avc_priv_surface->res_fei_mb_cntrl_surface;
5087         i965_add_buffer_gpe_surface(ctx,
5088                                     gpe_context,
5089                                     gpe_resource,
5090                                     0,
5091                                     size / 4,
5092                                     0,
5093                                     GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX);
5094     }
5095
5096     /* multi mv predictor surface */
5097     if (fei_param->mv_predictor_enable && (fei_param->mv_predictor != VA_INVALID_ID)) {
5098         size = frame_mb_nums * 48; //sizeof (VAEncMVPredictorH264Intel) == 40
5099         gpe_resource = &avc_priv_surface->res_fei_mv_predictor_surface;
5100         i965_add_buffer_gpe_surface(ctx,
5101                                     gpe_context,
5102                                     gpe_resource,
5103                                     0,
5104                                     size / 4,
5105                                     0,
5106                                     GEN9_AVC_MBENC_MV_PREDICTOR_INDEX);
5107     }
5108
5109     /* mb qp */
5110     if (fei_param->mb_qp && (fei_param->qp != VA_INVALID_ID)) {
5111         size = frame_mb_nums  + 3;
5112         gpe_resource = &avc_priv_surface->res_fei_mb_qp_surface,
5113         i965_add_buffer_gpe_surface(ctx,
5114                                     gpe_context,
5115                                     gpe_resource,
5116                                     0,
5117                                     size / 4,
5118                                     0,
5119                                     GEN9_AVC_MBENC_MBQP_INDEX);
5120     }
5121
5122
5123     /*=== FEI distortion surface ====*/
5124     size = frame_mb_nums * 48; //sizeof (VAEncFEIDistortionBufferH264Intel) == 48
5125     gpe_resource = &avc_priv_surface->res_fei_vme_distortion_surface;
5126     i965_add_buffer_gpe_surface(ctx,
5127                                 gpe_context,
5128                                 gpe_resource,
5129                                 0,
5130                                 size / 4,
5131                                 0,
5132                                 GEN9_AVC_MBENC_AUX_VME_OUT_INDEX);
5133
5134     return;
5135 }
5136
5137 static VAStatus
5138 gen9_avc_kernel_mbenc(VADriverContextP ctx,
5139                       struct encode_state *encode_state,
5140                       struct intel_encoder_context *encoder_context,
5141                       bool i_frame_dist_in_use)
5142 {
5143     struct i965_driver_data *i965 = i965_driver_data(ctx);
5144     struct i965_gpe_table *gpe = &i965->gpe_table;
5145     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5146     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5147     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5148     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5149     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5150
5151     struct i965_gpe_context *gpe_context;
5152     struct gpe_media_object_walker_parameter media_object_walker_param;
5153     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
5154     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
5155     int media_function = 0;
5156     int kernel_idx = 0;
5157     unsigned int mb_const_data_buffer_in_use = 0;
5158     unsigned int mb_qp_buffer_in_use = 0;
5159     unsigned int brc_enabled = 0;
5160     unsigned int roi_enable = (generic_state->num_roi > 0) ? 1 : 0;
5161     unsigned int dirty_roi_enable = ((generic_state->dirty_num_roi > 0) && (generic_state->frame_type == SLICE_TYPE_P) && (0));
5162     struct mbenc_param param ;
5163
5164     int mbenc_i_frame_dist_in_use = i_frame_dist_in_use;
5165     int mad_enable = 0;
5166     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5167
5168     mb_const_data_buffer_in_use =
5169         generic_state->mb_brc_enabled ||
5170         roi_enable ||
5171         dirty_roi_enable ||
5172         avc_state->mb_qp_data_enable ||
5173         avc_state->rolling_intra_refresh_enable;
5174     mb_qp_buffer_in_use =
5175         generic_state->mb_brc_enabled ||
5176         generic_state->brc_roi_enable ||
5177         avc_state->mb_qp_data_enable;
5178
5179     if (mbenc_i_frame_dist_in_use) {
5180         media_function = INTEL_MEDIA_STATE_ENC_I_FRAME_DIST;
5181         kernel_idx = GEN9_AVC_KERNEL_BRC_I_FRAME_DIST;
5182         downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
5183         downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
5184         mad_enable = 0;
5185         brc_enabled = 0;
5186
5187         gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
5188     } else {
5189         switch (generic_state->kernel_mode) {
5190         case INTEL_ENC_KERNEL_NORMAL : {
5191             media_function = INTEL_MEDIA_STATE_ENC_NORMAL;
5192             kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_NORMAL_I;
5193             break;
5194         }
5195         case INTEL_ENC_KERNEL_PERFORMANCE : {
5196             media_function = INTEL_MEDIA_STATE_ENC_PERFORMANCE;
5197             kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_PERFORMANCE_I;
5198             break;
5199         }
5200         case INTEL_ENC_KERNEL_QUALITY : {
5201             media_function = INTEL_MEDIA_STATE_ENC_QUALITY;
5202             kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_QUALITY_I;
5203             break;
5204         }
5205         default:
5206             assert(0);
5207
5208         }
5209
5210         if (encoder_context->fei_enabled) {
5211             media_function = INTEL_MEDIA_STATE_ENC_NORMAL;
5212             kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_FEI_KERNEL_I;
5213         }
5214
5215         if (generic_state->frame_type == SLICE_TYPE_P) {
5216             kernel_idx += 1;
5217         } else if (generic_state->frame_type == SLICE_TYPE_B) {
5218             kernel_idx += 2;
5219         }
5220
5221         downscaled_width_in_mb = generic_state->frame_width_in_mbs;
5222         downscaled_height_in_mb = generic_state->frame_height_in_mbs;
5223         mad_enable = avc_state->mad_enable;
5224         brc_enabled = generic_state->brc_enabled;
5225
5226         gpe_context = &(avc_ctx->context_mbenc.gpe_contexts[kernel_idx]);
5227     }
5228
5229     memset(&param, 0, sizeof(struct mbenc_param));
5230
5231     param.mb_const_data_buffer_in_use = mb_const_data_buffer_in_use;
5232     param.mb_qp_buffer_in_use = mb_qp_buffer_in_use;
5233     param.mbenc_i_frame_dist_in_use = mbenc_i_frame_dist_in_use;
5234     param.mad_enable = mad_enable;
5235     param.brc_enabled = brc_enabled;
5236     param.roi_enabled = roi_enable;
5237
5238     if (avc_state->mb_status_supported) {
5239         param.mb_vproc_stats_enable =  avc_state->flatness_check_enable || avc_state->adaptive_transform_decision_enable;
5240     }
5241
5242     if (!avc_state->mbenc_curbe_set_in_brc_update) {
5243         gpe->context_init(ctx, gpe_context);
5244     }
5245
5246     gpe->reset_binding_table(ctx, gpe_context);
5247
5248     if (!avc_state->mbenc_curbe_set_in_brc_update) {
5249         /*set curbe here*/
5250         generic_ctx->pfn_set_curbe_mbenc(ctx, encode_state, gpe_context, encoder_context, &param);
5251     }
5252
5253     /* MB brc const data buffer set up*/
5254     if (mb_const_data_buffer_in_use) {
5255         // caculate the lambda table, it is kernel controlled trellis quantization,gen95+
5256         if (avc_state->lambda_table_enable)
5257             gen95_avc_calc_lambda_table(ctx, encode_state, encoder_context);
5258
5259         gen9_avc_load_mb_brc_const_data(ctx, encode_state, encoder_context);
5260     }
5261
5262     /*clear the mad buffer*/
5263     if (mad_enable) {
5264         i965_zero_gpe_resource(&(avc_ctx->res_mad_data_buffer));
5265     }
5266     /*send surface*/
5267     generic_ctx->pfn_send_mbenc_surface(ctx, encode_state, gpe_context, encoder_context, &param);
5268
5269     gpe->setup_interface_data(ctx, gpe_context);
5270
5271     /*walker setting*/
5272     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
5273
5274     kernel_walker_param.use_scoreboard = 1;
5275     kernel_walker_param.resolution_x = downscaled_width_in_mb ;
5276     kernel_walker_param.resolution_y = downscaled_height_in_mb ;
5277     if (mbenc_i_frame_dist_in_use) {
5278         kernel_walker_param.no_dependency = 1;
5279     } else {
5280         switch (generic_state->frame_type) {
5281         case SLICE_TYPE_I:
5282             kernel_walker_param.walker_degree = WALKER_45_DEGREE;
5283             break;
5284         case SLICE_TYPE_P:
5285             kernel_walker_param.walker_degree = WALKER_26_DEGREE;
5286             break;
5287         case SLICE_TYPE_B:
5288             kernel_walker_param.walker_degree = WALKER_26_DEGREE;
5289             if (!slice_param->direct_spatial_mv_pred_flag) {
5290                 kernel_walker_param.walker_degree = WALKER_45_DEGREE;
5291             }
5292             break;
5293         default:
5294             assert(0);
5295         }
5296         kernel_walker_param.no_dependency = 0;
5297     }
5298
5299     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
5300
5301     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
5302                                             gpe_context,
5303                                             media_function,
5304                                             &media_object_walker_param);
5305     return VA_STATUS_SUCCESS;
5306 }
5307
5308 /*
5309 me kernle related function
5310 */
5311 static void
5312 gen9_avc_set_curbe_me(VADriverContextP ctx,
5313                       struct encode_state *encode_state,
5314                       struct i965_gpe_context *gpe_context,
5315                       struct intel_encoder_context *encoder_context,
5316                       void * param)
5317 {
5318     gen9_avc_me_curbe_data *curbe_cmd;
5319     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5320     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5321     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5322
5323     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5324
5325     struct me_param * curbe_param = (struct me_param *)param ;
5326     unsigned char  use_mv_from_prev_step = 0;
5327     unsigned char write_distortions = 0;
5328     unsigned char qp_prime_y = 0;
5329     unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
5330     unsigned char seach_table_idx = 0;
5331     unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
5332     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
5333     unsigned int scale_factor = 0;
5334
5335     qp_prime_y = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta;
5336     switch (curbe_param->hme_type) {
5337     case INTEL_ENC_HME_4x : {
5338         use_mv_from_prev_step = (generic_state->b16xme_enabled) ? 1 : 0;
5339         write_distortions = 1;
5340         mv_shift_factor = 2;
5341         scale_factor = 4;
5342         prev_mv_read_pos_factor = 0;
5343         break;
5344     }
5345     case INTEL_ENC_HME_16x : {
5346         use_mv_from_prev_step = (generic_state->b32xme_enabled) ? 1 : 0;
5347         write_distortions = 0;
5348         mv_shift_factor = 2;
5349         scale_factor = 16;
5350         prev_mv_read_pos_factor = 1;
5351         break;
5352     }
5353     case INTEL_ENC_HME_32x : {
5354         use_mv_from_prev_step = 0;
5355         write_distortions = 0;
5356         mv_shift_factor = 1;
5357         scale_factor = 32;
5358         prev_mv_read_pos_factor = 0;
5359         break;
5360     }
5361     default:
5362         assert(0);
5363
5364     }
5365     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
5366
5367     if (!curbe_cmd)
5368         return;
5369
5370     downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
5371     downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
5372
5373     memcpy(curbe_cmd, gen9_avc_me_curbe_init_data, sizeof(gen9_avc_me_curbe_data));
5374
5375     curbe_cmd->dw3.sub_pel_mode = 3;
5376     if (avc_state->field_scaling_output_interleaved) {
5377         /*frame set to zero,field specified*/
5378         curbe_cmd->dw3.src_access = 0;
5379         curbe_cmd->dw3.ref_access = 0;
5380         curbe_cmd->dw7.src_field_polarity = 0;
5381     }
5382     curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
5383     curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
5384     curbe_cmd->dw5.qp_prime_y = qp_prime_y;
5385
5386     curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
5387     curbe_cmd->dw6.write_distortions = write_distortions;
5388     curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
5389     curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
5390
5391     if (generic_state->frame_type == SLICE_TYPE_B) {
5392         curbe_cmd->dw1.bi_weight = 32;
5393         curbe_cmd->dw13.num_ref_idx_l1_minus1 = slice_param->num_ref_idx_l1_active_minus1;
5394         me_method = gen9_avc_b_me_method[generic_state->preset];
5395         seach_table_idx = 1;
5396     }
5397
5398     if (generic_state->frame_type == SLICE_TYPE_P ||
5399         generic_state->frame_type == SLICE_TYPE_B)
5400         curbe_cmd->dw13.num_ref_idx_l0_minus1 = slice_param->num_ref_idx_l0_active_minus1;
5401
5402     curbe_cmd->dw13.ref_streamin_cost = 5;
5403     curbe_cmd->dw13.roi_enable = 0;
5404
5405     curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
5406     curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
5407
5408     memcpy(&curbe_cmd->dw16, table_enc_search_path[seach_table_idx][me_method], 14 * sizeof(int));
5409
5410     curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN9_AVC_ME_MV_DATA_SURFACE_INDEX;
5411     curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x) ? GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX : GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX ;
5412     curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN9_AVC_ME_DISTORTION_SURFACE_INDEX;
5413     curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN9_AVC_ME_BRC_DISTORTION_INDEX;
5414     curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX;
5415     curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX;
5416     curbe_cmd->dw38.reserved = GEN9_AVC_ME_VDENC_STREAMIN_INDEX;
5417
5418     i965_gpe_context_unmap_curbe(gpe_context);
5419     return;
5420 }
5421
5422 static void
5423 gen9_avc_send_surface_me(VADriverContextP ctx,
5424                          struct encode_state *encode_state,
5425                          struct i965_gpe_context *gpe_context,
5426                          struct intel_encoder_context *encoder_context,
5427                          void * param)
5428 {
5429     struct i965_driver_data *i965 = i965_driver_data(ctx);
5430
5431     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5432     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5433     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5434     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5435
5436     struct object_surface *obj_surface, *input_surface;
5437     struct gen9_surface_avc *avc_priv_surface;
5438     struct i965_gpe_resource *gpe_resource;
5439     struct me_param * curbe_param = (struct me_param *)param ;
5440
5441     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5442     VASurfaceID surface_id;
5443     int i = 0;
5444
5445     /* all scaled input surface stored in reconstructed_object*/
5446     obj_surface = encode_state->reconstructed_object;
5447     if (!obj_surface || !obj_surface->private_data)
5448         return;
5449     avc_priv_surface = obj_surface->private_data;
5450
5451
5452     switch (curbe_param->hme_type) {
5453     case INTEL_ENC_HME_4x : {
5454         /*memv output 4x*/
5455         gpe_resource = &avc_ctx->s4x_memv_data_buffer;
5456         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5457                                        gpe_resource,
5458                                        1,
5459                                        I965_SURFACEFORMAT_R8_UNORM,
5460                                        GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
5461
5462         /*memv input 16x*/
5463         if (generic_state->b16xme_enabled) {
5464             gpe_resource = &avc_ctx->s16x_memv_data_buffer;
5465             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5466                                            gpe_resource,
5467                                            1,
5468                                            I965_SURFACEFORMAT_R8_UNORM,
5469                                            GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX);
5470         }
5471         /* brc distortion  output*/
5472         gpe_resource = &avc_ctx->res_brc_dist_data_surface;
5473         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5474                                        gpe_resource,
5475                                        1,
5476                                        I965_SURFACEFORMAT_R8_UNORM,
5477                                        GEN9_AVC_ME_BRC_DISTORTION_INDEX);
5478         /* memv distortion output*/
5479         gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
5480         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5481                                        gpe_resource,
5482                                        1,
5483                                        I965_SURFACEFORMAT_R8_UNORM,
5484                                        GEN9_AVC_ME_DISTORTION_SURFACE_INDEX);
5485         /*input current down scaled YUV surface*/
5486         obj_surface = encode_state->reconstructed_object;
5487         avc_priv_surface = obj_surface->private_data;
5488         input_surface = avc_priv_surface->scaled_4x_surface_obj;
5489         i965_add_adv_gpe_surface(ctx, gpe_context,
5490                                  input_surface,
5491                                  GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
5492         /*input ref scaled YUV surface*/
5493         for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
5494             surface_id = slice_param->RefPicList0[i].picture_id;
5495             obj_surface = SURFACE(surface_id);
5496             if (!obj_surface || !obj_surface->private_data)
5497                 break;
5498             avc_priv_surface = obj_surface->private_data;
5499
5500             input_surface = avc_priv_surface->scaled_4x_surface_obj;
5501
5502             i965_add_adv_gpe_surface(ctx, gpe_context,
5503                                      input_surface,
5504                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
5505         }
5506
5507         obj_surface = encode_state->reconstructed_object;
5508         avc_priv_surface = obj_surface->private_data;
5509         input_surface = avc_priv_surface->scaled_4x_surface_obj;
5510
5511         i965_add_adv_gpe_surface(ctx, gpe_context,
5512                                  input_surface,
5513                                  GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
5514
5515         for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
5516             surface_id = slice_param->RefPicList1[i].picture_id;
5517             obj_surface = SURFACE(surface_id);
5518             if (!obj_surface || !obj_surface->private_data)
5519                 break;
5520             avc_priv_surface = obj_surface->private_data;
5521
5522             input_surface = avc_priv_surface->scaled_4x_surface_obj;
5523
5524             i965_add_adv_gpe_surface(ctx, gpe_context,
5525                                      input_surface,
5526                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
5527         }
5528         break;
5529
5530     }
5531     case INTEL_ENC_HME_16x : {
5532         gpe_resource = &avc_ctx->s16x_memv_data_buffer;
5533         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5534                                        gpe_resource,
5535                                        1,
5536                                        I965_SURFACEFORMAT_R8_UNORM,
5537                                        GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
5538
5539         if (generic_state->b32xme_enabled) {
5540             gpe_resource = &avc_ctx->s32x_memv_data_buffer;
5541             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5542                                            gpe_resource,
5543                                            1,
5544                                            I965_SURFACEFORMAT_R8_UNORM,
5545                                            GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX);
5546         }
5547
5548         obj_surface = encode_state->reconstructed_object;
5549         avc_priv_surface = obj_surface->private_data;
5550         input_surface = avc_priv_surface->scaled_16x_surface_obj;
5551         i965_add_adv_gpe_surface(ctx, gpe_context,
5552                                  input_surface,
5553                                  GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
5554
5555         for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
5556             surface_id = slice_param->RefPicList0[i].picture_id;
5557             obj_surface = SURFACE(surface_id);
5558             if (!obj_surface || !obj_surface->private_data)
5559                 break;
5560             avc_priv_surface = obj_surface->private_data;
5561
5562             input_surface = avc_priv_surface->scaled_16x_surface_obj;
5563
5564             i965_add_adv_gpe_surface(ctx, gpe_context,
5565                                      input_surface,
5566                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
5567         }
5568
5569         obj_surface = encode_state->reconstructed_object;
5570         avc_priv_surface = obj_surface->private_data;
5571         input_surface = avc_priv_surface->scaled_16x_surface_obj;
5572
5573         i965_add_adv_gpe_surface(ctx, gpe_context,
5574                                  input_surface,
5575                                  GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
5576
5577         for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
5578             surface_id = slice_param->RefPicList1[i].picture_id;
5579             obj_surface = SURFACE(surface_id);
5580             if (!obj_surface || !obj_surface->private_data)
5581                 break;
5582             avc_priv_surface = obj_surface->private_data;
5583
5584             input_surface = avc_priv_surface->scaled_16x_surface_obj;
5585
5586             i965_add_adv_gpe_surface(ctx, gpe_context,
5587                                      input_surface,
5588                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
5589         }
5590         break;
5591     }
5592     case INTEL_ENC_HME_32x : {
5593         gpe_resource = &avc_ctx->s32x_memv_data_buffer;
5594         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5595                                        gpe_resource,
5596                                        1,
5597                                        I965_SURFACEFORMAT_R8_UNORM,
5598                                        GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
5599
5600         obj_surface = encode_state->reconstructed_object;
5601         avc_priv_surface = obj_surface->private_data;
5602         input_surface = avc_priv_surface->scaled_32x_surface_obj;
5603         i965_add_adv_gpe_surface(ctx, gpe_context,
5604                                  input_surface,
5605                                  GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
5606
5607         for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
5608             surface_id = slice_param->RefPicList0[i].picture_id;
5609             obj_surface = SURFACE(surface_id);
5610             if (!obj_surface || !obj_surface->private_data)
5611                 break;
5612             avc_priv_surface = obj_surface->private_data;
5613
5614             input_surface = avc_priv_surface->scaled_32x_surface_obj;
5615
5616             i965_add_adv_gpe_surface(ctx, gpe_context,
5617                                      input_surface,
5618                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
5619         }
5620
5621         obj_surface = encode_state->reconstructed_object;
5622         avc_priv_surface = obj_surface->private_data;
5623         input_surface = avc_priv_surface->scaled_32x_surface_obj;
5624
5625         i965_add_adv_gpe_surface(ctx, gpe_context,
5626                                  input_surface,
5627                                  GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
5628
5629         for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
5630             surface_id = slice_param->RefPicList1[i].picture_id;
5631             obj_surface = SURFACE(surface_id);
5632             if (!obj_surface || !obj_surface->private_data)
5633                 break;
5634             avc_priv_surface = obj_surface->private_data;
5635
5636             input_surface = avc_priv_surface->scaled_32x_surface_obj;
5637
5638             i965_add_adv_gpe_surface(ctx, gpe_context,
5639                                      input_surface,
5640                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
5641         }
5642         break;
5643     }
5644     default:
5645         assert(0);
5646
5647     }
5648 }
5649
5650 static VAStatus
5651 gen9_avc_kernel_me(VADriverContextP ctx,
5652                    struct encode_state *encode_state,
5653                    struct intel_encoder_context *encoder_context,
5654                    int hme_type)
5655 {
5656     struct i965_driver_data *i965 = i965_driver_data(ctx);
5657     struct i965_gpe_table *gpe = &i965->gpe_table;
5658     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5659     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5660     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5661     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5662
5663     struct i965_gpe_context *gpe_context;
5664     struct gpe_media_object_walker_parameter media_object_walker_param;
5665     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
5666     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
5667     int media_function = 0;
5668     int kernel_idx = 0;
5669     struct me_param param ;
5670     unsigned int scale_factor = 0;
5671
5672     switch (hme_type) {
5673     case INTEL_ENC_HME_4x : {
5674         media_function = INTEL_MEDIA_STATE_4X_ME;
5675         scale_factor = 4;
5676         break;
5677     }
5678     case INTEL_ENC_HME_16x : {
5679         media_function = INTEL_MEDIA_STATE_16X_ME;
5680         scale_factor = 16;
5681         break;
5682     }
5683     case INTEL_ENC_HME_32x : {
5684         media_function = INTEL_MEDIA_STATE_32X_ME;
5685         scale_factor = 32;
5686         break;
5687     }
5688     default:
5689         assert(0);
5690
5691     }
5692
5693     downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
5694     downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
5695
5696     /* I frame should not come here.*/
5697     kernel_idx = (generic_state->frame_type == SLICE_TYPE_P) ? GEN9_AVC_KERNEL_ME_P_IDX : GEN9_AVC_KERNEL_ME_B_IDX;
5698     gpe_context = &(avc_ctx->context_me.gpe_contexts[kernel_idx]);
5699
5700     gpe->context_init(ctx, gpe_context);
5701     gpe->reset_binding_table(ctx, gpe_context);
5702
5703     /*set curbe*/
5704     memset(&param, 0, sizeof(param));
5705     param.hme_type = hme_type;
5706     generic_ctx->pfn_set_curbe_me(ctx, encode_state, gpe_context, encoder_context, &param);
5707
5708     /*send surface*/
5709     generic_ctx->pfn_send_me_surface(ctx, encode_state, gpe_context, encoder_context, &param);
5710
5711     gpe->setup_interface_data(ctx, gpe_context);
5712
5713     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
5714     /* the scaling is based on 8x8 blk level */
5715     kernel_walker_param.resolution_x = downscaled_width_in_mb ;
5716     kernel_walker_param.resolution_y = downscaled_height_in_mb ;
5717     kernel_walker_param.no_dependency = 1;
5718
5719     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
5720
5721     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
5722                                             gpe_context,
5723                                             media_function,
5724                                             &media_object_walker_param);
5725
5726     return VA_STATUS_SUCCESS;
5727 }
5728
5729 /*
5730 wp related function
5731 */
5732 static void
5733 gen9_avc_set_curbe_wp(VADriverContextP ctx,
5734                       struct encode_state *encode_state,
5735                       struct i965_gpe_context *gpe_context,
5736                       struct intel_encoder_context *encoder_context,
5737                       void * param)
5738 {
5739     gen9_avc_wp_curbe_data *cmd;
5740     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5741     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5742     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5743     struct wp_param * curbe_param = (struct wp_param *)param;
5744
5745     cmd = i965_gpe_context_map_curbe(gpe_context);
5746
5747     if (!cmd)
5748         return;
5749     memset(cmd, 0, sizeof(gen9_avc_wp_curbe_data));
5750     if (curbe_param->ref_list_idx) {
5751         cmd->dw0.default_weight = slice_param->luma_weight_l1[0];
5752         cmd->dw0.default_offset = slice_param->luma_offset_l1[0];
5753     } else {
5754         cmd->dw0.default_weight = slice_param->luma_weight_l0[0];
5755         cmd->dw0.default_offset = slice_param->luma_offset_l0[0];
5756     }
5757
5758     cmd->dw49.input_surface = GEN9_AVC_WP_INPUT_REF_SURFACE_INDEX;
5759     cmd->dw50.output_surface = GEN9_AVC_WP_OUTPUT_SCALED_SURFACE_INDEX;
5760
5761     i965_gpe_context_unmap_curbe(gpe_context);
5762
5763 }
5764
5765 static void
5766 gen9_avc_send_surface_wp(VADriverContextP ctx,
5767                          struct encode_state *encode_state,
5768                          struct i965_gpe_context *gpe_context,
5769                          struct intel_encoder_context *encoder_context,
5770                          void * param)
5771 {
5772     struct i965_driver_data *i965 = i965_driver_data(ctx);
5773     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5774     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5775     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5776     struct wp_param * curbe_param = (struct wp_param *)param;
5777     struct object_surface *obj_surface;
5778     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5779     VASurfaceID surface_id;
5780
5781     if (curbe_param->ref_list_idx) {
5782         surface_id = slice_param->RefPicList1[0].picture_id;
5783         obj_surface = SURFACE(surface_id);
5784         if (!obj_surface || !obj_surface->private_data)
5785             avc_state->weighted_ref_l1_enable = 0;
5786         else
5787             avc_state->weighted_ref_l1_enable = 1;
5788     } else {
5789         surface_id = slice_param->RefPicList0[0].picture_id;
5790         obj_surface = SURFACE(surface_id);
5791         if (!obj_surface || !obj_surface->private_data)
5792             avc_state->weighted_ref_l0_enable = 0;
5793         else
5794             avc_state->weighted_ref_l0_enable = 1;
5795     }
5796     if (!obj_surface)
5797         obj_surface = encode_state->reference_objects[0];
5798
5799
5800     i965_add_adv_gpe_surface(ctx, gpe_context,
5801                              obj_surface,
5802                              GEN9_AVC_WP_INPUT_REF_SURFACE_INDEX);
5803
5804     obj_surface = avc_ctx->wp_output_pic_select_surface_obj[curbe_param->ref_list_idx];
5805     i965_add_adv_gpe_surface(ctx, gpe_context,
5806                              obj_surface,
5807                              GEN9_AVC_WP_OUTPUT_SCALED_SURFACE_INDEX);
5808 }
5809
5810
5811 static VAStatus
5812 gen9_avc_kernel_wp(VADriverContextP ctx,
5813                    struct encode_state *encode_state,
5814                    struct intel_encoder_context *encoder_context,
5815                    unsigned int list1_in_use)
5816 {
5817     struct i965_driver_data *i965 = i965_driver_data(ctx);
5818     struct i965_gpe_table *gpe = &i965->gpe_table;
5819     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5820     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5821     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5822     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5823
5824     struct i965_gpe_context *gpe_context;
5825     struct gpe_media_object_walker_parameter media_object_walker_param;
5826     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
5827     int media_function = INTEL_MEDIA_STATE_ENC_WP;
5828     struct wp_param param;
5829
5830     gpe_context = &(avc_ctx->context_wp.gpe_contexts);
5831
5832     gpe->context_init(ctx, gpe_context);
5833     gpe->reset_binding_table(ctx, gpe_context);
5834
5835     memset(&param, 0, sizeof(param));
5836     param.ref_list_idx = (list1_in_use == 1) ? 1 : 0;
5837     /*set curbe*/
5838     generic_ctx->pfn_set_curbe_wp(ctx, encode_state, gpe_context, encoder_context, &param);
5839
5840     /*send surface*/
5841     generic_ctx->pfn_send_wp_surface(ctx, encode_state, gpe_context, encoder_context, &param);
5842
5843     gpe->setup_interface_data(ctx, gpe_context);
5844
5845     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
5846     /* the scaling is based on 8x8 blk level */
5847     kernel_walker_param.resolution_x = generic_state->frame_width_in_mbs;
5848     kernel_walker_param.resolution_y = generic_state->frame_height_in_mbs;
5849     kernel_walker_param.no_dependency = 1;
5850
5851     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
5852
5853     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
5854                                             gpe_context,
5855                                             media_function,
5856                                             &media_object_walker_param);
5857
5858     return VA_STATUS_SUCCESS;
5859 }
5860
5861
5862 /*
5863 sfd related function
5864 */
5865 static void
5866 gen9_avc_set_curbe_sfd(VADriverContextP ctx,
5867                        struct encode_state *encode_state,
5868                        struct i965_gpe_context *gpe_context,
5869                        struct intel_encoder_context *encoder_context,
5870                        void * param)
5871 {
5872     gen9_avc_sfd_curbe_data *cmd;
5873     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5874     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5875     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5876     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5877
5878     cmd = i965_gpe_context_map_curbe(gpe_context);
5879
5880     if (!cmd)
5881         return;
5882     memset(cmd, 0, sizeof(gen9_avc_sfd_curbe_data));
5883
5884     cmd->dw0.enable_intra_cost_scaling_for_static_frame = 1 ;
5885     cmd->dw0.enable_adaptive_mv_stream_in = 0 ;
5886     cmd->dw0.stream_in_type = 7 ;
5887     cmd->dw0.slice_type = slice_type_kernel[generic_state->frame_type]  ;
5888     cmd->dw0.brc_mode_enable = generic_state->brc_enabled ;
5889     cmd->dw0.vdenc_mode_disable = 1 ;
5890
5891     cmd->dw1.hme_stream_in_ref_cost = 5 ;
5892     cmd->dw1.num_of_refs = slice_param->num_ref_idx_l0_active_minus1 ;
5893     cmd->dw1.qp_value = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta ;
5894
5895     cmd->dw2.frame_width_in_mbs = generic_state->frame_width_in_mbs ;
5896     cmd->dw2.frame_height_in_mbs = generic_state->frame_height_in_mbs ;
5897
5898     cmd->dw3.large_mv_threshold = 128 ;
5899     cmd->dw4.total_large_mv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs) / 100 ;
5900     cmd->dw5.zmv_threshold = 4 ;
5901     cmd->dw6.total_zmv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * avc_state->zero_mv_threshold) / 100 ; // zero_mv_threshold = 60;
5902     cmd->dw7.min_dist_threshold = 10 ;
5903
5904     if (generic_state->frame_type == SLICE_TYPE_P) {
5905         memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_p_frame, AVC_QP_MAX * sizeof(unsigned char));
5906
5907     } else if (generic_state->frame_type == SLICE_TYPE_B) {
5908         memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_b_frame, AVC_QP_MAX * sizeof(unsigned char));
5909     }
5910
5911     cmd->dw21.actual_width_in_mb = cmd->dw2.frame_width_in_mbs ;
5912     cmd->dw21.actual_height_in_mb = cmd->dw2.frame_height_in_mbs ;
5913     cmd->dw24.vdenc_input_image_state_index = GEN9_AVC_SFD_VDENC_INPUT_IMAGE_STATE_INDEX ;
5914     cmd->dw26.mv_data_surface_index = GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX ;
5915     cmd->dw27.inter_distortion_surface_index = GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX ;
5916     cmd->dw28.output_data_surface_index = GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX ;
5917     cmd->dw29.vdenc_output_image_state_index = GEN9_AVC_SFD_VDENC_OUTPUT_IMAGE_STATE_INDEX ;
5918
5919     i965_gpe_context_unmap_curbe(gpe_context);
5920
5921 }
5922
5923 static void
5924 gen9_avc_send_surface_sfd(VADriverContextP ctx,
5925                           struct encode_state *encode_state,
5926                           struct i965_gpe_context *gpe_context,
5927                           struct intel_encoder_context *encoder_context,
5928                           void * param)
5929 {
5930     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5931     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5932     struct i965_gpe_resource *gpe_resource;
5933     int size = 0;
5934
5935     /*HME mv data surface memv output 4x*/
5936     gpe_resource = &avc_ctx->s4x_memv_data_buffer;
5937     i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5938                                    gpe_resource,
5939                                    1,
5940                                    I965_SURFACEFORMAT_R8_UNORM,
5941                                    GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX);
5942
5943     /* memv distortion */
5944     gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
5945     i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5946                                    gpe_resource,
5947                                    1,
5948                                    I965_SURFACEFORMAT_R8_UNORM,
5949                                    GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX);
5950     /*buffer output*/
5951     size = 32 * 4 * 4;
5952     gpe_resource = &avc_ctx->res_sfd_output_buffer;
5953     i965_add_buffer_gpe_surface(ctx,
5954                                 gpe_context,
5955                                 gpe_resource,
5956                                 0,
5957                                 size / 4,
5958                                 0,
5959                                 GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX);
5960
5961 }
5962
5963 static VAStatus
5964 gen9_avc_kernel_sfd(VADriverContextP ctx,
5965                     struct encode_state *encode_state,
5966                     struct intel_encoder_context *encoder_context)
5967 {
5968     struct i965_driver_data *i965 = i965_driver_data(ctx);
5969     struct i965_gpe_table *gpe = &i965->gpe_table;
5970     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5971     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5972     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5973
5974     struct i965_gpe_context *gpe_context;
5975     struct gpe_media_object_parameter media_object_param;
5976     struct gpe_media_object_inline_data media_object_inline_data;
5977     int media_function = INTEL_MEDIA_STATE_STATIC_FRAME_DETECTION;
5978     gpe_context = &(avc_ctx->context_sfd.gpe_contexts);
5979
5980     gpe->context_init(ctx, gpe_context);
5981     gpe->reset_binding_table(ctx, gpe_context);
5982
5983     /*set curbe*/
5984     generic_ctx->pfn_set_curbe_sfd(ctx, encode_state, gpe_context, encoder_context, NULL);
5985
5986     /*send surface*/
5987     generic_ctx->pfn_send_sfd_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
5988
5989     gpe->setup_interface_data(ctx, gpe_context);
5990
5991     memset(&media_object_param, 0, sizeof(media_object_param));
5992     memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
5993     media_object_param.pinline_data = &media_object_inline_data;
5994     media_object_param.inline_size = sizeof(media_object_inline_data);
5995
5996     gen9_avc_run_kernel_media_object(ctx, encoder_context,
5997                                      gpe_context,
5998                                      media_function,
5999                                      &media_object_param);
6000
6001     return VA_STATUS_SUCCESS;
6002 }
6003
6004 /**************** PreEnc Scaling *************************************/
6005 /* function to run preenc scaling: gen9_avc_preenc_kernel_scaling()
6006  * function to set preenc scaling curbe is the same one using for avc encode
6007         == gen95_avc_set_curbe_scaling4x()
6008  * function to send buffer/surface resources is the same one using for avc encode
6009         == gen9_avc_send_surface_scaling()
6010  */
6011 static VAStatus
6012 gen9_avc_preenc_kernel_scaling(VADriverContextP ctx,
6013                                struct encode_state *encode_state,
6014                                struct intel_encoder_context *encoder_context,
6015                                int hme_type,
6016                                int scale_surface_type)
6017 {
6018     struct i965_driver_data *i965 = i965_driver_data(ctx);
6019     struct i965_gpe_table *gpe = &i965->gpe_table;
6020     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6021     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
6022     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6023     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6024     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
6025     VAStatsStatisticsParameterH264 *stat_param_h264 = NULL;
6026     VAStatsStatisticsParameter *stat_param = NULL;
6027     struct i965_gpe_context *gpe_context;
6028     struct scaling_param surface_param;
6029     struct object_surface *obj_surface = NULL;
6030     struct gpe_media_object_walker_parameter media_object_walker_param;
6031     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
6032     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
6033     int media_function = 0;
6034     int kernel_idx = 0;
6035     int enable_statistics_output;
6036
6037     stat_param_h264 = avc_state->stat_param;
6038     assert(stat_param_h264);
6039     stat_param = &stat_param_h264->stats_params;
6040     enable_statistics_output = !stat_param_h264->disable_statistics_output;
6041
6042     memset(&surface_param, 0, sizeof(struct scaling_param));
6043     media_function = INTEL_MEDIA_STATE_4X_SCALING;
6044     kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
6045     downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
6046     downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
6047
6048     surface_param.input_frame_width = generic_state->frame_width_in_pixel;
6049     surface_param.input_frame_height = generic_state->frame_height_in_pixel;
6050     surface_param.output_frame_width = generic_state->frame_width_4x;
6051     surface_param.output_frame_height = generic_state->frame_height_4x;
6052     surface_param.use_4x_scaling  = 1 ;
6053     surface_param.use_16x_scaling = 0 ;
6054     surface_param.use_32x_scaling = 0 ;
6055     surface_param.enable_mb_flatness_check = enable_statistics_output;
6056     surface_param.enable_mb_variance_output = enable_statistics_output;
6057     surface_param.enable_mb_pixel_average_output = enable_statistics_output;
6058     surface_param.blk8x8_stat_enabled = stat_param_h264->enable_8x8_statistics;
6059
6060     switch (scale_surface_type) {
6061
6062     case  SCALE_CUR_PIC:
6063         surface_param.input_surface = encode_state->input_yuv_object ;
6064         surface_param.output_surface = avc_ctx->preenc_scaled_4x_surface_obj ;
6065
6066         if (enable_statistics_output) {
6067             surface_param.pres_mbv_proc_stat_buffer =
6068                 &avc_ctx->preproc_stat_data_out_buffer;
6069             surface_param.mbv_proc_stat_enabled = 1;
6070         } else {
6071             surface_param.mbv_proc_stat_enabled = 0;
6072             surface_param.pres_mbv_proc_stat_buffer = NULL;
6073         }
6074         break;
6075
6076     case SCALE_PAST_REF_PIC:
6077         obj_surface = SURFACE(stat_param->past_references[0].picture_id);
6078         assert(obj_surface);
6079         surface_param.input_surface = obj_surface;
6080         surface_param.output_surface = avc_ctx->preenc_past_ref_scaled_4x_surface_obj;
6081
6082         if (stat_param->past_ref_stat_buf) {
6083             surface_param.pres_mbv_proc_stat_buffer =
6084                 &avc_ctx->preenc_past_ref_stat_data_out_buffer;
6085             surface_param.mbv_proc_stat_enabled = 1;
6086         } else {
6087             surface_param.mbv_proc_stat_enabled = 0;
6088             surface_param.pres_mbv_proc_stat_buffer = NULL;
6089         }
6090         break;
6091
6092     case SCALE_FUTURE_REF_PIC:
6093
6094         obj_surface = SURFACE(stat_param->future_references[0].picture_id);
6095         assert(obj_surface);
6096         surface_param.input_surface = obj_surface;
6097         surface_param.output_surface = avc_ctx->preenc_future_ref_scaled_4x_surface_obj;
6098
6099         if (stat_param->future_ref_stat_buf) {
6100             surface_param.pres_mbv_proc_stat_buffer =
6101                 &avc_ctx->preenc_future_ref_stat_data_out_buffer;
6102             surface_param.mbv_proc_stat_enabled = 1;
6103         } else {
6104             surface_param.mbv_proc_stat_enabled = 0;
6105             surface_param.pres_mbv_proc_stat_buffer = NULL;
6106         }
6107         break;
6108     default :
6109         assert(0);
6110     }
6111
6112     gpe_context = &(avc_ctx->context_scaling.gpe_contexts[kernel_idx]);
6113
6114     gpe->context_init(ctx, gpe_context);
6115     gpe->reset_binding_table(ctx, gpe_context);
6116
6117     generic_ctx->pfn_set_curbe_scaling4x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
6118
6119     surface_param.scaling_out_use_16unorm_surf_fmt = 0 ;
6120     surface_param.scaling_out_use_32unorm_surf_fmt = 1 ;
6121
6122     /* No need of explicit flatness_check surface allocation. The field mb_is_flat
6123      * VAStatsStatisticsH264 will be used to store the output.  */
6124     surface_param.enable_mb_flatness_check = 0;
6125     generic_ctx->pfn_send_scaling_surface(ctx, encode_state, gpe_context, encoder_context, &surface_param);
6126
6127     /* setup the interface data */
6128     gpe->setup_interface_data(ctx, gpe_context);
6129
6130     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
6131     /* the scaling is based on 8x8 blk level */
6132     kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
6133     kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
6134     kernel_walker_param.no_dependency = 1;
6135
6136     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
6137
6138     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
6139                                             gpe_context,
6140                                             media_function,
6141                                             &media_object_walker_param);
6142
6143     return VA_STATUS_SUCCESS;
6144 }
6145
6146 /**************** PreEnc HME *************************************/
6147 /* function to run preenc hme is the same one we using in avc encode:
6148          ==  gen9_avc_kernel_me()
6149  * function to set preenc hme curbe: gen9_avc_preenc_set_curbe_me()
6150  * function to send hme buffer/surface: gen9_avc_preenc_send_surface_me()
6151  */
6152 static void
6153 gen9_avc_preenc_set_curbe_me(VADriverContextP ctx,
6154                              struct encode_state *encode_state,
6155                              struct i965_gpe_context *gpe_context,
6156                              struct intel_encoder_context *encoder_context,
6157                              void * param)
6158 {
6159     gen9_avc_fei_me_curbe_data *curbe_cmd;
6160     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6161     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6162     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6163     VAStatsStatisticsParameterH264 * stat_param_h264 = avc_state->stat_param;
6164     VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
6165
6166     struct me_param * curbe_param = (struct me_param *)param ;
6167     unsigned char  use_mv_from_prev_step = 0;
6168     unsigned char write_distortions = 0;
6169     unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
6170     unsigned char seach_table_idx = 0;
6171     unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
6172     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
6173     unsigned int scale_factor = 0;
6174
6175     switch (curbe_param->hme_type) {
6176     case INTEL_ENC_HME_4x:
6177         use_mv_from_prev_step = 0;
6178         write_distortions = 0;
6179         mv_shift_factor = 2;
6180         scale_factor = 4;
6181         prev_mv_read_pos_factor = 0;
6182         break;
6183
6184     default:
6185         assert(0);
6186     }
6187
6188     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
6189     if (!curbe_cmd)
6190         return;
6191
6192     downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
6193     downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
6194
6195     memcpy(curbe_cmd, gen9_avc_me_curbe_init_data, sizeof(gen9_avc_me_curbe_init_data));
6196
6197     curbe_cmd->dw3.sub_pel_mode = stat_param_h264->sub_pel_mode;
6198     if (avc_state->field_scaling_output_interleaved) {
6199         /*frame set to zero,field specified*/
6200         curbe_cmd->dw3.src_access = 0;
6201         curbe_cmd->dw3.ref_access = 0;
6202         curbe_cmd->dw7.src_field_polarity = 0;
6203     }
6204     curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
6205     curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
6206     curbe_cmd->dw5.qp_prime_y = stat_param_h264->frame_qp;
6207
6208     curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
6209     curbe_cmd->dw6.write_distortions = write_distortions;
6210     curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
6211     curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(INTEL_AVC_LEVEL_52) * 4;//frame only
6212
6213     if (generic_state->frame_type == SLICE_TYPE_B) {
6214         curbe_cmd->dw1.bi_weight = 32;
6215         curbe_cmd->dw13.num_ref_idx_l1_minus1 = stat_param->num_future_references - 1;
6216         me_method = gen9_avc_b_me_method[generic_state->preset];
6217         seach_table_idx = 1;
6218     }
6219
6220     if (generic_state->frame_type == SLICE_TYPE_P ||
6221         generic_state->frame_type == SLICE_TYPE_B)
6222         curbe_cmd->dw13.num_ref_idx_l0_minus1 = stat_param->num_past_references - 1;
6223
6224     curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
6225     curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
6226
6227     memcpy(&curbe_cmd->dw16, table_enc_search_path[seach_table_idx][me_method], 14 * sizeof(int));
6228
6229     curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN9_AVC_ME_MV_DATA_SURFACE_INDEX;
6230     curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x) ? GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX : GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX ;
6231     curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN9_AVC_ME_DISTORTION_SURFACE_INDEX;
6232     curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN9_AVC_ME_BRC_DISTORTION_INDEX;
6233     curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX;
6234     curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX;
6235     curbe_cmd->dw38.reserved = 0;
6236
6237     i965_gpe_context_unmap_curbe(gpe_context);
6238     return;
6239 }
6240
6241 static void
6242 gen9_avc_preenc_send_surface_me(VADriverContextP ctx,
6243                                 struct encode_state *encode_state,
6244                                 struct i965_gpe_context *gpe_context,
6245                                 struct intel_encoder_context *encoder_context,
6246                                 void * param)
6247 {
6248     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6249     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
6250     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6251     VAStatsStatisticsParameterH264 * stat_param_h264 = avc_state->stat_param;
6252     VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
6253     struct object_surface *input_surface;
6254     struct i965_gpe_resource *gpe_resource;
6255     struct me_param * curbe_param = (struct me_param *)param ;
6256     int i = 0;
6257
6258     /* PreEnc Only supports 4xme */
6259     assert(curbe_param->hme_type == INTEL_ENC_HME_4x);
6260
6261     switch (curbe_param->hme_type) {
6262     case INTEL_ENC_HME_4x : {
6263         /*memv output 4x*/
6264         gpe_resource = &avc_ctx->s4x_memv_data_buffer;
6265         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
6266                                        gpe_resource,
6267                                        1,
6268                                        I965_SURFACEFORMAT_R8_UNORM,
6269                                        GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
6270
6271         /* memv distortion output*/
6272         gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
6273         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
6274                                        gpe_resource,
6275                                        1,
6276                                        I965_SURFACEFORMAT_R8_UNORM,
6277                                        GEN9_AVC_ME_DISTORTION_SURFACE_INDEX);
6278
6279         /* brc distortion  output*/
6280         gpe_resource = &avc_ctx->res_brc_dist_data_surface;
6281         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
6282                                        gpe_resource,
6283                                        1,
6284                                        I965_SURFACEFORMAT_R8_UNORM,
6285                                        GEN9_AVC_ME_BRC_DISTORTION_INDEX);
6286
6287         /* input past ref scaled YUV surface*/
6288         for (i = 0; i < stat_param->num_past_references; i++) {
6289             /*input current down scaled YUV surface for forward refef */
6290             input_surface = avc_ctx->preenc_scaled_4x_surface_obj;
6291             i965_add_adv_gpe_surface(ctx, gpe_context,
6292                                      input_surface,
6293                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
6294
6295             input_surface = avc_ctx->preenc_past_ref_scaled_4x_surface_obj;
6296             i965_add_adv_gpe_surface(ctx, gpe_context,
6297                                      input_surface,
6298                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
6299         }
6300
6301         /* input future ref scaled YUV surface*/
6302         for (i = 0; i < stat_param->num_future_references; i++) {
6303             /*input current down scaled YUV surface for backward ref */
6304             input_surface = avc_ctx->preenc_scaled_4x_surface_obj;
6305             i965_add_adv_gpe_surface(ctx, gpe_context,
6306                                      input_surface,
6307                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
6308
6309             input_surface = avc_ctx->preenc_future_ref_scaled_4x_surface_obj;
6310             i965_add_adv_gpe_surface(ctx, gpe_context,
6311                                      input_surface,
6312                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
6313         }
6314         break;
6315
6316     }
6317     default:
6318         break;
6319
6320     }
6321 }
6322
6323 /**************** PreEnc PreProc *************************************/
6324 /* function to run preenc preproc: gen9_avc_preenc_kernel_preproc()
6325  * function to set preenc preproc curbe: gen9_avc_preenc_set_curbe_preproc()
6326  * function to send preproc buffer/surface: gen9_avc_preenc_send_surface_preproc ()
6327  */
6328 static void
6329 gen9_avc_preenc_set_curbe_preproc(VADriverContextP ctx,
6330                                   struct encode_state *encode_state,
6331                                   struct i965_gpe_context *gpe_context,
6332                                   struct intel_encoder_context *encoder_context,
6333                                   void * param)
6334 {
6335     gen9_avc_preproc_curbe_data *cmd;
6336     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6337     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6338     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6339     VAStatsStatisticsParameterH264 *stat_param_h264 = avc_state->stat_param;
6340     VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
6341     unsigned char me_method = 0;
6342     unsigned int table_idx = 0;
6343     int ref_width, ref_height, len_sp;
6344     int is_bframe = (generic_state->frame_type == SLICE_TYPE_B);
6345     int is_pframe = (generic_state->frame_type == SLICE_TYPE_P);
6346     unsigned int preset = generic_state->preset;
6347
6348     cmd = (gen9_avc_preproc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
6349     if (!cmd)
6350         return;
6351     memset(cmd, 0, sizeof(gen9_avc_preproc_curbe_data));
6352
6353     switch (generic_state->frame_type) {
6354     case SLICE_TYPE_I:
6355         memcpy(cmd, gen9_avc_preenc_preproc_curbe_i_frame_init_data,
6356                sizeof(gen9_avc_preproc_curbe_data));
6357         break;
6358     case SLICE_TYPE_P:
6359         memcpy(cmd, gen9_avc_preenc_preproc_curbe_p_frame_init_data,
6360                sizeof(gen9_avc_preproc_curbe_data));
6361         break;
6362     case SLICE_TYPE_B:
6363         memcpy(cmd, gen9_avc_preenc_preproc_curbe_b_frame_init_data,
6364                sizeof(gen9_avc_preproc_curbe_data));
6365         break;
6366     default:
6367         assert(0);
6368     }
6369     /* 4 means full search, 6 means diamand search */
6370     me_method  = (stat_param_h264->search_window == 5) ||
6371                  (stat_param_h264->search_window == 8) ? 4 : 6;
6372
6373     ref_width    = stat_param_h264->ref_width;
6374     ref_height   = stat_param_h264->ref_height;
6375     len_sp       = stat_param_h264->len_sp;
6376     /* If there is a serch_window, discard user provided ref_width, ref_height
6377      * and search_path length */
6378     switch (stat_param_h264->search_window) {
6379     case 0:
6380         /*  not use predefined search window, there should be a search_path input */
6381         if ((stat_param_h264->search_path != 0) &&
6382             (stat_param_h264->search_path != 1) &&
6383             (stat_param_h264->search_path != 2)) {
6384             WARN_ONCE("Invalid input search_path for SearchWindow=0  \n");
6385             assert(0);
6386         }
6387         /* 4 means full search, 6 means diamand search */
6388         me_method = (stat_param_h264->search_path == 1) ? 6 : 4;
6389         if (((ref_width * ref_height) > 2048) || (ref_width > 64) || (ref_height > 64)) {
6390             WARN_ONCE("Invalid input ref_width/ref_height in"
6391                       "SearchWindow=0 case! \n");
6392             assert(0);
6393         }
6394         break;
6395
6396     case 1:
6397         /* Tiny - 4 SUs 24x24 window */
6398         ref_width  = 24;
6399         ref_height = 24;
6400         len_sp     = 4;
6401         break;
6402
6403     case 2:
6404         /* Small - 9 SUs 28x28 window */
6405         ref_width  = 28;
6406         ref_height = 28;
6407         len_sp     = 9;
6408         break;
6409     case 3:
6410         /* Diamond - 16 SUs 48x40 window */
6411         ref_width  = 48;
6412         ref_height = 40;
6413         len_sp     = 16;
6414         break;
6415     case 4:
6416         /* Large Diamond - 32 SUs 48x40 window */
6417         ref_width  = 48;
6418         ref_height = 40;
6419         len_sp     = 32;
6420         break;
6421     case 5:
6422         /* Exhaustive - 48 SUs 48x40 window */
6423         ref_width  = 48;
6424         ref_height = 40;
6425         len_sp     = 48;
6426         break;
6427     case 6:
6428         /* Diamond - 16 SUs 64x32 window */
6429         ref_width  = 64;
6430         ref_height = 32;
6431         len_sp     = 16;
6432         break;
6433     case 7:
6434         /* Large Diamond - 32 SUs 64x32 window */
6435         ref_width  = 64;
6436         ref_height = 32;
6437         len_sp     = 32;
6438         break;
6439     case 8:
6440         /* Exhaustive - 48 SUs 64x32 window */
6441         ref_width  = 64;
6442         ref_height = 32;
6443         len_sp     = 48;
6444         break;
6445
6446     default:
6447         assert(0);
6448     }
6449
6450     /* ref_width*ref_height = Max 64x32 one direction, Max 32x32 two directions */
6451     if (is_bframe) {
6452         CLIP(ref_width, 4, 32);
6453         CLIP(ref_height, 4, 32);
6454     } else if (is_pframe) {
6455         CLIP(ref_width, 4, 64);
6456         CLIP(ref_height, 4, 32);
6457     }
6458
6459     cmd->dw0.adaptive_enable =
6460         cmd->dw37.adaptive_enable = stat_param_h264->adaptive_search;
6461     cmd->dw2.max_len_sp = len_sp;
6462     cmd->dw38.max_len_sp = 0; // HLD mandates this field to be Zero
6463     cmd->dw2.max_num_su = cmd->dw38.max_num_su = 57;
6464     cmd->dw3.src_access =
6465         cmd->dw3.ref_access = 0; // change it to (is_frame ? 0: 1) when interlace is suppoted
6466
6467     if (generic_state->frame_type != SLICE_TYPE_I && avc_state->ftq_enable)
6468         cmd->dw3.ft_enable = stat_param_h264->ft_enable;
6469     else
6470         cmd->dw3.ft_enable = 0;
6471
6472     cmd->dw2.pic_width = generic_state->frame_width_in_mbs;
6473     cmd->dw6.pic_height = cmd->dw5.slice_mb_height = generic_state->frame_height_in_mbs;
6474     cmd->dw3.sub_mb_part_mask = stat_param_h264->sub_mb_part_mask;
6475     cmd->dw3.sub_pel_mode = stat_param_h264->sub_pel_mode;
6476     cmd->dw3.inter_sad = stat_param_h264->inter_sad;
6477     cmd->dw3.intra_sad = stat_param_h264->intra_sad;
6478     cmd->dw4.hme_enable = generic_state->hme_enabled;
6479     cmd->dw4.frame_qp = stat_param_h264->frame_qp;
6480     cmd->dw4.per_mb_qp_enable = stat_param_h264->mb_qp;
6481
6482     cmd->dw4.multiple_mv_predictor_per_mb_enable =
6483         (generic_state->frame_type != SLICE_TYPE_I) ? 0 : stat_param_h264->mv_predictor_ctrl;
6484
6485     cmd->dw4.disable_mv_output = (generic_state->frame_type == SLICE_TYPE_I) ? 1 : stat_param_h264->disable_mv_output;
6486     cmd->dw4.disable_mb_stats = stat_param_h264->disable_statistics_output;
6487
6488     cmd->dw4.fwd_ref_pic_enable = (stat_param->num_past_references > 0) ? 1 : 0;
6489     cmd->dw4.bwd_ref_pic_enable = (stat_param->num_future_references > 0) ? 1 : 0;
6490
6491     cmd->dw7.intra_part_mask = stat_param_h264->intra_part_mask;
6492
6493     /* mv mode cost */
6494     memcpy(&(cmd->dw8), gen75_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][stat_param_h264->frame_qp], 8 * sizeof(unsigned int));
6495
6496     /* reset all except sic_fwd_trans_coeff_threshold_* from dw8 to dw15 */
6497     memset(&(cmd->dw8), 0, 6 * (sizeof(unsigned int)));
6498
6499     /* search path tables */
6500     table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
6501     memcpy(&(cmd->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
6502
6503     if (stat_param_h264->intra_part_mask  == 0x07)
6504         cmd->dw31.intra_compute_type  = 3;
6505
6506     cmd->dw38.ref_threshold = 400;
6507     cmd->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
6508
6509     if (generic_state->frame_type == SLICE_TYPE_I) {
6510         cmd->dw0.skip_mode_enable = cmd->dw37.skip_mode_enable = 0;
6511         cmd->dw36.hme_combine_overlap = 0;
6512     } else if (generic_state->frame_type == SLICE_TYPE_P) {
6513         cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(INTEL_AVC_LEVEL_52) / 2;
6514         cmd->dw3.bme_disable_fbr = 1;
6515         cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
6516         cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
6517         cmd->dw7.non_skip_zmv_added = 1;
6518         cmd->dw7.non_skip_mode_added = 1;
6519         cmd->dw7.skip_center_mask = 1;
6520         cmd->dw32.max_vmv_r =
6521             i965_avc_get_max_mv_len(INTEL_AVC_LEVEL_52) * 4;
6522         cmd->dw36.hme_combine_overlap = 1;
6523
6524     } else if (generic_state->frame_type == SLICE_TYPE_P) { /* B slice */
6525
6526         cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(INTEL_AVC_LEVEL_52) / 2;
6527         cmd->dw3.search_ctrl = 0;
6528         cmd->dw3.skip_type = 1;
6529         cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
6530         cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
6531         cmd->dw7.skip_center_mask = 0xff;
6532         cmd->dw32.max_vmv_r =
6533             i965_avc_get_max_mv_len(INTEL_AVC_LEVEL_52) * 4;
6534         cmd->dw36.hme_combine_overlap = 1;
6535     }
6536
6537     cmd->dw40.curr_pic_surf_index = GEN9_AVC_PREPROC_CURR_Y_INDEX;
6538     cmd->dw41.hme_mv_dat_surf_index = GEN9_AVC_PREPROC_HME_MV_DATA_INDEX;
6539     cmd->dw42.mv_predictor_surf_index = GEN9_AVC_PREPROC_MV_PREDICTOR_INDEX;
6540     cmd->dw43.mb_qp_surf_index = GEN9_AVC_PREPROC_MBQP_INDEX;
6541     cmd->dw44.mv_data_out_surf_index = GEN9_AVC_PREPROC_MV_DATA_INDEX;
6542     cmd->dw45.mb_stats_out_surf_index = GEN9_AVC_PREPROC_MB_STATS_INDEX;
6543     cmd->dw46.vme_inter_prediction_surf_index = GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_0_INDEX;
6544     cmd->dw47.vme_Inter_prediction_mr_surf_index = GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_1_INDEX;
6545     cmd->dw48.ftq_lut_surf_index = GEN9_AVC_PREPROC_FTQ_LUT_INDEX;
6546
6547     i965_gpe_context_unmap_curbe(gpe_context);
6548 }
6549
6550 static void
6551 gen9_avc_preenc_send_surface_preproc(VADriverContextP ctx,
6552                                      struct encode_state *encode_state,
6553                                      struct i965_gpe_context *gpe_context,
6554                                      struct intel_encoder_context *encoder_context,
6555                                      void * param)
6556 {
6557     struct i965_driver_data *i965 = i965_driver_data(ctx);
6558     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6559     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
6560     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6561     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6562     struct object_surface *obj_surface;
6563     struct i965_gpe_resource *gpe_resource;
6564     VASurfaceID surface_id;
6565     VAStatsStatisticsParameterH264 *stat_param_h264 = avc_state->stat_param;
6566     VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
6567     unsigned int size = 0, frame_mb_nums = 0;
6568
6569     frame_mb_nums = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs;
6570
6571     /* input yuv surface, Y index */
6572     obj_surface = encode_state->input_yuv_object;
6573     i965_add_2d_gpe_surface(ctx,
6574                             gpe_context,
6575                             obj_surface,
6576                             0,
6577                             1,
6578                             I965_SURFACEFORMAT_R8_UNORM,
6579                             GEN9_AVC_PREPROC_CURR_Y_INDEX);
6580
6581     /* input yuv surface, UV index */
6582     i965_add_2d_gpe_surface(ctx,
6583                             gpe_context,
6584                             obj_surface,
6585                             1,
6586                             1,
6587                             I965_SURFACEFORMAT_R16_UINT,
6588                             GEN9_AVC_MBENC_CURR_UV_INDEX);
6589
6590
6591     if (generic_state->hme_enabled) {
6592         /* HME mv data buffer */
6593         gpe_resource = &avc_ctx->s4x_memv_data_buffer;
6594         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
6595                                        gpe_resource,
6596                                        1,
6597                                        I965_SURFACEFORMAT_R8_UNORM,
6598                                        GEN9_AVC_PREPROC_HME_MV_DATA_INDEX);
6599     }
6600
6601     /* mv predictor buffer */
6602     if (stat_param_h264->mv_predictor_ctrl) {
6603         size = frame_mb_nums * FEI_AVC_MV_PREDICTOR_BUFFER_SIZE;
6604         gpe_resource = &avc_ctx->preproc_mv_predictor_buffer;
6605         i965_add_buffer_gpe_surface(ctx,
6606                                     gpe_context,
6607                                     gpe_resource,
6608                                     0,
6609                                     size / 4,
6610                                     0,
6611                                     GEN9_AVC_PREPROC_MV_PREDICTOR_INDEX);
6612     }
6613
6614     /* MB qp buffer */
6615     if (stat_param_h264->mb_qp) {
6616         size = frame_mb_nums * FEI_AVC_QP_BUFFER_SIZE;
6617         gpe_resource = &avc_ctx->preproc_mb_qp_buffer;
6618         i965_add_buffer_gpe_surface(ctx,
6619                                     gpe_context,
6620                                     gpe_resource,
6621                                     0,
6622                                     size / 4,
6623                                     0,
6624                                     GEN9_AVC_PREPROC_MBQP_INDEX);
6625
6626         gpe_resource = &avc_ctx->res_mbbrc_const_data_buffer;
6627         size = 16 * AVC_QP_MAX * 4;
6628         i965_add_buffer_gpe_surface(ctx,
6629                                     gpe_context,
6630                                     gpe_resource,
6631                                     0,
6632                                     size / 4,
6633                                     0,
6634                                     GEN9_AVC_PREPROC_FTQ_LUT_INDEX);
6635
6636     }
6637
6638     /* mv data output buffer */
6639     if (!stat_param_h264->disable_mv_output) {
6640         gpe_resource = &avc_ctx->preproc_mv_data_out_buffer;
6641         size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
6642         i965_add_buffer_gpe_surface(ctx,
6643                                     gpe_context,
6644                                     gpe_resource,
6645                                     0,
6646                                     size / 4,
6647                                     0,
6648                                     GEN9_AVC_PREPROC_MV_DATA_INDEX);
6649     }
6650
6651     /* statistics output buffer */
6652     if (!stat_param_h264->disable_statistics_output) {
6653         gpe_resource = &avc_ctx->preproc_stat_data_out_buffer;
6654         size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
6655         i965_add_buffer_gpe_surface(ctx,
6656                                     gpe_context,
6657                                     gpe_resource,
6658                                     0,
6659                                     size / 4,
6660                                     0,
6661                                     GEN9_AVC_PREPROC_MB_STATS_INDEX);
6662     }
6663
6664     /* vme cur pic y */
6665     obj_surface = encode_state->input_yuv_object;
6666     i965_add_2d_gpe_surface(ctx,
6667                             gpe_context,
6668                             obj_surface,
6669                             0,
6670                             1,
6671                             I965_SURFACEFORMAT_R8_UNORM,
6672                             GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_0_INDEX);
6673
6674     /* vme cur pic y (repeating based on required BTI order for mediakerel)*/
6675     obj_surface = encode_state->input_yuv_object;
6676     i965_add_2d_gpe_surface(ctx,
6677                             gpe_context,
6678                             obj_surface,
6679                             0,
6680                             1,
6681                             I965_SURFACEFORMAT_R8_UNORM,
6682                             GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_1_INDEX);
6683
6684     /* vme forward ref */
6685     /* Only supports one past ref */
6686     if (stat_param->num_past_references > 0) {
6687         surface_id = stat_param->past_references[0].picture_id;
6688         assert(surface_id != VA_INVALID_ID);
6689         obj_surface = SURFACE(surface_id);
6690         if (!obj_surface)
6691             return;
6692         i965_add_adv_gpe_surface(ctx, gpe_context,
6693                                  obj_surface,
6694                                  GEN9_AVC_PREPROC_VME_FWD_PIC_IDX0_INDEX);
6695
6696     }
6697
6698     /* vme future ref */
6699     /* Only supports one future ref */
6700     if (stat_param->num_future_references > 0) {
6701         surface_id = stat_param->future_references[0].picture_id;
6702         assert(surface_id != VA_INVALID_ID);
6703         obj_surface = SURFACE(surface_id);
6704         if (!obj_surface)
6705             return;
6706         i965_add_adv_gpe_surface(ctx, gpe_context,
6707                                  obj_surface,
6708                                  GEN9_AVC_PREPROC_VME_BWD_PIC_IDX0_0_INDEX);
6709
6710         surface_id = stat_param->future_references[0].picture_id;
6711         assert(surface_id != VA_INVALID_ID);
6712         obj_surface = SURFACE(surface_id);
6713         if (!obj_surface)
6714             return;
6715         i965_add_adv_gpe_surface(ctx, gpe_context,
6716                                  obj_surface,
6717                                  GEN9_AVC_PREPROC_VME_BWD_PIC_IDX0_1_INDEX);
6718     }
6719
6720     return;
6721
6722 }
6723
6724 static VAStatus
6725 gen9_avc_preenc_kernel_preproc(VADriverContextP ctx,
6726                                struct encode_state *encode_state,
6727                                struct intel_encoder_context *encoder_context)
6728 {
6729     struct i965_driver_data *i965 = i965_driver_data(ctx);
6730     struct i965_gpe_table *gpe = &i965->gpe_table;
6731     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6732     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
6733     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
6734     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6735     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6736     VAStatsStatisticsParameterH264 * stat_param_h264 = avc_state->stat_param;
6737     struct i965_gpe_context *gpe_context;
6738     struct gpe_media_object_walker_parameter media_object_walker_param;
6739     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
6740     int media_function = INTEL_MEDIA_STATE_PREPROC;
6741     struct i965_gpe_resource *gpe_resource = NULL;
6742     unsigned int * data = NULL;
6743     unsigned int ftq_lut_table_size = 16 * 52; /* 16 DW per QP for each qp*/
6744
6745     gpe_context = &(avc_ctx->context_preproc.gpe_contexts);
6746     gpe->context_init(ctx, gpe_context);
6747     gpe->reset_binding_table(ctx, gpe_context);
6748
6749     /*set curbe*/
6750     generic_ctx->pfn_set_curbe_preproc(ctx, encode_state, gpe_context, encoder_context, NULL);
6751
6752     /*send surface*/
6753     generic_ctx->pfn_send_preproc_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
6754
6755     gpe->setup_interface_data(ctx, gpe_context);
6756
6757     /*  Set up FtqLut Buffer if there is QP change within a frame */
6758     if (stat_param_h264->mb_qp) {
6759         gpe_resource = &(avc_ctx->res_mbbrc_const_data_buffer);
6760         assert(gpe_resource);
6761         data = i965_map_gpe_resource(gpe_resource);
6762         assert(data);
6763         memcpy(data, gen9_avc_preenc_preproc_ftq_lut, ftq_lut_table_size * sizeof(unsigned int));
6764     }
6765
6766     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
6767     kernel_walker_param.resolution_x = generic_state->frame_width_in_mbs ;
6768     kernel_walker_param.resolution_y = generic_state->frame_height_in_mbs ;
6769     kernel_walker_param.no_dependency = 1;
6770
6771     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
6772
6773     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
6774                                             gpe_context,
6775                                             media_function,
6776                                             &media_object_walker_param);
6777
6778     return VA_STATUS_SUCCESS;
6779 }
6780
6781
6782 static void
6783 gen8_avc_set_curbe_mbenc(VADriverContextP ctx,
6784                          struct encode_state *encode_state,
6785                          struct i965_gpe_context *gpe_context,
6786                          struct intel_encoder_context *encoder_context,
6787                          void * param)
6788 {
6789     struct i965_driver_data *i965 = i965_driver_data(ctx);
6790     gen8_avc_mbenc_curbe_data *cmd;
6791     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6792     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6793     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6794
6795     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
6796     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
6797     VASurfaceID surface_id;
6798     struct object_surface *obj_surface;
6799
6800     struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
6801     unsigned char qp = 0;
6802     unsigned char me_method = 0;
6803     unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
6804     unsigned int table_idx = 0;
6805     unsigned int curbe_size = 0;
6806
6807     unsigned int preset = generic_state->preset;
6808     if (IS_GEN8(i965->intel.device_info)) {
6809         cmd = (gen8_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
6810         if (!cmd)
6811             return;
6812         curbe_size = sizeof(gen8_avc_mbenc_curbe_data);
6813         memset(cmd, 0, curbe_size);
6814
6815         if (mbenc_i_frame_dist_in_use) {
6816             memcpy(cmd, gen8_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
6817         } else {
6818             switch (generic_state->frame_type) {
6819             case SLICE_TYPE_I:
6820                 memcpy(cmd, gen8_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
6821                 break;
6822             case SLICE_TYPE_P:
6823                 memcpy(cmd, gen8_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
6824                 break;
6825             case SLICE_TYPE_B:
6826                 memcpy(cmd, gen8_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
6827                 break;
6828             default:
6829                 assert(0);
6830             }
6831         }
6832     } else {
6833         assert(0);
6834
6835         return;
6836     }
6837
6838     me_method = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_b_me_method[preset] : gen9_avc_p_me_method[preset];
6839     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
6840
6841     cmd->dw0.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
6842     cmd->dw37.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
6843     cmd->dw0.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
6844     cmd->dw37.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
6845
6846     cmd->dw2.max_len_sp = gen9_avc_max_len_sp[preset];
6847     cmd->dw38.max_len_sp = 0;
6848
6849     cmd->dw3.src_access = 0;
6850     cmd->dw3.ref_access = 0;
6851
6852     if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) {
6853         //disable ftq_override by now.
6854         if (avc_state->ftq_override) {
6855             cmd->dw3.ftq_enable = avc_state->ftq_enable;
6856
6857         } else {
6858             if (generic_state->frame_type == SLICE_TYPE_P) {
6859                 cmd->dw3.ftq_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
6860
6861             } else {
6862                 cmd->dw3.ftq_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
6863             }
6864         }
6865     } else {
6866         cmd->dw3.ftq_enable = 0;
6867     }
6868
6869     if (avc_state->disable_sub_mb_partion)
6870         cmd->dw3.sub_mb_part_mask = 0x7;
6871
6872     if (mbenc_i_frame_dist_in_use) {
6873         cmd->dw2.pitch_width = generic_state->downscaled_width_4x_in_mb;
6874         cmd->dw4.picture_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
6875         cmd->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4;
6876         cmd->dw6.batch_buffer_end = 0;
6877         cmd->dw31.intra_compute_type = 1;
6878     } else {
6879         cmd->dw2.pitch_width = generic_state->frame_width_in_mbs;
6880         cmd->dw4.picture_height_minus1 = generic_state->frame_height_in_mbs - 1;
6881         cmd->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ? generic_state->frame_height_in_mbs : avc_state->slice_height;
6882
6883         {
6884             memcpy(&(cmd->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
6885             if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
6886             } else if (avc_state->skip_bias_adjustment_enable) {
6887                 /* Load different MvCost for P picture when SkipBiasAdjustment is enabled
6888                 // No need to check for P picture as the flag is only enabled for P picture */
6889                 cmd->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
6890             }
6891         }
6892         table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
6893         memcpy(&(cmd->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
6894     }
6895     cmd->dw4.enable_intra_cost_scaling_for_static_frame = avc_state->sfd_enable && generic_state->hme_enabled;
6896     cmd->dw4.field_parity_flag = 0;//bottom field
6897     cmd->dw4.enable_cur_fld_idr = 0;//field realted
6898     cmd->dw4.contrained_intra_pred_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
6899     cmd->dw4.hme_enable = generic_state->hme_enabled;
6900     cmd->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
6901     cmd->dw4.use_actual_ref_qp_value = generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
6902
6903     cmd->dw7.intra_part_mask = avc_state->transform_8x8_mode_enable ? 0 : 0x02;
6904     cmd->dw7.src_field_polarity = 0;//field related
6905
6906     /*ftq_skip_threshold_lut set,dw14 /15*/
6907
6908     /*r5 disable NonFTQSkipThresholdLUT*/
6909     if (generic_state->frame_type == SLICE_TYPE_P) {
6910         cmd->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
6911     } else if (generic_state->frame_type == SLICE_TYPE_B) {
6912         cmd->dw32.skip_val = gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
6913     }
6914
6915     cmd->dw13.qp_prime_y = qp;
6916     cmd->dw13.qp_prime_cb = qp;
6917     cmd->dw13.qp_prime_cr = qp;
6918     cmd->dw13.target_size_in_word = 0xff;//hardcode for brc disable
6919
6920     if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) {
6921         switch (gen9_avc_multi_pred[preset]) {
6922         case 0:
6923             cmd->dw32.mult_pred_l0_disable = 128;
6924             cmd->dw32.mult_pred_l1_disable = 128;
6925             break;
6926         case 1:
6927             cmd->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_P) ? 1 : 128;
6928             cmd->dw32.mult_pred_l1_disable = 128;
6929             break;
6930         case 2:
6931             cmd->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
6932             cmd->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
6933             break;
6934         case 3:
6935             cmd->dw32.mult_pred_l0_disable = 1;
6936             cmd->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
6937             break;
6938         }
6939
6940     } else {
6941         cmd->dw32.mult_pred_l0_disable = 128;
6942         cmd->dw32.mult_pred_l1_disable = 128;
6943     }
6944
6945     if (generic_state->frame_type == SLICE_TYPE_B) {
6946         cmd->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
6947         cmd->dw34.list1_ref_id0_frm_field_parity = 0;
6948         cmd->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
6949     }
6950
6951     cmd->dw34.b_original_bff = 0; //frame only
6952     cmd->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
6953     cmd->dw34.roi_enable_flag = curbe_param->roi_enabled;
6954     cmd->dw34.mad_enable_falg = avc_state->mad_enable;
6955     cmd->dw34.mb_brc_enable = avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled;
6956     cmd->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
6957     cmd->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
6958
6959     if (cmd->dw34.force_non_skip_check) {
6960         cmd->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
6961     }
6962
6963     cmd->dw36.check_all_fractional_enable = avc_state->caf_enable;
6964     cmd->dw38.ref_threshold = 400;
6965     cmd->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
6966     cmd->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable) ? 0 : 2;
6967
6968     if (mbenc_i_frame_dist_in_use) {
6969         cmd->dw13.qp_prime_y = 0;
6970         cmd->dw13.qp_prime_cb = 0;
6971         cmd->dw13.qp_prime_cr = 0;
6972         cmd->dw33.intra_16x16_nondc_penalty = 0;
6973         cmd->dw33.intra_8x8_nondc_penalty = 0;
6974         cmd->dw33.intra_4x4_nondc_penalty = 0;
6975     }
6976     if (cmd->dw4.use_actual_ref_qp_value) {
6977         cmd->dw44.actual_qp_value_for_ref_id0_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 0);
6978         cmd->dw44.actual_qp_value_for_ref_id1_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 1);
6979         cmd->dw44.actual_qp_value_for_ref_id2_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 2);
6980         cmd->dw44.actual_qp_value_for_ref_id3_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 3);
6981         cmd->dw45.actual_qp_value_for_ref_id4_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 4);
6982         cmd->dw45.actual_qp_value_for_ref_id5_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 5);
6983         cmd->dw45.actual_qp_value_for_ref_id6_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 6);
6984         cmd->dw45.actual_qp_value_for_ref_id7_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 7);
6985         cmd->dw46.actual_qp_value_for_ref_id0_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 0);
6986         cmd->dw46.actual_qp_value_for_ref_id1_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 1);
6987     }
6988
6989     table_idx = slice_type_kernel[generic_state->frame_type];
6990     cmd->dw46.ref_cost = gen8_avc_ref_cost[table_idx][qp];
6991     if (generic_state->frame_type == SLICE_TYPE_I) {
6992         cmd->dw0.skip_mode_enable = 0;
6993         cmd->dw37.skip_mode_enable = 0;
6994         cmd->dw36.hme_combine_overlap = 0;
6995         cmd->dw47.intra_cost_sf = 16;
6996         cmd->dw34.enable_direct_bias_adjustment = 0;
6997         cmd->dw34.enable_global_motion_bias_adjustment = 0;
6998
6999     } else if (generic_state->frame_type == SLICE_TYPE_P) {
7000         cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
7001         cmd->dw3.bme_disable_fbr = 1;
7002         cmd->dw5.ref_width = gen9_avc_search_x[preset];
7003         cmd->dw5.ref_height = gen9_avc_search_y[preset];
7004         cmd->dw7.non_skip_zmv_added = 1;
7005         cmd->dw7.non_skip_mode_added = 1;
7006         cmd->dw7.skip_center_mask = 1;
7007         cmd->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
7008         cmd->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame onlys
7009         cmd->dw36.hme_combine_overlap = 1;
7010         cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
7011         cmd->dw39.ref_width = gen9_avc_search_x[preset];
7012         cmd->dw39.ref_height = gen9_avc_search_y[preset];
7013         cmd->dw34.enable_direct_bias_adjustment = 0;
7014         cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
7015         if (avc_state->global_motion_bias_adjustment_enable)
7016             cmd->dw58.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
7017     } else {
7018         cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
7019         cmd->dw1.bi_weight = avc_state->bi_weight;
7020         cmd->dw3.search_ctrl = 7;
7021         cmd->dw3.skip_type = 1;
7022         cmd->dw5.ref_width = gen9_avc_b_search_x[preset];
7023         cmd->dw5.ref_height = gen9_avc_b_search_y[preset];
7024         cmd->dw7.skip_center_mask = 0xff;
7025         cmd->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
7026         cmd->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
7027         cmd->dw36.hme_combine_overlap = 1;
7028         surface_id = slice_param->RefPicList1[0].picture_id;
7029         obj_surface = SURFACE(surface_id);
7030         if (!obj_surface) {
7031             WARN_ONCE("Invalid backward reference frame\n");
7032             return;
7033         }
7034         cmd->dw36.is_fwd_frame_short_term_ref = !!(slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
7035         cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
7036         cmd->dw36.num_ref_idx_l1_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l1_active_minus1 : 0;
7037         cmd->dw39.ref_width = gen9_avc_b_search_x[preset];
7038         cmd->dw39.ref_height = gen9_avc_b_search_y[preset];
7039         cmd->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
7040         cmd->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
7041         cmd->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
7042         cmd->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
7043         cmd->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
7044         cmd->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
7045         cmd->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
7046         cmd->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
7047         cmd->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
7048         if (cmd->dw34.enable_direct_bias_adjustment) {
7049             cmd->dw7.non_skip_zmv_added = 1;
7050             cmd->dw7.non_skip_mode_added = 1;
7051         }
7052
7053         cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
7054         if (avc_state->global_motion_bias_adjustment_enable)
7055             cmd->dw58.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
7056     }
7057     avc_state->block_based_skip_enable = cmd->dw3.block_based_skip_enable;
7058
7059     if (avc_state->rolling_intra_refresh_enable) {
7060         /*by now disable it*/
7061         if (generic_state->brc_enabled) {
7062             cmd->dw4.enable_intra_refresh = false;
7063             cmd->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
7064             cmd->dw48.widi_intra_refresh_mbx = 0;
7065             cmd->dw58.widi_intra_refresh_mby = 0;
7066         } else {
7067             cmd->dw4.enable_intra_refresh = true;
7068             cmd->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
7069         }
7070         cmd->dw32.mult_pred_l0_disable = 128;
7071         /* Pass the same IntraRefreshUnit to the kernel w/o the adjustment by -1, so as to have an overlap of one MB row or column of Intra macroblocks
7072          across one P frame to another P frame, as needed by the RollingI algo */
7073         cmd->dw48.widi_intra_refresh_mbx = 0;
7074         cmd->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
7075         cmd->dw48.widi_intra_refresh_qp_delta = 0;
7076
7077     } else {
7078         cmd->dw34.widi_intra_refresh_en = 0;
7079     }
7080
7081     /*roi set disable by now. 49-56*/
7082     if (curbe_param->roi_enabled) {
7083         cmd->dw49.roi_1_x_left   = generic_state->roi[0].left;
7084         cmd->dw49.roi_1_y_top    = generic_state->roi[0].top;
7085         cmd->dw50.roi_1_x_right  = generic_state->roi[0].right;
7086         cmd->dw50.roi_1_y_bottom = generic_state->roi[0].bottom;
7087
7088         cmd->dw51.roi_2_x_left   = generic_state->roi[1].left;
7089         cmd->dw51.roi_2_y_top    = generic_state->roi[1].top;
7090         cmd->dw52.roi_2_x_right  = generic_state->roi[1].right;
7091         cmd->dw52.roi_2_y_bottom = generic_state->roi[1].bottom;
7092
7093         cmd->dw53.roi_3_x_left   = generic_state->roi[2].left;
7094         cmd->dw53.roi_3_y_top    = generic_state->roi[2].top;
7095         cmd->dw54.roi_3_x_right  = generic_state->roi[2].right;
7096         cmd->dw54.roi_3_y_bottom = generic_state->roi[2].bottom;
7097
7098         cmd->dw55.roi_4_x_left   = generic_state->roi[3].left;
7099         cmd->dw55.roi_4_y_top    = generic_state->roi[3].top;
7100         cmd->dw56.roi_4_x_right  = generic_state->roi[3].right;
7101         cmd->dw56.roi_4_y_bottom = generic_state->roi[3].bottom;
7102
7103         cmd->dw36.enable_cabac_work_around = 0;
7104
7105         if (!generic_state->brc_enabled) {
7106             char tmp = 0;
7107             tmp = generic_state->roi[0].value;
7108             CLIP(tmp, -qp, AVC_QP_MAX - qp);
7109             cmd->dw57.roi_1_dqp_prime_y = tmp;
7110             tmp = generic_state->roi[1].value;
7111             CLIP(tmp, -qp, AVC_QP_MAX - qp);
7112             cmd->dw57.roi_2_dqp_prime_y = tmp;
7113             tmp = generic_state->roi[2].value;
7114             CLIP(tmp, -qp, AVC_QP_MAX - qp);
7115             cmd->dw57.roi_3_dqp_prime_y = tmp;
7116             tmp = generic_state->roi[3].value;
7117             CLIP(tmp, -qp, AVC_QP_MAX - qp);
7118             cmd->dw57.roi_4_dqp_prime_y = tmp;
7119         } else {
7120             cmd->dw34.roi_enable_flag = 0;
7121         }
7122     }
7123
7124     cmd->dw65.mb_data_surf_index = GEN8_AVC_MBENC_MFC_AVC_PAK_OBJ_CM;
7125     cmd->dw66.mv_data_surf_index =  GEN8_AVC_MBENC_IND_MV_DATA_CM;
7126     cmd->dw67.i_dist_surf_index = GEN8_AVC_MBENC_BRC_DISTORTION_CM;
7127     cmd->dw68.src_y_surf_index = GEN8_AVC_MBENC_CURR_Y_CM;
7128     cmd->dw69.mb_specific_data_surf_index = GEN8_AVC_MBENC_MB_SPECIFIC_DATA_CM;
7129     cmd->dw70.aux_vme_out_surf_index = GEN8_AVC_MBENC_AUX_VME_OUT_CM;
7130     cmd->dw71.curr_ref_pic_sel_surf_index = GEN8_AVC_MBENC_REFPICSELECT_L0_CM;
7131     cmd->dw72.hme_mv_pred_fwd_bwd_surf_index = GEN8_AVC_MBENC_MV_DATA_FROM_ME_CM;
7132     cmd->dw73.hme_dist_surf_index = GEN8_AVC_MBENC_4xME_DISTORTION_CM;
7133     cmd->dw74.slice_map_surf_index = GEN8_AVC_MBENC_SLICEMAP_DATA_CM;
7134     cmd->dw75.fwd_frm_mb_data_surf_index = GEN8_AVC_MBENC_FWD_MB_DATA_CM;
7135     cmd->dw76.fwd_frm_mv_surf_index = GEN8_AVC_MBENC_FWD_MV_DATA_CM;
7136     cmd->dw77.mb_qp_buffer = GEN8_AVC_MBENC_MBQP_CM;
7137     cmd->dw78.mb_brc_lut = GEN8_AVC_MBENC_MBBRC_CONST_DATA_CM;
7138     cmd->dw79.vme_inter_prediction_surf_index = GEN8_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_CM;
7139     cmd->dw80.vme_inter_prediction_mr_surf_index = GEN8_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_CM;
7140     cmd->dw81.flatness_chk_surf_index = GEN8_AVC_MBENC_FLATNESS_CHECK_CM;
7141     cmd->dw82.mad_surf_index = GEN8_AVC_MBENC_MAD_DATA_CM;
7142     cmd->dw83.force_non_skip_mb_map_surface = GEN8_AVC_MBENC_FORCE_NONSKIP_MB_MAP_CM;
7143     cmd->dw84.widi_wa_surf_index = GEN8_AVC_MBENC_WIDI_WA_DATA_CM;
7144     cmd->dw85.brc_curbe_surf_index = GEN8_AVC_MBENC_BRC_CURBE_DATA_CM;
7145     cmd->dw86.static_detection_cost_table_index = GEN8_AVC_MBENC_STATIC_FRAME_DETECTION_OUTPUT_CM;
7146
7147     i965_gpe_context_unmap_curbe(gpe_context);
7148
7149     return;
7150 }
7151
7152 static void
7153 gen8_avc_set_curbe_scaling4x(VADriverContextP ctx,
7154                              struct encode_state *encode_state,
7155                              struct i965_gpe_context *gpe_context,
7156                              struct intel_encoder_context *encoder_context,
7157                              void *param)
7158 {
7159     gen8_avc_scaling4x_curbe_data *curbe_cmd;
7160     struct scaling_param *surface_param = (struct scaling_param *)param;
7161
7162     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
7163
7164     if (!curbe_cmd)
7165         return;
7166
7167     memset(curbe_cmd, 0, sizeof(gen9_avc_scaling4x_curbe_data));
7168
7169     curbe_cmd->dw0.input_picture_width  = surface_param->input_frame_width;
7170     curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
7171
7172     curbe_cmd->dw1.input_y_bti = GEN8_SCALING_FRAME_SRC_Y_CM;
7173     curbe_cmd->dw2.output_y_bti = GEN8_SCALING_FRAME_DST_Y_CM;
7174
7175     curbe_cmd->dw5.flatness_threshold = 0;
7176     if (surface_param->enable_mb_flatness_check) {
7177         curbe_cmd->dw5.flatness_threshold = 128;
7178         curbe_cmd->dw8.flatness_output_bti_top_field = 4;
7179     }
7180
7181     curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
7182     curbe_cmd->dw6.enable_mb_variance_output = surface_param->enable_mb_variance_output;
7183     curbe_cmd->dw6.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
7184
7185     if (curbe_cmd->dw6.enable_mb_variance_output ||
7186         curbe_cmd->dw6.enable_mb_pixel_average_output) {
7187         curbe_cmd->dw10.mbv_proc_states_bti_top_field  = GEN8_SCALING_FIELD_TOP_MBVPROCSTATS_DST_CM;
7188         curbe_cmd->dw11.mbv_proc_states_bti_bottom_field = GEN8_SCALING_FIELD_BOT_MBVPROCSTATS_DST_CM;
7189     }
7190
7191     i965_gpe_context_unmap_curbe(gpe_context);
7192     return;
7193 }
7194
7195 static void
7196 gen8_avc_set_curbe_me(VADriverContextP ctx,
7197                       struct encode_state *encode_state,
7198                       struct i965_gpe_context *gpe_context,
7199                       struct intel_encoder_context *encoder_context,
7200                       void * param)
7201 {
7202     gen8_avc_me_curbe_data *curbe_cmd;
7203     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7204     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
7205     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
7206
7207     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
7208
7209     struct me_param * curbe_param = (struct me_param *)param ;
7210     unsigned char  use_mv_from_prev_step = 0;
7211     unsigned char write_distortions = 0;
7212     unsigned char qp_prime_y = 0;
7213     unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
7214     unsigned char seach_table_idx = 0;
7215     unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
7216     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
7217     unsigned int scale_factor = 0;
7218
7219     qp_prime_y = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta;
7220     switch (curbe_param->hme_type) {
7221     case INTEL_ENC_HME_4x : {
7222         use_mv_from_prev_step = (generic_state->b16xme_enabled) ? 1 : 0;
7223         write_distortions = 1;
7224         mv_shift_factor = 2;
7225         scale_factor = 4;
7226         prev_mv_read_pos_factor = 0;
7227         break;
7228     }
7229     case INTEL_ENC_HME_16x : {
7230         use_mv_from_prev_step = (generic_state->b32xme_enabled) ? 1 : 0;
7231         write_distortions = 0;
7232         mv_shift_factor = 2;
7233         scale_factor = 16;
7234         prev_mv_read_pos_factor = 1;
7235         break;
7236     }
7237     case INTEL_ENC_HME_32x : {
7238         use_mv_from_prev_step = 0;
7239         write_distortions = 0;
7240         mv_shift_factor = 1;
7241         scale_factor = 32;
7242         prev_mv_read_pos_factor = 0;
7243         break;
7244     }
7245     default:
7246         assert(0);
7247
7248     }
7249     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
7250
7251     if (!curbe_cmd)
7252         return;
7253
7254     downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
7255     downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
7256
7257     memcpy(curbe_cmd, gen8_avc_me_curbe_init_data, sizeof(gen8_avc_me_curbe_data));
7258
7259     curbe_cmd->dw3.sub_pel_mode = 3;
7260     if (avc_state->field_scaling_output_interleaved) {
7261         /*frame set to zero,field specified*/
7262         curbe_cmd->dw3.src_access = 0;
7263         curbe_cmd->dw3.ref_access = 0;
7264         curbe_cmd->dw7.src_field_polarity = 0;
7265     }
7266     curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
7267     curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
7268     curbe_cmd->dw5.qp_prime_y = qp_prime_y;
7269
7270     curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
7271     curbe_cmd->dw6.write_distortions = write_distortions;
7272     curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
7273     curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
7274
7275     if (generic_state->frame_type == SLICE_TYPE_B) {
7276         curbe_cmd->dw1.bi_weight = 32;
7277         curbe_cmd->dw13.num_ref_idx_l1_minus1 = slice_param->num_ref_idx_l1_active_minus1;
7278         me_method = gen9_avc_b_me_method[generic_state->preset];
7279         seach_table_idx = 1;
7280     }
7281
7282     if (generic_state->frame_type == SLICE_TYPE_P ||
7283         generic_state->frame_type == SLICE_TYPE_B)
7284         curbe_cmd->dw13.num_ref_idx_l0_minus1 = slice_param->num_ref_idx_l0_active_minus1;
7285
7286     curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
7287     curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
7288
7289     memcpy(&curbe_cmd->dw16, table_enc_search_path[seach_table_idx][me_method], 14 * sizeof(int));
7290
7291     curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN8_AVC_ME_MV_DATA_SURFACE_CM;
7292     curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x) ? GEN8_AVC_32xME_MV_DATA_SURFACE_CM : GEN8_AVC_16xME_MV_DATA_SURFACE_CM ;
7293     curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN8_AVC_ME_DISTORTION_SURFACE_CM;
7294     curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN8_AVC_ME_BRC_DISTORTION_CM;
7295     curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN8_AVC_ME_CURR_FOR_FWD_REF_CM;
7296     curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN8_AVC_ME_CURR_FOR_BWD_REF_CM;
7297     curbe_cmd->dw38.reserved = 0;
7298
7299     i965_gpe_context_unmap_curbe(gpe_context);
7300     return;
7301 }
7302
7303 static void
7304 gen8_avc_set_curbe_brc_frame_update(VADriverContextP ctx,
7305                                     struct encode_state *encode_state,
7306                                     struct i965_gpe_context *gpe_context,
7307                                     struct intel_encoder_context *encoder_context,
7308                                     void * param)
7309 {
7310     gen8_avc_frame_brc_update_curbe_data *cmd;
7311     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7312     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
7313     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
7314     struct object_surface *obj_surface;
7315     struct gen9_surface_avc *avc_priv_surface;
7316     struct avc_param common_param;
7317
7318     obj_surface = encode_state->reconstructed_object;
7319
7320     if (!obj_surface || !obj_surface->private_data)
7321         return;
7322     avc_priv_surface = obj_surface->private_data;
7323
7324     cmd = i965_gpe_context_map_curbe(gpe_context);
7325
7326     if (!cmd)
7327         return;
7328
7329     memcpy(cmd, &gen8_avc_frame_brc_update_curbe_init_data, sizeof(gen8_avc_frame_brc_update_curbe_data));
7330
7331     cmd->dw5.target_size_flag = 0 ;
7332     if (generic_state->brc_init_current_target_buf_full_in_bits > (double)generic_state->brc_init_reset_buf_size_in_bits) {
7333         /*overflow*/
7334         generic_state->brc_init_current_target_buf_full_in_bits -= (double)generic_state->brc_init_reset_buf_size_in_bits;
7335         cmd->dw5.target_size_flag = 1 ;
7336     }
7337
7338     if (generic_state->skip_frame_enbale) {
7339         cmd->dw6.num_skip_frames = generic_state->num_skip_frames ;
7340         cmd->dw7.size_skip_frames = generic_state->size_skip_frames;
7341
7342         generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame * generic_state->num_skip_frames;
7343
7344     }
7345     cmd->dw0.target_size = (unsigned int)generic_state->brc_init_current_target_buf_full_in_bits ;
7346     cmd->dw1.frame_number = generic_state->seq_frame_number ;
7347     cmd->dw2.size_of_pic_headers = generic_state->herder_bytes_inserted << 3 ;
7348     cmd->dw5.cur_frame_type = generic_state->frame_type ;
7349     cmd->dw5.brc_flag = 0 ;
7350     cmd->dw5.brc_flag |= (avc_priv_surface->is_as_ref) ? INTEL_ENCODE_BRCUPDATE_IS_REFERENCE : 0 ;
7351
7352     if (avc_state->multi_pre_enable) {
7353         cmd->dw5.brc_flag  |= INTEL_ENCODE_BRCUPDATE_IS_ACTUALQP ;
7354         cmd->dw14.qp_index_of_cur_pic = avc_priv_surface->frame_idx ; //do not know this. use -1
7355     }
7356
7357     cmd->dw5.max_num_paks = generic_state->num_pak_passes ;
7358     if (avc_state->min_max_qp_enable) {
7359         switch (generic_state->frame_type) {
7360         case SLICE_TYPE_I:
7361             cmd->dw6.minimum_qp = avc_state->min_qp_i ;
7362             cmd->dw6.maximum_qp = avc_state->max_qp_i ;
7363             break;
7364         case SLICE_TYPE_P:
7365             cmd->dw6.minimum_qp = avc_state->min_qp_p ;
7366             cmd->dw6.maximum_qp = avc_state->max_qp_p ;
7367             break;
7368         case SLICE_TYPE_B:
7369             cmd->dw6.minimum_qp = avc_state->min_qp_b ;
7370             cmd->dw6.maximum_qp = avc_state->max_qp_b ;
7371             break;
7372         }
7373     } else {
7374         cmd->dw6.minimum_qp = 0 ;
7375         cmd->dw6.maximum_qp = 0 ;
7376     }
7377
7378     generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame;
7379
7380     if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
7381         cmd->dw3.start_gadj_frame0 = (unsigned int)((10 *   generic_state->avbr_convergence) / (double)150);
7382         cmd->dw3.start_gadj_frame1 = (unsigned int)((50 *   generic_state->avbr_convergence) / (double)150);
7383         cmd->dw4.start_gadj_frame2 = (unsigned int)((100 *  generic_state->avbr_convergence) / (double)150);
7384         cmd->dw4.start_gadj_frame3 = (unsigned int)((150 *  generic_state->avbr_convergence) / (double)150);
7385         cmd->dw11.g_rate_ratio_threshold_0 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 40)));
7386         cmd->dw11.g_rate_ratio_threshold_1 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 75)));
7387         cmd->dw12.g_rate_ratio_threshold_2 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 97)));
7388         cmd->dw12.g_rate_ratio_threshold_3 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (103 - 100)));
7389         cmd->dw12.g_rate_ratio_threshold_4 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (125 - 100)));
7390         cmd->dw12.g_rate_ratio_threshold_5 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (160 - 100)));
7391
7392     }
7393
7394     memset(&common_param, 0, sizeof(common_param));
7395     common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
7396     common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
7397     common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
7398     common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
7399     common_param.frames_per_100s = generic_state->frames_per_100s;
7400     common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
7401     common_param.target_bit_rate = generic_state->target_bit_rate;
7402
7403     i965_gpe_context_unmap_curbe(gpe_context);
7404
7405     return;
7406 }
7407
7408 /*
7409 kernel related function:init/destroy etc
7410 */
7411 static void
7412 gen9_avc_kernel_init_scaling(VADriverContextP ctx,
7413                              struct generic_encoder_context *generic_context,
7414                              struct gen_avc_scaling_context *kernel_context,
7415                              int preenc_enabled)
7416 {
7417     struct i965_driver_data *i965 = i965_driver_data(ctx);
7418     struct i965_gpe_table *gpe = &i965->gpe_table;
7419     struct i965_gpe_context *gpe_context = NULL;
7420     struct encoder_kernel_parameter kernel_param ;
7421     struct encoder_scoreboard_parameter scoreboard_param;
7422     struct i965_kernel common_kernel;
7423
7424     memset(&kernel_param, 0, sizeof(kernel_param));
7425     if (IS_SKL(i965->intel.device_info) ||
7426         IS_BXT(i965->intel.device_info)) {
7427         if (!preenc_enabled) {
7428             kernel_param.curbe_size = sizeof(gen9_avc_scaling4x_curbe_data);
7429             kernel_param.inline_data_size = sizeof(gen9_avc_scaling4x_curbe_data);
7430         } else {
7431             /* Skylake PreEnc using GEN95/gen10 DS kernel */
7432             kernel_param.curbe_size = sizeof(gen95_avc_scaling4x_curbe_data);
7433             kernel_param.inline_data_size = sizeof(gen95_avc_scaling4x_curbe_data);
7434         }
7435     } else if (IS_KBL(i965->intel.device_info) ||
7436                IS_GEN10(i965->intel.device_info) ||
7437                IS_GLK(i965->intel.device_info)) {
7438         kernel_param.curbe_size = sizeof(gen95_avc_scaling4x_curbe_data);
7439         kernel_param.inline_data_size = sizeof(gen95_avc_scaling4x_curbe_data);
7440     } else if (IS_GEN8(i965->intel.device_info)) {
7441         kernel_param.curbe_size = sizeof(gen8_avc_scaling4x_curbe_data);
7442         kernel_param.inline_data_size = sizeof(gen8_avc_scaling4x_curbe_data);
7443     } else
7444         assert(0);
7445
7446     /* 4x scaling kernel*/
7447     kernel_param.sampler_size = 0;
7448
7449     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7450     scoreboard_param.mask = 0xFF;
7451     scoreboard_param.enable = generic_context->use_hw_scoreboard;
7452     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7453     scoreboard_param.walkpat_flag = 0;
7454
7455     gpe_context = &kernel_context->gpe_contexts[GEN9_AVC_KERNEL_SCALING_4X_IDX];
7456     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7457     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7458
7459     memset(&common_kernel, 0, sizeof(common_kernel));
7460
7461     generic_context->get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7462                                                 generic_context->enc_kernel_size,
7463                                                 INTEL_GENERIC_ENC_SCALING4X,
7464                                                 0,
7465                                                 &common_kernel);
7466
7467     gpe->load_kernels(ctx,
7468                       gpe_context,
7469                       &common_kernel,
7470                       1);
7471
7472     /* PreEnc using only the 4X scaling */
7473     if (preenc_enabled)
7474         return;
7475
7476     /*2x scaling kernel*/
7477     kernel_param.curbe_size = sizeof(gen9_avc_scaling2x_curbe_data);
7478     kernel_param.inline_data_size = 0;
7479     kernel_param.sampler_size = 0;
7480
7481     gpe_context = &kernel_context->gpe_contexts[GEN9_AVC_KERNEL_SCALING_2X_IDX];
7482     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7483     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7484
7485     memset(&common_kernel, 0, sizeof(common_kernel));
7486
7487     intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7488                                          generic_context->enc_kernel_size,
7489                                          INTEL_GENERIC_ENC_SCALING2X,
7490                                          0,
7491                                          &common_kernel);
7492
7493     gpe->load_kernels(ctx,
7494                       gpe_context,
7495                       &common_kernel,
7496                       1);
7497
7498 }
7499
7500 static void
7501 gen9_avc_kernel_init_me(VADriverContextP ctx,
7502                         struct generic_encoder_context *generic_context,
7503                         struct gen_avc_me_context *kernel_context,
7504                         int preenc_enabled)
7505 {
7506     struct i965_driver_data *i965 = i965_driver_data(ctx);
7507     struct i965_gpe_table *gpe = &i965->gpe_table;
7508     struct i965_gpe_context *gpe_context = NULL;
7509     struct encoder_kernel_parameter kernel_param ;
7510     struct encoder_scoreboard_parameter scoreboard_param;
7511     struct i965_kernel common_kernel;
7512     int i = 0;
7513     unsigned int curbe_size = 0;
7514
7515     if (IS_GEN8(i965->intel.device_info)) {
7516         curbe_size = sizeof(gen8_avc_me_curbe_data);
7517     } else {
7518         if (!preenc_enabled)
7519             curbe_size = sizeof(gen9_avc_me_curbe_data);
7520         else
7521             curbe_size = sizeof(gen9_avc_fei_me_curbe_data);
7522     }
7523
7524     kernel_param.curbe_size = curbe_size;
7525     kernel_param.inline_data_size = 0;
7526     kernel_param.sampler_size = 0;
7527
7528     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7529     scoreboard_param.mask = 0xFF;
7530     scoreboard_param.enable = generic_context->use_hw_scoreboard;
7531     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7532     scoreboard_param.walkpat_flag = 0;
7533
7534     /* There is two hme kernel, one for P and other for B frame */
7535     for (i = 0; i < 2; i++) {
7536         gpe_context = &kernel_context->gpe_contexts[i];
7537         gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7538         gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7539
7540         memset(&common_kernel, 0, sizeof(common_kernel));
7541
7542         generic_context->get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7543                                                     generic_context->enc_kernel_size,
7544                                                     INTEL_GENERIC_ENC_ME,
7545                                                     i,
7546                                                     &common_kernel);
7547
7548         gpe->load_kernels(ctx,
7549                           gpe_context,
7550                           &common_kernel,
7551                           1);
7552     }
7553
7554 }
7555
7556 static void
7557 gen9_avc_kernel_init_preproc(VADriverContextP ctx,
7558                              struct generic_encoder_context *generic_context,
7559                              struct gen_avc_preproc_context *kernel_context)
7560 {
7561     struct i965_driver_data *i965 = i965_driver_data(ctx);
7562     struct i965_gpe_table *gpe = &i965->gpe_table;
7563     struct i965_gpe_context *gpe_context = NULL;
7564     struct encoder_kernel_parameter kernel_param ;
7565     struct encoder_scoreboard_parameter scoreboard_param;
7566     struct i965_kernel common_kernel;
7567
7568     kernel_param.curbe_size = sizeof(gen9_avc_preproc_curbe_data);
7569     kernel_param.inline_data_size = 0;
7570     kernel_param.sampler_size = 0;
7571
7572     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7573     scoreboard_param.mask = 0xFF;
7574     scoreboard_param.enable = generic_context->use_hw_scoreboard;
7575     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7576     scoreboard_param.walkpat_flag = 0;
7577
7578     gpe_context = &kernel_context->gpe_contexts;
7579     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7580     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7581
7582     memset(&common_kernel, 0, sizeof(common_kernel));
7583
7584     intel_avc_fei_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7585                                              generic_context->enc_kernel_size,
7586                                              INTEL_GENERIC_ENC_PREPROC,
7587                                              0,
7588                                              &common_kernel);
7589
7590     gpe->load_kernels(ctx,
7591                       gpe_context,
7592                       &common_kernel,
7593                       1);
7594
7595 }
7596
7597 static void
7598 gen9_avc_kernel_init_mbenc(VADriverContextP ctx,
7599                            struct generic_encoder_context *generic_context,
7600                            struct gen_avc_mbenc_context *kernel_context,
7601                            int fei_enabled)
7602 {
7603     struct i965_driver_data *i965 = i965_driver_data(ctx);
7604     struct i965_gpe_table *gpe = &i965->gpe_table;
7605     struct i965_gpe_context *gpe_context = NULL;
7606     struct encoder_kernel_parameter kernel_param ;
7607     struct encoder_scoreboard_parameter scoreboard_param;
7608     struct i965_kernel common_kernel;
7609     int i = 0;
7610     unsigned int curbe_size = 0;
7611     unsigned int num_mbenc_kernels = 0;
7612
7613     if (IS_SKL(i965->intel.device_info) ||
7614         IS_BXT(i965->intel.device_info)) {
7615         if (!fei_enabled) {
7616             curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
7617             num_mbenc_kernels = NUM_GEN9_AVC_KERNEL_MBENC;
7618         } else {
7619             curbe_size = sizeof(gen9_avc_fei_mbenc_curbe_data);
7620             num_mbenc_kernels = NUM_GEN9_AVC_FEI_KERNEL_MBENC;
7621         }
7622     } else if (IS_KBL(i965->intel.device_info) ||
7623                IS_GEN10(i965->intel.device_info) ||
7624                IS_GLK(i965->intel.device_info)) {
7625         curbe_size = sizeof(gen95_avc_mbenc_curbe_data);
7626         num_mbenc_kernels = NUM_GEN9_AVC_KERNEL_MBENC;
7627     } else if (IS_GEN8(i965->intel.device_info)) {
7628         curbe_size = sizeof(gen8_avc_mbenc_curbe_data);
7629         num_mbenc_kernels = NUM_GEN9_AVC_KERNEL_MBENC;
7630     }
7631
7632     assert(curbe_size > 0);
7633     kernel_param.curbe_size = curbe_size;
7634     kernel_param.inline_data_size = 0;
7635     kernel_param.sampler_size = 0;
7636
7637     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7638     scoreboard_param.mask = 0xFF;
7639     scoreboard_param.enable = generic_context->use_hw_scoreboard;
7640     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7641     scoreboard_param.walkpat_flag = 0;
7642
7643     for (i = 0; i < num_mbenc_kernels ; i++) {
7644         gpe_context = &kernel_context->gpe_contexts[i];
7645         gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7646         gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7647
7648         memset(&common_kernel, 0, sizeof(common_kernel));
7649
7650         generic_context->get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7651                                                     generic_context->enc_kernel_size,
7652                                                     INTEL_GENERIC_ENC_MBENC,
7653                                                     i,
7654                                                     &common_kernel);
7655
7656         gpe->load_kernels(ctx,
7657                           gpe_context,
7658                           &common_kernel,
7659                           1);
7660     }
7661
7662 }
7663
7664 static void
7665 gen9_avc_kernel_init_brc(VADriverContextP ctx,
7666                          struct generic_encoder_context *generic_context,
7667                          struct gen_avc_brc_context *kernel_context)
7668 {
7669     struct i965_driver_data *i965 = i965_driver_data(ctx);
7670     struct i965_gpe_table *gpe = &i965->gpe_table;
7671     struct i965_gpe_context *gpe_context = NULL;
7672     struct encoder_kernel_parameter kernel_param ;
7673     struct encoder_scoreboard_parameter scoreboard_param;
7674     struct i965_kernel common_kernel;
7675     int num_brc_init_kernels = 0;
7676     int i = 0;
7677
7678     if (IS_GEN8(i965->intel.device_info)) {
7679         num_brc_init_kernels = NUM_GEN9_AVC_KERNEL_BRC - 1;
7680     } else {
7681         num_brc_init_kernels = NUM_GEN9_AVC_KERNEL_BRC;
7682     }
7683
7684     const int gen8_brc_curbe_size[NUM_GEN9_AVC_KERNEL_BRC - 1] = {
7685         (sizeof(gen9_avc_brc_init_reset_curbe_data)),
7686         (sizeof(gen8_avc_frame_brc_update_curbe_data)),
7687         (sizeof(gen9_avc_brc_init_reset_curbe_data)),
7688         (sizeof(gen8_avc_mbenc_curbe_data)),
7689         0,
7690     };
7691     const int gen9_brc_curbe_size[NUM_GEN9_AVC_KERNEL_BRC] = {
7692         (sizeof(gen9_avc_brc_init_reset_curbe_data)),
7693         (sizeof(gen9_avc_frame_brc_update_curbe_data)),
7694         (sizeof(gen9_avc_brc_init_reset_curbe_data)),
7695         ((IS_SKL(i965->intel.device_info) || IS_BXT(i965->intel.device_info)) ? sizeof(gen9_avc_mbenc_curbe_data) : sizeof(gen95_avc_mbenc_curbe_data)),
7696         0,
7697         (sizeof(gen9_avc_mb_brc_curbe_data))
7698     };
7699
7700     kernel_param.inline_data_size = 0;
7701     kernel_param.sampler_size = 0;
7702
7703     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7704     scoreboard_param.mask = 0xFF;
7705     scoreboard_param.enable = generic_context->use_hw_scoreboard;
7706     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7707     scoreboard_param.walkpat_flag = 0;
7708
7709     for (i = 0; i < num_brc_init_kernels; i++) {
7710         if (IS_GEN8(i965->intel.device_info)) {
7711             kernel_param.curbe_size = gen8_brc_curbe_size[i];
7712         } else {
7713             kernel_param.curbe_size = gen9_brc_curbe_size[i];
7714         }
7715         gpe_context = &kernel_context->gpe_contexts[i];
7716         gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7717         gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7718
7719         memset(&common_kernel, 0, sizeof(common_kernel));
7720
7721         intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7722                                              generic_context->enc_kernel_size,
7723                                              INTEL_GENERIC_ENC_BRC,
7724                                              i,
7725                                              &common_kernel);
7726
7727         gpe->load_kernels(ctx,
7728                           gpe_context,
7729                           &common_kernel,
7730                           1);
7731     }
7732
7733 }
7734
7735 static void
7736 gen9_avc_kernel_init_wp(VADriverContextP ctx,
7737                         struct generic_encoder_context *generic_context,
7738                         struct gen_avc_wp_context *kernel_context)
7739 {
7740     struct i965_driver_data *i965 = i965_driver_data(ctx);
7741     struct i965_gpe_table *gpe = &i965->gpe_table;
7742     struct i965_gpe_context *gpe_context = NULL;
7743     struct encoder_kernel_parameter kernel_param ;
7744     struct encoder_scoreboard_parameter scoreboard_param;
7745     struct i965_kernel common_kernel;
7746
7747     kernel_param.curbe_size = sizeof(gen9_avc_wp_curbe_data);
7748     kernel_param.inline_data_size = 0;
7749     kernel_param.sampler_size = 0;
7750
7751     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7752     scoreboard_param.mask = 0xFF;
7753     scoreboard_param.enable = generic_context->use_hw_scoreboard;
7754     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7755     scoreboard_param.walkpat_flag = 0;
7756
7757     gpe_context = &kernel_context->gpe_contexts;
7758     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7759     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7760
7761     memset(&common_kernel, 0, sizeof(common_kernel));
7762
7763     intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7764                                          generic_context->enc_kernel_size,
7765                                          INTEL_GENERIC_ENC_WP,
7766                                          0,
7767                                          &common_kernel);
7768
7769     gpe->load_kernels(ctx,
7770                       gpe_context,
7771                       &common_kernel,
7772                       1);
7773
7774 }
7775
7776 static void
7777 gen9_avc_kernel_init_sfd(VADriverContextP ctx,
7778                          struct generic_encoder_context *generic_context,
7779                          struct gen_avc_sfd_context *kernel_context)
7780 {
7781     struct i965_driver_data *i965 = i965_driver_data(ctx);
7782     struct i965_gpe_table *gpe = &i965->gpe_table;
7783     struct i965_gpe_context *gpe_context = NULL;
7784     struct encoder_kernel_parameter kernel_param ;
7785     struct encoder_scoreboard_parameter scoreboard_param;
7786     struct i965_kernel common_kernel;
7787
7788     kernel_param.curbe_size = sizeof(gen9_avc_sfd_curbe_data);
7789     kernel_param.inline_data_size = 0;
7790     kernel_param.sampler_size = 0;
7791
7792     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7793     scoreboard_param.mask = 0xFF;
7794     scoreboard_param.enable = generic_context->use_hw_scoreboard;
7795     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7796     scoreboard_param.walkpat_flag = 0;
7797
7798     gpe_context = &kernel_context->gpe_contexts;
7799     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7800     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7801
7802     memset(&common_kernel, 0, sizeof(common_kernel));
7803
7804     intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7805                                          generic_context->enc_kernel_size,
7806                                          INTEL_GENERIC_ENC_SFD,
7807                                          0,
7808                                          &common_kernel);
7809
7810     gpe->load_kernels(ctx,
7811                       gpe_context,
7812                       &common_kernel,
7813                       1);
7814
7815 }
7816
7817 static void
7818 gen9_avc_kernel_destroy(struct encoder_vme_mfc_context * vme_context)
7819 {
7820
7821     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
7822     struct i965_driver_data *i965 = i965_driver_data(avc_ctx->ctx);
7823     struct i965_gpe_table *gpe = &i965->gpe_table;
7824
7825     int i = 0;
7826
7827     gen9_avc_free_resources(vme_context);
7828
7829     for (i = 0; i < NUM_GEN9_AVC_KERNEL_SCALING; i++)
7830         gpe->context_destroy(&avc_ctx->context_scaling.gpe_contexts[i]);
7831
7832     for (i = 0; i < NUM_GEN9_AVC_KERNEL_BRC; i++)
7833         gpe->context_destroy(&avc_ctx->context_brc.gpe_contexts[i]);
7834
7835     for (i = 0; i < NUM_GEN9_AVC_KERNEL_ME; i++)
7836         gpe->context_destroy(&avc_ctx->context_me.gpe_contexts[i]);
7837
7838     for (i = 0; i < NUM_GEN9_AVC_KERNEL_MBENC; i++)
7839         gpe->context_destroy(&avc_ctx->context_mbenc.gpe_contexts[i]);
7840
7841     gpe->context_destroy(&avc_ctx->context_wp.gpe_contexts);
7842
7843     gpe->context_destroy(&avc_ctx->context_sfd.gpe_contexts);
7844
7845     gpe->context_destroy(&avc_ctx->context_preproc.gpe_contexts);
7846
7847 }
7848
7849 /*
7850 vme pipeline
7851 */
7852 static void
7853 gen9_avc_update_parameters(VADriverContextP ctx,
7854                            VAProfile profile,
7855                            struct encode_state *encode_state,
7856                            struct intel_encoder_context *encoder_context)
7857 {
7858     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7859     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
7860     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
7861     VAEncSequenceParameterBufferH264 *seq_param;
7862     VAEncSliceParameterBufferH264 *slice_param;
7863     VAEncMiscParameterBuffer *fei_misc_param;
7864     int i, j, slice_index;
7865     unsigned int preset = generic_state->preset;
7866     unsigned int fei_enabled = encoder_context->fei_enabled;
7867
7868     /* seq/pic/slice parameter setting */
7869     generic_state->b16xme_supported = gen9_avc_super_hme[preset];
7870     generic_state->b32xme_supported = gen9_avc_ultra_hme[preset];
7871
7872     avc_state->seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
7873     avc_state->pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
7874
7875     if (fei_enabled &&
7876         encode_state->misc_param[VAEncMiscParameterTypeFEIFrameControl][0] &&
7877         encode_state->misc_param[VAEncMiscParameterTypeFEIFrameControl][0]->buffer) {
7878         fei_misc_param = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeFEIFrameControl][0]->buffer;
7879         avc_state->fei_framectl_param =
7880             (VAEncMiscParameterFEIFrameControlH264 *)fei_misc_param->data;
7881     }
7882
7883     avc_state->slice_num = 0;
7884     slice_index = 0;
7885     for (j = 0; j < encode_state->num_slice_params_ext; j++) {
7886         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
7887         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
7888             avc_state->slice_param[slice_index] = slice_param;
7889             slice_param++;
7890             slice_index++;
7891             avc_state->slice_num++;
7892         }
7893     }
7894
7895     /* how many slices support by now? 1 slice or multi slices, but row slice.not slice group. */
7896     seq_param = avc_state->seq_param;
7897     slice_param = avc_state->slice_param[0];
7898
7899     generic_state->frame_type = avc_state->slice_param[0]->slice_type;
7900
7901     if (slice_param->slice_type == SLICE_TYPE_I ||
7902         slice_param->slice_type == SLICE_TYPE_SI)
7903         generic_state->frame_type = SLICE_TYPE_I;
7904     else if (slice_param->slice_type == SLICE_TYPE_P)
7905         generic_state->frame_type = SLICE_TYPE_P;
7906     else if (slice_param->slice_type == SLICE_TYPE_B)
7907         generic_state->frame_type = SLICE_TYPE_B;
7908     if (profile == VAProfileH264High)
7909         avc_state->transform_8x8_mode_enable = 0;//work around for high profile to disabel pic_param->pic_fields.bits.transform_8x8_mode_flag
7910     else
7911         avc_state->transform_8x8_mode_enable = 0;
7912
7913     /* rc init*/
7914     if (generic_state->brc_enabled && (!generic_state->brc_inited || generic_state->brc_need_reset)) {
7915         generic_state->target_bit_rate = ALIGN(seq_param->bits_per_second, 1000) / 1000;
7916         generic_state->init_vbv_buffer_fullness_in_bit = seq_param->bits_per_second;
7917         generic_state->vbv_buffer_size_in_bit = (uint64_t)seq_param->bits_per_second << 1;
7918         generic_state->frames_per_100s = 3000; /* 30fps */
7919     }
7920
7921     generic_state->gop_size = seq_param->intra_period;
7922     generic_state->gop_ref_distance = seq_param->ip_period;
7923
7924     if (generic_state->internal_rate_mode == VA_RC_CBR) {
7925         generic_state->max_bit_rate = generic_state->target_bit_rate;
7926         generic_state->min_bit_rate = generic_state->target_bit_rate;
7927     }
7928
7929     if (generic_state->frame_type == SLICE_TYPE_I || generic_state->first_frame) {
7930         gen9_avc_update_misc_parameters(ctx, encode_state, encoder_context);
7931     }
7932
7933     generic_state->preset = encoder_context->quality_level;
7934     if (encoder_context->quality_level == INTEL_PRESET_UNKNOWN) {
7935         generic_state->preset = INTEL_PRESET_RT_SPEED;
7936     }
7937     generic_state->kernel_mode = gen9_avc_kernel_mode[generic_state->preset];
7938
7939     if (!generic_state->brc_inited) {
7940         generic_state->brc_init_reset_input_bits_per_frame = ((double)(generic_state->max_bit_rate * 1000) * 100) / generic_state->frames_per_100s;;
7941         generic_state->brc_init_current_target_buf_full_in_bits = generic_state->init_vbv_buffer_fullness_in_bit;
7942         generic_state->brc_init_reset_buf_size_in_bits = generic_state->vbv_buffer_size_in_bit;
7943         generic_state->brc_target_size = generic_state->init_vbv_buffer_fullness_in_bit;
7944     }
7945
7946
7947     generic_state->curr_pak_pass = 0;
7948     generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
7949
7950     if (generic_state->internal_rate_mode == VA_RC_CBR ||
7951         generic_state->internal_rate_mode == VA_RC_VBR)
7952         generic_state->brc_enabled = 1;
7953     else
7954         generic_state->brc_enabled = 0;
7955
7956     if (generic_state->brc_enabled &&
7957         (!generic_state->init_vbv_buffer_fullness_in_bit ||
7958          !generic_state->vbv_buffer_size_in_bit ||
7959          !generic_state->max_bit_rate ||
7960          !generic_state->target_bit_rate ||
7961          !generic_state->frames_per_100s)) {
7962         WARN_ONCE("Rate control parameter is required for BRC\n");
7963         generic_state->brc_enabled = 0;
7964     }
7965
7966     if (!generic_state->brc_enabled) {
7967         generic_state->target_bit_rate = 0;
7968         generic_state->max_bit_rate = 0;
7969         generic_state->min_bit_rate = 0;
7970         generic_state->init_vbv_buffer_fullness_in_bit = 0;
7971         generic_state->vbv_buffer_size_in_bit = 0;
7972         generic_state->num_pak_passes = 1;
7973     } else {
7974         generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
7975     }
7976
7977
7978     generic_state->frame_width_in_mbs = seq_param->picture_width_in_mbs;
7979     generic_state->frame_height_in_mbs = seq_param->picture_height_in_mbs;
7980     generic_state->frame_width_in_pixel = generic_state->frame_width_in_mbs * 16;
7981     generic_state->frame_height_in_pixel = generic_state->frame_height_in_mbs * 16;
7982
7983     generic_state->frame_width_4x  = ALIGN(generic_state->frame_width_in_pixel / 4, 16);
7984     generic_state->frame_height_4x = ALIGN(generic_state->frame_height_in_pixel / 4, 16);
7985     generic_state->downscaled_width_4x_in_mb  = generic_state->frame_width_4x / 16 ;
7986     generic_state->downscaled_height_4x_in_mb = generic_state->frame_height_4x / 16;
7987
7988     generic_state->frame_width_16x  =  ALIGN(generic_state->frame_width_in_pixel / 16, 16);
7989     generic_state->frame_height_16x =  ALIGN(generic_state->frame_height_in_pixel / 16, 16);
7990     generic_state->downscaled_width_16x_in_mb  = generic_state->frame_width_16x / 16 ;
7991     generic_state->downscaled_height_16x_in_mb = generic_state->frame_height_16x / 16;
7992
7993     generic_state->frame_width_32x  = ALIGN(generic_state->frame_width_in_pixel / 32, 16);
7994     generic_state->frame_height_32x = ALIGN(generic_state->frame_height_in_pixel / 32, 16);
7995     generic_state->downscaled_width_32x_in_mb  = generic_state->frame_width_32x / 16 ;
7996     generic_state->downscaled_height_32x_in_mb = generic_state->frame_height_32x / 16;
7997
7998     if (generic_state->hme_supported) {
7999         generic_state->hme_enabled = 1;
8000     } else {
8001         generic_state->hme_enabled = 0;
8002     }
8003
8004     if (generic_state->b16xme_supported) {
8005         generic_state->b16xme_enabled = 1;
8006     } else {
8007         generic_state->b16xme_enabled = 0;
8008     }
8009
8010     if (generic_state->b32xme_supported) {
8011         generic_state->b32xme_enabled = 1;
8012     } else {
8013         generic_state->b32xme_enabled = 0;
8014     }
8015     /* disable HME/16xME if the size is too small */
8016     if (generic_state->frame_width_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8017         generic_state->b32xme_supported = 0;
8018         generic_state->b32xme_enabled = 0;
8019         generic_state->b16xme_supported = 0;
8020         generic_state->b16xme_enabled = 0;
8021         generic_state->frame_width_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8022         generic_state->downscaled_width_4x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8023     }
8024     if (generic_state->frame_height_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8025         generic_state->b32xme_supported = 0;
8026         generic_state->b32xme_enabled = 0;
8027         generic_state->b16xme_supported = 0;
8028         generic_state->b16xme_enabled = 0;
8029         generic_state->frame_height_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8030         generic_state->downscaled_height_4x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8031     }
8032
8033     if (generic_state->frame_width_16x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8034         generic_state->b32xme_supported = 0;
8035         generic_state->b32xme_enabled = 0;
8036         generic_state->frame_width_16x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8037         generic_state->downscaled_width_16x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8038     }
8039     if (generic_state->frame_height_16x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8040         generic_state->b32xme_supported = 0;
8041         generic_state->b32xme_enabled = 0;
8042         generic_state->frame_height_16x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8043         generic_state->downscaled_height_16x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8044     }
8045
8046     if (generic_state->frame_width_32x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8047         generic_state->frame_width_32x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8048         generic_state->downscaled_width_32x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8049     }
8050     if (generic_state->frame_height_32x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8051         generic_state->frame_height_32x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8052         generic_state->downscaled_height_32x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8053     }
8054
8055 }
8056
8057 static VAStatus
8058 gen9_avc_encode_check_parameter(VADriverContextP ctx,
8059                                 struct encode_state *encode_state,
8060                                 struct intel_encoder_context *encoder_context)
8061 {
8062     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8063     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8064     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8065     unsigned int rate_control_mode = encoder_context->rate_control_mode;
8066     unsigned int preset = generic_state->preset;
8067     VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param ;
8068     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
8069     int i = 0;
8070     int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
8071     /*avbr init*/
8072     generic_state->avbr_curracy = 30;
8073     generic_state->avbr_convergence = 150;
8074
8075     switch (rate_control_mode & 0x7f) {
8076     case VA_RC_CBR:
8077         generic_state->internal_rate_mode = VA_RC_CBR;
8078         break;
8079
8080     case VA_RC_VBR:
8081         generic_state->internal_rate_mode = VA_RC_VBR;
8082         break;
8083
8084     case VA_RC_CQP:
8085     default:
8086         generic_state->internal_rate_mode = VA_RC_CQP;
8087         break;
8088     }
8089
8090     if (rate_control_mode != VA_RC_NONE &&
8091         rate_control_mode != VA_RC_CQP) {
8092         generic_state->brc_enabled = 1;
8093         generic_state->brc_distortion_buffer_supported = 1;
8094         generic_state->brc_constant_buffer_supported = 1;
8095         generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
8096     }
8097
8098     /*check brc parameter*/
8099     if (generic_state->brc_enabled) {
8100         avc_state->mb_qp_data_enable = 0;
8101     }
8102
8103     /*set the brc init and reset accordingly*/
8104     if (generic_state->brc_need_reset &&
8105         (generic_state->brc_distortion_buffer_supported == 0 ||
8106          rate_control_mode == VA_RC_CQP)) {
8107         generic_state->brc_need_reset = 0;// not support by CQP
8108     }
8109     if (generic_state->internal_rate_mode == VA_RC_CBR || generic_state->internal_rate_mode == VA_RC_VBR || generic_state->frame_type == SLICE_TYPE_I) {
8110         avc_state->sfd_enable = 0;
8111     } else {
8112         avc_state->sfd_enable = 1;
8113     }
8114
8115     if (generic_state->frames_per_window_size == 0) {
8116         generic_state->frames_per_window_size = (generic_state->frames_per_100s / 100 < 60) ? (generic_state->frames_per_100s / 100) : 60;
8117     } else if (generic_state->frames_per_window_size > 2 * generic_state->frames_per_100s / 100) {
8118         generic_state->frames_per_window_size = (generic_state->frames_per_100s / 100 < 60) ? (generic_state->frames_per_100s / 100) : 60;
8119     }
8120
8121     if (generic_state->brc_enabled) {
8122         generic_state->hme_enabled = generic_state->frame_type != SLICE_TYPE_I;
8123         if (avc_state->min_max_qp_enable) {
8124             generic_state->num_pak_passes = 1;
8125         }
8126         generic_state->brc_roi_enable = (rate_control_mode != VA_RC_CQP) && (generic_state->num_roi > 0);// only !CQP
8127         generic_state->mb_brc_enabled = generic_state->mb_brc_enabled || generic_state->brc_roi_enable;
8128     } else {
8129         generic_state->num_pak_passes = 1;// CQP only one pass
8130     }
8131
8132     avc_state->mbenc_i_frame_dist_in_use = 0;
8133     avc_state->mbenc_i_frame_dist_in_use = (generic_state->brc_enabled) && (generic_state->brc_distortion_buffer_supported) && (generic_state->frame_type == SLICE_TYPE_I);
8134
8135     /*ROI must enable mbbrc.*/
8136
8137     /*CAD check*/
8138     if (avc_state->caf_supported) {
8139         switch (generic_state->frame_type) {
8140         case SLICE_TYPE_I:
8141             avc_state->caf_enable = 0;
8142             break;
8143         case SLICE_TYPE_P:
8144             avc_state->caf_enable = gen9_avc_all_fractional[preset] & 0x01;
8145             break;
8146         case SLICE_TYPE_B:
8147             avc_state->caf_enable = (gen9_avc_all_fractional[preset] >> 1) & 0x01;
8148             break;
8149         }
8150
8151         if (avc_state->caf_enable && avc_state->caf_disable_hd && gen9_avc_disable_all_fractional_check_for_high_res[preset]) {
8152             if (generic_state->frame_width_in_pixel >= 1280 && generic_state->frame_height_in_pixel >= 720)
8153                 avc_state->caf_enable = 0;
8154         }
8155     }
8156
8157     avc_state->adaptive_transform_decision_enable &= gen9_avc_enable_adaptive_tx_decision[preset & 0x7];
8158
8159     /* Flatness check is enabled only if scaling will be performed and CAF is enabled. here only frame */
8160     if (avc_state->flatness_check_supported) {
8161         avc_state->flatness_check_enable = ((avc_state->caf_enable) && (generic_state->brc_enabled || generic_state->hme_supported)) ;
8162     } else {
8163         avc_state->flatness_check_enable = 0;
8164     }
8165
8166     /* check mb_status_supported/enbale*/
8167     if (avc_state->adaptive_transform_decision_enable) {
8168         avc_state->mb_status_enable = 1;
8169     } else {
8170         avc_state->mb_status_enable = 0;
8171     }
8172     /*slice check,all the slices use the same slice height except the last slice*/
8173     avc_state->arbitrary_num_mbs_in_slice = 0;
8174     for (i = 0; i < avc_state->slice_num; i++) {
8175         if (avc_state->slice_param[i]->num_macroblocks % generic_state->frame_width_in_mbs > 0) {
8176             avc_state->arbitrary_num_mbs_in_slice = 1;
8177             avc_state->slice_height = 1; /* slice height will be ignored by kernel ans here set it as default value */
8178         } else {
8179             avc_state->slice_height = avc_state->slice_param[i]->num_macroblocks / generic_state->frame_width_in_mbs;
8180         }
8181     }
8182
8183     if (avc_state->slice_num > 1)
8184         avc_state->arbitrary_num_mbs_in_slice = 1;
8185
8186     if (generic_state->frame_type == SLICE_TYPE_I) {
8187         generic_state->hme_enabled = 0;
8188         generic_state->b16xme_enabled = 0;
8189         generic_state->b32xme_enabled = 0;
8190     }
8191
8192     if (generic_state->frame_type == SLICE_TYPE_B) {
8193         gen9_avc_get_dist_scale_factor(ctx, encode_state, encoder_context);
8194         avc_state->bi_weight = gen9_avc_get_biweight(avc_state->dist_scale_factor_list0[0], pic_param->pic_fields.bits.weighted_bipred_idc);
8195     }
8196
8197     /* Determine if SkipBiasAdjustment should be enabled for P picture 1. No B frame 2. Qp >= 22 3. CQP mode */
8198     avc_state->skip_bias_adjustment_enable = avc_state->skip_bias_adjustment_supported && (generic_state->frame_type == SLICE_TYPE_P)
8199                                              && (generic_state->gop_ref_distance == 1) && (avc_state->pic_param->pic_init_qp + avc_state->slice_param[0]->slice_qp_delta >= 22) && !generic_state->brc_enabled;
8200
8201     if (generic_state->kernel_mode == INTEL_ENC_KERNEL_QUALITY) {
8202         avc_state->tq_enable = 1;
8203         avc_state->tq_rounding = 6;
8204         if (generic_state->brc_enabled) {
8205             generic_state->mb_brc_enabled = 1;
8206         }
8207     }
8208
8209     //check the inter rounding
8210     avc_state->rounding_value = 0;
8211     avc_state->rounding_inter_p = 255;//default
8212     avc_state->rounding_inter_b = 255; //default
8213     avc_state->rounding_inter_b_ref = 255; //default
8214
8215     if (generic_state->frame_type == SLICE_TYPE_P) {
8216         if (avc_state->rounding_inter_p == AVC_INVALID_ROUNDING_VALUE) {
8217             if (avc_state->adaptive_rounding_inter_enable && !(generic_state->brc_enabled)) {
8218                 if (generic_state->gop_ref_distance == 1)
8219                     avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_p_without_b[slice_qp];
8220                 else
8221                     avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_p[slice_qp];
8222             } else {
8223                 avc_state->rounding_value = gen9_avc_inter_rounding_p[generic_state->preset];
8224             }
8225
8226         } else {
8227             avc_state->rounding_value = avc_state->rounding_inter_p;
8228         }
8229     } else if (generic_state->frame_type == SLICE_TYPE_B) {
8230         if (pic_param->pic_fields.bits.reference_pic_flag) {
8231             if (avc_state->rounding_inter_b_ref == AVC_INVALID_ROUNDING_VALUE)
8232                 avc_state->rounding_value = gen9_avc_inter_rounding_b_ref[generic_state->preset];
8233             else
8234                 avc_state->rounding_value = avc_state->rounding_inter_b_ref;
8235         } else {
8236             if (avc_state->rounding_inter_b == AVC_INVALID_ROUNDING_VALUE) {
8237                 if (avc_state->adaptive_rounding_inter_enable && !(generic_state->brc_enabled))
8238                     avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_b[slice_qp];
8239                 else
8240                     avc_state->rounding_value = gen9_avc_inter_rounding_b[generic_state->preset];
8241             } else {
8242                 avc_state->rounding_value = avc_state->rounding_inter_b;
8243             }
8244         }
8245     }
8246     return VA_STATUS_SUCCESS;
8247 }
8248
8249 static VAStatus
8250 gen9_avc_vme_gpe_kernel_prepare(VADriverContextP ctx,
8251                                 struct encode_state *encode_state,
8252                                 struct intel_encoder_context *encoder_context)
8253 {
8254     VAStatus va_status;
8255     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8256     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
8257     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
8258     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8259     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8260
8261     struct object_surface *obj_surface;
8262     struct object_buffer *obj_buffer;
8263     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
8264     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
8265     struct i965_coded_buffer_segment *coded_buffer_segment;
8266
8267     struct gen9_surface_avc *avc_priv_surface;
8268     dri_bo *bo;
8269     struct avc_surface_param surface_param;
8270     int i, j = 0;
8271     unsigned char * pdata;
8272
8273     /* Setup current reconstruct frame */
8274     obj_surface = encode_state->reconstructed_object;
8275     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
8276
8277     if (va_status != VA_STATUS_SUCCESS)
8278         return va_status;
8279
8280     memset(&surface_param, 0, sizeof(surface_param));
8281     surface_param.frame_width = generic_state->frame_width_in_pixel;
8282     surface_param.frame_height = generic_state->frame_height_in_pixel;
8283     va_status = gen9_avc_init_check_surfaces(ctx,
8284                                              obj_surface,
8285                                              encoder_context,
8286                                              &surface_param);
8287     if (va_status != VA_STATUS_SUCCESS)
8288         return va_status;
8289     {
8290         /* init the member of avc_priv_surface,frame_store_id,qp_value*/
8291         avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
8292         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = 0;
8293         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = 0;
8294         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2]);
8295         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1]);
8296         i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2], avc_priv_surface->dmv_top);
8297         i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1], avc_priv_surface->dmv_bottom);
8298         avc_priv_surface->qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
8299         avc_priv_surface->frame_store_id = 0;
8300         avc_priv_surface->frame_idx = pic_param->CurrPic.frame_idx;
8301         avc_priv_surface->top_field_order_cnt = pic_param->CurrPic.TopFieldOrderCnt;
8302         avc_priv_surface->is_as_ref = pic_param->pic_fields.bits.reference_pic_flag;
8303         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = avc_priv_surface->top_field_order_cnt;
8304         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = avc_priv_surface->top_field_order_cnt + 1;
8305     }
8306     i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
8307     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_reconstructed_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
8308
8309     /* input YUV surface*/
8310     obj_surface = encode_state->input_yuv_object;
8311     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
8312
8313     if (va_status != VA_STATUS_SUCCESS)
8314         return va_status;
8315     i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
8316     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_uncompressed_input_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
8317
8318     /* Reference surfaces */
8319     for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
8320         i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
8321         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2]);
8322         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1]);
8323         obj_surface = encode_state->reference_objects[i];
8324         avc_state->top_field_poc[2 * i] = 0;
8325         avc_state->top_field_poc[2 * i + 1] = 0;
8326
8327         if (obj_surface && obj_surface->bo) {
8328             i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->list_reference_res[i], obj_surface, GPE_RESOURCE_ALIGNMENT);
8329
8330             /* actually it should be handled when it is reconstructed surface*/
8331             va_status = gen9_avc_init_check_surfaces(ctx,
8332                                                      obj_surface, encoder_context,
8333                                                      &surface_param);
8334             if (va_status != VA_STATUS_SUCCESS)
8335                 return va_status;
8336             avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
8337             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2], avc_priv_surface->dmv_top);
8338             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1], avc_priv_surface->dmv_bottom);
8339             avc_state->top_field_poc[2 * i] = avc_priv_surface->top_field_order_cnt;
8340             avc_state->top_field_poc[2 * i + 1] = avc_priv_surface->top_field_order_cnt + 1;
8341             avc_priv_surface->frame_store_id = i;
8342         } else {
8343             break;
8344         }
8345     }
8346
8347     /* Encoded bitstream ?*/
8348     obj_buffer = encode_state->coded_buf_object;
8349     bo = obj_buffer->buffer_store->bo;
8350     i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
8351     i965_dri_object_to_buffer_gpe_resource(&generic_ctx->compressed_bitstream.res, bo);
8352     generic_ctx->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
8353     generic_ctx->compressed_bitstream.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
8354
8355     /*status buffer */
8356     avc_ctx->status_buffer.bo = bo;
8357
8358     /* set the internal flag to 0 to indicate the coded size is unknown */
8359     dri_bo_map(bo, 1);
8360     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
8361     coded_buffer_segment->mapped = 0;
8362     coded_buffer_segment->codec = encoder_context->codec;
8363     coded_buffer_segment->status_support = 1;
8364
8365     pdata = bo->virtual + avc_ctx->status_buffer.base_offset;
8366     memset(pdata, 0, avc_ctx->status_buffer.status_buffer_size);
8367     dri_bo_unmap(bo);
8368
8369     //frame id, it is the ref pic id in the reference_objects list.
8370     avc_state->num_refs[0] = 0;
8371     avc_state->num_refs[1] = 0;
8372     if (generic_state->frame_type == SLICE_TYPE_P) {
8373         avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
8374
8375         if (slice_param->num_ref_idx_active_override_flag)
8376             avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
8377     } else if (generic_state->frame_type == SLICE_TYPE_B) {
8378         avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
8379         avc_state->num_refs[1] = pic_param->num_ref_idx_l1_active_minus1 + 1;
8380
8381         if (slice_param->num_ref_idx_active_override_flag) {
8382             avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
8383             avc_state->num_refs[1] = slice_param->num_ref_idx_l1_active_minus1 + 1;
8384         }
8385     }
8386
8387     if (avc_state->num_refs[0] > ARRAY_ELEMS(avc_state->list_ref_idx[0]))
8388         return VA_STATUS_ERROR_INVALID_VALUE;
8389     if (avc_state->num_refs[1] > ARRAY_ELEMS(avc_state->list_ref_idx[1]))
8390         return VA_STATUS_ERROR_INVALID_VALUE;
8391
8392     for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[0]); i++) {
8393         VAPictureH264 *va_pic;
8394
8395         assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(avc_state->list_ref_idx[0]));
8396         avc_state->list_ref_idx[0][i] = 0;
8397
8398         if (i >= avc_state->num_refs[0])
8399             continue;
8400
8401         va_pic = &slice_param->RefPicList0[i];
8402
8403         for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
8404             obj_surface = encode_state->reference_objects[j];
8405
8406             if (obj_surface &&
8407                 obj_surface->bo &&
8408                 obj_surface->base.id == va_pic->picture_id) {
8409
8410                 assert(obj_surface->base.id != VA_INVALID_SURFACE);
8411                 avc_state->list_ref_idx[0][i] = j;
8412
8413                 break;
8414             }
8415         }
8416     }
8417     for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[1]); i++) {
8418         VAPictureH264 *va_pic;
8419
8420         assert(ARRAY_ELEMS(slice_param->RefPicList1) == ARRAY_ELEMS(avc_state->list_ref_idx[1]));
8421         avc_state->list_ref_idx[1][i] = 0;
8422
8423         if (i >= avc_state->num_refs[1])
8424             continue;
8425
8426         va_pic = &slice_param->RefPicList1[i];
8427
8428         for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
8429             obj_surface = encode_state->reference_objects[j];
8430
8431             if (obj_surface &&
8432                 obj_surface->bo &&
8433                 obj_surface->base.id == va_pic->picture_id) {
8434
8435                 assert(obj_surface->base.id != VA_INVALID_SURFACE);
8436                 avc_state->list_ref_idx[1][i] = j;
8437
8438                 break;
8439             }
8440         }
8441     }
8442
8443     return VA_STATUS_SUCCESS;
8444 }
8445
8446 static VAStatus
8447 gen9_avc_vme_gpe_kernel_init(VADriverContextP ctx,
8448                              struct encode_state *encode_state,
8449                              struct intel_encoder_context *encoder_context)
8450 {
8451     return VA_STATUS_SUCCESS;
8452 }
8453
8454 static VAStatus
8455 gen9_avc_vme_gpe_kernel_final(VADriverContextP ctx,
8456                               struct encode_state *encode_state,
8457                               struct intel_encoder_context *encoder_context)
8458 {
8459
8460     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8461     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8462     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8463
8464     /*set this flag when all kernel is finished*/
8465     if (generic_state->brc_enabled) {
8466         generic_state->brc_inited = 1;
8467         generic_state->brc_need_reset = 0;
8468         avc_state->mbenc_curbe_set_in_brc_update = 0;
8469     }
8470     return VA_STATUS_SUCCESS;
8471 }
8472
8473 static VAStatus
8474 gen9_avc_vme_gpe_kernel_run(VADriverContextP ctx,
8475                             struct encode_state *encode_state,
8476                             struct intel_encoder_context *encoder_context)
8477 {
8478     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8479     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8480     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8481     int fei_enabled = encoder_context->fei_enabled;
8482
8483     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
8484     VAEncSliceParameterBufferH264 *slice_param = avc_state->slice_param[0];
8485     int sfd_in_use = 0;
8486
8487     /* BRC init/reset needs to be called before HME since it will reset the Brc Distortion surface*/
8488     if (!fei_enabled && generic_state->brc_enabled && (!generic_state->brc_inited || generic_state->brc_need_reset)) {
8489         gen9_avc_kernel_brc_init_reset(ctx, encode_state, encoder_context);
8490     }
8491
8492     /*down scaling*/
8493     if (generic_state->hme_supported) {
8494         gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
8495         if (generic_state->b16xme_supported) {
8496             gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_16x);
8497             if (generic_state->b32xme_supported) {
8498                 gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_32x);
8499             }
8500         }
8501     }
8502
8503     /*me kernel*/
8504     if (generic_state->hme_enabled) {
8505         if (generic_state->b16xme_enabled) {
8506             if (generic_state->b32xme_enabled) {
8507                 gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_32x);
8508             }
8509             gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_16x);
8510         }
8511         gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
8512     }
8513
8514     /*call SFD kernel after HME in same command buffer*/
8515     sfd_in_use = avc_state->sfd_enable && generic_state->hme_enabled;
8516     sfd_in_use = sfd_in_use && !avc_state->sfd_mb_enable;
8517     if (sfd_in_use) {
8518         gen9_avc_kernel_sfd(ctx, encode_state, encoder_context);
8519     }
8520
8521     /* BRC and MbEnc are included in the same task phase*/
8522     if (generic_state->brc_enabled) {
8523         if (avc_state->mbenc_i_frame_dist_in_use) {
8524             gen9_avc_kernel_mbenc(ctx, encode_state, encoder_context, true);
8525         }
8526         gen9_avc_kernel_brc_frame_update(ctx, encode_state, encoder_context);
8527
8528         if (avc_state->brc_split_enable && generic_state->mb_brc_enabled) {
8529             gen9_avc_kernel_brc_mb_update(ctx, encode_state, encoder_context);
8530         }
8531     }
8532
8533     /*weight prediction,disable by now */
8534     avc_state->weighted_ref_l0_enable = 0;
8535     avc_state->weighted_ref_l1_enable = 0;
8536     if (avc_state->weighted_prediction_supported &&
8537         ((generic_state->frame_type == SLICE_TYPE_P && pic_param->pic_fields.bits.weighted_pred_flag) ||
8538          (generic_state->frame_type == SLICE_TYPE_B && pic_param->pic_fields.bits.weighted_bipred_idc == INTEL_AVC_WP_MODE_EXPLICIT))) {
8539         if (slice_param->luma_weight_l0_flag & 1) {
8540             gen9_avc_kernel_wp(ctx, encode_state, encoder_context, 0);
8541
8542         } else if (!(slice_param->chroma_weight_l0_flag & 1)) {
8543             pic_param->pic_fields.bits.weighted_pred_flag = 0;// it should be handled in app
8544         }
8545
8546         if (generic_state->frame_type == SLICE_TYPE_B && pic_param->pic_fields.bits.weighted_bipred_idc == INTEL_AVC_WP_MODE_EXPLICIT) {
8547             if (slice_param->luma_weight_l1_flag & 1) {
8548                 gen9_avc_kernel_wp(ctx, encode_state, encoder_context, 1);
8549             } else if (!((slice_param->luma_weight_l0_flag & 1) ||
8550                          (slice_param->chroma_weight_l0_flag & 1) ||
8551                          (slice_param->chroma_weight_l1_flag & 1))) {
8552                 pic_param->pic_fields.bits.weighted_bipred_idc = INTEL_AVC_WP_MODE_DEFAULT;// it should be handled in app
8553             }
8554         }
8555     }
8556
8557     /*mbenc kernel*/
8558     gen9_avc_kernel_mbenc(ctx, encode_state, encoder_context, false);
8559
8560     /*ignore the reset vertical line kernel*/
8561
8562     return VA_STATUS_SUCCESS;
8563 }
8564
8565 static VAStatus
8566 gen9_avc_vme_pipeline(VADriverContextP ctx,
8567                       VAProfile profile,
8568                       struct encode_state *encode_state,
8569                       struct intel_encoder_context *encoder_context)
8570 {
8571     VAStatus va_status;
8572
8573     gen9_avc_update_parameters(ctx, profile, encode_state, encoder_context);
8574
8575     va_status = gen9_avc_encode_check_parameter(ctx, encode_state, encoder_context);
8576     if (va_status != VA_STATUS_SUCCESS)
8577         return va_status;
8578
8579     va_status = gen9_avc_allocate_resources(ctx, encode_state, encoder_context);
8580     if (va_status != VA_STATUS_SUCCESS)
8581         return va_status;
8582
8583     va_status = gen9_avc_vme_gpe_kernel_prepare(ctx, encode_state, encoder_context);
8584     if (va_status != VA_STATUS_SUCCESS)
8585         return va_status;
8586
8587     va_status = gen9_avc_vme_gpe_kernel_init(ctx, encode_state, encoder_context);
8588     if (va_status != VA_STATUS_SUCCESS)
8589         return va_status;
8590
8591     va_status = gen9_avc_vme_gpe_kernel_run(ctx, encode_state, encoder_context);
8592     if (va_status != VA_STATUS_SUCCESS)
8593         return va_status;
8594
8595     gen9_avc_vme_gpe_kernel_final(ctx, encode_state, encoder_context);
8596
8597     return VA_STATUS_SUCCESS;
8598 }
8599
8600 /* Update PreEnc specific parameters */
8601 static VAStatus
8602 gen9_avc_preenc_update_parameters(VADriverContextP ctx,
8603                                   VAProfile profile,
8604                                   struct encode_state *encode_state,
8605                                   struct intel_encoder_context *encoder_context)
8606 {
8607     struct i965_driver_data *i965 = i965_driver_data(ctx);
8608     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8609     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8610     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
8611     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8612     VAStatsStatisticsParameterH264 *stat_param_h264 = NULL;
8613     VAStatsStatisticsParameter *stat_param = NULL;
8614     struct object_buffer *obj_buffer = NULL;
8615     struct object_buffer *obj_buffer_mv = NULL, *obj_buffer_stat = NULL;
8616     struct buffer_store *buffer_store = NULL;
8617     unsigned int size = 0, i = 0;
8618     unsigned int frame_mb_nums = 0;
8619
8620     if (!encoder_context->preenc_enabled ||
8621         !encode_state->stat_param_ext ||
8622         !encode_state->stat_param_ext->buffer)
8623         return VA_STATUS_ERROR_OPERATION_FAILED;
8624
8625     stat_param_h264 = avc_state->stat_param =
8626                           (VAStatsStatisticsParameterH264 *)encode_state->stat_param_ext->buffer;
8627     stat_param = &stat_param_h264->stats_params;
8628
8629     /* Assume the frame type based on number of past/future ref frames */
8630     if (!stat_param->num_past_references && !stat_param->num_future_references)
8631         generic_state->frame_type = SLICE_TYPE_I;
8632     else if (stat_param->num_future_references > 0)
8633         generic_state->frame_type = SLICE_TYPE_B;
8634     else
8635         generic_state->frame_type = SLICE_TYPE_P;
8636
8637     generic_state->preset = INTEL_PRESET_RT_SPEED;
8638     generic_state->kernel_mode = gen9_avc_kernel_mode[generic_state->preset];
8639
8640     /* frame width and height */
8641     generic_state->frame_width_in_pixel = encoder_context->frame_width_in_pixel;
8642     generic_state->frame_height_in_pixel = encoder_context->frame_height_in_pixel;
8643     generic_state->frame_width_in_mbs = ALIGN(generic_state->frame_width_in_pixel, 16) / 16;
8644     generic_state->frame_height_in_mbs = ALIGN(generic_state->frame_height_in_pixel, 16) / 16;
8645
8646     /* 4x downscaled width and height */
8647     generic_state->frame_width_4x = ALIGN(generic_state->frame_width_in_pixel / 4, 16);
8648     generic_state->frame_height_4x = ALIGN(generic_state->frame_height_in_pixel / 4, 16);
8649     generic_state->downscaled_width_4x_in_mb  = generic_state->frame_width_4x / 16 ;
8650     generic_state->downscaled_height_4x_in_mb = generic_state->frame_height_4x / 16;
8651
8652     /* reset hme types for preenc */
8653     if (generic_state->frame_type != SLICE_TYPE_I)
8654         generic_state->hme_enabled = 1;
8655
8656     /* ensure frame width is not too small */
8657     if (generic_state->frame_width_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8658         generic_state->frame_width_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8659         generic_state->downscaled_width_4x_in_mb =
8660             WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8661     }
8662
8663     /* ensure frame height is not too small*/
8664     if (generic_state->frame_height_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8665         generic_state->frame_height_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8666         generic_state->downscaled_height_4x_in_mb =
8667             WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8668     }
8669
8670     /********** Ensure buffer object parameters ********/
8671     frame_mb_nums = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs;
8672
8673     /* mv predictor buffer */
8674     if (stat_param_h264->mv_predictor_ctrl) {
8675         if (stat_param->mv_predictor == VA_INVALID_ID)
8676             goto error;
8677         size = frame_mb_nums * FEI_AVC_MV_PREDICTOR_BUFFER_SIZE;
8678         obj_buffer = BUFFER(stat_param->mv_predictor);
8679         if (!obj_buffer)
8680             goto error;
8681         buffer_store = obj_buffer->buffer_store;
8682         if (buffer_store->bo->size < size)
8683             goto error;
8684         if (avc_ctx->preproc_mv_predictor_buffer.bo != NULL)
8685             i965_free_gpe_resource(&avc_ctx->preproc_mv_predictor_buffer);
8686         i965_dri_object_to_buffer_gpe_resource(
8687             &avc_ctx->preproc_mv_predictor_buffer,
8688             buffer_store->bo);
8689     }
8690
8691     /* MB qp buffer */
8692     if (stat_param_h264->mb_qp) {
8693         if (stat_param->qp == VA_INVALID_ID)
8694             goto error;
8695         size = frame_mb_nums * FEI_AVC_QP_BUFFER_SIZE;
8696         obj_buffer = BUFFER(stat_param->qp);
8697         buffer_store = obj_buffer->buffer_store;
8698         if (buffer_store->bo->size < size)
8699             goto error;
8700         if (avc_ctx->preproc_mb_qp_buffer.bo != NULL)
8701             i965_free_gpe_resource(&avc_ctx->preproc_mb_qp_buffer);
8702         i965_dri_object_to_buffer_gpe_resource(
8703             &avc_ctx->preproc_mb_qp_buffer,
8704             buffer_store->bo);
8705     }
8706
8707     /* locate mv and stat buffer */
8708     if (!stat_param_h264->disable_mv_output ||
8709         !stat_param_h264->disable_statistics_output) {
8710
8711         if (!stat_param->outputs)
8712             goto error;
8713
8714         for (i = 0; i < 2 ; i++) {
8715             if (stat_param->outputs[i] != VA_INVALID_ID) {
8716                 obj_buffer = BUFFER(stat_param->outputs[i]);
8717                 switch (obj_buffer->type) {
8718                 case VAStatsMVBufferType:
8719                     obj_buffer_mv = obj_buffer;
8720                     break;
8721                 case VAStatsStatisticsBufferType:
8722                     obj_buffer_stat = obj_buffer;
8723                     break;
8724                 default:
8725                     assert(0);
8726                 }
8727             }
8728             if (!(!stat_param_h264->disable_mv_output &&
8729                   !stat_param_h264->disable_statistics_output))
8730                 break;
8731         }
8732     }
8733     /* mv data output buffer */
8734     if (!stat_param_h264->disable_mv_output && obj_buffer_mv) {
8735         size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
8736         buffer_store = obj_buffer_mv->buffer_store;
8737         if (buffer_store->bo->size < size)
8738             goto error;
8739         if (avc_ctx->preproc_mv_data_out_buffer.bo != NULL)
8740             i965_free_gpe_resource(&avc_ctx->preproc_mv_data_out_buffer);
8741         i965_dri_object_to_buffer_gpe_resource(
8742             &avc_ctx->preproc_mv_data_out_buffer,
8743             buffer_store->bo);
8744     }
8745     /* statistics output buffer */
8746     if (!stat_param_h264->disable_statistics_output && obj_buffer_stat) {
8747         size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
8748         buffer_store = obj_buffer_stat->buffer_store;
8749         if (buffer_store->bo->size < size)
8750             goto error;
8751         if (avc_ctx->preproc_stat_data_out_buffer.bo != NULL)
8752             i965_free_gpe_resource(&avc_ctx->preproc_stat_data_out_buffer);
8753         i965_dri_object_to_buffer_gpe_resource(
8754             &avc_ctx->preproc_stat_data_out_buffer,
8755             buffer_store->bo);
8756     }
8757
8758     /* past ref stat out buffer */
8759     if (stat_param->num_past_references && stat_param->past_ref_stat_buf &&
8760         stat_param->past_ref_stat_buf[0] != VA_INVALID_ID) {
8761         size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
8762         obj_buffer = BUFFER(stat_param->past_ref_stat_buf[0]);
8763         buffer_store = obj_buffer->buffer_store;
8764         if (buffer_store->bo->size < size)
8765             goto error;
8766         if (avc_ctx->preenc_past_ref_stat_data_out_buffer.bo != NULL)
8767             i965_free_gpe_resource(&avc_ctx->preenc_past_ref_stat_data_out_buffer);
8768         i965_dri_object_to_buffer_gpe_resource(
8769             &avc_ctx->preenc_past_ref_stat_data_out_buffer,
8770             buffer_store->bo);
8771     }
8772     /* future ref stat out buffer */
8773     if (stat_param->num_past_references && stat_param->future_ref_stat_buf &&
8774         stat_param->future_ref_stat_buf[0] != VA_INVALID_ID) {
8775         size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
8776         obj_buffer = BUFFER(stat_param->future_ref_stat_buf[0]);
8777         buffer_store = obj_buffer->buffer_store;
8778         if (buffer_store->bo->size < size)
8779             goto error;
8780         if (avc_ctx->preenc_future_ref_stat_data_out_buffer.bo != NULL)
8781             i965_free_gpe_resource(&avc_ctx->preenc_future_ref_stat_data_out_buffer);
8782         i965_dri_object_to_buffer_gpe_resource(
8783             &avc_ctx->preenc_future_ref_stat_data_out_buffer,
8784             buffer_store->bo);
8785     }
8786     return VA_STATUS_SUCCESS;
8787
8788 error:
8789     return VA_STATUS_ERROR_INVALID_BUFFER;
8790 }
8791
8792 /* allocate internal resouces required for PreEenc */
8793 static VAStatus
8794 gen9_avc_preenc_allocate_internal_resources(VADriverContextP ctx,
8795                                             struct encode_state *encode_state,
8796                                             struct intel_encoder_context *encoder_context)
8797 {
8798     struct i965_driver_data *i965 = i965_driver_data(ctx);
8799     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8800     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
8801     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8802     unsigned int width  = 0;
8803     unsigned int height  = 0;
8804     unsigned int size  = 0;
8805     int allocate_flag = 1;
8806
8807     /* 4x MEMV data buffer */
8808     width = ALIGN(generic_state->downscaled_width_4x_in_mb * 32, 64);
8809     height = generic_state->downscaled_height_4x_in_mb * 4 * 2 * 10;
8810     i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
8811     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
8812                                                   &avc_ctx->s4x_memv_data_buffer,
8813                                                   width, height,
8814                                                   width,
8815                                                   "4x MEMV data buffer");
8816     if (!allocate_flag)
8817         goto failed_allocation;
8818     i965_zero_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
8819
8820     /*  Output DISTORTION surface from 4x ME */
8821     width = generic_state->downscaled_width_4x_in_mb * 8;
8822     height = generic_state->downscaled_height_4x_in_mb * 4 * 10;
8823     i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
8824     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
8825                                                   &avc_ctx->s4x_memv_distortion_buffer,
8826                                                   width, height,
8827                                                   ALIGN(width, 64),
8828                                                   "4x MEMV distortion buffer");
8829     if (!allocate_flag)
8830         goto failed_allocation;
8831     i965_zero_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
8832
8833     /* output BRC DISTORTION surface from 4x ME  */
8834     width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
8835     height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
8836     i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
8837     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
8838                                                   &avc_ctx->res_brc_dist_data_surface,
8839                                                   width, height,
8840                                                   width,
8841                                                   "brc dist data buffer");
8842     if (!allocate_flag)
8843         goto failed_allocation;
8844     i965_zero_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
8845
8846
8847     /* FTQ Lut buffer,whichs is the mbbrc_const_data_buffer */
8848     i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
8849     size = 16 * AVC_QP_MAX * 4;
8850     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
8851                                                &avc_ctx->res_mbbrc_const_data_buffer,
8852                                                ALIGN(size, 0x1000),
8853                                                "mbbrc const data buffer");
8854     if (!allocate_flag)
8855         goto failed_allocation;
8856     i965_zero_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
8857
8858     /* 4x downscaled surface  */
8859     if (!avc_ctx->preenc_scaled_4x_surface_obj) {
8860         i965_CreateSurfaces(ctx,
8861                             generic_state->frame_width_4x,
8862                             generic_state->frame_height_4x,
8863                             VA_RT_FORMAT_YUV420,
8864                             1,
8865                             &avc_ctx->preenc_scaled_4x_surface_id);
8866         avc_ctx->preenc_scaled_4x_surface_obj = SURFACE(avc_ctx->preenc_scaled_4x_surface_id);
8867         if (!avc_ctx->preenc_scaled_4x_surface_obj)
8868             goto failed_allocation;
8869         i965_check_alloc_surface_bo(ctx, avc_ctx->preenc_scaled_4x_surface_obj, 1,
8870                                     VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
8871     }
8872
8873     /* 4x downscaled past ref surface  */
8874     if (!avc_ctx->preenc_past_ref_scaled_4x_surface_obj) {
8875         i965_CreateSurfaces(ctx,
8876                             generic_state->frame_width_4x,
8877                             generic_state->frame_height_4x,
8878                             VA_RT_FORMAT_YUV420,
8879                             1,
8880                             &avc_ctx->preenc_past_ref_scaled_4x_surface_id);
8881         avc_ctx->preenc_past_ref_scaled_4x_surface_obj =
8882             SURFACE(avc_ctx->preenc_past_ref_scaled_4x_surface_id);
8883         if (!avc_ctx->preenc_past_ref_scaled_4x_surface_obj)
8884             goto failed_allocation;
8885         i965_check_alloc_surface_bo(ctx, avc_ctx->preenc_past_ref_scaled_4x_surface_obj, 1,
8886                                     VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
8887     }
8888
8889     /* 4x downscaled future ref surface  */
8890     if (!avc_ctx->preenc_future_ref_scaled_4x_surface_obj) {
8891         i965_CreateSurfaces(ctx,
8892                             generic_state->frame_width_4x,
8893                             generic_state->frame_height_4x,
8894                             VA_RT_FORMAT_YUV420,
8895                             1,
8896                             &avc_ctx->preenc_future_ref_scaled_4x_surface_id);
8897         avc_ctx->preenc_future_ref_scaled_4x_surface_obj =
8898             SURFACE(avc_ctx->preenc_future_ref_scaled_4x_surface_id);
8899         if (!avc_ctx->preenc_future_ref_scaled_4x_surface_obj)
8900             goto failed_allocation;
8901         i965_check_alloc_surface_bo(ctx, avc_ctx->preenc_future_ref_scaled_4x_surface_obj, 1,
8902                                     VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
8903     }
8904
8905     /* FeiPreEncFixme: dummy coded buffer. This is a tweak which helps to use
8906      * the generic AVC Encdoe codepath which allocate status buffer as extension
8907      * to CodedBuffer */
8908     if (!avc_ctx->status_buffer.bo) {
8909         size =
8910             generic_state->frame_width_in_pixel * generic_state->frame_height_in_pixel * 12;
8911         size += I965_CODEDBUFFER_HEADER_SIZE;
8912         size += 0x1000;
8913         avc_ctx->status_buffer.bo = dri_bo_alloc(i965->intel.bufmgr,
8914                                                  "Dummy Coded Buffer",
8915                                                  size, 64);
8916     }
8917
8918     return VA_STATUS_SUCCESS;
8919
8920 failed_allocation:
8921     return VA_STATUS_ERROR_ALLOCATION_FAILED;
8922 }
8923
8924
8925 static VAStatus
8926 gen9_avc_preenc_gpe_kernel_run(VADriverContextP ctx,
8927                                struct encode_state *encode_state,
8928                                struct intel_encoder_context *encoder_context)
8929 {
8930     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8931     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8932     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8933     VAStatsStatisticsParameterH264 *stat_param_h264 = avc_state->stat_param;;
8934     VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
8935
8936     /* FeiPreEncFixme: Optimize the scaling. Keep a cache of already scaled surfaces
8937      * to avoid repeated scaling of same surfaces */
8938
8939     /* down scaling */
8940     gen9_avc_preenc_kernel_scaling(ctx, encode_state, encoder_context,
8941                                    INTEL_ENC_HME_4x, SCALE_CUR_PIC);
8942     if (stat_param->num_past_references > 0) {
8943         gen9_avc_preenc_kernel_scaling(ctx, encode_state, encoder_context,
8944                                        INTEL_ENC_HME_4x, SCALE_PAST_REF_PIC);
8945     }
8946     if (stat_param->num_future_references > 0) {
8947         gen9_avc_preenc_kernel_scaling(ctx, encode_state, encoder_context,
8948                                        INTEL_ENC_HME_4x, SCALE_FUTURE_REF_PIC);
8949     }
8950
8951     /* me kernel */
8952     if (generic_state->hme_enabled) {
8953         gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
8954     }
8955
8956     /* preproc kernel */
8957     if (!stat_param_h264->disable_mv_output || !stat_param_h264->disable_statistics_output) {
8958         gen9_avc_preenc_kernel_preproc(ctx, encode_state, encoder_context);
8959     }
8960
8961     return VA_STATUS_SUCCESS;
8962 }
8963
8964 static VAStatus
8965 gen9_avc_preenc_pipeline(VADriverContextP ctx,
8966                          VAProfile profile,
8967                          struct encode_state *encode_state,
8968                          struct intel_encoder_context *encoder_context)
8969 {
8970     VAStatus va_status;
8971
8972     va_status = gen9_avc_preenc_update_parameters(ctx, profile, encode_state, encoder_context);
8973     if (va_status != VA_STATUS_SUCCESS)
8974         return va_status;
8975
8976     va_status = gen9_avc_preenc_allocate_internal_resources(ctx, encode_state, encoder_context);
8977     if (va_status != VA_STATUS_SUCCESS)
8978         return va_status;
8979
8980     va_status = gen9_avc_preenc_gpe_kernel_run(ctx, encode_state, encoder_context);
8981     if (va_status != VA_STATUS_SUCCESS)
8982         return va_status;
8983
8984     return VA_STATUS_SUCCESS;
8985 }
8986
8987 static void
8988 gen9_avc_vme_context_destroy(void * context)
8989 {
8990     struct encoder_vme_mfc_context *vme_context = (struct encoder_vme_mfc_context *)context;
8991     struct generic_encoder_context *generic_ctx;
8992     struct i965_avc_encoder_context *avc_ctx;
8993     struct generic_enc_codec_state *generic_state;
8994     struct avc_enc_state *avc_state;
8995
8996     if (!vme_context)
8997         return;
8998
8999     generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
9000     avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
9001     generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
9002     avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
9003
9004     gen9_avc_kernel_destroy(vme_context);
9005
9006     free(generic_ctx);
9007     free(avc_ctx);
9008     free(generic_state);
9009     free(avc_state);
9010     free(vme_context);
9011     return;
9012
9013 }
9014
9015 static void
9016 gen8_avc_kernel_init(VADriverContextP ctx,
9017                      struct intel_encoder_context *encoder_context)
9018 {
9019     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9020     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
9021     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
9022     int fei_enabled = encoder_context->fei_enabled;
9023
9024     generic_ctx->get_kernel_header_and_size = fei_enabled ?
9025                                               intel_avc_fei_get_kernel_header_and_size :
9026                                               intel_avc_get_kernel_header_and_size ;
9027     gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling, false);
9028     gen9_avc_kernel_init_brc(ctx, generic_ctx, &avc_ctx->context_brc);
9029     gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me, false);
9030     gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc, fei_enabled);
9031     gen9_avc_kernel_init_sfd(ctx, generic_ctx, &avc_ctx->context_sfd);
9032
9033     //function pointer
9034     generic_ctx->pfn_set_curbe_scaling2x = gen9_avc_set_curbe_scaling2x;
9035     generic_ctx->pfn_set_curbe_scaling4x = gen8_avc_set_curbe_scaling4x;
9036     generic_ctx->pfn_set_curbe_me = gen8_avc_set_curbe_me;
9037     generic_ctx->pfn_set_curbe_mbenc = gen8_avc_set_curbe_mbenc;
9038     generic_ctx->pfn_set_curbe_brc_init_reset = gen9_avc_set_curbe_brc_init_reset;
9039     generic_ctx->pfn_set_curbe_brc_frame_update = gen8_avc_set_curbe_brc_frame_update;
9040     generic_ctx->pfn_set_curbe_sfd = gen9_avc_set_curbe_sfd;
9041
9042     generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
9043     generic_ctx->pfn_send_me_surface = gen9_avc_send_surface_me;
9044     generic_ctx->pfn_send_mbenc_surface = gen9_avc_send_surface_mbenc;
9045     generic_ctx->pfn_send_brc_init_reset_surface = gen9_avc_send_surface_brc_init_reset;
9046     generic_ctx->pfn_send_brc_frame_update_surface = gen9_avc_send_surface_brc_frame_update;
9047     generic_ctx->pfn_send_sfd_surface = gen9_avc_send_surface_sfd;
9048 }
9049 static void
9050 gen9_avc_kernel_init(VADriverContextP ctx,
9051                      struct intel_encoder_context *encoder_context)
9052 {
9053     struct i965_driver_data *i965 = i965_driver_data(ctx);
9054     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9055     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
9056     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
9057     int fei_enabled = encoder_context->fei_enabled;
9058     int preenc_enabled = encoder_context->preenc_enabled;
9059
9060     generic_ctx->get_kernel_header_and_size = (fei_enabled || preenc_enabled) ?
9061                                               intel_avc_fei_get_kernel_header_and_size :
9062                                               intel_avc_get_kernel_header_and_size ;
9063
9064     if (!fei_enabled && !preenc_enabled) {
9065         /* generic AVC Encoder */
9066         gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling, false);
9067         gen9_avc_kernel_init_brc(ctx, generic_ctx, &avc_ctx->context_brc);
9068         gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me, false);
9069         gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc,
9070                                    encoder_context->fei_enabled);
9071         gen9_avc_kernel_init_wp(ctx, generic_ctx, &avc_ctx->context_wp);
9072         gen9_avc_kernel_init_sfd(ctx, generic_ctx, &avc_ctx->context_sfd);
9073
9074         //function pointer
9075         generic_ctx->pfn_set_curbe_scaling2x = gen9_avc_set_curbe_scaling2x;
9076         generic_ctx->pfn_set_curbe_scaling4x = gen9_avc_set_curbe_scaling4x;
9077         generic_ctx->pfn_set_curbe_me = gen9_avc_set_curbe_me;
9078         generic_ctx->pfn_set_curbe_mbenc = gen9_avc_set_curbe_mbenc;
9079         generic_ctx->pfn_set_curbe_brc_init_reset = gen9_avc_set_curbe_brc_init_reset;
9080         generic_ctx->pfn_set_curbe_brc_frame_update = gen9_avc_set_curbe_brc_frame_update;
9081         generic_ctx->pfn_set_curbe_brc_mb_update = gen9_avc_set_curbe_brc_mb_update;
9082         generic_ctx->pfn_set_curbe_sfd = gen9_avc_set_curbe_sfd;
9083         generic_ctx->pfn_set_curbe_wp = gen9_avc_set_curbe_wp;
9084
9085         generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
9086         generic_ctx->pfn_send_me_surface = gen9_avc_send_surface_me;
9087         generic_ctx->pfn_send_mbenc_surface = gen9_avc_send_surface_mbenc;
9088         generic_ctx->pfn_send_brc_init_reset_surface = gen9_avc_send_surface_brc_init_reset;
9089         generic_ctx->pfn_send_brc_frame_update_surface = gen9_avc_send_surface_brc_frame_update;
9090         generic_ctx->pfn_send_brc_mb_update_surface = gen9_avc_send_surface_brc_mb_update;
9091         generic_ctx->pfn_send_sfd_surface = gen9_avc_send_surface_sfd;
9092         generic_ctx->pfn_send_wp_surface = gen9_avc_send_surface_wp;
9093
9094         if (IS_SKL(i965->intel.device_info) ||
9095             IS_BXT(i965->intel.device_info))
9096             generic_ctx->pfn_set_curbe_scaling4x = gen9_avc_set_curbe_scaling4x;
9097         else if (IS_KBL(i965->intel.device_info) ||
9098                  IS_GEN10(i965->intel.device_info) ||
9099                  IS_GLK(i965->intel.device_info))
9100             generic_ctx->pfn_set_curbe_scaling4x = gen95_avc_set_curbe_scaling4x;
9101
9102     } else if (fei_enabled) {
9103         /* FEI AVC Encoding */
9104         gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc,
9105                                    encoder_context->fei_enabled);
9106         generic_ctx->pfn_set_curbe_mbenc = gen9_avc_fei_set_curbe_mbenc;
9107         generic_ctx->pfn_send_mbenc_surface = gen9_avc_fei_send_surface_mbenc;
9108
9109     } else {
9110         /* PreEnc for AVC */
9111         gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling,
9112                                      encoder_context->preenc_enabled);
9113         gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me,
9114                                 encoder_context->preenc_enabled);
9115         gen9_avc_kernel_init_preproc(ctx, generic_ctx, &avc_ctx->context_preproc);
9116
9117         /* preenc 4x scaling uses the gen95 kernel */
9118         generic_ctx->pfn_set_curbe_scaling4x = gen95_avc_set_curbe_scaling4x;
9119         generic_ctx->pfn_set_curbe_me = gen9_avc_preenc_set_curbe_me;
9120         generic_ctx->pfn_set_curbe_preproc = gen9_avc_preenc_set_curbe_preproc;
9121
9122         generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
9123         generic_ctx->pfn_send_me_surface = gen9_avc_preenc_send_surface_me;
9124         generic_ctx->pfn_send_preproc_surface = gen9_avc_preenc_send_surface_preproc;
9125     }
9126 }
9127
9128 /*
9129 PAK pipeline related function
9130 */
9131 extern int
9132 intel_avc_enc_slice_type_fixup(int slice_type);
9133
9134 /* Allocate resources needed for PAK only mode (get invoked only in FEI encode) */
9135 static VAStatus
9136 gen9_avc_allocate_pak_resources(VADriverContextP ctx,
9137                                 struct encode_state *encode_state,
9138                                 struct intel_encoder_context *encoder_context)
9139 {
9140     struct i965_driver_data *i965 = i965_driver_data(ctx);
9141     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9142     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
9143     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
9144     unsigned int size  = 0;
9145     int allocate_flag = 1;
9146
9147     /*second level batch buffer for image state write when cqp etc*/
9148     i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
9149     size = INTEL_AVC_IMAGE_STATE_CMD_SIZE ;
9150     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
9151                                                &avc_ctx->res_image_state_batch_buffer_2nd_level,
9152                                                ALIGN(size, 0x1000),
9153                                                "second levle batch (image state write) buffer");
9154     if (!allocate_flag)
9155         goto failed_allocation;
9156
9157     if (!generic_state->brc_allocated) {
9158         i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
9159         size = 64;//44
9160         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
9161                                                    &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
9162                                                    ALIGN(size, 0x1000),
9163                                                    "brc pak statistic buffer");
9164         if (!allocate_flag)
9165             goto failed_allocation;
9166     }
9167
9168     return VA_STATUS_SUCCESS;
9169
9170 failed_allocation:
9171     return VA_STATUS_ERROR_ALLOCATION_FAILED;
9172 }
9173
9174 static void
9175 gen9_mfc_avc_pipe_mode_select(VADriverContextP ctx,
9176                               struct encode_state *encode_state,
9177                               struct intel_encoder_context *encoder_context)
9178 {
9179     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9180     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9181     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9182     struct intel_batchbuffer *batch = encoder_context->base.batch;
9183
9184     BEGIN_BCS_BATCH(batch, 5);
9185
9186     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
9187     OUT_BCS_BATCH(batch,
9188                   (0 << 29) |
9189                   (MFX_LONG_MODE << 17) |       /* Must be long format for encoder */
9190                   (MFD_MODE_VLD << 15) |
9191                   (0 << 13) |                   /* Non-VDEnc mode  is 0*/
9192                   ((generic_state->curr_pak_pass != (generic_state->num_pak_passes - 1)) << 10) |                  /* Stream-Out Enable */
9193                   ((!!avc_ctx->res_post_deblocking_output.bo) << 9)  |    /* Post Deblocking Output */
9194                   ((!!avc_ctx->res_pre_deblocking_output.bo) << 8)  |     /* Pre Deblocking Output */
9195                   (0 << 7)  |                   /* Scaled surface enable */
9196                   (0 << 6)  |                   /* Frame statistics stream out enable */
9197                   (0 << 5)  |                   /* not in stitch mode */
9198                   (1 << 4)  |                   /* encoding mode */
9199                   (MFX_FORMAT_AVC << 0));
9200     OUT_BCS_BATCH(batch,
9201                   (0 << 7)  | /* expand NOA bus flag */
9202                   (0 << 6)  | /* disable slice-level clock gating */
9203                   (0 << 5)  | /* disable clock gating for NOA */
9204                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
9205                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
9206                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
9207                   (0 << 1)  |
9208                   (0 << 0));
9209     OUT_BCS_BATCH(batch, 0);
9210     OUT_BCS_BATCH(batch, 0);
9211
9212     ADVANCE_BCS_BATCH(batch);
9213 }
9214
9215 static void
9216 gen9_mfc_avc_surface_state(VADriverContextP ctx,
9217                            struct intel_encoder_context *encoder_context,
9218                            struct i965_gpe_resource *gpe_resource,
9219                            int id)
9220 {
9221     struct intel_batchbuffer *batch = encoder_context->base.batch;
9222
9223     BEGIN_BCS_BATCH(batch, 6);
9224
9225     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
9226     OUT_BCS_BATCH(batch, id);
9227     OUT_BCS_BATCH(batch,
9228                   ((gpe_resource->height - 1) << 18) |
9229                   ((gpe_resource->width - 1) << 4));
9230     OUT_BCS_BATCH(batch,
9231                   (MFX_SURFACE_PLANAR_420_8 << 28) |    /* 420 planar YUV surface */
9232                   (1 << 27) |                           /* must be 1 for interleave U/V, hardware requirement */
9233                   ((gpe_resource->pitch - 1) << 3) |    /* pitch */
9234                   (0 << 2)  |                           /* must be 0 for interleave U/V */
9235                   (1 << 1)  |                           /* must be tiled */
9236                   (I965_TILEWALK_YMAJOR << 0));         /* tile walk, TILEWALK_YMAJOR */
9237     OUT_BCS_BATCH(batch,
9238                   (0 << 16) |                   /* must be 0 for interleave U/V */
9239                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
9240     OUT_BCS_BATCH(batch,
9241                   (0 << 16) |                   /* must be 0 for interleave U/V */
9242                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
9243
9244     ADVANCE_BCS_BATCH(batch);
9245 }
9246
9247 static void
9248 gen9_mfc_avc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
9249 {
9250     struct i965_driver_data *i965 = i965_driver_data(ctx);
9251     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9252     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
9253     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9254     struct intel_batchbuffer *batch = encoder_context->base.batch;
9255     int i;
9256     unsigned int cmd_len = 65;
9257
9258     if (IS_GEN10(i965->intel.device_info))
9259         cmd_len = 68;
9260
9261     BEGIN_BCS_BATCH(batch, cmd_len);
9262
9263     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (cmd_len - 2));
9264
9265     /* the DW1-3 is for pre_deblocking */
9266     OUT_BUFFER_3DW(batch, avc_ctx->res_pre_deblocking_output.bo, 1, 0, i965->intel.mocs_state);
9267
9268     /* the DW4-6 is for the post_deblocking */
9269     OUT_BUFFER_3DW(batch, avc_ctx->res_post_deblocking_output.bo, 1, 0, i965->intel.mocs_state);
9270
9271     /* the DW7-9 is for the uncompressed_picture */
9272     OUT_BUFFER_3DW(batch, generic_ctx->res_uncompressed_input_surface.bo, 0, 0, i965->intel.mocs_state);
9273
9274     /* the DW10-12 is for PAK information (write) */
9275     OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, i965->intel.mocs_state);//?
9276
9277     /* the DW13-15 is for the intra_row_store_scratch */
9278     OUT_BUFFER_3DW(batch, avc_ctx->res_intra_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
9279
9280     /* the DW16-18 is for the deblocking filter */
9281     OUT_BUFFER_3DW(batch, avc_ctx->res_deblocking_filter_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
9282
9283     /* the DW 19-50 is for Reference pictures*/
9284     for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
9285         OUT_BUFFER_2DW(batch, avc_ctx->list_reference_res[i].bo, 0, 0);
9286     }
9287
9288     /* DW 51, reference picture attributes */
9289     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
9290
9291     /* The DW 52-54 is for PAK information (read) */
9292     OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, i965->intel.mocs_state);
9293
9294     /* the DW 55-57 is the ILDB buffer */
9295     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9296
9297     /* the DW 58-60 is the second ILDB buffer */
9298     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9299
9300     /* DW 61, memory compress enable & mode */
9301     OUT_BCS_BATCH(batch, 0);
9302
9303     /* the DW 62-64 is the buffer */
9304     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9305
9306     /*65-67 for CNL */
9307     if (IS_GEN10(i965->intel.device_info)) {
9308         OUT_BCS_BATCH(batch, 0);
9309         OUT_BCS_BATCH(batch, 0);
9310         OUT_BCS_BATCH(batch, 0);
9311     }
9312
9313     ADVANCE_BCS_BATCH(batch);
9314 }
9315
9316 static void
9317 gen9_mfc_avc_ind_obj_base_addr_state(VADriverContextP ctx,
9318                                      struct encode_state *encode_state,
9319                                      struct intel_encoder_context *encoder_context)
9320 {
9321     struct i965_driver_data *i965 = i965_driver_data(ctx);
9322     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9323     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
9324     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9325     struct intel_batchbuffer *batch = encoder_context->base.batch;
9326     struct object_surface *obj_surface;
9327     struct gen9_surface_avc *avc_priv_surface;
9328     unsigned int size = 0;
9329     unsigned int w_mb = generic_state->frame_width_in_mbs;
9330     unsigned int h_mb = generic_state->frame_height_in_mbs;
9331
9332     obj_surface = encode_state->reconstructed_object;
9333
9334     if (!obj_surface || !obj_surface->private_data)
9335         return;
9336     avc_priv_surface = obj_surface->private_data;
9337
9338     BEGIN_BCS_BATCH(batch, 26);
9339
9340     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
9341     /* The DW1-5 is for the MFX indirect bistream offset */
9342     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9343     OUT_BUFFER_2DW(batch, NULL, 0, 0);
9344
9345     /* the DW6-10 is for MFX Indirect MV Object Base Address */
9346     size = w_mb * h_mb * 32 * 4;
9347     OUT_BUFFER_3DW(batch,
9348                    avc_priv_surface->res_mv_data_surface.bo,
9349                    1,
9350                    0,
9351                    i965->intel.mocs_state);
9352     OUT_BUFFER_2DW(batch,
9353                    avc_priv_surface->res_mv_data_surface.bo,
9354                    1,
9355                    ALIGN(size, 0x1000));
9356
9357     /* The DW11-15 is for MFX IT-COFF. Not used on encoder */
9358     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9359     OUT_BUFFER_2DW(batch, NULL, 0, 0);
9360
9361     /* The DW16-20 is for MFX indirect DBLK. Not used on encoder */
9362     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9363     OUT_BUFFER_2DW(batch, NULL, 0, 0);
9364
9365     /* The DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder
9366      * Note: an offset is specified in MFX_AVC_SLICE_STATE
9367      */
9368     OUT_BUFFER_3DW(batch,
9369                    generic_ctx->compressed_bitstream.res.bo,
9370                    1,
9371                    0,
9372                    i965->intel.mocs_state);
9373     OUT_BUFFER_2DW(batch,
9374                    generic_ctx->compressed_bitstream.res.bo,
9375                    1,
9376                    generic_ctx->compressed_bitstream.end_offset);
9377
9378     ADVANCE_BCS_BATCH(batch);
9379 }
9380
9381 static void
9382 gen9_mfc_avc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
9383 {
9384     struct i965_driver_data *i965 = i965_driver_data(ctx);
9385     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9386     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9387     struct intel_batchbuffer *batch = encoder_context->base.batch;
9388
9389     BEGIN_BCS_BATCH(batch, 10);
9390
9391     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
9392
9393     /* The DW1-3 is for bsd/mpc row store scratch buffer */
9394     OUT_BUFFER_3DW(batch, avc_ctx->res_bsd_mpc_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
9395
9396     /* The DW4-6 is for MPR Row Store Scratch Buffer Base Address, ignore for encoder */
9397     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9398
9399     /* The DW7-9 is for Bitplane Read Buffer Base Address, ignore for encoder */
9400     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9401
9402     ADVANCE_BCS_BATCH(batch);
9403 }
9404
9405 static void
9406 gen9_mfc_avc_directmode_state(VADriverContextP ctx,
9407                               struct intel_encoder_context *encoder_context)
9408 {
9409     struct i965_driver_data *i965 = i965_driver_data(ctx);
9410     struct intel_batchbuffer *batch = encoder_context->base.batch;
9411     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9412     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9413     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
9414
9415     int i;
9416
9417     BEGIN_BCS_BATCH(batch, 71);
9418
9419     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
9420
9421     /* Reference frames and Current frames */
9422     /* the DW1-32 is for the direct MV for reference */
9423     for (i = 0; i < NUM_MFC_AVC_DMV_BUFFERS - 2; i += 2) {
9424         if (avc_ctx->res_direct_mv_buffersr[i].bo != NULL) {
9425             OUT_BCS_RELOC64(batch, avc_ctx->res_direct_mv_buffersr[i].bo,
9426                             I915_GEM_DOMAIN_INSTRUCTION, 0,
9427                             0);
9428         } else {
9429             OUT_BCS_BATCH(batch, 0);
9430             OUT_BCS_BATCH(batch, 0);
9431         }
9432     }
9433
9434     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
9435
9436     /* the DW34-36 is the MV for the current frame */
9437     OUT_BCS_RELOC64(batch, avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2].bo,
9438                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
9439                     0);
9440
9441     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
9442
9443     /* POL list */
9444     for (i = 0; i < 32; i++) {
9445         OUT_BCS_BATCH(batch, avc_state->top_field_poc[i]);
9446     }
9447     OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2]);
9448     OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1]);
9449
9450     ADVANCE_BCS_BATCH(batch);
9451 }
9452
9453 static void
9454 gen9_mfc_qm_state(VADriverContextP ctx,
9455                   int qm_type,
9456                   const unsigned int *qm,
9457                   int qm_length,
9458                   struct intel_encoder_context *encoder_context)
9459 {
9460     struct intel_batchbuffer *batch = encoder_context->base.batch;
9461     unsigned int qm_buffer[16];
9462
9463     assert(qm_length <= 16);
9464     assert(sizeof(*qm) == 4);
9465     memset(qm_buffer, 0, 16 * 4);
9466     memcpy(qm_buffer, qm, qm_length * 4);
9467
9468     BEGIN_BCS_BATCH(batch, 18);
9469     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
9470     OUT_BCS_BATCH(batch, qm_type << 0);
9471     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
9472     ADVANCE_BCS_BATCH(batch);
9473 }
9474
9475 static void
9476 gen9_mfc_avc_qm_state(VADriverContextP ctx,
9477                       struct encode_state *encode_state,
9478                       struct intel_encoder_context *encoder_context)
9479 {
9480     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9481     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
9482     VAEncSequenceParameterBufferH264  *seq_param = avc_state->seq_param;
9483     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
9484
9485
9486     const unsigned int *qm_4x4_intra;
9487     const unsigned int *qm_4x4_inter;
9488     const unsigned int *qm_8x8_intra;
9489     const unsigned int *qm_8x8_inter;
9490
9491     if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
9492         && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
9493         qm_4x4_intra = qm_4x4_inter = qm_8x8_intra = qm_8x8_inter = qm_flat;
9494     } else {
9495         VAIQMatrixBufferH264 *qm;
9496         assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
9497         qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
9498         qm_4x4_intra = (unsigned int *)qm->ScalingList4x4[0];
9499         qm_4x4_inter = (unsigned int *)qm->ScalingList4x4[3];
9500         qm_8x8_intra = (unsigned int *)qm->ScalingList8x8[0];
9501         qm_8x8_inter = (unsigned int *)qm->ScalingList8x8[1];
9502     }
9503
9504     gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm_4x4_intra, 12, encoder_context);
9505     gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm_4x4_inter, 12, encoder_context);
9506     gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm_8x8_intra, 16, encoder_context);
9507     gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm_8x8_inter, 16, encoder_context);
9508 }
9509
9510 static void
9511 gen9_mfc_fqm_state(VADriverContextP ctx,
9512                    int fqm_type,
9513                    const unsigned int *fqm,
9514                    int fqm_length,
9515                    struct intel_encoder_context *encoder_context)
9516 {
9517     struct intel_batchbuffer *batch = encoder_context->base.batch;
9518     unsigned int fqm_buffer[32];
9519
9520     assert(fqm_length <= 32);
9521     assert(sizeof(*fqm) == 4);
9522     memset(fqm_buffer, 0, 32 * 4);
9523     memcpy(fqm_buffer, fqm, fqm_length * 4);
9524
9525     BEGIN_BCS_BATCH(batch, 34);
9526     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
9527     OUT_BCS_BATCH(batch, fqm_type << 0);
9528     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
9529     ADVANCE_BCS_BATCH(batch);
9530 }
9531
9532 static void
9533 gen9_mfc_fill_fqm(uint8_t *qm, uint16_t *fqm, int len)
9534 {
9535     int i, j;
9536     for (i = 0; i < len; i++)
9537         for (j = 0; j < len; j++) {
9538             assert(qm[j * len + i]);
9539             fqm[i * len + j] = (1 << 16) / qm[j * len + i];
9540         }
9541 }
9542
9543 static void
9544 gen9_mfc_avc_fqm_state(VADriverContextP ctx,
9545                        struct encode_state *encode_state,
9546                        struct intel_encoder_context *encoder_context)
9547 {
9548     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9549     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
9550     VAEncSequenceParameterBufferH264  *seq_param = avc_state->seq_param;
9551     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
9552
9553     if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
9554         && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
9555         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm_flat, 24, encoder_context);
9556         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm_flat, 24, encoder_context);
9557         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm_flat, 32, encoder_context);
9558         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm_flat, 32, encoder_context);
9559     } else {
9560         int i;
9561         uint32_t fqm[32];
9562         VAIQMatrixBufferH264 *qm;
9563         assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
9564         qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
9565
9566         for (i = 0; i < 3; i++)
9567             gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * i, 4);
9568         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm, 24, encoder_context);
9569
9570         for (i = 3; i < 6; i++)
9571             gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * (i - 3), 4);
9572         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm, 24, encoder_context);
9573
9574         gen9_mfc_fill_fqm(qm->ScalingList8x8[0], (uint16_t *)fqm, 8);
9575         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm, 32, encoder_context);
9576
9577         gen9_mfc_fill_fqm(qm->ScalingList8x8[1], (uint16_t *)fqm, 8);
9578         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm, 32, encoder_context);
9579     }
9580 }
9581
9582 static void
9583 gen9_mfc_avc_insert_object(VADriverContextP ctx,
9584                            struct intel_encoder_context *encoder_context,
9585                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
9586                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
9587                            int slice_header_indicator,
9588                            struct intel_batchbuffer *batch)
9589 {
9590     if (data_bits_in_last_dw == 0)
9591         data_bits_in_last_dw = 32;
9592
9593     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
9594
9595     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws));
9596     OUT_BCS_BATCH(batch,
9597                   (0 << 16) |   /* always start at offset 0 */
9598                   (slice_header_indicator << 14) |
9599                   (data_bits_in_last_dw << 8) |
9600                   (skip_emul_byte_count << 4) |
9601                   (!!emulation_flag << 3) |
9602                   ((!!is_last_header) << 2) |
9603                   ((!!is_end_of_slice) << 1) |
9604                   (0 << 0));    /* check this flag */
9605     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
9606
9607     ADVANCE_BCS_BATCH(batch);
9608 }
9609
9610 static void
9611 gen9_mfc_avc_insert_aud_packed_data(VADriverContextP ctx,
9612                                     struct encode_state *encode_state,
9613                                     struct intel_encoder_context *encoder_context,
9614                                     struct intel_batchbuffer *batch)
9615 {
9616     VAEncPackedHeaderParameterBuffer *param = NULL;
9617     unsigned int length_in_bits;
9618     unsigned int *header_data = NULL;
9619     unsigned char *nal_type = NULL;
9620     int count, i, start_index;
9621
9622     count = encode_state->slice_rawdata_count[0];
9623     start_index = (encode_state->slice_rawdata_index[0] & SLICE_PACKED_DATA_INDEX_MASK);
9624
9625     for (i = 0; i < count; i++) {
9626         unsigned int skip_emul_byte_cnt;
9627
9628         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
9629         nal_type = (unsigned char *)header_data;
9630
9631         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
9632         if (param->type != VAEncPackedHeaderRawData)
9633             continue;
9634
9635         length_in_bits = param->bit_length;
9636
9637         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9638
9639         if ((*(nal_type + skip_emul_byte_cnt - 1) & 0x1f) == AVC_NAL_DELIMITER) {
9640             gen9_mfc_avc_insert_object(ctx,
9641                                        encoder_context,
9642                                        header_data,
9643                                        ALIGN(length_in_bits, 32) >> 5,
9644                                        length_in_bits & 0x1f,
9645                                        skip_emul_byte_cnt,
9646                                        0,
9647                                        0,
9648                                        !param->has_emulation_bytes,
9649                                        0,
9650                                        batch);
9651             break;
9652         }
9653     }
9654 }
9655
9656 static void
9657 gen9_mfc_avc_insert_slice_packed_data(VADriverContextP ctx,
9658                                       struct encode_state *encode_state,
9659                                       struct intel_encoder_context *encoder_context,
9660                                       int slice_index,
9661                                       struct intel_batchbuffer *batch)
9662 {
9663     VAEncPackedHeaderParameterBuffer *param = NULL;
9664     unsigned int length_in_bits;
9665     unsigned int *header_data = NULL;
9666     int count, i, start_index;
9667     int slice_header_index;
9668     unsigned char *nal_type = NULL;
9669
9670     if (encode_state->slice_header_index[slice_index] == 0)
9671         slice_header_index = -1;
9672     else
9673         slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
9674
9675     count = encode_state->slice_rawdata_count[slice_index];
9676     start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
9677
9678     for (i = 0; i < count; i++) {
9679         unsigned int skip_emul_byte_cnt;
9680
9681         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
9682         nal_type = (unsigned char *)header_data;
9683
9684         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
9685
9686         length_in_bits = param->bit_length;
9687
9688         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9689
9690         /* skip the slice header packed data type as it is lastly inserted */
9691         if (param->type == VAEncPackedHeaderSlice || (*(nal_type + skip_emul_byte_cnt - 1) & 0x1f) == AVC_NAL_DELIMITER)
9692             continue;
9693
9694         /* as the slice header is still required, the last header flag is set to
9695          * zero.
9696          */
9697         gen9_mfc_avc_insert_object(ctx,
9698                                    encoder_context,
9699                                    header_data,
9700                                    ALIGN(length_in_bits, 32) >> 5,
9701                                    length_in_bits & 0x1f,
9702                                    skip_emul_byte_cnt,
9703                                    0,
9704                                    0,
9705                                    !param->has_emulation_bytes,
9706                                    0,
9707                                    batch);
9708     }
9709
9710     if (slice_header_index == -1) {
9711         VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
9712         VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
9713         VAEncSliceParameterBufferH264 *slice_params = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
9714         unsigned char *slice_header = NULL;
9715         int slice_header_length_in_bits = 0;
9716
9717         /* No slice header data is passed. And the driver needs to generate it */
9718         /* For the Normal H264 */
9719         slice_header_length_in_bits = build_avc_slice_header(seq_param,
9720                                                              pic_param,
9721                                                              slice_params,
9722                                                              &slice_header);
9723         gen9_mfc_avc_insert_object(ctx,
9724                                    encoder_context,
9725                                    (unsigned int *)slice_header,
9726                                    ALIGN(slice_header_length_in_bits, 32) >> 5,
9727                                    slice_header_length_in_bits & 0x1f,
9728                                    5,  /* first 5 bytes are start code + nal unit type */
9729                                    1, 0, 1,
9730                                    1,
9731                                    batch);
9732
9733         free(slice_header);
9734     } else {
9735         unsigned int skip_emul_byte_cnt;
9736
9737         header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
9738
9739         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[slice_header_index]->buffer);
9740         length_in_bits = param->bit_length;
9741
9742         /* as the slice header is the last header data for one slice,
9743          * the last header flag is set to one.
9744          */
9745         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9746
9747         gen9_mfc_avc_insert_object(ctx,
9748                                    encoder_context,
9749                                    header_data,
9750                                    ALIGN(length_in_bits, 32) >> 5,
9751                                    length_in_bits & 0x1f,
9752                                    skip_emul_byte_cnt,
9753                                    1,
9754                                    0,
9755                                    !param->has_emulation_bytes,
9756                                    1,
9757                                    batch);
9758     }
9759
9760     return;
9761 }
9762
9763 static void
9764 gen9_mfc_avc_inset_headers(VADriverContextP ctx,
9765                            struct encode_state *encode_state,
9766                            struct intel_encoder_context *encoder_context,
9767                            VAEncSliceParameterBufferH264 *slice_param,
9768                            int slice_index,
9769                            struct intel_batchbuffer *batch)
9770 {
9771     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9772     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9773     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
9774     unsigned int internal_rate_mode = generic_state->internal_rate_mode;
9775     unsigned int skip_emul_byte_cnt;
9776
9777     if (slice_index == 0) {
9778
9779         /* if AUD exist and insert it firstly */
9780         gen9_mfc_avc_insert_aud_packed_data(ctx, encode_state, encoder_context, batch);
9781
9782         if (encode_state->packed_header_data[idx]) {
9783             VAEncPackedHeaderParameterBuffer *param = NULL;
9784             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
9785             unsigned int length_in_bits;
9786
9787             assert(encode_state->packed_header_param[idx]);
9788             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
9789             length_in_bits = param->bit_length;
9790
9791             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9792             gen9_mfc_avc_insert_object(ctx,
9793                                        encoder_context,
9794                                        header_data,
9795                                        ALIGN(length_in_bits, 32) >> 5,
9796                                        length_in_bits & 0x1f,
9797                                        skip_emul_byte_cnt,
9798                                        0,
9799                                        0,
9800                                        !param->has_emulation_bytes,
9801                                        0,
9802                                        batch);
9803         }
9804
9805         idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
9806
9807         if (encode_state->packed_header_data[idx]) {
9808             VAEncPackedHeaderParameterBuffer *param = NULL;
9809             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
9810             unsigned int length_in_bits;
9811
9812             assert(encode_state->packed_header_param[idx]);
9813             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
9814             length_in_bits = param->bit_length;
9815
9816             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9817
9818             gen9_mfc_avc_insert_object(ctx,
9819                                        encoder_context,
9820                                        header_data,
9821                                        ALIGN(length_in_bits, 32) >> 5,
9822                                        length_in_bits & 0x1f,
9823                                        skip_emul_byte_cnt,
9824                                        0,
9825                                        0,
9826                                        !param->has_emulation_bytes,
9827                                        0,
9828                                        batch);
9829         }
9830
9831         idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
9832
9833         if (encode_state->packed_header_data[idx]) {
9834             VAEncPackedHeaderParameterBuffer *param = NULL;
9835             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
9836             unsigned int length_in_bits;
9837
9838             assert(encode_state->packed_header_param[idx]);
9839             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
9840             length_in_bits = param->bit_length;
9841
9842             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9843             gen9_mfc_avc_insert_object(ctx,
9844                                        encoder_context,
9845                                        header_data,
9846                                        ALIGN(length_in_bits, 32) >> 5,
9847                                        length_in_bits & 0x1f,
9848                                        skip_emul_byte_cnt,
9849                                        0,
9850                                        0,
9851                                        !param->has_emulation_bytes,
9852                                        0,
9853                                        batch);
9854         } else if (internal_rate_mode == VA_RC_CBR) {
9855             /* insert others */
9856         }
9857     }
9858
9859     gen9_mfc_avc_insert_slice_packed_data(ctx,
9860                                           encode_state,
9861                                           encoder_context,
9862                                           slice_index,
9863                                           batch);
9864 }
9865
9866 static void
9867 gen9_mfc_avc_slice_state(VADriverContextP ctx,
9868                          struct encode_state *encode_state,
9869                          struct intel_encoder_context *encoder_context,
9870                          VAEncPictureParameterBufferH264 *pic_param,
9871                          VAEncSliceParameterBufferH264 *slice_param,
9872                          VAEncSliceParameterBufferH264 *next_slice_param,
9873                          struct intel_batchbuffer *batch)
9874 {
9875     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9876     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
9877     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9878     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
9879     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
9880     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
9881     unsigned char correct[6], grow, shrink;
9882     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
9883     int max_qp_n, max_qp_p;
9884     int i;
9885     int weighted_pred_idc = 0;
9886     int num_ref_l0 = 0, num_ref_l1 = 0;
9887     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
9888     int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
9889     unsigned int rc_panic_enable = 0;
9890     unsigned int rate_control_counter_enable = 0;
9891     unsigned int rounding_value = 0;
9892     unsigned int rounding_inter_enable = 0;
9893
9894     slice_hor_pos = slice_param->macroblock_address % generic_state->frame_width_in_mbs;
9895     slice_ver_pos = slice_param->macroblock_address / generic_state->frame_width_in_mbs;
9896
9897     if (next_slice_param) {
9898         next_slice_hor_pos = next_slice_param->macroblock_address % generic_state->frame_width_in_mbs;
9899         next_slice_ver_pos = next_slice_param->macroblock_address / generic_state->frame_width_in_mbs;
9900     } else {
9901         next_slice_hor_pos = 0;
9902         next_slice_ver_pos = generic_state->frame_height_in_mbs;
9903     }
9904
9905     if (slice_type == SLICE_TYPE_I) {
9906         luma_log2_weight_denom = 0;
9907         chroma_log2_weight_denom = 0;
9908     } else if (slice_type == SLICE_TYPE_P) {
9909         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
9910         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
9911         rounding_inter_enable = avc_state->rounding_inter_enable;
9912         rounding_value = avc_state->rounding_value;
9913
9914         if (slice_param->num_ref_idx_active_override_flag)
9915             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
9916     } else if (slice_type == SLICE_TYPE_B) {
9917         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
9918         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
9919         num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
9920         rounding_inter_enable = avc_state->rounding_inter_enable;
9921         rounding_value = avc_state->rounding_value;
9922
9923         if (slice_param->num_ref_idx_active_override_flag) {
9924             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
9925             num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
9926         }
9927
9928         if (weighted_pred_idc == 2) {
9929             /* 8.4.3 - Derivation process for prediction weights (8-279) */
9930             luma_log2_weight_denom = 5;
9931             chroma_log2_weight_denom = 5;
9932         }
9933     }
9934
9935     max_qp_n = 0;
9936     max_qp_p = 0;
9937     grow = 0;
9938     shrink = 0;
9939
9940     rate_control_counter_enable = (generic_state->brc_enabled && (generic_state->curr_pak_pass != 0));
9941     rc_panic_enable = (avc_state->rc_panic_enable &&
9942                        (!avc_state->min_max_qp_enable) &&
9943                        (encoder_context->rate_control_mode != VA_RC_CQP) &&
9944                        (generic_state->curr_pak_pass == (generic_state->num_pak_passes - 1)));
9945
9946     for (i = 0; i < 6; i++)
9947         correct[i] = 0;
9948
9949     BEGIN_BCS_BATCH(batch, 11);
9950
9951     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
9952     OUT_BCS_BATCH(batch, slice_type);
9953     OUT_BCS_BATCH(batch,
9954                   (num_ref_l1 << 24) |
9955                   (num_ref_l0 << 16) |
9956                   (chroma_log2_weight_denom << 8) |
9957                   (luma_log2_weight_denom << 0));
9958     OUT_BCS_BATCH(batch,
9959                   (weighted_pred_idc << 30) |
9960                   (((slice_type == SLICE_TYPE_B) ? slice_param->direct_spatial_mv_pred_flag : 0) << 29) |
9961                   (slice_param->disable_deblocking_filter_idc << 27) |
9962                   (slice_param->cabac_init_idc << 24) |
9963                   (slice_qp << 16) |
9964                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
9965                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
9966
9967     OUT_BCS_BATCH(batch,
9968                   slice_ver_pos << 24 |
9969                   slice_hor_pos << 16 |
9970                   slice_param->macroblock_address);
9971     OUT_BCS_BATCH(batch,
9972                   next_slice_ver_pos << 16 |
9973                   next_slice_hor_pos);
9974
9975     OUT_BCS_BATCH(batch,
9976                   (rate_control_counter_enable << 31) |
9977                   (1 << 30) |           /* ResetRateControlCounter */
9978                   (2 << 28) |           /* Loose Rate Control */
9979                   (0 << 24) |           /* RC Stable Tolerance */
9980                   (rc_panic_enable << 23) |           /* RC Panic Enable */
9981                   (1 << 22) |           /* CBP mode */
9982                   (0 << 21) |           /* MB Type Direct Conversion, 0: Enable, 1: Disable */
9983                   (0 << 20) |           /* MB Type Skip Conversion, 0: Enable, 1: Disable */
9984                   (!next_slice_param << 19) |                   /* Is Last Slice */
9985                   (0 << 18) |           /* BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable */
9986                   (1 << 17) |           /* HeaderPresentFlag */
9987                   (1 << 16) |           /* SliceData PresentFlag */
9988                   (0 << 15) |           /* TailPresentFlag  */
9989                   (1 << 13) |           /* RBSP NAL TYPE */
9990                   (1 << 12));           /* CabacZeroWordInsertionEnable */
9991
9992     OUT_BCS_BATCH(batch, generic_ctx->compressed_bitstream.start_offset);
9993
9994     OUT_BCS_BATCH(batch,
9995                   (max_qp_n << 24) |     /*Target QP - 24 is lowest QP*/
9996                   (max_qp_p << 16) |     /*Target QP + 20 is highest QP*/
9997                   (shrink << 8) |
9998                   (grow << 0));
9999     OUT_BCS_BATCH(batch,
10000                   (rounding_inter_enable << 31) |
10001                   (rounding_value << 28) |
10002                   (1 << 27) |
10003                   (5 << 24) |
10004                   (correct[5] << 20) |
10005                   (correct[4] << 16) |
10006                   (correct[3] << 12) |
10007                   (correct[2] << 8) |
10008                   (correct[1] << 4) |
10009                   (correct[0] << 0));
10010     OUT_BCS_BATCH(batch, 0);
10011
10012     ADVANCE_BCS_BATCH(batch);
10013 }
10014
10015 static uint8_t
10016 gen9_mfc_avc_get_ref_idx_state(VAPictureH264 *va_pic, unsigned int frame_store_id)
10017 {
10018     unsigned int is_long_term =
10019         !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
10020     unsigned int is_top_field =
10021         !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
10022     unsigned int is_bottom_field =
10023         !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
10024
10025     return ((is_long_term                         << 6) |
10026             (0 << 5) |
10027             (frame_store_id                       << 1) |
10028             ((is_top_field ^ 1) & is_bottom_field));
10029 }
10030
10031 static void
10032 gen9_mfc_avc_ref_idx_state(VADriverContextP ctx,
10033                            struct encode_state *encode_state,
10034                            struct intel_encoder_context *encoder_context,
10035                            VAEncSliceParameterBufferH264 *slice_param,
10036                            struct intel_batchbuffer *batch)
10037 {
10038     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10039     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10040     VAPictureH264 *ref_pic;
10041     int i, slice_type, ref_idx_shift;
10042     unsigned int fwd_ref_entry;
10043     unsigned int bwd_ref_entry;
10044
10045     /* max 4 ref frames are allowed for l0 and l1 */
10046     fwd_ref_entry = 0x80808080;
10047     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
10048
10049     if ((slice_type == SLICE_TYPE_P) ||
10050         (slice_type == SLICE_TYPE_B)) {
10051         for (i = 0; i < MIN(avc_state->num_refs[0], 4); i++) {
10052             ref_pic = &slice_param->RefPicList0[i];
10053             ref_idx_shift = i * 8;
10054
10055             fwd_ref_entry &= ~(0xFF << ref_idx_shift);
10056             fwd_ref_entry += (gen9_mfc_avc_get_ref_idx_state(ref_pic, avc_state->list_ref_idx[0][i]) << ref_idx_shift);
10057         }
10058     }
10059
10060     bwd_ref_entry = 0x80808080;
10061     if (slice_type == SLICE_TYPE_B) {
10062         for (i = 0; i < MIN(avc_state->num_refs[1], 4); i++) {
10063             ref_pic = &slice_param->RefPicList1[i];
10064             ref_idx_shift = i * 8;
10065
10066             bwd_ref_entry &= ~(0xFF << ref_idx_shift);
10067             bwd_ref_entry += (gen9_mfc_avc_get_ref_idx_state(ref_pic, avc_state->list_ref_idx[1][i]) << ref_idx_shift);
10068         }
10069     }
10070
10071     if ((slice_type == SLICE_TYPE_P) ||
10072         (slice_type == SLICE_TYPE_B)) {
10073         BEGIN_BCS_BATCH(batch, 10);
10074         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
10075         OUT_BCS_BATCH(batch, 0);                        // L0
10076         OUT_BCS_BATCH(batch, fwd_ref_entry);
10077
10078         for (i = 0; i < 7; i++) {
10079             OUT_BCS_BATCH(batch, 0x80808080);
10080         }
10081
10082         ADVANCE_BCS_BATCH(batch);
10083     }
10084
10085     if (slice_type == SLICE_TYPE_B) {
10086         BEGIN_BCS_BATCH(batch, 10);
10087         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
10088         OUT_BCS_BATCH(batch, 1);                  //Select L1
10089         OUT_BCS_BATCH(batch, bwd_ref_entry);      //max 4 reference allowed
10090         for (i = 0; i < 7; i++) {
10091             OUT_BCS_BATCH(batch, 0x80808080);
10092         }
10093         ADVANCE_BCS_BATCH(batch);
10094     }
10095 }
10096
10097 static void
10098 gen9_mfc_avc_weightoffset_state(VADriverContextP ctx,
10099                                 struct encode_state *encode_state,
10100                                 struct intel_encoder_context *encoder_context,
10101                                 VAEncPictureParameterBufferH264 *pic_param,
10102                                 VAEncSliceParameterBufferH264 *slice_param,
10103                                 struct intel_batchbuffer *batch)
10104 {
10105     int i, slice_type;
10106     short weightoffsets[32 * 6];
10107
10108     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
10109
10110     if (slice_type == SLICE_TYPE_P &&
10111         pic_param->pic_fields.bits.weighted_pred_flag == 1) {
10112         memset(weightoffsets, 0, 32 * 6 * sizeof(short));
10113         for (i = 0; i < 32; i++) {
10114             weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
10115             weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
10116             weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
10117             weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
10118             weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
10119             weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
10120         }
10121
10122         BEGIN_BCS_BATCH(batch, 98);
10123         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
10124         OUT_BCS_BATCH(batch, 0);
10125         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
10126
10127         ADVANCE_BCS_BATCH(batch);
10128     }
10129
10130     if (slice_type == SLICE_TYPE_B &&
10131         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
10132         memset(weightoffsets, 0, 32 * 6 * sizeof(short));
10133         for (i = 0; i < 32; i++) {
10134             weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
10135             weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
10136             weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
10137             weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
10138             weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
10139             weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
10140         }
10141
10142         BEGIN_BCS_BATCH(batch, 98);
10143         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
10144         OUT_BCS_BATCH(batch, 0);
10145         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
10146         ADVANCE_BCS_BATCH(batch);
10147
10148         memset(weightoffsets, 0, 32 * 6 * sizeof(short));
10149         for (i = 0; i < 32; i++) {
10150             weightoffsets[i * 6 + 0] = slice_param->luma_weight_l1[i];
10151             weightoffsets[i * 6 + 1] = slice_param->luma_offset_l1[i];
10152             weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l1[i][0];
10153             weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l1[i][0];
10154             weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l1[i][1];
10155             weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l1[i][1];
10156         }
10157
10158         BEGIN_BCS_BATCH(batch, 98);
10159         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
10160         OUT_BCS_BATCH(batch, 1);
10161         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
10162         ADVANCE_BCS_BATCH(batch);
10163     }
10164 }
10165
10166 static void
10167 gen9_mfc_avc_single_slice(VADriverContextP ctx,
10168                           struct encode_state *encode_state,
10169                           struct intel_encoder_context *encoder_context,
10170                           VAEncSliceParameterBufferH264 *slice_param,
10171                           VAEncSliceParameterBufferH264 *next_slice_param,
10172                           int slice_index)
10173 {
10174     struct i965_driver_data *i965 = i965_driver_data(ctx);
10175     struct i965_gpe_table *gpe = &i965->gpe_table;
10176     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10177     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10178     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10179     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10180     struct intel_batchbuffer *batch = encoder_context->base.batch;
10181     struct intel_batchbuffer *slice_batch = avc_ctx->pres_slice_batch_buffer_2nd_level;
10182     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
10183     struct gpe_mi_batch_buffer_start_parameter second_level_batch;
10184     struct object_surface *obj_surface;
10185     struct gen9_surface_avc *avc_priv_surface;
10186
10187     unsigned int slice_offset = 0;
10188
10189     if (generic_state->curr_pak_pass == 0) {
10190         slice_offset = intel_batchbuffer_used_size(slice_batch);
10191         avc_state->slice_batch_offset[slice_index] = slice_offset;
10192         gen9_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context, slice_param, slice_batch);
10193         gen9_mfc_avc_weightoffset_state(ctx,
10194                                         encode_state,
10195                                         encoder_context,
10196                                         pic_param,
10197                                         slice_param,
10198                                         slice_batch);
10199         gen9_mfc_avc_slice_state(ctx,
10200                                  encode_state,
10201                                  encoder_context,
10202                                  pic_param,
10203                                  slice_param,
10204                                  next_slice_param,
10205                                  slice_batch);
10206         gen9_mfc_avc_inset_headers(ctx,
10207                                    encode_state,
10208                                    encoder_context,
10209                                    slice_param,
10210                                    slice_index,
10211                                    slice_batch);
10212
10213         BEGIN_BCS_BATCH(slice_batch, 2);
10214         OUT_BCS_BATCH(slice_batch, 0);
10215         OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END);
10216         ADVANCE_BCS_BATCH(slice_batch);
10217
10218     } else {
10219         slice_offset = avc_state->slice_batch_offset[slice_index];
10220     }
10221     /* insert slice as second level.*/
10222     memset(&second_level_batch, 0, sizeof(second_level_batch));
10223     second_level_batch.is_second_level = 1; /* Must be the second level batch buffer */
10224     second_level_batch.offset = slice_offset;
10225     second_level_batch.bo = slice_batch->buffer;
10226     gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
10227
10228     /* insert mb code as second level.*/
10229     obj_surface = encode_state->reconstructed_object;
10230     assert(obj_surface->private_data);
10231     avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
10232
10233     memset(&second_level_batch, 0, sizeof(second_level_batch));
10234     second_level_batch.is_second_level = 1; /* Must be the second level batch buffer */
10235     second_level_batch.offset = slice_param->macroblock_address * 16 * 4;
10236     second_level_batch.bo = avc_priv_surface->res_mb_code_surface.bo;
10237     gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
10238
10239 }
10240
10241 static void
10242 gen9_avc_pak_slice_level(VADriverContextP ctx,
10243                          struct encode_state *encode_state,
10244                          struct intel_encoder_context *encoder_context)
10245 {
10246     struct i965_driver_data *i965 = i965_driver_data(ctx);
10247     struct i965_gpe_table *gpe = &i965->gpe_table;
10248     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10249     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10250     struct intel_batchbuffer *batch = encoder_context->base.batch;
10251     struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
10252     VAEncSliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
10253     int i, j;
10254     int slice_index = 0;
10255     int is_frame_level = (avc_state->slice_num > 1) ? 0 : 1;   /* check it for SKL,now single slice per frame */
10256     int has_tail = 0;             /* check it later */
10257
10258     for (j = 0; j < encode_state->num_slice_params_ext; j++) {
10259         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
10260
10261         if (j == encode_state->num_slice_params_ext - 1)
10262             next_slice_group_param = NULL;
10263         else
10264             next_slice_group_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j + 1]->buffer;
10265
10266         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
10267             if (i < encode_state->slice_params_ext[j]->num_elements - 1)
10268                 next_slice_param = slice_param + 1;
10269             else
10270                 next_slice_param = next_slice_group_param;
10271
10272             gen9_mfc_avc_single_slice(ctx,
10273                                       encode_state,
10274                                       encoder_context,
10275                                       slice_param,
10276                                       next_slice_param,
10277                                       slice_index);
10278             slice_param++;
10279             slice_index++;
10280
10281             if (is_frame_level)
10282                 break;
10283         }
10284
10285         if (is_frame_level)
10286             break;
10287     }
10288
10289     if (has_tail) {
10290         /* insert a tail if required */
10291     }
10292
10293     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
10294     mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
10295     gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_params);
10296 }
10297 static void
10298 gen9_avc_pak_picture_level(VADriverContextP ctx,
10299                            struct encode_state *encode_state,
10300                            struct intel_encoder_context *encoder_context)
10301 {
10302     struct i965_driver_data *i965 = i965_driver_data(ctx);
10303     struct i965_gpe_table *gpe = &i965->gpe_table;
10304     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10305     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
10306     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10307     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10308     struct gpe_mi_batch_buffer_start_parameter second_level_batch;
10309     struct intel_batchbuffer *batch = encoder_context->base.batch;
10310
10311     if (generic_state->brc_enabled &&
10312         generic_state->curr_pak_pass) {
10313         struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
10314         struct encoder_status_buffer_internal *status_buffer;
10315         status_buffer = &(avc_ctx->status_buffer);
10316
10317         memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
10318         mi_conditional_batch_buffer_end_params.offset = status_buffer->image_status_mask_offset;
10319         mi_conditional_batch_buffer_end_params.bo = status_buffer->bo;
10320         mi_conditional_batch_buffer_end_params.compare_data = 0;
10321         mi_conditional_batch_buffer_end_params.compare_mask_mode_disabled = 0;
10322         gpe->mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
10323     }
10324
10325     gen9_mfc_avc_pipe_mode_select(ctx, encode_state, encoder_context);
10326     gen9_mfc_avc_surface_state(ctx, encoder_context, &(generic_ctx->res_reconstructed_surface), 0);
10327     gen9_mfc_avc_surface_state(ctx, encoder_context, &(generic_ctx->res_uncompressed_input_surface), 4);
10328     gen9_mfc_avc_pipe_buf_addr_state(ctx, encoder_context);
10329     gen9_mfc_avc_ind_obj_base_addr_state(ctx, encode_state, encoder_context);
10330     gen9_mfc_avc_bsp_buf_base_addr_state(ctx, encoder_context);
10331
10332     if (generic_state->brc_enabled) {
10333         memset(&second_level_batch, 0, sizeof(second_level_batch));
10334         if (generic_state->curr_pak_pass == 0) {
10335             second_level_batch.offset = 0;
10336         } else {
10337             second_level_batch.offset = generic_state->curr_pak_pass * INTEL_AVC_IMAGE_STATE_CMD_SIZE;
10338         }
10339         second_level_batch.is_second_level = 1;
10340         second_level_batch.bo = avc_ctx->res_brc_image_state_read_buffer.bo;
10341         gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
10342     } else {
10343         /*generate a new image state */
10344         gen9_avc_set_image_state_non_brc(ctx, encode_state, encoder_context, &(avc_ctx->res_image_state_batch_buffer_2nd_level));
10345         memset(&second_level_batch, 0, sizeof(second_level_batch));
10346         second_level_batch.offset = 0;
10347         second_level_batch.is_second_level = 1;
10348         second_level_batch.bo = avc_ctx->res_image_state_batch_buffer_2nd_level.bo;
10349         gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
10350     }
10351
10352     gen9_mfc_avc_qm_state(ctx, encode_state, encoder_context);
10353     gen9_mfc_avc_fqm_state(ctx, encode_state, encoder_context);
10354     gen9_mfc_avc_directmode_state(ctx, encoder_context);
10355
10356 }
10357
10358 static void
10359 gen9_avc_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
10360 {
10361     struct i965_driver_data *i965 = i965_driver_data(ctx);
10362     struct i965_gpe_table *gpe = &i965->gpe_table;
10363     struct intel_batchbuffer *batch = encoder_context->base.batch;
10364     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10365     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10366     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10367
10368     struct gpe_mi_store_register_mem_parameter mi_store_reg_mem_param;
10369     struct gpe_mi_store_data_imm_parameter mi_store_data_imm_param;
10370     struct gpe_mi_flush_dw_parameter mi_flush_dw_param;
10371     struct encoder_status_buffer_internal *status_buffer;
10372
10373     status_buffer = &(avc_ctx->status_buffer);
10374
10375     memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
10376     gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
10377
10378     /* read register and store into status_buffer and pak_statitistic info */
10379     memset(&mi_store_reg_mem_param, 0, sizeof(mi_store_reg_mem_param));
10380     mi_store_reg_mem_param.bo = status_buffer->bo;
10381     mi_store_reg_mem_param.offset = status_buffer->bs_byte_count_frame_offset;
10382     mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_reg_offset;
10383     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10384
10385     mi_store_reg_mem_param.bo = status_buffer->bo;
10386     mi_store_reg_mem_param.offset = status_buffer->image_status_mask_offset;
10387     mi_store_reg_mem_param.mmio_offset = status_buffer->image_status_mask_reg_offset;
10388     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10389
10390     /*update the status in the pak_statistic_surface */
10391     mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
10392     mi_store_reg_mem_param.offset = 0;
10393     mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_reg_offset;
10394     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10395
10396     mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
10397     mi_store_reg_mem_param.offset = 4;
10398     mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_nh_reg_offset;
10399     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10400
10401     memset(&mi_store_data_imm_param, 0, sizeof(mi_store_data_imm_param));
10402     mi_store_data_imm_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
10403     mi_store_data_imm_param.offset = sizeof(unsigned int) * 2;
10404     mi_store_data_imm_param.dw0 = (generic_state->curr_pak_pass + 1);
10405     gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm_param);
10406
10407     mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
10408     mi_store_reg_mem_param.offset = sizeof(unsigned int) * (4 + generic_state->curr_pak_pass) ;
10409     mi_store_reg_mem_param.mmio_offset = status_buffer->image_status_ctrl_reg_offset;
10410     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10411
10412     memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
10413     gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
10414
10415     return;
10416 }
10417
10418 static void
10419 gen9_avc_pak_brc_prepare(struct encode_state *encode_state,
10420                          struct intel_encoder_context *encoder_context)
10421 {
10422     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10423     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10424     unsigned int rate_control_mode = encoder_context->rate_control_mode;
10425
10426     switch (rate_control_mode & 0x7f) {
10427     case VA_RC_CBR:
10428         generic_state->internal_rate_mode = VA_RC_CBR;
10429         break;
10430
10431     case VA_RC_VBR:
10432         generic_state->internal_rate_mode = VA_RC_VBR;//AVBR
10433         break;
10434
10435     case VA_RC_CQP:
10436     default:
10437         generic_state->internal_rate_mode = VA_RC_CQP;
10438         break;
10439     }
10440
10441     if (encoder_context->quality_level == 0)
10442         encoder_context->quality_level = ENCODER_DEFAULT_QUALITY_AVC;
10443 }
10444
10445 /* allcate resources for pak only (fei mode) */
10446 static VAStatus
10447 gen9_avc_fei_pak_pipeline_prepare(VADriverContextP ctx,
10448                                   VAProfile profile,
10449                                   struct encode_state *encode_state,
10450                                   struct intel_encoder_context *encoder_context)
10451 {
10452     VAStatus va_status;
10453     struct i965_driver_data *i965 = i965_driver_data(ctx);
10454     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10455     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
10456     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10457     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10458     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10459     struct gen9_surface_avc *avc_priv_surface;
10460     VAEncPictureParameterBufferH264  *pic_param;
10461     VAEncSliceParameterBufferH264 *slice_param;
10462     VAEncMiscParameterFEIFrameControlH264 *fei_param = NULL;
10463     unsigned int size = 0, i, j;
10464     unsigned int frame_mb_nums;
10465     struct object_buffer *obj_buffer = NULL;
10466     struct buffer_store *buffer_store = NULL;
10467     struct object_surface *obj_surface = NULL;
10468     struct avc_surface_param surface_param;
10469     struct i965_coded_buffer_segment *coded_buffer_segment;
10470     dri_bo *bo;
10471     unsigned char * pdata;
10472
10473     gen9_avc_update_parameters(ctx, profile, encode_state, encoder_context);
10474
10475     pic_param = avc_state->pic_param;
10476     slice_param = avc_state->slice_param[0];
10477
10478     va_status = gen9_avc_encode_check_parameter(ctx, encode_state, encoder_context);
10479     if (va_status != VA_STATUS_SUCCESS)
10480         return va_status;
10481
10482     va_status = gen9_avc_allocate_pak_resources(ctx, encode_state, encoder_context);
10483     if (va_status != VA_STATUS_SUCCESS)
10484         return va_status;
10485
10486     /* Encoded bitstream ?*/
10487     obj_buffer = encode_state->coded_buf_object;
10488     bo = obj_buffer->buffer_store->bo;
10489     i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
10490     i965_dri_object_to_buffer_gpe_resource(&generic_ctx->compressed_bitstream.res, bo);
10491     generic_ctx->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
10492     generic_ctx->compressed_bitstream.end_offset =
10493         ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
10494
10495     /*status buffer */
10496     dri_bo_unreference(avc_ctx->status_buffer.bo);
10497     avc_ctx->status_buffer.bo = bo;
10498     dri_bo_reference(bo);
10499
10500     /* set the internal flag to 0 to indicate the coded size is unknown */
10501     dri_bo_map(bo, 1);
10502     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
10503     coded_buffer_segment->mapped = 0;
10504     coded_buffer_segment->codec = encoder_context->codec;
10505     coded_buffer_segment->status_support = 1;
10506
10507     pdata = bo->virtual + avc_ctx->status_buffer.base_offset;
10508     memset(pdata, 0, avc_ctx->status_buffer.status_buffer_size);
10509     dri_bo_unmap(bo);
10510     //frame id, it is the ref pic id in the reference_objects list.
10511     avc_state->num_refs[0] = 0;
10512     avc_state->num_refs[1] = 0;
10513     if (generic_state->frame_type == SLICE_TYPE_P) {
10514         avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
10515
10516         if (slice_param->num_ref_idx_active_override_flag)
10517             avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
10518     } else if (generic_state->frame_type == SLICE_TYPE_B) {
10519         avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
10520         avc_state->num_refs[1] = pic_param->num_ref_idx_l1_active_minus1 + 1;
10521
10522         if (slice_param->num_ref_idx_active_override_flag) {
10523             avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
10524             avc_state->num_refs[1] = slice_param->num_ref_idx_l1_active_minus1 + 1;
10525         }
10526     }
10527     for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[0]); i++) {
10528         VAPictureH264 *va_pic;
10529
10530         assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(avc_state->list_ref_idx[0]));
10531         avc_state->list_ref_idx[0][i] = 0;
10532
10533         if (i >= avc_state->num_refs[0])
10534             continue;
10535
10536         va_pic = &slice_param->RefPicList0[i];
10537
10538         for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
10539             obj_surface = encode_state->reference_objects[j];
10540
10541             if (obj_surface &&
10542                 obj_surface->bo &&
10543                 obj_surface->base.id == va_pic->picture_id) {
10544
10545                 assert(obj_surface->base.id != VA_INVALID_SURFACE);
10546                 avc_state->list_ref_idx[0][i] = j;
10547
10548                 break;
10549             }
10550         }
10551     }
10552     for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[1]); i++) {
10553         VAPictureH264 *va_pic;
10554
10555         assert(ARRAY_ELEMS(slice_param->RefPicList1) == ARRAY_ELEMS(avc_state->list_ref_idx[1]));
10556         avc_state->list_ref_idx[1][i] = 0;
10557
10558         if (i >= avc_state->num_refs[1])
10559             continue;
10560
10561         va_pic = &slice_param->RefPicList1[i];
10562
10563         for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
10564             obj_surface = encode_state->reference_objects[j];
10565
10566
10567             if (obj_surface &&
10568                 obj_surface->bo &&
10569                 obj_surface->base.id == va_pic->picture_id) {
10570
10571                 assert(obj_surface->base.id != VA_INVALID_SURFACE);
10572                 avc_state->list_ref_idx[1][i] = j;
10573
10574                 break;
10575                 break;
10576             }
10577         }
10578     }
10579
10580     obj_surface = encode_state->reconstructed_object;
10581     fei_param = avc_state->fei_framectl_param;
10582     frame_mb_nums = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs;
10583
10584     /* Setup current reconstruct frame */
10585     obj_surface = encode_state->reconstructed_object;
10586     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
10587
10588     if (va_status != VA_STATUS_SUCCESS)
10589         return va_status;
10590
10591     memset(&surface_param, 0, sizeof(surface_param));
10592     surface_param.frame_width = generic_state->frame_width_in_pixel;
10593     surface_param.frame_height = generic_state->frame_height_in_pixel;
10594     va_status = gen9_avc_init_check_surfaces(ctx,
10595                                              obj_surface, encoder_context,
10596                                              &surface_param);
10597     avc_priv_surface = obj_surface->private_data;
10598
10599     /* res_mb_code_surface for MB code */
10600     /* PAK only mode must have the mb_code_surface from middleware,
10601      * so the code shouldn't reach here without an externally provided
10602      * MB Code buffer */
10603     assert(fei_param->mb_code_data != VA_INVALID_ID);
10604     size = frame_mb_nums * FEI_AVC_MB_CODE_BUFFER_SIZE;
10605     obj_buffer = BUFFER(fei_param->mb_code_data);
10606     assert(obj_buffer != NULL);
10607     buffer_store = obj_buffer->buffer_store;
10608     assert(size <= buffer_store->bo->size);
10609     if (avc_priv_surface->res_mb_code_surface.bo != NULL)
10610         i965_free_gpe_resource(&avc_priv_surface->res_mb_code_surface);
10611     i965_dri_object_to_buffer_gpe_resource(&avc_priv_surface->res_mb_code_surface,
10612                                            buffer_store->bo);
10613     /* res_mv_data_surface for MV data */
10614     size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
10615     if (fei_param->mv_data != VA_INVALID_ID) {
10616         obj_buffer = BUFFER(fei_param->mv_data);
10617         assert(obj_buffer != NULL);
10618         buffer_store = obj_buffer->buffer_store;
10619         assert(size <= buffer_store->bo->size);
10620         if (avc_priv_surface->res_mv_data_surface.bo != NULL)
10621             i965_free_gpe_resource(&avc_priv_surface->res_mv_data_surface);
10622         i965_dri_object_to_buffer_gpe_resource(&avc_priv_surface->res_mv_data_surface,
10623                                                buffer_store->bo);
10624     }
10625
10626     return VA_STATUS_SUCCESS;
10627
10628 }
10629
10630 static VAStatus
10631 gen9_avc_pak_pipeline_prepare(VADriverContextP ctx,
10632                               VAProfile profile,
10633                               struct encode_state *encode_state,
10634                               struct intel_encoder_context *encoder_context)
10635 {
10636     VAStatus va_status;
10637     struct i965_driver_data *i965 = i965_driver_data(ctx);
10638     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10639     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
10640     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10641     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10642     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10643
10644     struct object_surface *obj_surface;
10645     VAEncPictureParameterBufferH264  *pic_param;
10646     VAEncSliceParameterBufferH264 *slice_param;
10647
10648     struct gen9_surface_avc *avc_priv_surface;
10649     struct avc_surface_param surface_param;
10650     int i, j, enable_avc_ildb = 0;
10651     unsigned int allocate_flag = 1;
10652     unsigned int size, w_mb, h_mb;
10653
10654     if (encoder_context->fei_function_mode == VA_FEI_FUNCTION_PAK) {
10655         va_status = gen9_avc_fei_pak_pipeline_prepare(ctx, profile, encode_state, encoder_context);
10656         if (va_status != VA_STATUS_SUCCESS)
10657             return va_status;
10658     }
10659
10660     pic_param = avc_state->pic_param;
10661     slice_param = avc_state->slice_param[0];
10662     w_mb = generic_state->frame_width_in_mbs;
10663     h_mb = generic_state->frame_height_in_mbs;
10664
10665     /* update the parameter and check slice parameter */
10666     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
10667         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
10668         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
10669
10670         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
10671             assert((slice_param->slice_type == SLICE_TYPE_I) ||
10672                    (slice_param->slice_type == SLICE_TYPE_SI) ||
10673                    (slice_param->slice_type == SLICE_TYPE_P) ||
10674                    (slice_param->slice_type == SLICE_TYPE_SP) ||
10675                    (slice_param->slice_type == SLICE_TYPE_B));
10676
10677             if (slice_param->disable_deblocking_filter_idc != 1) {
10678                 enable_avc_ildb = 1;
10679                 break;
10680             }
10681
10682             slice_param++;
10683         }
10684     }
10685     avc_state->enable_avc_ildb = enable_avc_ildb;
10686
10687     /* setup the all surface and buffer for PAK */
10688     /* Setup current reconstruct frame */
10689     obj_surface = encode_state->reconstructed_object;
10690     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
10691
10692     if (va_status != VA_STATUS_SUCCESS)
10693         return va_status;
10694
10695     memset(&surface_param, 0, sizeof(surface_param));
10696     surface_param.frame_width = generic_state->frame_width_in_pixel;
10697     surface_param.frame_height = generic_state->frame_height_in_pixel;
10698     va_status = gen9_avc_init_check_surfaces(ctx,
10699                                              obj_surface, encoder_context,
10700                                              &surface_param);
10701     if (va_status != VA_STATUS_SUCCESS)
10702         return va_status;
10703     /* init the member of avc_priv_surface,frame_store_id,qp_value */
10704     {
10705         avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
10706         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = 0;
10707         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = 0;
10708         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2]);
10709         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1]);
10710         i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2], avc_priv_surface->dmv_top);
10711         i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1], avc_priv_surface->dmv_bottom);
10712         avc_priv_surface->qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
10713         avc_priv_surface->frame_store_id = 0;
10714         avc_priv_surface->frame_idx = pic_param->CurrPic.frame_idx;
10715         avc_priv_surface->top_field_order_cnt = pic_param->CurrPic.TopFieldOrderCnt;
10716         avc_priv_surface->is_as_ref = pic_param->pic_fields.bits.reference_pic_flag;
10717         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = avc_priv_surface->top_field_order_cnt;
10718         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = avc_priv_surface->top_field_order_cnt + 1;
10719     }
10720     i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
10721     i965_free_gpe_resource(&avc_ctx->res_post_deblocking_output);
10722     i965_free_gpe_resource(&avc_ctx->res_pre_deblocking_output);
10723     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_reconstructed_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
10724
10725
10726     if (avc_state->enable_avc_ildb) {
10727         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_post_deblocking_output, obj_surface, GPE_RESOURCE_ALIGNMENT);
10728     } else {
10729         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_pre_deblocking_output, obj_surface, GPE_RESOURCE_ALIGNMENT);
10730     }
10731     /* input YUV surface */
10732     obj_surface = encode_state->input_yuv_object;
10733     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
10734
10735     if (va_status != VA_STATUS_SUCCESS)
10736         return va_status;
10737     i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
10738     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_uncompressed_input_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
10739
10740     /* Reference surfaces */
10741     for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
10742         i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
10743         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2]);
10744         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1]);
10745         obj_surface = encode_state->reference_objects[i];
10746         avc_state->top_field_poc[2 * i] = 0;
10747         avc_state->top_field_poc[2 * i + 1] = 0;
10748
10749         if (obj_surface && obj_surface->bo) {
10750             i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->list_reference_res[i], obj_surface, GPE_RESOURCE_ALIGNMENT);
10751
10752             /* actually it should be handled when it is reconstructed surface */
10753             va_status = gen9_avc_init_check_surfaces(ctx,
10754                                                      obj_surface, encoder_context,
10755                                                      &surface_param);
10756             if (va_status != VA_STATUS_SUCCESS)
10757                 return va_status;
10758             avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
10759             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2], avc_priv_surface->dmv_top);
10760             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1], avc_priv_surface->dmv_bottom);
10761             avc_priv_surface->frame_store_id = i;
10762             avc_state->top_field_poc[2 * i] = avc_priv_surface->top_field_order_cnt;
10763             avc_state->top_field_poc[2 * i + 1] = avc_priv_surface->top_field_order_cnt + 1;
10764         } else {
10765             break;
10766         }
10767     }
10768
10769     if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
10770         intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
10771         avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
10772     }
10773
10774     avc_ctx->pres_slice_batch_buffer_2nd_level =
10775         intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD,
10776                               4096 *
10777                               encode_state->num_slice_params_ext);
10778     if (!avc_ctx->pres_slice_batch_buffer_2nd_level)
10779         return VA_STATUS_ERROR_ALLOCATION_FAILED;
10780
10781     for (i = 0; i < MAX_AVC_SLICE_NUM; i++) {
10782         avc_state->slice_batch_offset[i] = 0;
10783     }
10784
10785
10786     size = w_mb * 64;
10787     i965_free_gpe_resource(&avc_ctx->res_intra_row_store_scratch_buffer);
10788     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
10789                                                &avc_ctx->res_intra_row_store_scratch_buffer,
10790                                                size,
10791                                                "PAK Intra row store scratch buffer");
10792     if (!allocate_flag)
10793         goto failed_allocation;
10794
10795     size = w_mb * 4 * 64;
10796     i965_free_gpe_resource(&avc_ctx->res_deblocking_filter_row_store_scratch_buffer);
10797     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
10798                                                &avc_ctx->res_deblocking_filter_row_store_scratch_buffer,
10799                                                size,
10800                                                "PAK Deblocking filter row store scratch buffer");
10801     if (!allocate_flag)
10802         goto failed_allocation;
10803
10804     size = w_mb * 2 * 64;
10805     i965_free_gpe_resource(&avc_ctx->res_bsd_mpc_row_store_scratch_buffer);
10806     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
10807                                                &avc_ctx->res_bsd_mpc_row_store_scratch_buffer,
10808                                                size,
10809                                                "PAK BSD/MPC row store scratch buffer");
10810     if (!allocate_flag)
10811         goto failed_allocation;
10812
10813     size = w_mb * h_mb * 16;
10814     i965_free_gpe_resource(&avc_ctx->res_pak_mb_status_buffer);
10815     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
10816                                                &avc_ctx->res_pak_mb_status_buffer,
10817                                                size,
10818                                                "PAK MB status buffer");
10819     if (!allocate_flag)
10820         goto failed_allocation;
10821
10822     return VA_STATUS_SUCCESS;
10823
10824 failed_allocation:
10825     return VA_STATUS_ERROR_ALLOCATION_FAILED;
10826 }
10827
10828 static VAStatus
10829 gen9_avc_encode_picture(VADriverContextP ctx,
10830                         VAProfile profile,
10831                         struct encode_state *encode_state,
10832                         struct intel_encoder_context *encoder_context)
10833 {
10834     VAStatus va_status;
10835     struct i965_driver_data *i965 = i965_driver_data(ctx);
10836     struct i965_gpe_table *gpe = &i965->gpe_table;
10837     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10838     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
10839     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
10840     struct intel_batchbuffer *batch = encoder_context->base.batch;
10841
10842     va_status = gen9_avc_pak_pipeline_prepare(ctx, profile, encode_state, encoder_context);
10843
10844     if (va_status != VA_STATUS_SUCCESS)
10845         return va_status;
10846
10847     if (i965->intel.has_bsd2)
10848         intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
10849     else
10850         intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
10851     intel_batchbuffer_emit_mi_flush(batch);
10852     for (generic_state->curr_pak_pass = 0;
10853          generic_state->curr_pak_pass < generic_state->num_pak_passes;
10854          generic_state->curr_pak_pass++) {
10855
10856         if (generic_state->curr_pak_pass == 0) {
10857             /* Initialize the avc Image Ctrl reg for the first pass,write 0 to staturs/control register, is it needed in AVC? */
10858             struct gpe_mi_load_register_imm_parameter mi_load_reg_imm;
10859             struct encoder_status_buffer_internal *status_buffer;
10860
10861             status_buffer = &(avc_ctx->status_buffer);
10862             memset(&mi_load_reg_imm, 0, sizeof(mi_load_reg_imm));
10863             mi_load_reg_imm.mmio_offset = status_buffer->image_status_ctrl_reg_offset;
10864             mi_load_reg_imm.data = 0;
10865             gpe->mi_load_register_imm(ctx, batch, &mi_load_reg_imm);
10866         }
10867         gen9_avc_pak_picture_level(ctx, encode_state, encoder_context);
10868         gen9_avc_pak_slice_level(ctx, encode_state, encoder_context);
10869         gen9_avc_read_mfc_status(ctx, encoder_context);
10870     }
10871
10872     if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
10873         intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
10874         avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
10875     }
10876
10877     intel_batchbuffer_end_atomic(batch);
10878     intel_batchbuffer_flush(batch);
10879
10880     generic_state->seq_frame_number++;
10881     generic_state->total_frame_number++;
10882     generic_state->first_frame = 0;
10883     return VA_STATUS_SUCCESS;
10884 }
10885
10886 static VAStatus
10887 gen9_avc_pak_pipeline(VADriverContextP ctx,
10888                       VAProfile profile,
10889                       struct encode_state *encode_state,
10890                       struct intel_encoder_context *encoder_context)
10891 {
10892     VAStatus vaStatus;
10893
10894     switch (profile) {
10895     case VAProfileH264ConstrainedBaseline:
10896     case VAProfileH264Main:
10897     case VAProfileH264High:
10898     case VAProfileH264MultiviewHigh:
10899     case VAProfileH264StereoHigh:
10900         vaStatus = gen9_avc_encode_picture(ctx, profile, encode_state, encoder_context);
10901         break;
10902
10903     default:
10904         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
10905         break;
10906     }
10907
10908     return vaStatus;
10909 }
10910
10911 static void
10912 gen9_avc_pak_context_destroy(void * context)
10913 {
10914     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)context;
10915     struct generic_encoder_context * generic_ctx;
10916     struct i965_avc_encoder_context * avc_ctx;
10917     int i = 0;
10918
10919     if (!pak_context)
10920         return;
10921
10922     generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
10923     avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10924
10925     // other things
10926     i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
10927     i965_free_gpe_resource(&avc_ctx->res_post_deblocking_output);
10928     i965_free_gpe_resource(&avc_ctx->res_pre_deblocking_output);
10929     i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
10930
10931     i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
10932     i965_free_gpe_resource(&avc_ctx->res_intra_row_store_scratch_buffer);
10933     i965_free_gpe_resource(&avc_ctx->res_deblocking_filter_row_store_scratch_buffer);
10934     i965_free_gpe_resource(&avc_ctx->res_bsd_mpc_row_store_scratch_buffer);
10935     i965_free_gpe_resource(&avc_ctx->res_pak_mb_status_buffer);
10936
10937     for (i = 0 ; i < MAX_MFC_AVC_REFERENCE_SURFACES; i++) {
10938         i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
10939     }
10940
10941     for (i = 0 ; i < NUM_MFC_AVC_DMV_BUFFERS; i++) {
10942         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i]);
10943     }
10944
10945     if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
10946         intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
10947         avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
10948     }
10949
10950 }
10951
10952 static VAStatus
10953 gen9_avc_get_coded_status(VADriverContextP ctx,
10954                           struct intel_encoder_context *encoder_context,
10955                           struct i965_coded_buffer_segment *coded_buf_seg)
10956 {
10957     struct encoder_status *avc_encode_status;
10958
10959     if (!encoder_context || !coded_buf_seg)
10960         return VA_STATUS_ERROR_INVALID_BUFFER;
10961
10962     avc_encode_status = (struct encoder_status *)coded_buf_seg->codec_private_data;
10963     coded_buf_seg->base.size = avc_encode_status->bs_byte_count_frame;
10964
10965     return VA_STATUS_SUCCESS;
10966 }
10967
10968 Bool
10969 gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
10970 {
10971     /* VME & PAK share the same context */
10972     struct i965_driver_data *i965 = i965_driver_data(ctx);
10973     struct encoder_vme_mfc_context * vme_context = NULL;
10974     struct generic_encoder_context * generic_ctx = NULL;
10975     struct i965_avc_encoder_context * avc_ctx = NULL;
10976     struct generic_enc_codec_state * generic_state = NULL;
10977     struct avc_enc_state * avc_state = NULL;
10978     struct encoder_status_buffer_internal *status_buffer;
10979     uint32_t base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
10980
10981     vme_context = calloc(1, sizeof(struct encoder_vme_mfc_context));
10982     generic_ctx = calloc(1, sizeof(struct generic_encoder_context));
10983     avc_ctx = calloc(1, sizeof(struct i965_avc_encoder_context));
10984     generic_state = calloc(1, sizeof(struct generic_enc_codec_state));
10985     avc_state = calloc(1, sizeof(struct avc_enc_state));
10986
10987     if (!vme_context || !generic_ctx || !avc_ctx || !generic_state || !avc_state)
10988         goto allocate_structure_failed;
10989
10990     memset(vme_context, 0, sizeof(struct encoder_vme_mfc_context));
10991     memset(generic_ctx, 0, sizeof(struct generic_encoder_context));
10992     memset(avc_ctx, 0, sizeof(struct i965_avc_encoder_context));
10993     memset(generic_state, 0, sizeof(struct generic_enc_codec_state));
10994     memset(avc_state, 0, sizeof(struct avc_enc_state));
10995
10996     encoder_context->vme_context = vme_context;
10997     vme_context->generic_enc_ctx = generic_ctx;
10998     vme_context->private_enc_ctx = avc_ctx;
10999     vme_context->generic_enc_state = generic_state;
11000     vme_context->private_enc_state = avc_state;
11001
11002     if (IS_SKL(i965->intel.device_info) ||
11003         IS_BXT(i965->intel.device_info)) {
11004         if (!encoder_context->fei_enabled && !encoder_context->preenc_enabled) {
11005             generic_ctx->enc_kernel_ptr = (void *)skl_avc_encoder_kernels;
11006             generic_ctx->enc_kernel_size = sizeof(skl_avc_encoder_kernels);
11007         } else {
11008             /* FEI and PreEnc operation kernels are included in
11009             * the monolithic kernel binary */
11010             generic_ctx->enc_kernel_ptr = (void *)skl_avc_fei_encoder_kernels;
11011             generic_ctx->enc_kernel_size = sizeof(skl_avc_fei_encoder_kernels);
11012         }
11013     } else if (IS_GEN8(i965->intel.device_info)) {
11014         generic_ctx->enc_kernel_ptr = (void *)bdw_avc_encoder_kernels;
11015         generic_ctx->enc_kernel_size = sizeof(bdw_avc_encoder_kernels);
11016     } else if (IS_KBL(i965->intel.device_info) ||
11017                IS_GLK(i965->intel.device_info)) {
11018         generic_ctx->enc_kernel_ptr = (void *)kbl_avc_encoder_kernels;
11019         generic_ctx->enc_kernel_size = sizeof(kbl_avc_encoder_kernels);
11020     } else if (IS_GEN10(i965->intel.device_info)) {
11021         generic_ctx->enc_kernel_ptr = (void *)cnl_avc_encoder_kernels;
11022         generic_ctx->enc_kernel_size = sizeof(cnl_avc_encoder_kernels);
11023     } else
11024         goto allocate_structure_failed;
11025
11026     /* initialize misc ? */
11027     avc_ctx->ctx = ctx;
11028     generic_ctx->use_hw_scoreboard = 1;
11029     generic_ctx->use_hw_non_stalling_scoreboard = 1;
11030
11031     /* initialize generic state */
11032
11033     generic_state->kernel_mode = INTEL_ENC_KERNEL_NORMAL;
11034     generic_state->preset = INTEL_PRESET_RT_SPEED;
11035     generic_state->seq_frame_number = 0;
11036     generic_state->total_frame_number = 0;
11037     generic_state->frame_type = 0;
11038     generic_state->first_frame = 1;
11039
11040     generic_state->frame_width_in_pixel = 0;
11041     generic_state->frame_height_in_pixel = 0;
11042     generic_state->frame_width_in_mbs = 0;
11043     generic_state->frame_height_in_mbs = 0;
11044     generic_state->frame_width_4x = 0;
11045     generic_state->frame_height_4x = 0;
11046     generic_state->frame_width_16x = 0;
11047     generic_state->frame_height_16x = 0;
11048     generic_state->frame_width_32x = 0;
11049     generic_state->downscaled_width_4x_in_mb = 0;
11050     generic_state->downscaled_height_4x_in_mb = 0;
11051     generic_state->downscaled_width_16x_in_mb = 0;
11052     generic_state->downscaled_height_16x_in_mb = 0;
11053     generic_state->downscaled_width_32x_in_mb = 0;
11054     generic_state->downscaled_height_32x_in_mb = 0;
11055
11056     generic_state->hme_supported = 1;
11057     generic_state->b16xme_supported = 1;
11058     generic_state->b32xme_supported = 0;
11059     generic_state->hme_enabled = 0;
11060     generic_state->b16xme_enabled = 0;
11061     generic_state->b32xme_enabled = 0;
11062
11063     if (encoder_context->fei_enabled) {
11064         /* Disabling HME in FEI encode */
11065         generic_state->hme_supported = 0;
11066         generic_state->b16xme_supported = 0;
11067     } else if (encoder_context->preenc_enabled) {
11068         /* Disabling 16x16ME in PreEnc */
11069         generic_state->b16xme_supported = 0;
11070     }
11071
11072     generic_state->brc_distortion_buffer_supported = 1;
11073     generic_state->brc_constant_buffer_supported = 0;
11074
11075     generic_state->frame_rate = 30;
11076     generic_state->brc_allocated = 0;
11077     generic_state->brc_inited = 0;
11078     generic_state->brc_need_reset = 0;
11079     generic_state->is_low_delay = 0;
11080     generic_state->brc_enabled = 0;//default
11081     generic_state->internal_rate_mode = 0;
11082     generic_state->curr_pak_pass = 0;
11083     generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
11084     generic_state->is_first_pass = 1;
11085     generic_state->is_last_pass = 0;
11086     generic_state->mb_brc_enabled = 0; // enable mb brc
11087     generic_state->brc_roi_enable = 0;
11088     generic_state->brc_dirty_roi_enable = 0;
11089     generic_state->skip_frame_enbale = 0;
11090
11091     generic_state->target_bit_rate = 0;
11092     generic_state->max_bit_rate = 0;
11093     generic_state->min_bit_rate = 0;
11094     generic_state->init_vbv_buffer_fullness_in_bit = 0;
11095     generic_state->vbv_buffer_size_in_bit = 0;
11096     generic_state->frames_per_100s = 0;
11097     generic_state->gop_size = 0;
11098     generic_state->gop_ref_distance = 0;
11099     generic_state->brc_target_size = 0;
11100     generic_state->brc_mode = 0;
11101     generic_state->brc_init_current_target_buf_full_in_bits = 0.0;
11102     generic_state->brc_init_reset_input_bits_per_frame = 0.0;
11103     generic_state->brc_init_reset_buf_size_in_bits = 0;
11104     generic_state->brc_init_previous_target_buf_full_in_bits = 0;
11105     generic_state->frames_per_window_size = 0;//default
11106     generic_state->target_percentage = 0;
11107
11108     generic_state->avbr_curracy = 0;
11109     generic_state->avbr_convergence = 0;
11110
11111     generic_state->num_skip_frames = 0;
11112     generic_state->size_skip_frames = 0;
11113
11114     generic_state->num_roi = 0;
11115     generic_state->max_delta_qp = 0;
11116     generic_state->min_delta_qp = 0;
11117
11118     if (encoder_context->rate_control_mode != VA_RC_NONE &&
11119         encoder_context->rate_control_mode != VA_RC_CQP) {
11120         generic_state->brc_enabled = 1;
11121         generic_state->brc_distortion_buffer_supported = 1;
11122         generic_state->brc_constant_buffer_supported = 1;
11123         generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
11124     }
11125     /*avc state initialization */
11126     avc_state->mad_enable = 0;
11127     avc_state->mb_disable_skip_map_enable = 0;
11128     avc_state->sfd_enable = 1;//default
11129     avc_state->sfd_mb_enable = 1;//set it true
11130     avc_state->adaptive_search_window_enable = 1;//default
11131     avc_state->mb_qp_data_enable = 0;
11132     avc_state->intra_refresh_i_enable = 0;
11133     avc_state->min_max_qp_enable = 0;
11134     avc_state->skip_bias_adjustment_enable = 0;//default,same as   skip_bias_adjustment_supporte? no
11135
11136     //external input
11137     avc_state->non_ftq_skip_threshold_lut_input_enable = 0;
11138     avc_state->ftq_skip_threshold_lut_input_enable = 0;
11139     avc_state->ftq_override = 0;
11140
11141     avc_state->direct_bias_adjustment_enable = 0;
11142     avc_state->global_motion_bias_adjustment_enable = 0;
11143     avc_state->disable_sub_mb_partion = 0;
11144     avc_state->arbitrary_num_mbs_in_slice = 0;
11145     avc_state->adaptive_transform_decision_enable = 0;//default
11146     avc_state->skip_check_disable = 0;
11147     avc_state->tq_enable = 0;
11148     avc_state->enable_avc_ildb = 0;
11149     avc_state->mbaff_flag = 0;
11150     avc_state->enable_force_skip = 1;//default
11151     avc_state->rc_panic_enable = 1;//default
11152     avc_state->suppress_recon_enable = 1;//default
11153
11154     avc_state->ref_pic_select_list_supported = 1;
11155     avc_state->mb_brc_supported = 1;//?,default
11156     avc_state->multi_pre_enable = 1;//default
11157     avc_state->ftq_enable = 1;//default
11158     avc_state->caf_supported = 1; //default
11159     avc_state->caf_enable = 0;
11160     avc_state->caf_disable_hd = 1;//default
11161     avc_state->skip_bias_adjustment_supported = 1;//default
11162
11163     avc_state->adaptive_intra_scaling_enable = 1;//default
11164     avc_state->old_mode_cost_enable = 0;//default
11165     avc_state->multi_ref_qp_enable = 1;//default
11166     avc_state->weighted_ref_l0_enable = 1;//default
11167     avc_state->weighted_ref_l1_enable = 1;//default
11168     avc_state->weighted_prediction_supported = 0;
11169     avc_state->brc_split_enable = 0;
11170     avc_state->slice_level_report_supported = 0;
11171
11172     avc_state->fbr_bypass_enable = 1;//default
11173     avc_state->field_scaling_output_interleaved = 0;
11174     avc_state->mb_variance_output_enable = 0;
11175     avc_state->mb_pixel_average_output_enable = 0;
11176     avc_state->rolling_intra_refresh_enable = 0;// same as intra_refresh_i_enable?
11177     avc_state->mbenc_curbe_set_in_brc_update = 0;
11178     avc_state->rounding_inter_enable = 1; //default
11179     avc_state->adaptive_rounding_inter_enable = 1;//default
11180
11181     avc_state->mbenc_i_frame_dist_in_use = 0;
11182     avc_state->mb_status_supported = 1; //set in intialization for gen9
11183     avc_state->mb_status_enable = 0;
11184     avc_state->mb_vproc_stats_enable = 0;
11185     avc_state->flatness_check_enable = 0;
11186     avc_state->flatness_check_supported = 1;//default
11187     avc_state->block_based_skip_enable = 0;
11188     avc_state->use_widi_mbenc_kernel = 0;
11189     avc_state->kernel_trellis_enable = 0;
11190     avc_state->generic_reserved = 0;
11191
11192     avc_state->rounding_value = 0;
11193     avc_state->rounding_inter_p = AVC_INVALID_ROUNDING_VALUE;//default
11194     avc_state->rounding_inter_b = AVC_INVALID_ROUNDING_VALUE; //default
11195     avc_state->rounding_inter_b_ref = AVC_INVALID_ROUNDING_VALUE; //default
11196     avc_state->min_qp_i = INTEL_AVC_MIN_QP;
11197     avc_state->min_qp_p = INTEL_AVC_MIN_QP;
11198     avc_state->min_qp_b = INTEL_AVC_MIN_QP;
11199     avc_state->max_qp_i = INTEL_AVC_MAX_QP;
11200     avc_state->max_qp_p = INTEL_AVC_MAX_QP;
11201     avc_state->max_qp_b = INTEL_AVC_MAX_QP;
11202
11203     memset(avc_state->non_ftq_skip_threshold_lut, 0, AVC_QP_MAX * sizeof(uint8_t));
11204     memset(avc_state->ftq_skip_threshold_lut, 0, AVC_QP_MAX * sizeof(uint8_t));
11205     memset(avc_state->lamda_value_lut, 0, AVC_QP_MAX * 2 * sizeof(uint32_t));
11206
11207     avc_state->intra_refresh_qp_threshold = 0;
11208     avc_state->trellis_flag = 0;
11209     avc_state->hme_mv_cost_scaling_factor = 0;
11210     avc_state->slice_height = 1;
11211     avc_state->slice_num = 1;
11212     memset(avc_state->dist_scale_factor_list0, 0, 32 * sizeof(uint32_t));
11213     avc_state->bi_weight = 0;
11214
11215     avc_state->lambda_table_enable = 0;
11216
11217     if (IS_GEN8(i965->intel.device_info)) {
11218         avc_state->brc_const_data_surface_width = 64;
11219         avc_state->brc_const_data_surface_height = 44;
11220         avc_state->mb_status_supported = 0;
11221     } else if (IS_SKL(i965->intel.device_info) ||
11222                IS_BXT(i965->intel.device_info)) {
11223         avc_state->brc_const_data_surface_width = 64;
11224         avc_state->brc_const_data_surface_height = 44;
11225         avc_state->brc_split_enable = 1;
11226     } else if (IS_KBL(i965->intel.device_info) ||
11227                IS_GEN10(i965->intel.device_info) ||
11228                IS_GLK(i965->intel.device_info)) {
11229         avc_state->brc_const_data_surface_width = 64;
11230         avc_state->brc_const_data_surface_height = 53;
11231         //gen95
11232         avc_state->decouple_mbenc_curbe_from_brc_enable = 1;
11233         avc_state->extended_mv_cost_range_enable = 0;
11234         avc_state->reserved_g95 = 0;
11235         avc_state->mbenc_brc_buffer_size = 128;
11236         avc_state->kernel_trellis_enable = 1;
11237         avc_state->lambda_table_enable = 1;
11238         avc_state->brc_split_enable = 1;
11239
11240         if (IS_GEN10(i965->intel.device_info))
11241             avc_state->adaptive_transform_decision_enable = 1;// CNL
11242     }
11243
11244     avc_state->num_refs[0] = 0;
11245     avc_state->num_refs[1] = 0;
11246     memset(avc_state->list_ref_idx, 0, 32 * 2 * sizeof(uint32_t));
11247     memset(avc_state->top_field_poc, 0, NUM_MFC_AVC_DMV_BUFFERS * sizeof(int32_t));
11248     avc_state->tq_rounding = 0;
11249     avc_state->zero_mv_threshold = 0;
11250     avc_state->slice_second_levle_batch_buffer_in_use = 0;
11251
11252     //1. seq/pic/slice
11253
11254     /* the definition of status buffer offset for Encoder */
11255
11256     status_buffer = &avc_ctx->status_buffer;
11257     memset(status_buffer, 0, sizeof(struct encoder_status_buffer_internal));
11258
11259     status_buffer->base_offset = base_offset;
11260     status_buffer->bs_byte_count_frame_offset = base_offset + offsetof(struct encoder_status, bs_byte_count_frame);
11261     status_buffer->bs_byte_count_frame_nh_offset = base_offset + offsetof(struct encoder_status, bs_byte_count_frame_nh);
11262     status_buffer->image_status_mask_offset = base_offset + offsetof(struct encoder_status, image_status_mask);
11263     status_buffer->image_status_ctrl_offset = base_offset + offsetof(struct encoder_status, image_status_ctrl);
11264     status_buffer->mfc_qp_status_count_offset = base_offset + offsetof(struct encoder_status, mfc_qp_status_count);
11265     status_buffer->media_index_offset       = base_offset + offsetof(struct encoder_status, media_index);
11266
11267     status_buffer->status_buffer_size = sizeof(struct encoder_status);
11268     status_buffer->bs_byte_count_frame_reg_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG;
11269     status_buffer->bs_byte_count_frame_nh_reg_offset = MFC_BITSTREAM_BYTECOUNT_SLICE_REG;
11270     status_buffer->image_status_mask_reg_offset = MFC_IMAGE_STATUS_MASK_REG;
11271     status_buffer->image_status_ctrl_reg_offset = MFC_IMAGE_STATUS_CTRL_REG;
11272     status_buffer->mfc_qp_status_count_reg_offset = MFC_QP_STATUS_COUNT_REG;
11273
11274     if (IS_GEN8(i965->intel.device_info)) {
11275         gen8_avc_kernel_init(ctx, encoder_context);
11276     } else {
11277         gen9_avc_kernel_init(ctx, encoder_context);
11278     }
11279     encoder_context->vme_context = vme_context;
11280     /* Handling PreEnc operations separately since it gives better
11281      * code readability, avoid possible vme operations mess-up */
11282     encoder_context->vme_pipeline =
11283         !encoder_context->preenc_enabled ? gen9_avc_vme_pipeline : gen9_avc_preenc_pipeline;
11284     encoder_context->vme_context_destroy = gen9_avc_vme_context_destroy;
11285
11286     return true;
11287
11288 allocate_structure_failed:
11289
11290     free(vme_context);
11291     free(generic_ctx);
11292     free(avc_ctx);
11293     free(generic_state);
11294     free(avc_state);
11295     return false;
11296 }
11297
11298 Bool
11299 gen9_avc_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
11300 {
11301     /* VME & PAK share the same context */
11302     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
11303
11304     if (!pak_context)
11305         return false;
11306
11307     encoder_context->mfc_context = pak_context;
11308     encoder_context->mfc_context_destroy = gen9_avc_pak_context_destroy;
11309     encoder_context->mfc_pipeline = gen9_avc_pak_pipeline;
11310     encoder_context->mfc_brc_prepare = gen9_avc_pak_brc_prepare;
11311     encoder_context->get_status = gen9_avc_get_coded_status;
11312     return true;
11313 }