OSDN Git Service

Add vdenc common commands for CNL
[android-x86/hardware-intel-common-vaapi.git] / src / i965_avc_encoder.c
1 /*
2  * Copyright @ 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWAR OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Pengfei Qu <Pengfei.qu@intel.com>
26  *    Sreerenj Balachandran <sreerenj.balachandran@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <stdbool.h>
33 #include <string.h>
34 #include <math.h>
35 #include <assert.h>
36 #include <va/va.h>
37
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
40
41 #include "i965_defines.h"
42 #include "i965_structs.h"
43 #include "i965_drv_video.h"
44 #include "i965_encoder.h"
45 #include "i965_encoder_utils.h"
46 #include "intel_media.h"
47
48 #include "i965_gpe_utils.h"
49 #include "i965_encoder_common.h"
50 #include "i965_avc_encoder_common.h"
51 #include "i965_avc_encoder_kernels.h"
52 #include "i965_avc_encoder.h"
53 #include "i965_avc_const_def.h"
54
55 #define MAX_URB_SIZE                    4096 /* In register */
56 #define NUM_KERNELS_PER_GPE_CONTEXT     1
57 #define MBENC_KERNEL_BASE GEN9_AVC_KERNEL_MBENC_QUALITY_I
58 #define GPE_RESOURCE_ALIGNMENT 4  /* 4 means 16 = 1 << 4) */
59
60 #define OUT_BUFFER_2DW(batch, bo, is_target, delta)  do {               \
61         if (bo) {                                                       \
62             OUT_BCS_RELOC64(batch,                                        \
63                             bo,                                         \
64                             I915_GEM_DOMAIN_INSTRUCTION,                \
65                             is_target ? I915_GEM_DOMAIN_RENDER : 0,     \
66                             delta);                                     \
67         } else {                                                        \
68             OUT_BCS_BATCH(batch, 0);                                    \
69             OUT_BCS_BATCH(batch, 0);                                    \
70         }                                                               \
71     } while (0)
72
73 #define OUT_BUFFER_3DW(batch, bo, is_target, delta, attr)  do { \
74         OUT_BUFFER_2DW(batch, bo, is_target, delta);            \
75         OUT_BCS_BATCH(batch, attr);                             \
76     } while (0)
77
78 /* FEI specific buffer sizes per MB in bytes for gen9 */
79 #define FEI_AVC_MB_CODE_BUFFER_SIZE      64
80 #define FEI_AVC_MV_DATA_BUFFER_SIZE      128
81 #define FEI_AVC_MB_CONTROL_BUFFER_SIZE   16
82 #define FEI_AVC_MV_PREDICTOR_BUFFER_SIZE 40
83 #define FEI_AVC_DISTORTION_BUFFER_SIZE   48
84 #define FEI_AVC_QP_BUFFER_SIZE           1
85 #define PREENC_AVC_STATISTICS_BUFFER_SIZE 64
86
87 #define SCALE_CUR_PIC        1
88 #define SCALE_PAST_REF_PIC   2
89 #define SCALE_FUTURE_REF_PIC 3
90
91 static const uint32_t qm_flat[16] = {
92     0x10101010, 0x10101010, 0x10101010, 0x10101010,
93     0x10101010, 0x10101010, 0x10101010, 0x10101010,
94     0x10101010, 0x10101010, 0x10101010, 0x10101010,
95     0x10101010, 0x10101010, 0x10101010, 0x10101010
96 };
97
98 static const uint32_t fqm_flat[32] = {
99     0x10001000, 0x10001000, 0x10001000, 0x10001000,
100     0x10001000, 0x10001000, 0x10001000, 0x10001000,
101     0x10001000, 0x10001000, 0x10001000, 0x10001000,
102     0x10001000, 0x10001000, 0x10001000, 0x10001000,
103     0x10001000, 0x10001000, 0x10001000, 0x10001000,
104     0x10001000, 0x10001000, 0x10001000, 0x10001000,
105     0x10001000, 0x10001000, 0x10001000, 0x10001000,
106     0x10001000, 0x10001000, 0x10001000, 0x10001000
107 };
108
109 static const unsigned int slice_type_kernel[3] = {1, 2, 0};
110
111 static const gen9_avc_brc_init_reset_curbe_data gen9_avc_brc_init_reset_curbe_init_data = {
112     // unsigned int 0
113     {
114         0
115     },
116
117     // unsigned int 1
118     {
119         0
120     },
121
122     // unsigned int 2
123     {
124         0
125     },
126
127     // unsigned int 3
128     {
129         0
130     },
131
132     // unsigned int 4
133     {
134         0
135     },
136
137     // unsigned int 5
138     {
139         0
140     },
141
142     // unsigned int 6
143     {
144         0
145     },
146
147     // unsigned int 7
148     {
149         0
150     },
151
152     // unsigned int 8
153     {
154         0,
155         0
156     },
157
158     // unsigned int 9
159     {
160         0,
161         0
162     },
163
164     // unsigned int 10
165     {
166         0,
167         0
168     },
169
170     // unsigned int 11
171     {
172         0,
173         1
174     },
175
176     // unsigned int 12
177     {
178         51,
179         0
180     },
181
182     // unsigned int 13
183     {
184         40,
185         60,
186         80,
187         120
188     },
189
190     // unsigned int 14
191     {
192         35,
193         60,
194         80,
195         120
196     },
197
198     // unsigned int 15
199     {
200         40,
201         60,
202         90,
203         115
204     },
205
206     // unsigned int 16
207     {
208         0,
209         0,
210         0,
211         0
212     },
213
214     // unsigned int 17
215     {
216         0,
217         0,
218         0,
219         0
220     },
221
222     // unsigned int 18
223     {
224         0,
225         0,
226         0,
227         0
228     },
229
230     // unsigned int 19
231     {
232         0,
233         0,
234         0,
235         0
236     },
237
238     // unsigned int 20
239     {
240         0,
241         0,
242         0,
243         0
244     },
245
246     // unsigned int 21
247     {
248         0,
249         0,
250         0,
251         0
252     },
253
254     // unsigned int 22
255     {
256         0,
257         0,
258         0,
259         0
260     },
261
262     // unsigned int 23
263     {
264         0
265     }
266 };
267
268 static const gen8_avc_frame_brc_update_curbe_data gen8_avc_frame_brc_update_curbe_init_data = {
269     //unsigned int 0
270     {
271         0
272     },
273
274     //unsigned int 1
275     {
276         0
277     },
278
279     //unsigned int 2
280     {
281         0
282     },
283
284     //unsigned int 3
285     {
286
287         10,
288         50
289
290     },
291
292     //unsigned int 4
293     {
294
295         100,
296         150
297
298     },
299
300     //unsigned int 5
301     {
302         0, 0, 0, 0
303     },
304
305     //unsigned int 6
306     {
307         0, 0, 0, 0
308     },
309
310     //unsigned int 7
311     {
312         0
313     },
314
315     //unsigned int 8
316     {
317
318         1,
319         1,
320         3,
321         2
322
323     },
324
325     //unsigned int 9
326     {
327
328         1,
329         40,
330         5,
331         5
332
333     },
334
335     //unsigned int 10
336     {
337
338         3,
339         1,
340         7,
341         18
342
343     },
344
345     //unsigned int 11
346     {
347
348         25,
349         37,
350         40,
351         75
352
353     },
354
355     //unsigned int 12
356     {
357
358         97,
359         103,
360         125,
361         160
362
363     },
364
365     //unsigned int 13
366     {
367
368         -3,
369         -2,
370         -1,
371         0
372
373     },
374
375     //unsigned int 14
376     {
377
378         1,
379         2,
380         3,
381         0xff
382
383     },
384
385     //unsigned int 15
386     {
387         0, 0
388     },
389
390     //unsigned int 16
391     {
392         0, 0
393     },
394
395     //unsigned int 17
396     {
397         0, 0
398     },
399 };
400 static const gen9_avc_frame_brc_update_curbe_data gen9_avc_frame_brc_update_curbe_init_data = {
401     // unsigned int 0
402     {
403         0
404     },
405
406     // unsigned int 1
407     {
408         0
409     },
410
411     // unsigned int 2
412     {
413         0
414     },
415
416     // unsigned int 3
417     {
418         10,
419         50
420     },
421
422     // unsigned int 4
423     {
424         100,
425         150
426     },
427
428     // unsigned int 5
429     {
430         0,
431         0,
432         0,
433         0
434     },
435
436     // unsigned int 6
437     {
438         0,
439         0,
440         0,
441         0,
442         0,
443         0
444     },
445
446     // unsigned int 7
447     {
448         0
449     },
450
451     // unsigned int 8
452     {
453         1,
454         1,
455         3,
456         2
457     },
458
459     // unsigned int 9
460     {
461         1,
462         40,
463         5,
464         5
465     },
466
467     // unsigned int 10
468     {
469         3,
470         1,
471         7,
472         18
473     },
474
475     // unsigned int 11
476     {
477         25,
478         37,
479         40,
480         75
481     },
482
483     // unsigned int 12
484     {
485         97,
486         103,
487         125,
488         160
489     },
490
491     // unsigned int 13
492     {
493         -3,
494         -2,
495         -1,
496         0
497     },
498
499     // unsigned int 14
500     {
501         1,
502         2,
503         3,
504         0xff
505     },
506
507     // unsigned int 15
508     {
509         0,
510         0,
511         0,
512         0
513     },
514
515     // unsigned int 16
516     {
517         0
518     },
519
520     // unsigned int 17
521     {
522         0
523     },
524
525     // unsigned int 18
526     {
527         0
528     },
529
530     // unsigned int 19
531     {
532         0
533     },
534
535     // unsigned int 20
536     {
537         0
538     },
539
540     // unsigned int 21
541     {
542         0
543     },
544
545     // unsigned int 22
546     {
547         0
548     },
549
550     // unsigned int 23
551     {
552         0
553     },
554
555 };
556
557 static void
558 gen9_avc_update_misc_parameters(VADriverContextP ctx,
559                                 struct encode_state *encode_state,
560                                 struct intel_encoder_context *encoder_context)
561 {
562     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
563     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
564     int i;
565
566     /* brc */
567     generic_state->max_bit_rate = (encoder_context->brc.bits_per_second[0] + 1000 - 1) / 1000;
568
569     generic_state->brc_need_reset = encoder_context->brc.need_reset;
570
571     if (generic_state->internal_rate_mode == VA_RC_CBR) {
572         generic_state->min_bit_rate = generic_state->max_bit_rate;
573         generic_state->mb_brc_enabled = encoder_context->brc.mb_rate_control[0] == 1;
574
575         if (generic_state->target_bit_rate != generic_state->max_bit_rate) {
576             generic_state->target_bit_rate = generic_state->max_bit_rate;
577             generic_state->brc_need_reset = 1;
578         }
579     } else if (generic_state->internal_rate_mode == VA_RC_VBR) {
580         generic_state->min_bit_rate = generic_state->max_bit_rate * (2 * encoder_context->brc.target_percentage[0] - 100) / 100;
581         generic_state->mb_brc_enabled = encoder_context->brc.mb_rate_control[0] == 1;
582
583         if (generic_state->target_bit_rate != generic_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100) {
584             generic_state->target_bit_rate = generic_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100;
585             generic_state->brc_need_reset = 1;
586         }
587     }
588
589     /*  frame rate */
590     if (generic_state->internal_rate_mode != VA_RC_CQP) {
591         generic_state->frames_per_100s = encoder_context->brc.framerate[0].num * 100 / encoder_context->brc.framerate[0].den ;
592         generic_state->frame_rate = encoder_context->brc.framerate[0].num / encoder_context->brc.framerate[0].den ;
593         generic_state->frames_per_window_size = (int)(encoder_context->brc.window_size * generic_state->frame_rate / 1000); // brc.windows size in ms as the unit
594     } else {
595         generic_state->frames_per_100s = 30 * 100;
596         generic_state->frame_rate = 30 ;
597         generic_state->frames_per_window_size = 30;
598     }
599
600     /*  HRD */
601     if (generic_state->internal_rate_mode != VA_RC_CQP) {
602         generic_state->vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;//misc->buffer_size;
603         generic_state->init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;//misc->initial_buffer_fullness;
604     }
605
606     /* ROI */
607     generic_state->num_roi = MIN(encoder_context->brc.num_roi, 3);
608     if (generic_state->num_roi > 0) {
609         generic_state->max_delta_qp = encoder_context->brc.roi_max_delta_qp;
610         generic_state->min_delta_qp = encoder_context->brc.roi_min_delta_qp;
611
612         for (i = 0; i < generic_state->num_roi; i++) {
613             generic_state->roi[i].left   = encoder_context->brc.roi[i].left;
614             generic_state->roi[i].right  = encoder_context->brc.roi[i].right;
615             generic_state->roi[i].top    = encoder_context->brc.roi[i].top;
616             generic_state->roi[i].bottom = encoder_context->brc.roi[i].bottom;
617             generic_state->roi[i].value  = encoder_context->brc.roi[i].value;
618
619             generic_state->roi[i].left /= 16;
620             generic_state->roi[i].right /= 16;
621             generic_state->roi[i].top /= 16;
622             generic_state->roi[i].bottom /= 16;
623         }
624     }
625
626 }
627
628 static bool
629 intel_avc_get_kernel_header_and_size(void *pvbinary,
630                                      int binary_size,
631                                      INTEL_GENERIC_ENC_OPERATION operation,
632                                      int krnstate_idx,
633                                      struct i965_kernel *ret_kernel)
634 {
635     typedef uint32_t BIN_PTR[4];
636
637     char *bin_start;
638     gen9_avc_encoder_kernel_header      *pkh_table;
639     kernel_header          *pcurr_header, *pinvalid_entry, *pnext_header;
640     int next_krnoffset;
641
642     if (!pvbinary || !ret_kernel)
643         return false;
644
645     bin_start = (char *)pvbinary;
646     pkh_table = (gen9_avc_encoder_kernel_header *)pvbinary;
647     pinvalid_entry = &(pkh_table->static_detection) + 1;
648     next_krnoffset = binary_size;
649
650     if (operation == INTEL_GENERIC_ENC_SCALING4X) {
651         pcurr_header = &pkh_table->ply_dscale_ply;
652     } else if (operation == INTEL_GENERIC_ENC_SCALING2X) {
653         pcurr_header = &pkh_table->ply_2xdscale_ply;
654     } else if (operation == INTEL_GENERIC_ENC_ME) {
655         pcurr_header = &pkh_table->me_p;
656     } else if (operation == INTEL_GENERIC_ENC_BRC) {
657         pcurr_header = &pkh_table->frame_brc_init;
658     } else if (operation == INTEL_GENERIC_ENC_MBENC) {
659         pcurr_header = &pkh_table->mbenc_quality_I;
660     } else if (operation == INTEL_GENERIC_ENC_WP) {
661         pcurr_header = &pkh_table->wp;
662     } else if (operation == INTEL_GENERIC_ENC_SFD) {
663         pcurr_header = &pkh_table->static_detection;
664     } else {
665         return false;
666     }
667
668     pcurr_header += krnstate_idx;
669     ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
670
671     pnext_header = (pcurr_header + 1);
672     if (pnext_header < pinvalid_entry) {
673         next_krnoffset = pnext_header->kernel_start_pointer << 6;
674     }
675     ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
676
677     return true;
678 }
679
680 static bool
681 intel_avc_fei_get_kernel_header_and_size(
682     void                             *pvbinary,
683     int                              binary_size,
684     INTEL_GENERIC_ENC_OPERATION      operation,
685     int                              krnstate_idx,
686     struct i965_kernel               *ret_kernel)
687 {
688     typedef uint32_t BIN_PTR[4];
689
690     char *bin_start;
691     gen9_avc_fei_encoder_kernel_header      *pkh_table;
692     kernel_header          *pcurr_header, *pinvalid_entry, *pnext_header;
693     int next_krnoffset;
694
695     if (!pvbinary || !ret_kernel)
696         return false;
697
698     bin_start = (char *)pvbinary;
699     pkh_table = (gen9_avc_fei_encoder_kernel_header *)pvbinary;
700     pinvalid_entry = &(pkh_table->wp) + 1;
701     next_krnoffset = binary_size;
702
703     if (operation == INTEL_GENERIC_ENC_SCALING4X) {
704         pcurr_header = &pkh_table->ply_dscale_ply;
705     } else if (operation == INTEL_GENERIC_ENC_ME) {
706         pcurr_header = &pkh_table->me_p;
707     } else if (operation == INTEL_GENERIC_ENC_MBENC) {
708         pcurr_header = &pkh_table->mbenc_i;
709     } else if (operation == INTEL_GENERIC_ENC_PREPROC) {
710         pcurr_header =  &pkh_table->preproc;
711     } else {
712         return false;
713     }
714
715     pcurr_header += krnstate_idx;
716     ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
717
718     pnext_header = (pcurr_header + 1);
719     if (pnext_header < pinvalid_entry) {
720         next_krnoffset = pnext_header->kernel_start_pointer << 6;
721     }
722     ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
723
724     return true;
725 }
726
727 static void
728 gen9_free_surfaces_avc(void **data)
729 {
730     struct gen9_surface_avc *avc_surface;
731
732     if (!data || !*data)
733         return;
734
735     avc_surface = *data;
736
737     if (avc_surface->scaled_4x_surface_obj) {
738         i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_4x_surface_id, 1);
739         avc_surface->scaled_4x_surface_id = VA_INVALID_SURFACE;
740         avc_surface->scaled_4x_surface_obj = NULL;
741     }
742
743     if (avc_surface->scaled_16x_surface_obj) {
744         i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_16x_surface_id, 1);
745         avc_surface->scaled_16x_surface_id = VA_INVALID_SURFACE;
746         avc_surface->scaled_16x_surface_obj = NULL;
747     }
748
749     if (avc_surface->scaled_32x_surface_obj) {
750         i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_32x_surface_id, 1);
751         avc_surface->scaled_32x_surface_id = VA_INVALID_SURFACE;
752         avc_surface->scaled_32x_surface_obj = NULL;
753     }
754
755     i965_free_gpe_resource(&avc_surface->res_mb_code_surface);
756     i965_free_gpe_resource(&avc_surface->res_mv_data_surface);
757     i965_free_gpe_resource(&avc_surface->res_ref_pic_select_surface);
758
759     /* FEI specific resources */
760     /* since the driver previously taken an extra reference to the drm_bo
761      * in case the buffers were supplied by middleware, there shouldn't
762      * be any memory handling issue */
763     i965_free_gpe_resource(&avc_surface->res_fei_mb_cntrl_surface);
764     i965_free_gpe_resource(&avc_surface->res_fei_mv_predictor_surface);
765     i965_free_gpe_resource(&avc_surface->res_fei_vme_distortion_surface);
766     i965_free_gpe_resource(&avc_surface->res_fei_mb_qp_surface);
767
768     dri_bo_unreference(avc_surface->dmv_top);
769     avc_surface->dmv_top = NULL;
770     dri_bo_unreference(avc_surface->dmv_bottom);
771     avc_surface->dmv_bottom = NULL;
772
773     free(avc_surface);
774
775     *data = NULL;
776
777     return;
778 }
779
780 static VAStatus
781 gen9_avc_init_check_surfaces(VADriverContextP ctx,
782                              struct object_surface *obj_surface,
783                              struct intel_encoder_context *encoder_context,
784                              struct avc_surface_param *surface_param)
785 {
786     struct i965_driver_data *i965 = i965_driver_data(ctx);
787     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
788     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
789     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
790
791     struct gen9_surface_avc *avc_surface;
792     int downscaled_width_4x, downscaled_height_4x;
793     int downscaled_width_16x, downscaled_height_16x;
794     int downscaled_width_32x, downscaled_height_32x;
795     int size = 0;
796     unsigned int frame_width_in_mbs = ALIGN(surface_param->frame_width, 16) / 16;
797     unsigned int frame_height_in_mbs = ALIGN(surface_param->frame_height, 16) / 16;
798     unsigned int frame_mb_nums = frame_width_in_mbs * frame_height_in_mbs;
799     int allocate_flag = 1;
800     int width, height;
801
802     if (!obj_surface || !obj_surface->bo)
803         return VA_STATUS_ERROR_INVALID_SURFACE;
804
805     if (obj_surface->private_data) {
806         return VA_STATUS_SUCCESS;
807     }
808
809     avc_surface = calloc(1, sizeof(struct gen9_surface_avc));
810
811     if (!avc_surface)
812         return VA_STATUS_ERROR_ALLOCATION_FAILED;
813
814     avc_surface->ctx = ctx;
815     obj_surface->private_data = avc_surface;
816     obj_surface->free_private_data = gen9_free_surfaces_avc;
817
818     downscaled_width_4x = generic_state->frame_width_4x;
819     downscaled_height_4x = generic_state->frame_height_4x;
820
821     i965_CreateSurfaces(ctx,
822                         downscaled_width_4x,
823                         downscaled_height_4x,
824                         VA_RT_FORMAT_YUV420,
825                         1,
826                         &avc_surface->scaled_4x_surface_id);
827
828     avc_surface->scaled_4x_surface_obj = SURFACE(avc_surface->scaled_4x_surface_id);
829
830     if (!avc_surface->scaled_4x_surface_obj) {
831         return VA_STATUS_ERROR_ALLOCATION_FAILED;
832     }
833
834     i965_check_alloc_surface_bo(ctx, avc_surface->scaled_4x_surface_obj, 1,
835                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
836
837     downscaled_width_16x = generic_state->frame_width_16x;
838     downscaled_height_16x = generic_state->frame_height_16x;
839     i965_CreateSurfaces(ctx,
840                         downscaled_width_16x,
841                         downscaled_height_16x,
842                         VA_RT_FORMAT_YUV420,
843                         1,
844                         &avc_surface->scaled_16x_surface_id);
845     avc_surface->scaled_16x_surface_obj = SURFACE(avc_surface->scaled_16x_surface_id);
846
847     if (!avc_surface->scaled_16x_surface_obj) {
848         return VA_STATUS_ERROR_ALLOCATION_FAILED;
849     }
850
851     i965_check_alloc_surface_bo(ctx, avc_surface->scaled_16x_surface_obj, 1,
852                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
853
854     if (generic_state->b32xme_supported ||
855         generic_state->b32xme_enabled) {
856         downscaled_width_32x = generic_state->frame_width_32x;
857         downscaled_height_32x = generic_state->frame_height_32x;
858         i965_CreateSurfaces(ctx,
859                             downscaled_width_32x,
860                             downscaled_height_32x,
861                             VA_RT_FORMAT_YUV420,
862                             1,
863                             &avc_surface->scaled_32x_surface_id);
864         avc_surface->scaled_32x_surface_obj = SURFACE(avc_surface->scaled_32x_surface_id);
865
866         if (!avc_surface->scaled_32x_surface_obj) {
867             return VA_STATUS_ERROR_ALLOCATION_FAILED;
868         }
869
870         i965_check_alloc_surface_bo(ctx, avc_surface->scaled_32x_surface_obj, 1,
871                                     VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
872     }
873
874     /*mb code and mv data for each frame*/
875     if (!encoder_context->fei_enabled) {
876         size = frame_mb_nums * 16 * 4;
877         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
878                                                    &avc_surface->res_mb_code_surface,
879                                                    ALIGN(size, 0x1000),
880                                                    "mb code buffer");
881         if (!allocate_flag)
882             goto failed_allocation;
883
884         size = frame_mb_nums * 32 * 4;
885         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
886                                                    &avc_surface->res_mv_data_surface,
887                                                    ALIGN(size, 0x1000),
888                                                    "mv data buffer");
889         if (!allocate_flag)
890             goto failed_allocation;
891     }
892
893     /* ref pic list*/
894     if (avc_state->ref_pic_select_list_supported) {
895         width = ALIGN(frame_width_in_mbs * 8, 64);
896         height = frame_height_in_mbs ;
897         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
898                                                       &avc_surface->res_ref_pic_select_surface,
899                                                       width, height,
900                                                       width,
901                                                       "Ref pic select list buffer");
902         if (!allocate_flag)
903             goto failed_allocation;
904     }
905
906     /*direct mv*/
907     avc_surface->dmv_top =
908         dri_bo_alloc(i965->intel.bufmgr,
909                      "direct mv top Buffer",
910                      68 * frame_mb_nums,
911                      64);
912     avc_surface->dmv_bottom =
913         dri_bo_alloc(i965->intel.bufmgr,
914                      "direct mv bottom Buffer",
915                      68 * frame_mb_nums,
916                      64);
917     assert(avc_surface->dmv_top);
918     assert(avc_surface->dmv_bottom);
919
920     return VA_STATUS_SUCCESS;
921
922 failed_allocation:
923     return VA_STATUS_ERROR_ALLOCATION_FAILED;
924 }
925
926 static void
927 gen9_avc_generate_slice_map(VADriverContextP ctx,
928                             struct encode_state *encode_state,
929                             struct intel_encoder_context *encoder_context)
930 {
931     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
932     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
933     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
934     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
935
936     struct i965_gpe_resource *gpe_resource = NULL;
937     VAEncSliceParameterBufferH264 * slice_param = NULL;
938     unsigned int * data = NULL;
939     unsigned int * data_row = NULL;
940     int i, j, count = 0;
941     unsigned int pitch = ALIGN((generic_state->frame_width_in_mbs + 1) * 4, 64) / 4;
942
943     if (!avc_state->arbitrary_num_mbs_in_slice)
944         return;
945
946     gpe_resource = &(avc_ctx->res_mbenc_slice_map_surface);
947     assert(gpe_resource);
948
949     i965_zero_gpe_resource(gpe_resource);
950
951     data_row = (unsigned int *)i965_map_gpe_resource(gpe_resource);
952     assert(data_row);
953
954     data = data_row;
955     for (i = 0; i < avc_state->slice_num; i++) {
956         slice_param = avc_state->slice_param[i];
957         for (j = 0; j < slice_param->num_macroblocks; j++) {
958             *data++ = i;
959             if ((count > 0) && (count % generic_state->frame_width_in_mbs == 0)) {
960                 data_row += pitch;
961                 data = data_row;
962                 *data++ = i;
963             }
964             count++;
965         }
966     }
967     *data++ = 0xFFFFFFFF;
968
969     i965_unmap_gpe_resource(gpe_resource);
970 }
971
972 static VAStatus
973 gen9_avc_allocate_resources(VADriverContextP ctx,
974                             struct encode_state *encode_state,
975                             struct intel_encoder_context *encoder_context)
976 {
977     struct i965_driver_data *i965 = i965_driver_data(ctx);
978     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
979     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
980     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
981     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
982     unsigned int size  = 0;
983     unsigned int width  = 0;
984     unsigned int height  = 0;
985     unsigned char * data  = NULL;
986     int allocate_flag = 1;
987     int i = 0;
988
989     /*all the surface/buffer are allocated here*/
990
991     /*second level batch buffer for image state write when cqp etc*/
992     i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
993     size = INTEL_AVC_IMAGE_STATE_CMD_SIZE ;
994     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
995                                                &avc_ctx->res_image_state_batch_buffer_2nd_level,
996                                                ALIGN(size, 0x1000),
997                                                "second levle batch (image state write) buffer");
998     if (!allocate_flag)
999         goto failed_allocation;
1000
1001     /* scaling related surface   */
1002     if (avc_state->mb_status_supported) {
1003         i965_free_gpe_resource(&avc_ctx->res_mb_status_buffer);
1004         size = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * 16 * 4 + 1023) & ~0x3ff;
1005         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1006                                                    &avc_ctx->res_mb_status_buffer,
1007                                                    ALIGN(size, 0x1000),
1008                                                    "MB statistics output buffer");
1009         if (!allocate_flag)
1010             goto failed_allocation;
1011         i965_zero_gpe_resource(&avc_ctx->res_mb_status_buffer);
1012     }
1013
1014     if (avc_state->flatness_check_supported) {
1015         width = generic_state->frame_width_in_mbs * 4;
1016         height = generic_state->frame_height_in_mbs * 4;
1017         i965_free_gpe_resource(&avc_ctx->res_flatness_check_surface);
1018         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1019                                                       &avc_ctx->res_flatness_check_surface,
1020                                                       width, height,
1021                                                       ALIGN(width, 64),
1022                                                       "Flatness check buffer");
1023         if (!allocate_flag)
1024             goto failed_allocation;
1025     }
1026     /* me related surface */
1027     width = generic_state->downscaled_width_4x_in_mb * 8;
1028     height = generic_state->downscaled_height_4x_in_mb * 4 * 10;
1029     i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
1030     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1031                                                   &avc_ctx->s4x_memv_distortion_buffer,
1032                                                   width, height,
1033                                                   ALIGN(width, 64),
1034                                                   "4x MEMV distortion buffer");
1035     if (!allocate_flag)
1036         goto failed_allocation;
1037     i965_zero_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
1038
1039     width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
1040     height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
1041     i965_free_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
1042     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1043                                                   &avc_ctx->s4x_memv_min_distortion_brc_buffer,
1044                                                   width, height,
1045                                                   width,
1046                                                   "4x MEMV min distortion brc buffer");
1047     if (!allocate_flag)
1048         goto failed_allocation;
1049     i965_zero_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
1050
1051
1052     width = ALIGN(generic_state->downscaled_width_4x_in_mb * 32, 64);
1053     height = generic_state->downscaled_height_4x_in_mb * 4 * 2 * 10;
1054     i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
1055     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1056                                                   &avc_ctx->s4x_memv_data_buffer,
1057                                                   width, height,
1058                                                   width,
1059                                                   "4x MEMV data buffer");
1060     if (!allocate_flag)
1061         goto failed_allocation;
1062     i965_zero_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
1063
1064
1065     width = ALIGN(generic_state->downscaled_width_16x_in_mb * 32, 64);
1066     height = generic_state->downscaled_height_16x_in_mb * 4 * 2 * 10 ;
1067     i965_free_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
1068     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1069                                                   &avc_ctx->s16x_memv_data_buffer,
1070                                                   width, height,
1071                                                   width,
1072                                                   "16x MEMV data buffer");
1073     if (!allocate_flag)
1074         goto failed_allocation;
1075     i965_zero_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
1076
1077
1078     width = ALIGN(generic_state->downscaled_width_32x_in_mb * 32, 64);
1079     height = generic_state->downscaled_height_32x_in_mb * 4 * 2 * 10 ;
1080     i965_free_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
1081     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1082                                                   &avc_ctx->s32x_memv_data_buffer,
1083                                                   width, height,
1084                                                   width,
1085                                                   "32x MEMV data buffer");
1086     if (!allocate_flag)
1087         goto failed_allocation;
1088     i965_zero_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
1089
1090
1091     if (!generic_state->brc_allocated) {
1092         /*brc related surface */
1093         i965_free_gpe_resource(&avc_ctx->res_brc_history_buffer);
1094         size = 864;
1095         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1096                                                    &avc_ctx->res_brc_history_buffer,
1097                                                    ALIGN(size, 0x1000),
1098                                                    "brc history buffer");
1099         if (!allocate_flag)
1100             goto failed_allocation;
1101
1102         i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
1103         size = 64;//44
1104         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1105                                                    &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
1106                                                    ALIGN(size, 0x1000),
1107                                                    "brc pak statistic buffer");
1108         if (!allocate_flag)
1109             goto failed_allocation;
1110
1111         i965_free_gpe_resource(&avc_ctx->res_brc_image_state_read_buffer);
1112         size = INTEL_AVC_IMAGE_STATE_CMD_SIZE * 7;
1113         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1114                                                    &avc_ctx->res_brc_image_state_read_buffer,
1115                                                    ALIGN(size, 0x1000),
1116                                                    "brc image state read buffer");
1117         if (!allocate_flag)
1118             goto failed_allocation;
1119
1120         i965_free_gpe_resource(&avc_ctx->res_brc_image_state_write_buffer);
1121         size = INTEL_AVC_IMAGE_STATE_CMD_SIZE * 7;
1122         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1123                                                    &avc_ctx->res_brc_image_state_write_buffer,
1124                                                    ALIGN(size, 0x1000),
1125                                                    "brc image state write buffer");
1126         if (!allocate_flag)
1127             goto failed_allocation;
1128
1129         width = ALIGN(avc_state->brc_const_data_surface_width, 64);
1130         height = avc_state->brc_const_data_surface_height;
1131         i965_free_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
1132         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1133                                                       &avc_ctx->res_brc_const_data_buffer,
1134                                                       width, height,
1135                                                       width,
1136                                                       "brc const data buffer");
1137         if (!allocate_flag)
1138             goto failed_allocation;
1139         i965_zero_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
1140
1141         if (generic_state->brc_distortion_buffer_supported) {
1142             width = ALIGN(generic_state->downscaled_width_4x_in_mb * 8, 64);
1143             height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1144             width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
1145             height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
1146             i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
1147             allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1148                                                           &avc_ctx->res_brc_dist_data_surface,
1149                                                           width, height,
1150                                                           width,
1151                                                           "brc dist data buffer");
1152             if (!allocate_flag)
1153                 goto failed_allocation;
1154             i965_zero_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
1155         }
1156
1157         if (generic_state->brc_roi_enable) {
1158             width = ALIGN(generic_state->downscaled_width_4x_in_mb * 16, 64);
1159             height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1160             i965_free_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
1161             allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1162                                                           &avc_ctx->res_mbbrc_roi_surface,
1163                                                           width, height,
1164                                                           width,
1165                                                           "mbbrc roi buffer");
1166             if (!allocate_flag)
1167                 goto failed_allocation;
1168             i965_zero_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
1169         }
1170
1171         /*mb qp in mb brc*/
1172         width = ALIGN(generic_state->downscaled_width_4x_in_mb * 4, 64);
1173         height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1174         i965_free_gpe_resource(&avc_ctx->res_mbbrc_mb_qp_data_surface);
1175         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1176                                                       &avc_ctx->res_mbbrc_mb_qp_data_surface,
1177                                                       width, height,
1178                                                       width,
1179                                                       "mbbrc mb qp buffer");
1180         if (!allocate_flag)
1181             goto failed_allocation;
1182
1183         i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
1184         size = 16 * AVC_QP_MAX * 4;
1185         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1186                                                    &avc_ctx->res_mbbrc_const_data_buffer,
1187                                                    ALIGN(size, 0x1000),
1188                                                    "mbbrc const data buffer");
1189         if (!allocate_flag)
1190             goto failed_allocation;
1191
1192         if (avc_state->decouple_mbenc_curbe_from_brc_enable) {
1193             i965_free_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
1194             size = avc_state->mbenc_brc_buffer_size;
1195             allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1196                                                        &avc_ctx->res_mbenc_brc_buffer,
1197                                                        ALIGN(size, 0x1000),
1198                                                        "mbenc brc buffer");
1199             if (!allocate_flag)
1200                 goto failed_allocation;
1201             i965_zero_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
1202         }
1203         generic_state->brc_allocated = 1;
1204     }
1205
1206     /*mb qp external*/
1207     if (avc_state->mb_qp_data_enable) {
1208         width = ALIGN(generic_state->downscaled_width_4x_in_mb * 4, 64);
1209         height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1210         i965_free_gpe_resource(&avc_ctx->res_mb_qp_data_surface);
1211         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1212                                                       &avc_ctx->res_mb_qp_data_surface,
1213                                                       width, height,
1214                                                       width,
1215                                                       "external mb qp buffer");
1216         if (!allocate_flag)
1217             goto failed_allocation;
1218     }
1219
1220     /*     mbenc related surface. it share most of surface with other kernels     */
1221     if (avc_state->arbitrary_num_mbs_in_slice) {
1222         width = ALIGN((generic_state->frame_width_in_mbs + 1) * 4, 64);
1223         height = generic_state->frame_height_in_mbs ;
1224         i965_free_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1225         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1226                                                       &avc_ctx->res_mbenc_slice_map_surface,
1227                                                       width, height,
1228                                                       width,
1229                                                       "slice map buffer");
1230         if (!allocate_flag)
1231             goto failed_allocation;
1232         i965_zero_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1233
1234         /*generate slice map,default one slice per frame.*/
1235     }
1236
1237     /* sfd related surface  */
1238     if (avc_state->sfd_enable) {
1239         i965_free_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1240         size = 128;
1241         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1242                                                    &avc_ctx->res_sfd_output_buffer,
1243                                                    size,
1244                                                    "sfd output buffer");
1245         if (!allocate_flag)
1246             goto failed_allocation;
1247         i965_zero_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1248
1249         i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer);
1250         size = ALIGN(52, 64);
1251         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1252                                                    &avc_ctx->res_sfd_cost_table_p_frame_buffer,
1253                                                    size,
1254                                                    "sfd P frame cost table buffer");
1255         if (!allocate_flag)
1256             goto failed_allocation;
1257         data = i965_map_gpe_resource(&(avc_ctx->res_sfd_cost_table_p_frame_buffer));
1258         assert(data);
1259         memcpy(data, gen9_avc_sfd_cost_table_p_frame, sizeof(unsigned char) * 52);
1260         i965_unmap_gpe_resource(&(avc_ctx->res_sfd_cost_table_p_frame_buffer));
1261
1262         i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_b_frame_buffer);
1263         size = ALIGN(52, 64);
1264         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1265                                                    &avc_ctx->res_sfd_cost_table_b_frame_buffer,
1266                                                    size,
1267                                                    "sfd B frame cost table buffer");
1268         if (!allocate_flag)
1269             goto failed_allocation;
1270         data = i965_map_gpe_resource(&(avc_ctx->res_sfd_cost_table_b_frame_buffer));
1271         assert(data);
1272         memcpy(data, gen9_avc_sfd_cost_table_b_frame, sizeof(unsigned char) * 52);
1273         i965_unmap_gpe_resource(&(avc_ctx->res_sfd_cost_table_b_frame_buffer));
1274     }
1275
1276     /* wp related surfaces */
1277     if (avc_state->weighted_prediction_supported) {
1278         for (i = 0; i < 2 ; i++) {
1279             if (avc_ctx->wp_output_pic_select_surface_obj[i]) {
1280                 continue;
1281             }
1282
1283             width = generic_state->frame_width_in_pixel;
1284             height = generic_state->frame_height_in_pixel ;
1285             i965_CreateSurfaces(ctx,
1286                                 width,
1287                                 height,
1288                                 VA_RT_FORMAT_YUV420,
1289                                 1,
1290                                 &avc_ctx->wp_output_pic_select_surface_id[i]);
1291             avc_ctx->wp_output_pic_select_surface_obj[i] = SURFACE(avc_ctx->wp_output_pic_select_surface_id[i]);
1292
1293             if (!avc_ctx->wp_output_pic_select_surface_obj[i]) {
1294                 goto failed_allocation;
1295             }
1296
1297             i965_check_alloc_surface_bo(ctx, avc_ctx->wp_output_pic_select_surface_obj[i], 1,
1298                                         VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
1299         }
1300         i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[0]);
1301         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_wp_output_pic_select_surface_list[0], avc_ctx->wp_output_pic_select_surface_obj[0], GPE_RESOURCE_ALIGNMENT);
1302         i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[1]);
1303         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_wp_output_pic_select_surface_list[1], avc_ctx->wp_output_pic_select_surface_obj[1], GPE_RESOURCE_ALIGNMENT);
1304     }
1305
1306     /* other   */
1307
1308     i965_free_gpe_resource(&avc_ctx->res_mad_data_buffer);
1309     size = 4 * 1;
1310     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1311                                                &avc_ctx->res_mad_data_buffer,
1312                                                ALIGN(size, 0x1000),
1313                                                "MAD data buffer");
1314     if (!allocate_flag)
1315         goto failed_allocation;
1316
1317     return VA_STATUS_SUCCESS;
1318
1319 failed_allocation:
1320     return VA_STATUS_ERROR_ALLOCATION_FAILED;
1321 }
1322
1323 static void
1324 gen9_avc_free_resources(struct encoder_vme_mfc_context * vme_context)
1325 {
1326     if (!vme_context)
1327         return;
1328
1329     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1330     VADriverContextP ctx = avc_ctx->ctx;
1331     int i = 0;
1332
1333     /* free all the surface/buffer here*/
1334     i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
1335     i965_free_gpe_resource(&avc_ctx->res_mb_status_buffer);
1336     i965_free_gpe_resource(&avc_ctx->res_flatness_check_surface);
1337     i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
1338     i965_free_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
1339     i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
1340     i965_free_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
1341     i965_free_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
1342     i965_free_gpe_resource(&avc_ctx->res_brc_history_buffer);
1343     i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
1344     i965_free_gpe_resource(&avc_ctx->res_brc_image_state_read_buffer);
1345     i965_free_gpe_resource(&avc_ctx->res_brc_image_state_write_buffer);
1346     i965_free_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
1347     i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
1348     i965_free_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
1349     i965_free_gpe_resource(&avc_ctx->res_mbbrc_mb_qp_data_surface);
1350     i965_free_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
1351     i965_free_gpe_resource(&avc_ctx->res_mb_qp_data_surface);
1352     i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
1353     i965_free_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1354     i965_free_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1355     i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer);
1356     i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_b_frame_buffer);
1357     i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[0]);
1358     i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[1]);
1359     i965_free_gpe_resource(&avc_ctx->res_mad_data_buffer);
1360
1361     for (i = 0; i < 2 ; i++) {
1362         if (avc_ctx->wp_output_pic_select_surface_obj[i]) {
1363             i965_DestroySurfaces(ctx, &avc_ctx->wp_output_pic_select_surface_id[i], 1);
1364             avc_ctx->wp_output_pic_select_surface_id[i] = VA_INVALID_SURFACE;
1365             avc_ctx->wp_output_pic_select_surface_obj[i] = NULL;
1366         }
1367     }
1368
1369     /* free preenc resources */
1370     i965_free_gpe_resource(&avc_ctx->preproc_mv_predictor_buffer);
1371     i965_free_gpe_resource(&avc_ctx->preproc_mb_qp_buffer);
1372     i965_free_gpe_resource(&avc_ctx->preproc_mv_data_out_buffer);
1373     i965_free_gpe_resource(&avc_ctx->preproc_stat_data_out_buffer);
1374
1375     i965_free_gpe_resource(&avc_ctx->preenc_past_ref_stat_data_out_buffer);
1376     i965_free_gpe_resource(&avc_ctx->preenc_future_ref_stat_data_out_buffer);
1377
1378     i965_DestroySurfaces(ctx, &avc_ctx->preenc_scaled_4x_surface_id, 1);
1379     avc_ctx->preenc_scaled_4x_surface_id = VA_INVALID_SURFACE;
1380     avc_ctx->preenc_scaled_4x_surface_obj = NULL;
1381
1382     i965_DestroySurfaces(ctx, &avc_ctx->preenc_past_ref_scaled_4x_surface_id, 1);
1383     avc_ctx->preenc_past_ref_scaled_4x_surface_id = VA_INVALID_SURFACE;
1384     avc_ctx->preenc_past_ref_scaled_4x_surface_obj = NULL;
1385
1386     i965_DestroySurfaces(ctx, &avc_ctx->preenc_future_ref_scaled_4x_surface_id, 1);
1387     avc_ctx->preenc_future_ref_scaled_4x_surface_id = VA_INVALID_SURFACE;
1388     avc_ctx->preenc_future_ref_scaled_4x_surface_obj = NULL;
1389 }
1390
1391 static void
1392 gen9_avc_run_kernel_media_object(VADriverContextP ctx,
1393                                  struct intel_encoder_context *encoder_context,
1394                                  struct i965_gpe_context *gpe_context,
1395                                  int media_function,
1396                                  struct gpe_media_object_parameter *param)
1397 {
1398     struct i965_driver_data *i965 = i965_driver_data(ctx);
1399     struct i965_gpe_table *gpe = &i965->gpe_table;
1400     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1401     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1402
1403     struct intel_batchbuffer *batch = encoder_context->base.batch;
1404     struct encoder_status_buffer_internal *status_buffer;
1405     struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1406
1407     if (!batch)
1408         return;
1409
1410     intel_batchbuffer_start_atomic(batch, 0x1000);
1411     intel_batchbuffer_emit_mi_flush(batch);
1412
1413     status_buffer = &(avc_ctx->status_buffer);
1414     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1415     mi_store_data_imm.bo = status_buffer->bo;
1416     mi_store_data_imm.offset = status_buffer->media_index_offset;
1417     mi_store_data_imm.dw0 = media_function;
1418     gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1419
1420     gpe->pipeline_setup(ctx, gpe_context, batch);
1421     gpe->media_object(ctx, gpe_context, batch, param);
1422     gpe->media_state_flush(ctx, gpe_context, batch);
1423
1424     gpe->pipeline_end(ctx, gpe_context, batch);
1425
1426     intel_batchbuffer_end_atomic(batch);
1427
1428     intel_batchbuffer_flush(batch);
1429 }
1430
1431 static void
1432 gen9_avc_run_kernel_media_object_walker(VADriverContextP ctx,
1433                                         struct intel_encoder_context *encoder_context,
1434                                         struct i965_gpe_context *gpe_context,
1435                                         int media_function,
1436                                         struct gpe_media_object_walker_parameter *param)
1437 {
1438     struct i965_driver_data *i965 = i965_driver_data(ctx);
1439     struct i965_gpe_table *gpe = &i965->gpe_table;
1440     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1441     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1442
1443     struct intel_batchbuffer *batch = encoder_context->base.batch;
1444     struct encoder_status_buffer_internal *status_buffer;
1445     struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1446
1447     if (!batch)
1448         return;
1449
1450     intel_batchbuffer_start_atomic(batch, 0x1000);
1451
1452     intel_batchbuffer_emit_mi_flush(batch);
1453
1454     status_buffer = &(avc_ctx->status_buffer);
1455     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1456     mi_store_data_imm.bo = status_buffer->bo;
1457     mi_store_data_imm.offset = status_buffer->media_index_offset;
1458     mi_store_data_imm.dw0 = media_function;
1459     gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1460
1461     gpe->pipeline_setup(ctx, gpe_context, batch);
1462     gpe->media_object_walker(ctx, gpe_context, batch, param);
1463     gpe->media_state_flush(ctx, gpe_context, batch);
1464
1465     gpe->pipeline_end(ctx, gpe_context, batch);
1466
1467     intel_batchbuffer_end_atomic(batch);
1468
1469     intel_batchbuffer_flush(batch);
1470 }
1471
1472 static void
1473 gen9_init_gpe_context_avc(VADriverContextP ctx,
1474                           struct i965_gpe_context *gpe_context,
1475                           struct encoder_kernel_parameter *kernel_param)
1476 {
1477     struct i965_driver_data *i965 = i965_driver_data(ctx);
1478
1479     gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
1480
1481     gpe_context->sampler.entry_size = 0;
1482     gpe_context->sampler.max_entries = 0;
1483
1484     if (kernel_param->sampler_size) {
1485         gpe_context->sampler.entry_size = ALIGN(kernel_param->sampler_size, 64);
1486         gpe_context->sampler.max_entries = 1;
1487     }
1488
1489     gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
1490     gpe_context->idrt.max_entries = NUM_KERNELS_PER_GPE_CONTEXT;
1491
1492     gpe_context->surface_state_binding_table.max_entries = MAX_AVC_ENCODER_SURFACES;
1493     gpe_context->surface_state_binding_table.binding_table_offset = 0;
1494     gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_AVC_ENCODER_SURFACES * 4, 64);
1495     gpe_context->surface_state_binding_table.length = ALIGN(MAX_AVC_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_AVC_ENCODER_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
1496
1497     if (i965->intel.eu_total > 0)
1498         gpe_context->vfe_state.max_num_threads = 6 * i965->intel.eu_total;
1499     else
1500         gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads
1501
1502     gpe_context->vfe_state.curbe_allocation_size = MAX(1, ALIGN(gpe_context->curbe.length, 32) >> 5); // in registers
1503     gpe_context->vfe_state.urb_entry_size = MAX(1, ALIGN(kernel_param->inline_data_size, 32) >> 5); // in registers
1504     gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE -
1505                                               gpe_context->vfe_state.curbe_allocation_size -
1506                                               ((gpe_context->idrt.entry_size >> 5) *
1507                                                gpe_context->idrt.max_entries)) / gpe_context->vfe_state.urb_entry_size;
1508     gpe_context->vfe_state.num_urb_entries = CLAMP(1, 127, gpe_context->vfe_state.num_urb_entries);
1509     gpe_context->vfe_state.gpgpu_mode = 0;
1510 }
1511
1512 static void
1513 gen9_init_vfe_scoreboard_avc(struct i965_gpe_context *gpe_context,
1514                              struct encoder_scoreboard_parameter *scoreboard_param)
1515 {
1516     gpe_context->vfe_desc5.scoreboard0.mask = scoreboard_param->mask;
1517     gpe_context->vfe_desc5.scoreboard0.type = scoreboard_param->type;
1518     gpe_context->vfe_desc5.scoreboard0.enable = scoreboard_param->enable;
1519
1520     if (scoreboard_param->walkpat_flag) {
1521         gpe_context->vfe_desc5.scoreboard0.mask = 0x0F;
1522         gpe_context->vfe_desc5.scoreboard0.type = 1;
1523
1524         gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0x0;
1525         gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0xF;
1526
1527         gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
1528         gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xE;
1529
1530         gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0xF;
1531         gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0x3;
1532
1533         gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
1534         gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0x1;
1535     } else {
1536         // Scoreboard 0
1537         gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0xF;
1538         gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0x0;
1539
1540         // Scoreboard 1
1541         gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
1542         gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xF;
1543
1544         // Scoreboard 2
1545         gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0x1;
1546         gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0xF;
1547
1548         // Scoreboard 3
1549         gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
1550         gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0xF;
1551
1552         // Scoreboard 4
1553         gpe_context->vfe_desc7.scoreboard2.delta_x4 = 0xF;
1554         gpe_context->vfe_desc7.scoreboard2.delta_y4 = 0x1;
1555
1556         // Scoreboard 5
1557         gpe_context->vfe_desc7.scoreboard2.delta_x5 = 0x0;
1558         gpe_context->vfe_desc7.scoreboard2.delta_y5 = 0xE;
1559
1560         // Scoreboard 6
1561         gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0x1;
1562         gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
1563
1564         // Scoreboard 7
1565         gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0xF;
1566         gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
1567     }
1568 }
1569 /*
1570 VME pipeline related function
1571 */
1572
1573 /*
1574 scaling kernel related function
1575 */
1576 static void
1577 gen9_avc_set_curbe_scaling4x(VADriverContextP ctx,
1578                              struct encode_state *encode_state,
1579                              struct i965_gpe_context *gpe_context,
1580                              struct intel_encoder_context *encoder_context,
1581                              void *param)
1582 {
1583     gen9_avc_scaling4x_curbe_data *curbe_cmd;
1584     struct scaling_param *surface_param = (struct scaling_param *)param;
1585
1586     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1587
1588     if (!curbe_cmd)
1589         return;
1590
1591     memset(curbe_cmd, 0, sizeof(gen9_avc_scaling4x_curbe_data));
1592
1593     curbe_cmd->dw0.input_picture_width  = surface_param->input_frame_width;
1594     curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1595
1596     curbe_cmd->dw1.input_y_bti = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1597     curbe_cmd->dw2.output_y_bti = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1598
1599
1600     curbe_cmd->dw5.flatness_threshold = 128;
1601     curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
1602     curbe_cmd->dw7.enable_mb_variance_output = surface_param->enable_mb_variance_output;
1603     curbe_cmd->dw8.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
1604
1605     if (curbe_cmd->dw6.enable_mb_flatness_check ||
1606         curbe_cmd->dw7.enable_mb_variance_output ||
1607         curbe_cmd->dw8.enable_mb_pixel_average_output) {
1608         curbe_cmd->dw10.mbv_proc_stat_bti = GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX;
1609     }
1610
1611     i965_gpe_context_unmap_curbe(gpe_context);
1612     return;
1613 }
1614
1615 static void
1616 gen95_avc_set_curbe_scaling4x(VADriverContextP ctx,
1617                               struct encode_state *encode_state,
1618                               struct i965_gpe_context *gpe_context,
1619                               struct intel_encoder_context *encoder_context,
1620                               void *param)
1621 {
1622     gen95_avc_scaling4x_curbe_data *curbe_cmd;
1623     struct scaling_param *surface_param = (struct scaling_param *)param;
1624
1625     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1626
1627     if (!curbe_cmd)
1628         return;
1629
1630     memset(curbe_cmd, 0, sizeof(gen95_avc_scaling4x_curbe_data));
1631
1632     curbe_cmd->dw0.input_picture_width  = surface_param->input_frame_width;
1633     curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1634
1635     curbe_cmd->dw1.input_y_bti_frame = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1636     curbe_cmd->dw2.output_y_bti_frame = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1637
1638     if (surface_param->enable_mb_flatness_check)
1639         curbe_cmd->dw5.flatness_threshold = 128;
1640     curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
1641     curbe_cmd->dw6.enable_mb_variance_output = surface_param->enable_mb_variance_output;
1642     curbe_cmd->dw6.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
1643     curbe_cmd->dw6.enable_block8x8_statistics_output = surface_param->blk8x8_stat_enabled;
1644
1645     if (curbe_cmd->dw6.enable_mb_flatness_check ||
1646         curbe_cmd->dw6.enable_mb_variance_output ||
1647         curbe_cmd->dw6.enable_mb_pixel_average_output) {
1648         curbe_cmd->dw8.mbv_proc_stat_bti_frame = GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX;
1649     }
1650
1651     i965_gpe_context_unmap_curbe(gpe_context);
1652     return;
1653 }
1654
1655 static void
1656 gen9_avc_set_curbe_scaling2x(VADriverContextP ctx,
1657                              struct encode_state *encode_state,
1658                              struct i965_gpe_context *gpe_context,
1659                              struct intel_encoder_context *encoder_context,
1660                              void *param)
1661 {
1662     gen9_avc_scaling2x_curbe_data *curbe_cmd;
1663     struct scaling_param *surface_param = (struct scaling_param *)param;
1664
1665     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1666
1667     if (!curbe_cmd)
1668         return;
1669
1670     memset(curbe_cmd, 0, sizeof(gen9_avc_scaling2x_curbe_data));
1671
1672     curbe_cmd->dw0.input_picture_width  = surface_param->input_frame_width;
1673     curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1674
1675     curbe_cmd->dw8.input_y_bti = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1676     curbe_cmd->dw9.output_y_bti = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1677
1678     i965_gpe_context_unmap_curbe(gpe_context);
1679     return;
1680 }
1681
1682 static void
1683 gen9_avc_send_surface_scaling(VADriverContextP ctx,
1684                               struct encode_state *encode_state,
1685                               struct i965_gpe_context *gpe_context,
1686                               struct intel_encoder_context *encoder_context,
1687                               void *param)
1688 {
1689     struct scaling_param *surface_param = (struct scaling_param *)param;
1690     struct i965_driver_data *i965 = i965_driver_data(ctx);
1691     unsigned int surface_format;
1692     unsigned int res_size;
1693
1694     if (surface_param->scaling_out_use_32unorm_surf_fmt)
1695         surface_format = I965_SURFACEFORMAT_R32_UNORM;
1696     else if (surface_param->scaling_out_use_16unorm_surf_fmt)
1697         surface_format = I965_SURFACEFORMAT_R16_UNORM;
1698     else
1699         surface_format = I965_SURFACEFORMAT_R8_UNORM;
1700
1701     i965_add_2d_gpe_surface(ctx, gpe_context,
1702                             surface_param->input_surface,
1703                             0, 1, surface_format,
1704                             GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX);
1705
1706     i965_add_2d_gpe_surface(ctx, gpe_context,
1707                             surface_param->output_surface,
1708                             0, 1, surface_format,
1709                             GEN9_AVC_SCALING_FRAME_DST_Y_INDEX);
1710
1711     /*add buffer mv_proc_stat, here need change*/
1712     if (IS_GEN8(i965->intel.device_info)) {
1713         if (surface_param->mbv_proc_stat_enabled) {
1714             res_size = 16 * (surface_param->input_frame_width / 16) * (surface_param->input_frame_height / 16) * sizeof(unsigned int);
1715
1716             i965_add_buffer_gpe_surface(ctx,
1717                                         gpe_context,
1718                                         surface_param->pres_mbv_proc_stat_buffer,
1719                                         0,
1720                                         res_size / 4,
1721                                         0,
1722                                         GEN8_SCALING_FRAME_MBVPROCSTATS_DST_CM);
1723         }
1724         if (surface_param->enable_mb_flatness_check) {
1725             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
1726                                            surface_param->pres_flatness_check_surface,
1727                                            1,
1728                                            I965_SURFACEFORMAT_R8_UNORM,
1729                                            GEN8_SCALING_FRAME_FLATNESS_DST_CM);
1730         }
1731     } else {
1732         if (surface_param->mbv_proc_stat_enabled) {
1733             res_size = 16 * (surface_param->input_frame_width / 16) * (surface_param->input_frame_height / 16) * sizeof(unsigned int);
1734
1735             i965_add_buffer_gpe_surface(ctx,
1736                                         gpe_context,
1737                                         surface_param->pres_mbv_proc_stat_buffer,
1738                                         0,
1739                                         res_size / 4,
1740                                         0,
1741                                         GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
1742         } else if (surface_param->enable_mb_flatness_check) {
1743             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
1744                                            surface_param->pres_flatness_check_surface,
1745                                            1,
1746                                            I965_SURFACEFORMAT_R8_UNORM,
1747                                            GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
1748         }
1749     }
1750     return;
1751 }
1752
1753 static VAStatus
1754 gen9_avc_kernel_scaling(VADriverContextP ctx,
1755                         struct encode_state *encode_state,
1756                         struct intel_encoder_context *encoder_context,
1757                         int hme_type)
1758 {
1759     struct i965_driver_data *i965 = i965_driver_data(ctx);
1760     struct i965_gpe_table *gpe = &i965->gpe_table;
1761     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1762     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1763     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1764     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1765     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
1766
1767     struct i965_gpe_context *gpe_context;
1768     struct scaling_param surface_param;
1769     struct object_surface *obj_surface;
1770     struct gen9_surface_avc *avc_priv_surface;
1771     struct gpe_media_object_walker_parameter media_object_walker_param;
1772     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
1773     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
1774     int media_function = 0;
1775     int kernel_idx = 0;
1776
1777     obj_surface = encode_state->reconstructed_object;
1778     avc_priv_surface = obj_surface->private_data;
1779
1780     memset(&surface_param, 0, sizeof(struct scaling_param));
1781     switch (hme_type) {
1782     case INTEL_ENC_HME_4x : {
1783         media_function = INTEL_MEDIA_STATE_4X_SCALING;
1784         kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
1785         downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
1786         downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
1787
1788         surface_param.input_surface = encode_state->input_yuv_object ;
1789         surface_param.input_frame_width = generic_state->frame_width_in_pixel ;
1790         surface_param.input_frame_height = generic_state->frame_height_in_pixel ;
1791
1792         surface_param.output_surface = avc_priv_surface->scaled_4x_surface_obj ;
1793         surface_param.output_frame_width = generic_state->frame_width_4x ;
1794         surface_param.output_frame_height = generic_state->frame_height_4x ;
1795
1796         surface_param.enable_mb_flatness_check = avc_state->flatness_check_enable;
1797         surface_param.enable_mb_variance_output = avc_state->mb_status_enable;
1798         surface_param.enable_mb_pixel_average_output = avc_state->mb_status_enable;
1799
1800         surface_param.blk8x8_stat_enabled = 0 ;
1801         surface_param.use_4x_scaling  = 1 ;
1802         surface_param.use_16x_scaling = 0 ;
1803         surface_param.use_32x_scaling = 0 ;
1804         break;
1805     }
1806     case INTEL_ENC_HME_16x : {
1807         media_function = INTEL_MEDIA_STATE_16X_SCALING;
1808         kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
1809         downscaled_width_in_mb = generic_state->downscaled_width_16x_in_mb;
1810         downscaled_height_in_mb = generic_state->downscaled_height_16x_in_mb;
1811
1812         surface_param.input_surface = avc_priv_surface->scaled_4x_surface_obj ;
1813         surface_param.input_frame_width = generic_state->frame_width_4x ;
1814         surface_param.input_frame_height = generic_state->frame_height_4x ;
1815
1816         surface_param.output_surface = avc_priv_surface->scaled_16x_surface_obj ;
1817         surface_param.output_frame_width = generic_state->frame_width_16x ;
1818         surface_param.output_frame_height = generic_state->frame_height_16x ;
1819
1820         surface_param.enable_mb_flatness_check = 0 ;
1821         surface_param.enable_mb_variance_output = 0 ;
1822         surface_param.enable_mb_pixel_average_output = 0 ;
1823
1824         surface_param.blk8x8_stat_enabled = 0 ;
1825         surface_param.use_4x_scaling  = 0 ;
1826         surface_param.use_16x_scaling = 1 ;
1827         surface_param.use_32x_scaling = 0 ;
1828
1829         break;
1830     }
1831     case INTEL_ENC_HME_32x : {
1832         media_function = INTEL_MEDIA_STATE_32X_SCALING;
1833         kernel_idx = GEN9_AVC_KERNEL_SCALING_2X_IDX;
1834         downscaled_width_in_mb = generic_state->downscaled_width_32x_in_mb;
1835         downscaled_height_in_mb = generic_state->downscaled_height_32x_in_mb;
1836
1837         surface_param.input_surface = avc_priv_surface->scaled_16x_surface_obj ;
1838         surface_param.input_frame_width = generic_state->frame_width_16x ;
1839         surface_param.input_frame_height = generic_state->frame_height_16x ;
1840
1841         surface_param.output_surface = avc_priv_surface->scaled_32x_surface_obj ;
1842         surface_param.output_frame_width = generic_state->frame_width_32x ;
1843         surface_param.output_frame_height = generic_state->frame_height_32x ;
1844
1845         surface_param.enable_mb_flatness_check = 0 ;
1846         surface_param.enable_mb_variance_output = 0 ;
1847         surface_param.enable_mb_pixel_average_output = 0 ;
1848
1849         surface_param.blk8x8_stat_enabled = 0 ;
1850         surface_param.use_4x_scaling  = 0 ;
1851         surface_param.use_16x_scaling = 0 ;
1852         surface_param.use_32x_scaling = 1 ;
1853         break;
1854     }
1855     default :
1856         assert(0);
1857
1858     }
1859
1860     gpe_context = &(avc_ctx->context_scaling.gpe_contexts[kernel_idx]);
1861
1862     gpe->context_init(ctx, gpe_context);
1863     gpe->reset_binding_table(ctx, gpe_context);
1864
1865     if (surface_param.use_32x_scaling) {
1866         generic_ctx->pfn_set_curbe_scaling2x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1867     } else {
1868         generic_ctx->pfn_set_curbe_scaling4x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1869     }
1870
1871     if (surface_param.use_32x_scaling) {
1872         surface_param.scaling_out_use_16unorm_surf_fmt = 1 ;
1873         surface_param.scaling_out_use_32unorm_surf_fmt = 0 ;
1874     } else {
1875         surface_param.scaling_out_use_16unorm_surf_fmt = 0 ;
1876         surface_param.scaling_out_use_32unorm_surf_fmt = 1 ;
1877     }
1878
1879     if (surface_param.use_4x_scaling) {
1880         if (avc_state->mb_status_supported) {
1881             surface_param.enable_mb_flatness_check = 0;
1882             surface_param.mbv_proc_stat_enabled = (surface_param.use_4x_scaling) ? (avc_state->mb_status_enable || avc_state->flatness_check_enable) : 0 ;
1883             surface_param.pres_mbv_proc_stat_buffer = &(avc_ctx->res_mb_status_buffer);
1884
1885         } else {
1886             surface_param.enable_mb_flatness_check = (surface_param.use_4x_scaling) ? avc_state->flatness_check_enable : 0;
1887             surface_param.mbv_proc_stat_enabled = 0 ;
1888             surface_param.pres_flatness_check_surface = &(avc_ctx->res_flatness_check_surface);
1889         }
1890     }
1891
1892     generic_ctx->pfn_send_scaling_surface(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1893
1894     /* setup the interface data */
1895     gpe->setup_interface_data(ctx, gpe_context);
1896
1897     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
1898     if (surface_param.use_32x_scaling) {
1899         kernel_walker_param.resolution_x = downscaled_width_in_mb ;
1900         kernel_walker_param.resolution_y = downscaled_height_in_mb ;
1901     } else {
1902         /* the scaling is based on 8x8 blk level */
1903         kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
1904         kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
1905     }
1906     kernel_walker_param.no_dependency = 1;
1907
1908     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
1909
1910     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
1911                                             gpe_context,
1912                                             media_function,
1913                                             &media_object_walker_param);
1914
1915     return VA_STATUS_SUCCESS;
1916 }
1917
1918 /*
1919 frame/mb brc related function
1920 */
1921 static void
1922 gen9_avc_init_mfx_avc_img_state(VADriverContextP ctx,
1923                                 struct encode_state *encode_state,
1924                                 struct intel_encoder_context *encoder_context,
1925                                 struct gen9_mfx_avc_img_state *pstate)
1926 {
1927     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1928     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1929     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1930
1931     VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
1932     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
1933
1934     memset(pstate, 0, sizeof(*pstate));
1935
1936     pstate->dw0.dword_length = (sizeof(struct gen9_mfx_avc_img_state)) / 4 - 2;
1937     pstate->dw0.sub_opcode_b = 0;
1938     pstate->dw0.sub_opcode_a = 0;
1939     pstate->dw0.command_opcode = 1;
1940     pstate->dw0.pipeline = 2;
1941     pstate->dw0.command_type = 3;
1942
1943     pstate->dw1.frame_size_in_mbs = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs ;
1944
1945     pstate->dw2.frame_width_in_mbs_minus1 = generic_state->frame_width_in_mbs - 1;
1946     pstate->dw2.frame_height_in_mbs_minus1 = generic_state->frame_height_in_mbs - 1;
1947
1948     pstate->dw3.image_structure = 0;//frame is zero
1949     pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
1950     pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
1951     pstate->dw3.brc_domain_rate_control_enable = 0;//0,set for non-vdenc mode;
1952     pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
1953     pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
1954
1955     pstate->dw4.field_picture_flag = 0;
1956     pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
1957     pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
1958     pstate->dw4.transform_8x8_idct_mode_flag = pic_param->pic_fields.bits.transform_8x8_mode_flag;
1959     pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
1960     pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
1961     pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
1962     pstate->dw4.mb_mv_format_flag = 1;
1963     pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
1964     pstate->dw4.mv_unpacked_flag = 1;
1965     pstate->dw4.insert_test_flag = 0;
1966     pstate->dw4.load_slice_pointer_flag = 0;
1967     pstate->dw4.macroblock_stat_enable = 0;        /* disable in the first pass */
1968     pstate->dw4.minimum_frame_size = 0;
1969     pstate->dw5.intra_mb_max_bit_flag = 1;
1970     pstate->dw5.inter_mb_max_bit_flag = 1;
1971     pstate->dw5.frame_size_over_flag = 1;
1972     pstate->dw5.frame_size_under_flag = 1;
1973     pstate->dw5.intra_mb_ipcm_flag = 1;
1974     pstate->dw5.mb_rate_ctrl_flag = 0;
1975     pstate->dw5.non_first_pass_flag = 0;
1976     pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
1977     pstate->dw5.aq_chroma_disable = 1;
1978     if (pstate->dw4.entropy_coding_flag && (avc_state->tq_enable)) {
1979         pstate->dw5.aq_enable = avc_state->tq_enable;
1980         pstate->dw5.aq_rounding = avc_state->tq_rounding;
1981     } else {
1982         pstate->dw5.aq_rounding = 0;
1983     }
1984
1985     pstate->dw6.intra_mb_max_size = 2700;
1986     pstate->dw6.inter_mb_max_size = 4095;
1987
1988     pstate->dw8.slice_delta_qp_max0 = 0;
1989     pstate->dw8.slice_delta_qp_max1 = 0;
1990     pstate->dw8.slice_delta_qp_max2 = 0;
1991     pstate->dw8.slice_delta_qp_max3 = 0;
1992
1993     pstate->dw9.slice_delta_qp_min0 = 0;
1994     pstate->dw9.slice_delta_qp_min1 = 0;
1995     pstate->dw9.slice_delta_qp_min2 = 0;
1996     pstate->dw9.slice_delta_qp_min3 = 0;
1997
1998     pstate->dw10.frame_bitrate_min = 0;
1999     pstate->dw10.frame_bitrate_min_unit = 1;
2000     pstate->dw10.frame_bitrate_min_unit_mode = 1;
2001     pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
2002     pstate->dw10.frame_bitrate_max_unit = 1;
2003     pstate->dw10.frame_bitrate_max_unit_mode = 1;
2004
2005     pstate->dw11.frame_bitrate_min_delta = 0;
2006     pstate->dw11.frame_bitrate_max_delta = 0;
2007
2008     pstate->dw12.vad_error_logic = 1;
2009     /* set paramters DW19/DW20 for slices */
2010 }
2011
2012 static void
2013 gen8_avc_init_mfx_avc_img_state(VADriverContextP ctx,
2014                                 struct encode_state *encode_state,
2015                                 struct intel_encoder_context *encoder_context,
2016                                 struct gen8_mfx_avc_img_state *pstate)
2017 {
2018     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2019     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2020     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2021
2022     VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
2023     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
2024
2025     memset(pstate, 0, sizeof(*pstate));
2026
2027     pstate->dw0.dword_length = (sizeof(struct gen8_mfx_avc_img_state)) / 4 - 2;
2028     pstate->dw0.command_sub_opcode_b = 0;
2029     pstate->dw0.command_sub_opcode_a = 0;
2030     pstate->dw0.command_opcode = 1;
2031     pstate->dw0.command_pipeline = 2;
2032     pstate->dw0.command_type = 3;
2033
2034     pstate->dw1.frame_size_in_mbs = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs ;
2035
2036     pstate->dw2.frame_width_in_mbs_minus1 = generic_state->frame_width_in_mbs - 1;
2037     pstate->dw2.frame_height_in_mbs_minus1 = generic_state->frame_height_in_mbs - 1;
2038
2039     pstate->dw3.image_structure = 0;//frame is zero
2040     pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
2041     pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
2042     pstate->dw3.inter_mb_conf_flag = 0;
2043     pstate->dw3.intra_mb_conf_flag = 0;
2044     pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
2045     pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
2046
2047     pstate->dw4.field_picture_flag = 0;
2048     pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
2049     pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
2050     pstate->dw4.transform_8x8_idct_mode_flag = pic_param->pic_fields.bits.transform_8x8_mode_flag;
2051     pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
2052     pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
2053     pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
2054     pstate->dw4.mb_mv_format_flag = 1;
2055     pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
2056     pstate->dw4.mv_unpacked_flag = 1;
2057     pstate->dw4.insert_test_flag = 0;
2058     pstate->dw4.load_slice_pointer_flag = 0;
2059     pstate->dw4.macroblock_stat_enable = 0;        /* disable in the first pass */
2060     pstate->dw4.minimum_frame_size = 0;
2061     pstate->dw5.intra_mb_max_bit_flag = 1;
2062     pstate->dw5.inter_mb_max_bit_flag = 1;
2063     pstate->dw5.frame_size_over_flag = 1;
2064     pstate->dw5.frame_size_under_flag = 1;
2065     pstate->dw5.intra_mb_ipcm_flag = 1;
2066     pstate->dw5.mb_rate_ctrl_flag = 0;
2067     pstate->dw5.non_first_pass_flag = 0;
2068     pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
2069     pstate->dw5.aq_chroma_disable = 1;
2070     if (pstate->dw4.entropy_coding_flag && (avc_state->tq_enable)) {
2071         pstate->dw5.aq_enable = avc_state->tq_enable;
2072         pstate->dw5.aq_rounding = avc_state->tq_rounding;
2073     } else {
2074         pstate->dw5.aq_rounding = 0;
2075     }
2076
2077     pstate->dw6.intra_mb_max_size = 2700;
2078     pstate->dw6.inter_mb_max_size = 4095;
2079
2080     pstate->dw8.slice_delta_qp_max0 = 0;
2081     pstate->dw8.slice_delta_qp_max1 = 0;
2082     pstate->dw8.slice_delta_qp_max2 = 0;
2083     pstate->dw8.slice_delta_qp_max3 = 0;
2084
2085     pstate->dw9.slice_delta_qp_min0 = 0;
2086     pstate->dw9.slice_delta_qp_min1 = 0;
2087     pstate->dw9.slice_delta_qp_min2 = 0;
2088     pstate->dw9.slice_delta_qp_min3 = 0;
2089
2090     pstate->dw10.frame_bitrate_min = 0;
2091     pstate->dw10.frame_bitrate_min_unit = 1;
2092     pstate->dw10.frame_bitrate_min_unit_mode = 1;
2093     pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
2094     pstate->dw10.frame_bitrate_max_unit = 1;
2095     pstate->dw10.frame_bitrate_max_unit_mode = 1;
2096
2097     pstate->dw11.frame_bitrate_min_delta = 0;
2098     pstate->dw11.frame_bitrate_max_delta = 0;
2099     /* set paramters DW19/DW20 for slices */
2100 }
2101 void gen9_avc_set_image_state(VADriverContextP ctx,
2102                               struct encode_state *encode_state,
2103                               struct intel_encoder_context *encoder_context,
2104                               struct i965_gpe_resource *gpe_resource)
2105 {
2106     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2107     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
2108     char *pdata;
2109     int i;
2110     unsigned int * data;
2111     struct gen9_mfx_avc_img_state cmd;
2112
2113     pdata = i965_map_gpe_resource(gpe_resource);
2114
2115     if (!pdata)
2116         return;
2117
2118     gen9_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
2119     for (i = 0; i < generic_state->num_pak_passes; i++) {
2120
2121         if (i == 0) {
2122             cmd.dw4.macroblock_stat_enable = 0;
2123             cmd.dw5.non_first_pass_flag = 0;
2124         } else {
2125             cmd.dw4.macroblock_stat_enable = 1;
2126             cmd.dw5.non_first_pass_flag = 1;
2127             cmd.dw5.intra_mb_ipcm_flag = 1;
2128
2129         }
2130         cmd.dw5.mb_rate_ctrl_flag = 0;
2131         memcpy(pdata, &cmd, sizeof(struct gen9_mfx_avc_img_state));
2132         data = (unsigned int *)(pdata + sizeof(struct gen9_mfx_avc_img_state));
2133         *data = MI_BATCH_BUFFER_END;
2134
2135         pdata += INTEL_AVC_IMAGE_STATE_CMD_SIZE;
2136     }
2137     i965_unmap_gpe_resource(gpe_resource);
2138     return;
2139 }
2140
2141 void gen8_avc_set_image_state(VADriverContextP ctx,
2142                               struct encode_state *encode_state,
2143                               struct intel_encoder_context *encoder_context,
2144                               struct i965_gpe_resource *gpe_resource)
2145 {
2146     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2147     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
2148     char *pdata;
2149     int i;
2150     unsigned int * data;
2151     struct gen8_mfx_avc_img_state cmd;
2152
2153     pdata = i965_map_gpe_resource(gpe_resource);
2154
2155     if (!pdata)
2156         return;
2157
2158     gen8_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
2159     for (i = 0; i < generic_state->num_pak_passes; i++) {
2160
2161         if (i == 0) {
2162             cmd.dw4.macroblock_stat_enable = 0;
2163             cmd.dw5.non_first_pass_flag = 0;
2164         } else {
2165             cmd.dw4.macroblock_stat_enable = 1;
2166             cmd.dw5.non_first_pass_flag = 1;
2167             cmd.dw5.intra_mb_ipcm_flag = 1;
2168             cmd.dw3.inter_mb_conf_flag = 1;
2169             cmd.dw3.intra_mb_conf_flag = 1;
2170         }
2171         cmd.dw5.mb_rate_ctrl_flag = 0;
2172         memcpy(pdata, &cmd, sizeof(struct gen8_mfx_avc_img_state));
2173         data = (unsigned int *)(pdata + sizeof(struct gen8_mfx_avc_img_state));
2174         *data = MI_BATCH_BUFFER_END;
2175
2176         pdata += INTEL_AVC_IMAGE_STATE_CMD_SIZE;
2177     }
2178     i965_unmap_gpe_resource(gpe_resource);
2179     return;
2180 }
2181
2182 void gen9_avc_set_image_state_non_brc(VADriverContextP ctx,
2183                                       struct encode_state *encode_state,
2184                                       struct intel_encoder_context *encoder_context,
2185                                       struct i965_gpe_resource *gpe_resource)
2186 {
2187     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2188     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
2189     char *pdata;
2190
2191     unsigned int * data;
2192     struct gen9_mfx_avc_img_state cmd;
2193
2194     pdata = i965_map_gpe_resource(gpe_resource);
2195
2196     if (!pdata)
2197         return;
2198
2199     gen9_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
2200
2201     if (generic_state->curr_pak_pass == 0) {
2202         cmd.dw4.macroblock_stat_enable = 0;
2203         cmd.dw5.non_first_pass_flag = 0;
2204
2205     } else {
2206         cmd.dw4.macroblock_stat_enable = 1;
2207         cmd.dw5.non_first_pass_flag = 0;
2208         cmd.dw5.intra_mb_ipcm_flag = 1;
2209     }
2210
2211     cmd.dw5.mb_rate_ctrl_flag = 0;
2212     memcpy(pdata, &cmd, sizeof(struct gen9_mfx_avc_img_state));
2213     data = (unsigned int *)(pdata + sizeof(struct gen9_mfx_avc_img_state));
2214     *data = MI_BATCH_BUFFER_END;
2215
2216     i965_unmap_gpe_resource(gpe_resource);
2217     return;
2218 }
2219
2220 static void
2221 gen95_avc_calc_lambda_table(VADriverContextP ctx,
2222                             struct encode_state *encode_state,
2223                             struct intel_encoder_context *encoder_context)
2224 {
2225     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2226     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2227     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2228     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
2229     unsigned int value, inter, intra;
2230     unsigned int rounding_value = 0;
2231     unsigned int size = 0;
2232     int i = 0;
2233     int col = 0;
2234     unsigned int * lambda_table = (unsigned int *)(avc_state->lamda_value_lut);
2235
2236     value = 0;
2237     inter = 0;
2238     intra = 0;
2239
2240     size = AVC_QP_MAX * 2 * sizeof(unsigned int);
2241     switch (generic_state->frame_type) {
2242     case SLICE_TYPE_I:
2243         memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_i_frame[0][0], size * sizeof(unsigned char));
2244         break;
2245     case SLICE_TYPE_P:
2246         memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_p_frame[0][0], size * sizeof(unsigned char));
2247         break;
2248     case SLICE_TYPE_B:
2249         memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_b_frame[0][0], size * sizeof(unsigned char));
2250         break;
2251     default:
2252         assert(0);
2253         break;
2254     }
2255
2256     for (i = 0; i < AVC_QP_MAX ; i++) {
2257         for (col = 0; col < 2; col++) {
2258             value = *(lambda_table + i * 2 + col);
2259             intra = value >> 16;
2260
2261             if (intra < GEN95_AVC_MAX_LAMBDA) {
2262                 if (intra == 0xfffa) {
2263                     intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
2264                 }
2265             }
2266
2267             intra = intra << 16;
2268             inter = value & 0xffff;
2269
2270             if (inter < GEN95_AVC_MAX_LAMBDA) {
2271                 if (inter == 0xffef) {
2272                     if (generic_state->frame_type == SLICE_TYPE_P) {
2273                         if (avc_state->rounding_inter_p == AVC_INVALID_ROUNDING_VALUE)
2274                             rounding_value = gen9_avc_inter_rounding_p[generic_state->preset];
2275                         else
2276                             rounding_value = avc_state->rounding_inter_p;
2277                     } else if (generic_state->frame_type == SLICE_TYPE_B) {
2278                         if (pic_param->pic_fields.bits.reference_pic_flag) {
2279                             if (avc_state->rounding_inter_b_ref == AVC_INVALID_ROUNDING_VALUE)
2280                                 rounding_value = gen9_avc_inter_rounding_b_ref[generic_state->preset];
2281                             else
2282                                 rounding_value = avc_state->rounding_inter_b_ref;
2283                         } else {
2284                             if (avc_state->rounding_inter_b == AVC_INVALID_ROUNDING_VALUE)
2285                                 rounding_value = gen9_avc_inter_rounding_b[generic_state->preset];
2286                             else
2287                                 rounding_value = avc_state->rounding_inter_b;
2288                         }
2289                     }
2290                 }
2291                 inter = 0xf000 + rounding_value;
2292             }
2293             *(lambda_table + i * 2 + col) = intra + inter;
2294         }
2295     }
2296 }
2297
2298 static void
2299 gen9_avc_init_brc_const_data(VADriverContextP ctx,
2300                              struct encode_state *encode_state,
2301                              struct intel_encoder_context *encoder_context)
2302 {
2303     struct i965_driver_data *i965 = i965_driver_data(ctx);
2304     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2305     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2306     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2307     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2308
2309     struct i965_gpe_resource *gpe_resource = NULL;
2310     unsigned char * data = NULL;
2311     unsigned char * data_tmp = NULL;
2312     unsigned int size = 0;
2313     unsigned int table_idx = 0;
2314     unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
2315     int i = 0;
2316
2317     struct object_surface *obj_surface;
2318     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
2319     VASurfaceID surface_id;
2320     unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
2321
2322     gpe_resource = &(avc_ctx->res_brc_const_data_buffer);
2323     assert(gpe_resource);
2324
2325     i965_zero_gpe_resource(gpe_resource);
2326
2327     data = i965_map_gpe_resource(gpe_resource);
2328     assert(data);
2329
2330     table_idx = slice_type_kernel[generic_state->frame_type];
2331
2332     /* Fill surface with QP Adjustment table, Distortion threshold table, MaxFrame threshold table, Distortion QP Adjustment Table*/
2333     size = sizeof(gen9_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb);
2334     memcpy(data, gen9_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb, size * sizeof(unsigned char));
2335
2336     data += size;
2337
2338     /* skip threshold table*/
2339     size = 128;
2340     switch (generic_state->frame_type) {
2341     case SLICE_TYPE_P:
2342         memcpy(data, gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2343         break;
2344     case SLICE_TYPE_B:
2345         memcpy(data, gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2346         break;
2347     default:
2348         /*SLICE_TYPE_I,no change */
2349         break;
2350     }
2351
2352     if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->non_ftq_skip_threshold_lut_input_enable) {
2353         for (i = 0; i < AVC_QP_MAX ; i++) {
2354             *(data + 1 + (i * 2)) = (unsigned char)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
2355         }
2356     }
2357     data += size;
2358
2359     /*fill the qp for ref list*/
2360     size = 32 + 32 + 32 + 160;
2361     memset(data, 0xff, 32);
2362     memset(data + 32 + 32, 0xff, 32);
2363     switch (generic_state->frame_type) {
2364     case SLICE_TYPE_P: {
2365         for (i = 0 ; i <  slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
2366             surface_id = slice_param->RefPicList0[i].picture_id;
2367             obj_surface = SURFACE(surface_id);
2368             if (!obj_surface)
2369                 break;
2370             *(data + i) = avc_state->list_ref_idx[0][i];//?
2371         }
2372     }
2373     break;
2374     case SLICE_TYPE_B: {
2375         data = data + 32 + 32;
2376         for (i = 0 ; i <  slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
2377             surface_id = slice_param->RefPicList1[i].picture_id;
2378             obj_surface = SURFACE(surface_id);
2379             if (!obj_surface)
2380                 break;
2381             *(data + i) = avc_state->list_ref_idx[1][i];//?
2382         }
2383
2384         data = data - 32 - 32;
2385
2386         for (i = 0 ; i <  slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
2387             surface_id = slice_param->RefPicList0[i].picture_id;
2388             obj_surface = SURFACE(surface_id);
2389             if (!obj_surface)
2390                 break;
2391             *(data + i) = avc_state->list_ref_idx[0][i];//?
2392         }
2393     }
2394     break;
2395     default:
2396         /*SLICE_TYPE_I,no change */
2397         break;
2398     }
2399     data += size;
2400
2401     /*mv cost and mode cost*/
2402     size = 1664;
2403     memcpy(data, (unsigned char *)&gen9_avc_mode_mv_cost_table[table_idx][0][0], size * sizeof(unsigned char));
2404
2405     if (avc_state->old_mode_cost_enable) {
2406         data_tmp = data;
2407         for (i = 0; i < AVC_QP_MAX ; i++) {
2408             *(data_tmp + 3) = (unsigned int)gen9_avc_old_intra_mode_cost[i];
2409             data_tmp += 16;
2410         }
2411     }
2412
2413     if (avc_state->ftq_skip_threshold_lut_input_enable) {
2414         for (i = 0; i < AVC_QP_MAX ; i++) {
2415             *(data + (i * 32) + 24) =
2416                 *(data + (i * 32) + 25) =
2417                     *(data + (i * 32) + 27) =
2418                         *(data + (i * 32) + 28) =
2419                             *(data + (i * 32) + 29) =
2420                                 *(data + (i * 32) + 30) =
2421                                     *(data + (i * 32) + 31) = avc_state->ftq_skip_threshold_lut[i];
2422         }
2423
2424     }
2425     data += size;
2426
2427     /*ref cost*/
2428     size = 128;
2429     memcpy(data, (unsigned char *)&gen9_avc_ref_cost[table_idx][0], size * sizeof(unsigned char));
2430     data += size;
2431
2432     /*scaling factor*/
2433     size = 64;
2434     if (avc_state->adaptive_intra_scaling_enable) {
2435         memcpy(data, (unsigned char *)gen9_avc_adaptive_intra_scaling_factor, size * sizeof(unsigned char));
2436     } else {
2437         memcpy(data, (unsigned char *)gen9_avc_intra_scaling_factor, size * sizeof(unsigned char));
2438     }
2439
2440     if (IS_KBL(i965->intel.device_info) ||
2441         IS_GEN10(i965->intel.device_info) ||
2442         IS_GLK(i965->intel.device_info)) {
2443         data += size;
2444
2445         size = 512;
2446         memcpy(data, (unsigned char *)gen95_avc_lambda_data, size * sizeof(unsigned char));
2447         data += size;
2448
2449         size = 64;
2450         memcpy(data, (unsigned char *)gen95_avc_ftq25, size * sizeof(unsigned char));
2451     }
2452
2453     i965_unmap_gpe_resource(gpe_resource);
2454 }
2455
2456 static void
2457 gen9_avc_init_brc_const_data_old(VADriverContextP ctx,
2458                                  struct encode_state *encode_state,
2459                                  struct intel_encoder_context *encoder_context)
2460 {
2461     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2462     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2463     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2464     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2465
2466     struct i965_gpe_resource *gpe_resource = NULL;
2467     unsigned int * data = NULL;
2468     unsigned int * data_tmp = NULL;
2469     unsigned int size = 0;
2470     unsigned int table_idx = 0;
2471     unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
2472     unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
2473     int i = 0;
2474
2475     gpe_resource = &(avc_ctx->res_brc_const_data_buffer);
2476     assert(gpe_resource);
2477
2478     i965_zero_gpe_resource(gpe_resource);
2479
2480     data = i965_map_gpe_resource(gpe_resource);
2481     assert(data);
2482
2483     table_idx = slice_type_kernel[generic_state->frame_type];
2484
2485     /* Fill surface with QP Adjustment table, Distortion threshold table, MaxFrame threshold table, Distortion QP Adjustment Table*/
2486     size = sizeof(gen75_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb);
2487     memcpy(data, gen75_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb, size * sizeof(unsigned char));
2488
2489     data += size;
2490
2491     /* skip threshold table*/
2492     size = 128;
2493     switch (generic_state->frame_type) {
2494     case SLICE_TYPE_P:
2495         memcpy(data, gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2496         break;
2497     case SLICE_TYPE_B:
2498         memcpy(data, gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2499         break;
2500     default:
2501         /*SLICE_TYPE_I,no change */
2502         break;
2503     }
2504
2505     if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->non_ftq_skip_threshold_lut_input_enable) {
2506         for (i = 0; i < AVC_QP_MAX ; i++) {
2507             *(data + 1 + (i * 2)) = (unsigned char)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
2508         }
2509     }
2510     data += size;
2511
2512     /*fill the qp for ref list*/
2513     size = 128;
2514     data += size;
2515     size = 128;
2516     data += size;
2517
2518     /*mv cost and mode cost*/
2519     size = 1664;
2520     memcpy(data, (unsigned char *)&gen75_avc_mode_mv_cost_table[table_idx][0][0], size * sizeof(unsigned char));
2521
2522     if (avc_state->old_mode_cost_enable) {
2523         data_tmp = data;
2524         for (i = 0; i < AVC_QP_MAX ; i++) {
2525             *(data_tmp + 3) = (unsigned int)gen9_avc_old_intra_mode_cost[i];
2526             data_tmp += 16;
2527         }
2528     }
2529
2530     if (avc_state->ftq_skip_threshold_lut_input_enable) {
2531         for (i = 0; i < AVC_QP_MAX ; i++) {
2532             *(data + (i * 32) + 24) =
2533                 *(data + (i * 32) + 25) =
2534                     *(data + (i * 32) + 27) =
2535                         *(data + (i * 32) + 28) =
2536                             *(data + (i * 32) + 29) =
2537                                 *(data + (i * 32) + 30) =
2538                                     *(data + (i * 32) + 31) = avc_state->ftq_skip_threshold_lut[i];
2539         }
2540
2541     }
2542     data += size;
2543
2544     /*ref cost*/
2545     size = 128;
2546     memcpy(data, (unsigned char *)&gen9_avc_ref_cost[table_idx][0], size * sizeof(unsigned char));
2547
2548     i965_unmap_gpe_resource(gpe_resource);
2549 }
2550 static void
2551 gen9_avc_set_curbe_brc_init_reset(VADriverContextP ctx,
2552                                   struct encode_state *encode_state,
2553                                   struct i965_gpe_context *gpe_context,
2554                                   struct intel_encoder_context *encoder_context,
2555                                   void * param)
2556 {
2557     gen9_avc_brc_init_reset_curbe_data *cmd;
2558     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2559     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2560     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2561     double input_bits_per_frame = 0;
2562     double bps_ratio = 0;
2563     VAEncSequenceParameterBufferH264 * seq_param = avc_state->seq_param;
2564     struct avc_param common_param;
2565
2566     cmd = i965_gpe_context_map_curbe(gpe_context);
2567
2568     if (!cmd)
2569         return;
2570
2571     memcpy(cmd, &gen9_avc_brc_init_reset_curbe_init_data, sizeof(gen9_avc_brc_init_reset_curbe_data));
2572
2573     memset(&common_param, 0, sizeof(common_param));
2574     common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
2575     common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
2576     common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
2577     common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
2578     common_param.frames_per_100s = generic_state->frames_per_100s;
2579     common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
2580     common_param.target_bit_rate = generic_state->target_bit_rate;
2581
2582     cmd->dw0.profile_level_max_frame = i965_avc_get_profile_level_max_frame(&common_param, seq_param->level_idc);
2583     cmd->dw1.init_buf_full_in_bits = generic_state->init_vbv_buffer_fullness_in_bit;
2584     cmd->dw2.buf_size_in_bits = generic_state->vbv_buffer_size_in_bit;
2585     cmd->dw3.average_bit_rate = generic_state->target_bit_rate * 1000;
2586     cmd->dw4.max_bit_rate = generic_state->max_bit_rate * 1000;
2587     cmd->dw8.gop_p = (generic_state->gop_ref_distance) ? ((generic_state->gop_size - 1) / generic_state->gop_ref_distance) : 0;
2588     cmd->dw9.gop_b = (generic_state->gop_size - 1 - cmd->dw8.gop_p);
2589     cmd->dw9.frame_width_in_bytes = generic_state->frame_width_in_pixel;
2590     cmd->dw10.frame_height_in_bytes = generic_state->frame_height_in_pixel;
2591     cmd->dw12.no_slices = avc_state->slice_num;
2592
2593     //VUI
2594     if (seq_param->vui_parameters_present_flag && generic_state->internal_rate_mode != INTEL_BRC_AVBR) {
2595         cmd->dw4.max_bit_rate = cmd->dw4.max_bit_rate;
2596         if (generic_state->internal_rate_mode == VA_RC_CBR) {
2597             cmd->dw3.average_bit_rate = cmd->dw4.max_bit_rate;
2598
2599         }
2600
2601     }
2602     cmd->dw6.frame_rate_m = generic_state->frames_per_100s;
2603     cmd->dw7.frame_rate_d = 100;
2604     cmd->dw8.brc_flag = 0;
2605     cmd->dw8.brc_flag |= (generic_state->mb_brc_enabled) ? 0 : 0x8000;
2606
2607
2608     if (generic_state->internal_rate_mode == VA_RC_CBR) {
2609         //CBR
2610         cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate;
2611         cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISCBR;
2612
2613     } else if (generic_state->internal_rate_mode == VA_RC_VBR) {
2614         //VBR
2615         if (cmd->dw4.max_bit_rate < cmd->dw3.average_bit_rate) {
2616             cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate << 1;
2617         }
2618         cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISVBR;
2619
2620     } else if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2621         //AVBR
2622         cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate;
2623         cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISAVBR;
2624
2625     }
2626     //igonre icq/vcm/qvbr
2627
2628     cmd->dw10.avbr_accuracy = generic_state->avbr_curracy;
2629     cmd->dw11.avbr_convergence = generic_state->avbr_convergence;
2630
2631     //frame bits
2632     input_bits_per_frame = (double)(cmd->dw4.max_bit_rate) * (double)(cmd->dw7.frame_rate_d) / (double)(cmd->dw6.frame_rate_m);;
2633
2634     if (cmd->dw2.buf_size_in_bits == 0) {
2635         cmd->dw2.buf_size_in_bits = (unsigned int)(input_bits_per_frame * 4);
2636     }
2637
2638     if (cmd->dw1.init_buf_full_in_bits == 0) {
2639         cmd->dw1.init_buf_full_in_bits = cmd->dw2.buf_size_in_bits * 7 / 8;
2640     }
2641     if (cmd->dw1.init_buf_full_in_bits < (unsigned int)(input_bits_per_frame * 2)) {
2642         cmd->dw1.init_buf_full_in_bits = (unsigned int)(input_bits_per_frame * 2);
2643     }
2644     if (cmd->dw1.init_buf_full_in_bits > cmd->dw2.buf_size_in_bits) {
2645         cmd->dw1.init_buf_full_in_bits = cmd->dw2.buf_size_in_bits;
2646     }
2647
2648     //AVBR
2649     if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2650         cmd->dw2.buf_size_in_bits = 2 * generic_state->target_bit_rate * 1000;
2651         cmd->dw1.init_buf_full_in_bits = (unsigned int)(3 * cmd->dw2.buf_size_in_bits / 4);
2652
2653     }
2654
2655     bps_ratio = input_bits_per_frame / (cmd->dw2.buf_size_in_bits / 30.0);
2656     bps_ratio = (bps_ratio < 0.1) ? 0.1 : (bps_ratio > 3.5) ? 3.5 : bps_ratio;
2657
2658
2659     cmd->dw16.deviation_threshold_0_pand_b = (unsigned int)(-50 * pow(0.90, bps_ratio));
2660     cmd->dw16.deviation_threshold_1_pand_b = (unsigned int)(-50 * pow(0.66, bps_ratio));
2661     cmd->dw16.deviation_threshold_2_pand_b = (unsigned int)(-50 * pow(0.46, bps_ratio));
2662     cmd->dw16.deviation_threshold_3_pand_b = (unsigned int)(-50 * pow(0.3, bps_ratio));
2663     cmd->dw17.deviation_threshold_4_pand_b = (unsigned int)(50 *  pow(0.3, bps_ratio));
2664     cmd->dw17.deviation_threshold_5_pand_b = (unsigned int)(50 * pow(0.46, bps_ratio));
2665     cmd->dw17.deviation_threshold_6_pand_b = (unsigned int)(50 * pow(0.7,  bps_ratio));
2666     cmd->dw17.deviation_threshold_7_pand_b = (unsigned int)(50 * pow(0.9,  bps_ratio));
2667     cmd->dw18.deviation_threshold_0_vbr = (unsigned int)(-50 * pow(0.9, bps_ratio));
2668     cmd->dw18.deviation_threshold_1_vbr = (unsigned int)(-50 * pow(0.7, bps_ratio));
2669     cmd->dw18.deviation_threshold_2_vbr = (unsigned int)(-50 * pow(0.5, bps_ratio));
2670     cmd->dw18.deviation_threshold_3_vbr = (unsigned int)(-50 * pow(0.3, bps_ratio));
2671     cmd->dw19.deviation_threshold_4_vbr = (unsigned int)(100 * pow(0.4, bps_ratio));
2672     cmd->dw19.deviation_threshold_5_vbr = (unsigned int)(100 * pow(0.5, bps_ratio));
2673     cmd->dw19.deviation_threshold_6_vbr = (unsigned int)(100 * pow(0.75, bps_ratio));
2674     cmd->dw19.deviation_threshold_7_vbr = (unsigned int)(100 * pow(0.9, bps_ratio));
2675     cmd->dw20.deviation_threshold_0_i = (unsigned int)(-50 * pow(0.8, bps_ratio));
2676     cmd->dw20.deviation_threshold_1_i = (unsigned int)(-50 * pow(0.6, bps_ratio));
2677     cmd->dw20.deviation_threshold_2_i = (unsigned int)(-50 * pow(0.34, bps_ratio));
2678     cmd->dw20.deviation_threshold_3_i = (unsigned int)(-50 * pow(0.2, bps_ratio));
2679     cmd->dw21.deviation_threshold_4_i = (unsigned int)(50 * pow(0.2,  bps_ratio));
2680     cmd->dw21.deviation_threshold_5_i = (unsigned int)(50 * pow(0.4,  bps_ratio));
2681     cmd->dw21.deviation_threshold_6_i = (unsigned int)(50 * pow(0.66, bps_ratio));
2682     cmd->dw21.deviation_threshold_7_i = (unsigned int)(50 * pow(0.9,  bps_ratio));
2683
2684     cmd->dw22.sliding_window_size = generic_state->frames_per_window_size;
2685
2686     i965_gpe_context_unmap_curbe(gpe_context);
2687
2688     return;
2689 }
2690
2691 static void
2692 gen9_avc_send_surface_brc_init_reset(VADriverContextP ctx,
2693                                      struct encode_state *encode_state,
2694                                      struct i965_gpe_context *gpe_context,
2695                                      struct intel_encoder_context *encoder_context,
2696                                      void * param_mbenc)
2697 {
2698     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2699     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2700
2701     i965_add_buffer_gpe_surface(ctx,
2702                                 gpe_context,
2703                                 &avc_ctx->res_brc_history_buffer,
2704                                 0,
2705                                 avc_ctx->res_brc_history_buffer.size,
2706                                 0,
2707                                 GEN9_AVC_BRC_INIT_RESET_HISTORY_INDEX);
2708
2709     i965_add_buffer_2d_gpe_surface(ctx,
2710                                    gpe_context,
2711                                    &avc_ctx->res_brc_dist_data_surface,
2712                                    1,
2713                                    I965_SURFACEFORMAT_R8_UNORM,
2714                                    GEN9_AVC_BRC_INIT_RESET_DISTORTION_INDEX);
2715
2716     return;
2717 }
2718
2719 static VAStatus
2720 gen9_avc_kernel_brc_init_reset(VADriverContextP ctx,
2721                                struct encode_state *encode_state,
2722                                struct intel_encoder_context *encoder_context)
2723 {
2724     struct i965_driver_data *i965 = i965_driver_data(ctx);
2725     struct i965_gpe_table *gpe = &i965->gpe_table;
2726     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2727     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2728     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2729     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
2730
2731     struct i965_gpe_context *gpe_context;
2732     struct gpe_media_object_parameter media_object_param;
2733     struct gpe_media_object_inline_data media_object_inline_data;
2734     int media_function = 0;
2735     int kernel_idx = GEN9_AVC_KERNEL_BRC_INIT;
2736
2737     media_function = INTEL_MEDIA_STATE_BRC_INIT_RESET;
2738
2739     if (generic_state->brc_inited)
2740         kernel_idx = GEN9_AVC_KERNEL_BRC_RESET;
2741
2742     gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
2743
2744     gpe->context_init(ctx, gpe_context);
2745     gpe->reset_binding_table(ctx, gpe_context);
2746
2747     generic_ctx->pfn_set_curbe_brc_init_reset(ctx, encode_state, gpe_context, encoder_context, NULL);
2748
2749     generic_ctx->pfn_send_brc_init_reset_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
2750
2751     gpe->setup_interface_data(ctx, gpe_context);
2752
2753     memset(&media_object_param, 0, sizeof(media_object_param));
2754     memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
2755     media_object_param.pinline_data = &media_object_inline_data;
2756     media_object_param.inline_size = sizeof(media_object_inline_data);
2757
2758     gen9_avc_run_kernel_media_object(ctx, encoder_context,
2759                                      gpe_context,
2760                                      media_function,
2761                                      &media_object_param);
2762
2763     return VA_STATUS_SUCCESS;
2764 }
2765
2766 static void
2767 gen9_avc_set_curbe_brc_frame_update(VADriverContextP ctx,
2768                                     struct encode_state *encode_state,
2769                                     struct i965_gpe_context *gpe_context,
2770                                     struct intel_encoder_context *encoder_context,
2771                                     void * param)
2772 {
2773     gen9_avc_frame_brc_update_curbe_data *cmd;
2774     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2775     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2776     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2777     struct object_surface *obj_surface;
2778     struct gen9_surface_avc *avc_priv_surface;
2779     struct avc_param common_param;
2780     VAEncSequenceParameterBufferH264 * seq_param = avc_state->seq_param;
2781
2782     obj_surface = encode_state->reconstructed_object;
2783
2784     if (!obj_surface || !obj_surface->private_data)
2785         return;
2786     avc_priv_surface = obj_surface->private_data;
2787
2788     cmd = i965_gpe_context_map_curbe(gpe_context);
2789
2790     if (!cmd)
2791         return;
2792
2793     memcpy(cmd, &gen9_avc_frame_brc_update_curbe_init_data, sizeof(gen9_avc_frame_brc_update_curbe_data));
2794
2795     cmd->dw5.target_size_flag = 0 ;
2796     if (generic_state->brc_init_current_target_buf_full_in_bits > (double)generic_state->brc_init_reset_buf_size_in_bits) {
2797         /*overflow*/
2798         generic_state->brc_init_current_target_buf_full_in_bits -= (double)generic_state->brc_init_reset_buf_size_in_bits;
2799         cmd->dw5.target_size_flag = 1 ;
2800     }
2801
2802     if (generic_state->skip_frame_enbale) {
2803         cmd->dw6.num_skip_frames = generic_state->num_skip_frames ;
2804         cmd->dw7.size_skip_frames = generic_state->size_skip_frames;
2805
2806         generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame * generic_state->num_skip_frames;
2807
2808     }
2809     cmd->dw0.target_size = (unsigned int)generic_state->brc_init_current_target_buf_full_in_bits ;
2810     cmd->dw1.frame_number = generic_state->seq_frame_number ;
2811     cmd->dw2.size_of_pic_headers = generic_state->herder_bytes_inserted << 3 ;
2812     cmd->dw5.cur_frame_type = generic_state->frame_type ;
2813     cmd->dw5.brc_flag = 0 ;
2814     cmd->dw5.brc_flag |= (avc_priv_surface->is_as_ref) ? INTEL_ENCODE_BRCUPDATE_IS_REFERENCE : 0 ;
2815
2816     if (avc_state->multi_pre_enable) {
2817         cmd->dw5.brc_flag  |= INTEL_ENCODE_BRCUPDATE_IS_ACTUALQP ;
2818         cmd->dw14.qp_index_of_cur_pic = avc_priv_surface->frame_idx ; //do not know this. use -1
2819     }
2820
2821     cmd->dw5.max_num_paks = generic_state->num_pak_passes ;
2822     if (avc_state->min_max_qp_enable) {
2823         switch (generic_state->frame_type) {
2824         case SLICE_TYPE_I:
2825             cmd->dw6.minimum_qp = avc_state->min_qp_i ;
2826             cmd->dw6.maximum_qp = avc_state->max_qp_i ;
2827             break;
2828         case SLICE_TYPE_P:
2829             cmd->dw6.minimum_qp = avc_state->min_qp_p ;
2830             cmd->dw6.maximum_qp = avc_state->max_qp_p ;
2831             break;
2832         case SLICE_TYPE_B:
2833             cmd->dw6.minimum_qp = avc_state->min_qp_b ;
2834             cmd->dw6.maximum_qp = avc_state->max_qp_b ;
2835             break;
2836         }
2837     } else {
2838         cmd->dw6.minimum_qp = 0 ;
2839         cmd->dw6.maximum_qp = 0 ;
2840     }
2841     cmd->dw6.enable_force_skip = avc_state->enable_force_skip ;
2842     cmd->dw6.enable_sliding_window = 0 ;
2843
2844     generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame;
2845
2846     if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2847         cmd->dw3.start_gadj_frame0 = (unsigned int)((10 *   generic_state->avbr_convergence) / (double)150);
2848         cmd->dw3.start_gadj_frame1 = (unsigned int)((50 *   generic_state->avbr_convergence) / (double)150);
2849         cmd->dw4.start_gadj_frame2 = (unsigned int)((100 *  generic_state->avbr_convergence) / (double)150);
2850         cmd->dw4.start_gadj_frame3 = (unsigned int)((150 *  generic_state->avbr_convergence) / (double)150);
2851         cmd->dw11.g_rate_ratio_threshold_0 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 40)));
2852         cmd->dw11.g_rate_ratio_threshold_1 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 75)));
2853         cmd->dw12.g_rate_ratio_threshold_2 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 97)));
2854         cmd->dw12.g_rate_ratio_threshold_3 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (103 - 100)));
2855         cmd->dw12.g_rate_ratio_threshold_4 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (125 - 100)));
2856         cmd->dw12.g_rate_ratio_threshold_5 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (160 - 100)));
2857
2858     }
2859     cmd->dw15.enable_roi = generic_state->brc_roi_enable ;
2860
2861     memset(&common_param, 0, sizeof(common_param));
2862     common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
2863     common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
2864     common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
2865     common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
2866     common_param.frames_per_100s = generic_state->frames_per_100s;
2867     common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
2868     common_param.target_bit_rate = generic_state->target_bit_rate;
2869
2870     cmd->dw19.user_max_frame = i965_avc_get_profile_level_max_frame(&common_param, seq_param->level_idc);
2871     i965_gpe_context_unmap_curbe(gpe_context);
2872
2873     return;
2874 }
2875
2876 static void
2877 gen9_avc_send_surface_brc_frame_update(VADriverContextP ctx,
2878                                        struct encode_state *encode_state,
2879                                        struct i965_gpe_context *gpe_context,
2880                                        struct intel_encoder_context *encoder_context,
2881                                        void * param_brc)
2882 {
2883     struct i965_driver_data *i965 = i965_driver_data(ctx);
2884     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2885     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2886     struct brc_param * param = (struct brc_param *)param_brc ;
2887     struct i965_gpe_context * gpe_context_mbenc = param->gpe_context_mbenc;
2888     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2889     unsigned char is_g95 = 0;
2890
2891     if (IS_SKL(i965->intel.device_info) ||
2892         IS_BXT(i965->intel.device_info) ||
2893         IS_GEN8(i965->intel.device_info))
2894         is_g95 = 0;
2895     else if (IS_KBL(i965->intel.device_info) ||
2896              IS_GEN10(i965->intel.device_info) ||
2897              IS_GLK(i965->intel.device_info))
2898         is_g95 = 1;
2899
2900     /* brc history buffer*/
2901     i965_add_buffer_gpe_surface(ctx,
2902                                 gpe_context,
2903                                 &avc_ctx->res_brc_history_buffer,
2904                                 0,
2905                                 avc_ctx->res_brc_history_buffer.size,
2906                                 0,
2907                                 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_HISTORY_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_HISTORY_INDEX));
2908
2909     /* previous pak buffer*/
2910     i965_add_buffer_gpe_surface(ctx,
2911                                 gpe_context,
2912                                 &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
2913                                 0,
2914                                 avc_ctx->res_brc_pre_pak_statistics_output_buffer.size,
2915                                 0,
2916                                 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_PAK_STATISTICS_OUTPUT_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_PAK_STATISTICS_OUTPUT_INDEX));
2917
2918     /* image state command buffer read only*/
2919     i965_add_buffer_gpe_surface(ctx,
2920                                 gpe_context,
2921                                 &avc_ctx->res_brc_image_state_read_buffer,
2922                                 0,
2923                                 avc_ctx->res_brc_image_state_read_buffer.size,
2924                                 0,
2925                                 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_READ_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_READ_INDEX));
2926
2927     /* image state command buffer write only*/
2928     i965_add_buffer_gpe_surface(ctx,
2929                                 gpe_context,
2930                                 &avc_ctx->res_brc_image_state_write_buffer,
2931                                 0,
2932                                 avc_ctx->res_brc_image_state_write_buffer.size,
2933                                 0,
2934                                 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_WRITE_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_WRITE_INDEX));
2935
2936     if (avc_state->mbenc_brc_buffer_size > 0) {
2937         i965_add_buffer_gpe_surface(ctx,
2938                                     gpe_context,
2939                                     &(avc_ctx->res_mbenc_brc_buffer),
2940                                     0,
2941                                     avc_ctx->res_mbenc_brc_buffer.size,
2942                                     0,
2943                                     GEN95_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_WRITE_INDEX);
2944     } else {
2945         /*  Mbenc curbe input buffer */
2946         gen9_add_dri_buffer_gpe_surface(ctx,
2947                                         gpe_context,
2948                                         gpe_context_mbenc->dynamic_state.bo,
2949                                         0,
2950                                         ALIGN(gpe_context_mbenc->curbe.length, 64),
2951                                         gpe_context_mbenc->curbe.offset,
2952                                         GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_READ_INDEX);
2953         /* Mbenc curbe output buffer */
2954         gen9_add_dri_buffer_gpe_surface(ctx,
2955                                         gpe_context,
2956                                         gpe_context_mbenc->dynamic_state.bo,
2957                                         0,
2958                                         ALIGN(gpe_context_mbenc->curbe.length, 64),
2959                                         gpe_context_mbenc->curbe.offset,
2960                                         GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_WRITE_INDEX);
2961     }
2962
2963     /* AVC_ME Distortion 2D surface buffer,input/output. is it res_brc_dist_data_surface*/
2964     i965_add_buffer_2d_gpe_surface(ctx,
2965                                    gpe_context,
2966                                    &avc_ctx->res_brc_dist_data_surface,
2967                                    1,
2968                                    I965_SURFACEFORMAT_R8_UNORM,
2969                                    (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_DISTORTION_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_DISTORTION_INDEX));
2970
2971     /* BRC const data 2D surface buffer */
2972     i965_add_buffer_2d_gpe_surface(ctx,
2973                                    gpe_context,
2974                                    &avc_ctx->res_brc_const_data_buffer,
2975                                    1,
2976                                    I965_SURFACEFORMAT_R8_UNORM,
2977                                    (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX));
2978
2979     /* MB statistical data surface*/
2980     if (!IS_GEN8(i965->intel.device_info)) {
2981         i965_add_buffer_gpe_surface(ctx,
2982                                     gpe_context,
2983                                     &avc_ctx->res_mb_status_buffer,
2984                                     0,
2985                                     avc_ctx->res_mb_status_buffer.size,
2986                                     0,
2987                                     (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX));
2988     } else {
2989         i965_add_buffer_2d_gpe_surface(ctx,
2990                                        gpe_context,
2991                                        &avc_ctx->res_mbbrc_mb_qp_data_surface,
2992                                        1,
2993                                        I965_SURFACEFORMAT_R8_UNORM,
2994                                        GEN9_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX);
2995     }
2996     return;
2997 }
2998
2999 static VAStatus
3000 gen9_avc_kernel_brc_frame_update(VADriverContextP ctx,
3001                                  struct encode_state *encode_state,
3002                                  struct intel_encoder_context *encoder_context)
3003
3004 {
3005     struct i965_driver_data *i965 = i965_driver_data(ctx);
3006     struct i965_gpe_table *gpe = &i965->gpe_table;
3007     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3008     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
3009     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3010     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3011     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3012
3013     struct i965_gpe_context *gpe_context = NULL;
3014     struct gpe_media_object_parameter media_object_param;
3015     struct gpe_media_object_inline_data media_object_inline_data;
3016     int media_function = 0;
3017     int kernel_idx = 0;
3018     unsigned int mb_const_data_buffer_in_use, mb_qp_buffer_in_use;
3019     unsigned int brc_enabled = 0;
3020     unsigned int roi_enable = (generic_state->num_roi > 0) ? 1 : 0;
3021     unsigned int dirty_roi_enable = ((generic_state->dirty_num_roi > 0) && (generic_state->frame_type == SLICE_TYPE_P) && (0));
3022
3023     /* the following set the mbenc curbe*/
3024     struct mbenc_param curbe_mbenc_param ;
3025     struct brc_param curbe_brc_param ;
3026
3027     mb_const_data_buffer_in_use =
3028         generic_state->mb_brc_enabled ||
3029         roi_enable ||
3030         dirty_roi_enable ||
3031         avc_state->mb_qp_data_enable ||
3032         avc_state->rolling_intra_refresh_enable;
3033     mb_qp_buffer_in_use =
3034         generic_state->mb_brc_enabled ||
3035         generic_state->brc_roi_enable ||
3036         avc_state->mb_qp_data_enable;
3037
3038     switch (generic_state->kernel_mode) {
3039     case INTEL_ENC_KERNEL_NORMAL : {
3040         kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_NORMAL_I;
3041         break;
3042     }
3043     case INTEL_ENC_KERNEL_PERFORMANCE : {
3044         kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_PERFORMANCE_I;
3045         break;
3046     }
3047     case INTEL_ENC_KERNEL_QUALITY : {
3048         kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_QUALITY_I;
3049         break;
3050     }
3051     default:
3052         assert(0);
3053
3054     }
3055
3056     if (generic_state->frame_type == SLICE_TYPE_P) {
3057         kernel_idx += 1;
3058     } else if (generic_state->frame_type == SLICE_TYPE_B) {
3059         kernel_idx += 2;
3060     }
3061
3062     gpe_context = &(avc_ctx->context_mbenc.gpe_contexts[kernel_idx]);
3063     gpe->context_init(ctx, gpe_context);
3064
3065     memset(&curbe_mbenc_param, 0, sizeof(struct mbenc_param));
3066
3067     curbe_mbenc_param.mb_const_data_buffer_in_use = mb_const_data_buffer_in_use;
3068     curbe_mbenc_param.mb_qp_buffer_in_use = mb_qp_buffer_in_use;
3069     curbe_mbenc_param.mbenc_i_frame_dist_in_use = 0;
3070     curbe_mbenc_param.brc_enabled = brc_enabled;
3071     curbe_mbenc_param.roi_enabled = roi_enable;
3072
3073     /* set curbe mbenc*/
3074     generic_ctx->pfn_set_curbe_mbenc(ctx, encode_state, gpe_context, encoder_context, &curbe_mbenc_param);
3075
3076     // gen95 set curbe out of the brc. gen9 do it here
3077     avc_state->mbenc_curbe_set_in_brc_update = !avc_state->decouple_mbenc_curbe_from_brc_enable;
3078     /*begin brc frame update*/
3079     memset(&curbe_brc_param, 0, sizeof(struct brc_param));
3080     curbe_brc_param.gpe_context_mbenc = gpe_context;
3081     media_function = INTEL_MEDIA_STATE_BRC_UPDATE;
3082     kernel_idx = GEN9_AVC_KERNEL_BRC_FRAME_UPDATE;
3083     gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
3084     curbe_brc_param.gpe_context_brc_frame_update = gpe_context;
3085
3086     gpe->context_init(ctx, gpe_context);
3087     gpe->reset_binding_table(ctx, gpe_context);
3088     /*brc copy ignored*/
3089
3090     /* set curbe frame update*/
3091     generic_ctx->pfn_set_curbe_brc_frame_update(ctx, encode_state, gpe_context, encoder_context, &curbe_brc_param);
3092
3093     /* load brc constant data, is it same as mbenc mb brc constant data? no.*/
3094     if (avc_state->multi_pre_enable) {
3095         gen9_avc_init_brc_const_data(ctx, encode_state, encoder_context);
3096     } else {
3097         gen9_avc_init_brc_const_data_old(ctx, encode_state, encoder_context);
3098     }
3099     /* image state construct*/
3100     if (IS_GEN8(i965->intel.device_info)) {
3101         gen8_avc_set_image_state(ctx, encode_state, encoder_context, &(avc_ctx->res_brc_image_state_read_buffer));
3102     } else {
3103         gen9_avc_set_image_state(ctx, encode_state, encoder_context, &(avc_ctx->res_brc_image_state_read_buffer));
3104     }
3105     /* set surface frame mbenc*/
3106     generic_ctx->pfn_send_brc_frame_update_surface(ctx, encode_state, gpe_context, encoder_context, &curbe_brc_param);
3107
3108
3109     gpe->setup_interface_data(ctx, gpe_context);
3110
3111     memset(&media_object_param, 0, sizeof(media_object_param));
3112     memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
3113     media_object_param.pinline_data = &media_object_inline_data;
3114     media_object_param.inline_size = sizeof(media_object_inline_data);
3115
3116     gen9_avc_run_kernel_media_object(ctx, encoder_context,
3117                                      gpe_context,
3118                                      media_function,
3119                                      &media_object_param);
3120
3121     return VA_STATUS_SUCCESS;
3122 }
3123
3124 static void
3125 gen9_avc_set_curbe_brc_mb_update(VADriverContextP ctx,
3126                                  struct encode_state *encode_state,
3127                                  struct i965_gpe_context *gpe_context,
3128                                  struct intel_encoder_context *encoder_context,
3129                                  void * param)
3130 {
3131     gen9_avc_mb_brc_curbe_data *cmd;
3132     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3133     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3134
3135     cmd = i965_gpe_context_map_curbe(gpe_context);
3136
3137     if (!cmd)
3138         return;
3139
3140     memset(cmd, 0, sizeof(gen9_avc_mb_brc_curbe_data));
3141
3142     cmd->dw0.cur_frame_type = generic_state->frame_type;
3143     if (generic_state->brc_roi_enable) {
3144         cmd->dw0.enable_roi = 1;
3145     } else {
3146         cmd->dw0.enable_roi = 0;
3147     }
3148
3149     i965_gpe_context_unmap_curbe(gpe_context);
3150
3151     return;
3152 }
3153
3154 static void
3155 gen9_avc_send_surface_brc_mb_update(VADriverContextP ctx,
3156                                     struct encode_state *encode_state,
3157                                     struct i965_gpe_context *gpe_context,
3158                                     struct intel_encoder_context *encoder_context,
3159                                     void * param_mbenc)
3160 {
3161     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3162     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3163     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3164
3165     /* brc history buffer*/
3166     i965_add_buffer_gpe_surface(ctx,
3167                                 gpe_context,
3168                                 &avc_ctx->res_brc_history_buffer,
3169                                 0,
3170                                 avc_ctx->res_brc_history_buffer.size,
3171                                 0,
3172                                 GEN9_AVC_MB_BRC_UPDATE_HISTORY_INDEX);
3173
3174     /* MB qp data buffer is it same as res_mbbrc_mb_qp_data_surface*/
3175     if (generic_state->mb_brc_enabled) {
3176         i965_add_buffer_2d_gpe_surface(ctx,
3177                                        gpe_context,
3178                                        &avc_ctx->res_mbbrc_mb_qp_data_surface,
3179                                        1,
3180                                        I965_SURFACEFORMAT_R8_UNORM,
3181                                        GEN9_AVC_MB_BRC_UPDATE_MB_QP_INDEX);
3182
3183     }
3184
3185     /* BRC roi feature*/
3186     if (generic_state->brc_roi_enable) {
3187         i965_add_buffer_gpe_surface(ctx,
3188                                     gpe_context,
3189                                     &avc_ctx->res_mbbrc_roi_surface,
3190                                     0,
3191                                     avc_ctx->res_mbbrc_roi_surface.size,
3192                                     0,
3193                                     GEN9_AVC_MB_BRC_UPDATE_ROI_INDEX);
3194
3195     }
3196
3197     /* MB statistical data surface*/
3198     i965_add_buffer_gpe_surface(ctx,
3199                                 gpe_context,
3200                                 &avc_ctx->res_mb_status_buffer,
3201                                 0,
3202                                 avc_ctx->res_mb_status_buffer.size,
3203                                 0,
3204                                 GEN9_AVC_MB_BRC_UPDATE_MB_STATUS_INDEX);
3205
3206     return;
3207 }
3208
3209 static VAStatus
3210 gen9_avc_kernel_brc_mb_update(VADriverContextP ctx,
3211                               struct encode_state *encode_state,
3212                               struct intel_encoder_context *encoder_context)
3213
3214 {
3215     struct i965_driver_data *i965 = i965_driver_data(ctx);
3216     struct i965_gpe_table *gpe = &i965->gpe_table;
3217     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3218     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3219     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3220     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
3221
3222     struct i965_gpe_context *gpe_context;
3223     struct gpe_media_object_walker_parameter media_object_walker_param;
3224     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
3225     int media_function = 0;
3226     int kernel_idx = 0;
3227
3228     media_function = INTEL_MEDIA_STATE_MB_BRC_UPDATE;
3229     kernel_idx = GEN9_AVC_KERNEL_BRC_MB_UPDATE;
3230     gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
3231
3232     gpe->context_init(ctx, gpe_context);
3233     gpe->reset_binding_table(ctx, gpe_context);
3234
3235     /* set curbe brc mb update*/
3236     generic_ctx->pfn_set_curbe_brc_mb_update(ctx, encode_state, gpe_context, encoder_context, NULL);
3237
3238
3239     /* set surface brc mb update*/
3240     generic_ctx->pfn_send_brc_mb_update_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
3241
3242
3243     gpe->setup_interface_data(ctx, gpe_context);
3244
3245     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
3246     /* the scaling is based on 8x8 blk level */
3247     kernel_walker_param.resolution_x = (generic_state->frame_width_in_mbs + 1) / 2;
3248     kernel_walker_param.resolution_y = (generic_state->frame_height_in_mbs + 1) / 2 ;
3249     kernel_walker_param.no_dependency = 1;
3250
3251     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
3252
3253     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
3254                                             gpe_context,
3255                                             media_function,
3256                                             &media_object_walker_param);
3257
3258     return VA_STATUS_SUCCESS;
3259 }
3260
3261 /*
3262 mbenc kernel related function,it include intra dist kernel
3263 */
3264 static int
3265 gen9_avc_get_biweight(int dist_scale_factor_ref_id0_list0, unsigned short weighted_bipredidc)
3266 {
3267     int biweight = 32;      // default value
3268
3269     /* based on kernel HLD*/
3270     if (weighted_bipredidc != INTEL_AVC_WP_MODE_IMPLICIT) {
3271         biweight = 32;
3272     } else {
3273         biweight = (dist_scale_factor_ref_id0_list0 + 2) >> 2;
3274
3275         if (biweight != 16 && biweight != 21 &&
3276             biweight != 32 && biweight != 43 && biweight != 48) {
3277             biweight = 32;        // If # of B-pics between two refs is more than 3. VME does not support it.
3278         }
3279     }
3280
3281     return biweight;
3282 }
3283
3284 static void
3285 gen9_avc_get_dist_scale_factor(VADriverContextP ctx,
3286                                struct encode_state *encode_state,
3287                                struct intel_encoder_context *encoder_context)
3288 {
3289     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3290     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3291     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3292     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
3293
3294     int max_num_references;
3295     VAPictureH264 *curr_pic;
3296     VAPictureH264 *ref_pic_l0;
3297     VAPictureH264 *ref_pic_l1;
3298     int i = 0;
3299     int tb = 0;
3300     int td = 0;
3301     int tx = 0;
3302     int tmp = 0;
3303     int poc0 = 0;
3304     int poc1 = 0;
3305
3306     max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
3307
3308     memset(avc_state->dist_scale_factor_list0, 0, 32 * sizeof(unsigned int));
3309     curr_pic = &pic_param->CurrPic;
3310     for (i = 0; i < max_num_references; i++) {
3311         ref_pic_l0 = &(slice_param->RefPicList0[i]);
3312
3313         if ((ref_pic_l0->flags & VA_PICTURE_H264_INVALID) ||
3314             (ref_pic_l0->picture_id == VA_INVALID_SURFACE))
3315             break;
3316         ref_pic_l1 = &(slice_param->RefPicList1[0]);
3317         if ((ref_pic_l0->flags & VA_PICTURE_H264_INVALID) ||
3318             (ref_pic_l0->picture_id == VA_INVALID_SURFACE))
3319             break;
3320
3321         poc0 = (curr_pic->TopFieldOrderCnt - ref_pic_l0->TopFieldOrderCnt);
3322         poc1 = (ref_pic_l1->TopFieldOrderCnt - ref_pic_l0->TopFieldOrderCnt);
3323         CLIP(poc0, -128, 127);
3324         CLIP(poc1, -128, 127);
3325         tb = poc0;
3326         td = poc1;
3327
3328         if (td == 0) {
3329             td = 1;
3330         }
3331         tmp = (td / 2 > 0) ? (td / 2) : (-(td / 2));
3332         tx = (16384 + tmp) / td ;
3333         tmp = (tb * tx + 32) >> 6;
3334         CLIP(tmp, -1024, 1023);
3335         avc_state->dist_scale_factor_list0[i] = tmp;
3336     }
3337     return;
3338 }
3339
3340 static unsigned int
3341 gen9_avc_get_qp_from_ref_list(VADriverContextP ctx,
3342                               VAEncSliceParameterBufferH264 *slice_param,
3343                               int list,
3344                               int ref_frame_idx)
3345 {
3346     struct i965_driver_data *i965 = i965_driver_data(ctx);
3347     struct object_surface *obj_surface;
3348     struct gen9_surface_avc *avc_priv_surface;
3349     VASurfaceID surface_id;
3350
3351     assert(slice_param);
3352     assert(list < 2);
3353
3354     if (list == 0) {
3355         if (ref_frame_idx < slice_param->num_ref_idx_l0_active_minus1 + 1)
3356             surface_id = slice_param->RefPicList0[ref_frame_idx].picture_id;
3357         else
3358             return 0;
3359     } else {
3360         if (ref_frame_idx < slice_param->num_ref_idx_l1_active_minus1 + 1)
3361             surface_id = slice_param->RefPicList1[ref_frame_idx].picture_id;
3362         else
3363             return 0;
3364     }
3365     obj_surface = SURFACE(surface_id);
3366     if (obj_surface && obj_surface->private_data) {
3367         avc_priv_surface = obj_surface->private_data;
3368         return avc_priv_surface->qp_value;
3369     } else {
3370         return 0;
3371     }
3372 }
3373
3374 static void
3375 gen9_avc_load_mb_brc_const_data(VADriverContextP ctx,
3376                                 struct encode_state *encode_state,
3377                                 struct intel_encoder_context *encoder_context)
3378 {
3379     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3380     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3381     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3382     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3383
3384     struct i965_gpe_resource *gpe_resource = NULL;
3385     unsigned int * data = NULL;
3386     unsigned int * data_tmp = NULL;
3387     unsigned int size = 16 * 52;
3388     unsigned int table_idx = 0;
3389     unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
3390     unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
3391     int i = 0;
3392
3393     gpe_resource = &(avc_ctx->res_mbbrc_const_data_buffer);
3394     assert(gpe_resource);
3395     data = i965_map_gpe_resource(gpe_resource);
3396     assert(data);
3397
3398     table_idx = slice_type_kernel[generic_state->frame_type];
3399
3400     memcpy(data, gen9_avc_mb_brc_const_data[table_idx][0], size * sizeof(unsigned int));
3401
3402     data_tmp = data;
3403
3404     switch (generic_state->frame_type) {
3405     case SLICE_TYPE_I:
3406         for (i = 0; i < AVC_QP_MAX ; i++) {
3407             if (avc_state->old_mode_cost_enable)
3408                 *data = (unsigned int)gen9_avc_old_intra_mode_cost[i];
3409             data += 16;
3410         }
3411         break;
3412     case SLICE_TYPE_P:
3413     case SLICE_TYPE_B:
3414         for (i = 0; i < AVC_QP_MAX ; i++) {
3415             if (generic_state->frame_type == SLICE_TYPE_P) {
3416                 if (avc_state->skip_bias_adjustment_enable)
3417                     *(data + 3) = (unsigned int)gen9_avc_mv_cost_p_skip_adjustment[i];
3418             }
3419             if (avc_state->non_ftq_skip_threshold_lut_input_enable) {
3420                 *(data + 9) = (unsigned int)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
3421             } else if (generic_state->frame_type == SLICE_TYPE_P) {
3422                 *(data + 9) = (unsigned int)gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag][i];
3423             } else {
3424                 *(data + 9) = (unsigned int)gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag][i];
3425             }
3426
3427             if (avc_state->adaptive_intra_scaling_enable) {
3428                 *(data + 10) = (unsigned int)gen9_avc_adaptive_intra_scaling_factor[i];
3429             } else {
3430                 *(data + 10) = (unsigned int)gen9_avc_intra_scaling_factor[i];
3431
3432             }
3433             data += 16;
3434
3435         }
3436         break;
3437     default:
3438         assert(0);
3439     }
3440
3441     data = data_tmp;
3442     for (i = 0; i < AVC_QP_MAX ; i++) {
3443         if (avc_state->ftq_skip_threshold_lut_input_enable) {
3444             *(data + 6) = (avc_state->ftq_skip_threshold_lut[i] |
3445                            (avc_state->ftq_skip_threshold_lut[i] << 16) |
3446                            (avc_state->ftq_skip_threshold_lut[i] << 24));
3447             *(data + 7) = (avc_state->ftq_skip_threshold_lut[i] |
3448                            (avc_state->ftq_skip_threshold_lut[i] << 8) |
3449                            (avc_state->ftq_skip_threshold_lut[i] << 16) |
3450                            (avc_state->ftq_skip_threshold_lut[i] << 24));
3451         }
3452
3453         if (avc_state->kernel_trellis_enable) {
3454             *(data + 11) = (unsigned int)avc_state->lamda_value_lut[i][0];
3455             *(data + 12) = (unsigned int)avc_state->lamda_value_lut[i][1];
3456
3457         }
3458         data += 16;
3459
3460     }
3461     i965_unmap_gpe_resource(gpe_resource);
3462 }
3463
3464 static void
3465 gen9_avc_set_curbe_mbenc(VADriverContextP ctx,
3466                          struct encode_state *encode_state,
3467                          struct i965_gpe_context *gpe_context,
3468                          struct intel_encoder_context *encoder_context,
3469                          void * param)
3470 {
3471     struct i965_driver_data *i965 = i965_driver_data(ctx);
3472     union {
3473         gen9_avc_mbenc_curbe_data *g9;
3474         gen95_avc_mbenc_curbe_data *g95;
3475     } cmd;
3476     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3477     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3478     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3479
3480     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3481     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
3482     VASurfaceID surface_id;
3483     struct object_surface *obj_surface;
3484
3485     struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
3486     unsigned char qp = 0;
3487     unsigned char me_method = 0;
3488     unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
3489     unsigned int table_idx = 0;
3490     unsigned char is_g9 = 0;
3491     unsigned char is_g95 = 0;
3492     unsigned int curbe_size = 0;
3493
3494     unsigned int preset = generic_state->preset;
3495     if (IS_SKL(i965->intel.device_info) ||
3496         IS_BXT(i965->intel.device_info)) {
3497         cmd.g9 = (gen9_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
3498         if (!cmd.g9)
3499             return;
3500         is_g9 = 1;
3501         curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
3502         memset(cmd.g9, 0, curbe_size);
3503
3504         if (mbenc_i_frame_dist_in_use) {
3505             memcpy(cmd.g9, gen9_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
3506
3507         } else {
3508             switch (generic_state->frame_type) {
3509             case SLICE_TYPE_I:
3510                 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
3511                 break;
3512             case SLICE_TYPE_P:
3513                 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
3514                 break;
3515             case SLICE_TYPE_B:
3516                 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
3517                 break;
3518             default:
3519                 assert(0);
3520             }
3521
3522         }
3523     } else if (IS_KBL(i965->intel.device_info) ||
3524                IS_GEN10(i965->intel.device_info) ||
3525                IS_GLK(i965->intel.device_info)) {
3526         cmd.g95 = (gen95_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
3527         if (!cmd.g95)
3528             return;
3529         is_g95 = 1;
3530         curbe_size = sizeof(gen95_avc_mbenc_curbe_data);
3531         memset(cmd.g9, 0, curbe_size);
3532
3533         if (mbenc_i_frame_dist_in_use) {
3534             memcpy(cmd.g95, gen95_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
3535
3536         } else {
3537             switch (generic_state->frame_type) {
3538             case SLICE_TYPE_I:
3539                 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
3540                 break;
3541             case SLICE_TYPE_P:
3542                 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
3543                 break;
3544             case SLICE_TYPE_B:
3545                 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
3546                 break;
3547             default:
3548                 assert(0);
3549             }
3550
3551         }
3552     } else {
3553         /* Never get here, just silence a gcc warning */
3554         assert(0);
3555
3556         return;
3557     }
3558
3559     me_method = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_b_me_method[preset] : gen9_avc_p_me_method[preset];
3560     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
3561
3562     cmd.g9->dw0.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
3563     cmd.g9->dw37.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
3564     cmd.g9->dw0.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
3565     cmd.g9->dw37.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
3566
3567     cmd.g9->dw2.max_len_sp = gen9_avc_max_len_sp[preset];
3568     cmd.g9->dw38.max_len_sp = 0;
3569
3570     if (is_g95)
3571         cmd.g95->dw1.extended_mv_cost_range = avc_state->extended_mv_cost_range_enable;
3572
3573     cmd.g9->dw3.src_access = 0;
3574     cmd.g9->dw3.ref_access = 0;
3575
3576     if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) {
3577         //disable ftq_override by now.
3578         if (avc_state->ftq_override) {
3579             cmd.g9->dw3.ftq_enable = avc_state->ftq_enable;
3580
3581         } else {
3582             // both gen9 and gen95 come here by now
3583             if (generic_state->frame_type == SLICE_TYPE_P) {
3584                 cmd.g9->dw3.ftq_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
3585
3586             } else {
3587                 cmd.g9->dw3.ftq_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
3588             }
3589         }
3590     } else {
3591         cmd.g9->dw3.ftq_enable = 0;
3592     }
3593
3594     if (avc_state->disable_sub_mb_partion)
3595         cmd.g9->dw3.sub_mb_part_mask = 0x7;
3596
3597     if (mbenc_i_frame_dist_in_use) {
3598         cmd.g9->dw2.pitch_width = generic_state->downscaled_width_4x_in_mb;
3599         cmd.g9->dw4.picture_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
3600         cmd.g9->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4;
3601         cmd.g9->dw6.batch_buffer_end = 0;
3602         cmd.g9->dw31.intra_compute_type = 1;
3603
3604     } else {
3605         cmd.g9->dw2.pitch_width = generic_state->frame_width_in_mbs;
3606         cmd.g9->dw4.picture_height_minus1 = generic_state->frame_height_in_mbs - 1;
3607         cmd.g9->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ? generic_state->frame_height_in_mbs : avc_state->slice_height;
3608
3609         {
3610             memcpy(&(cmd.g9->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
3611             if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
3612             } else if (avc_state->skip_bias_adjustment_enable) {
3613                 /* Load different MvCost for P picture when SkipBiasAdjustment is enabled
3614                 // No need to check for P picture as the flag is only enabled for P picture */
3615                 cmd.g9->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
3616
3617             }
3618         }
3619
3620         table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
3621         memcpy(&(cmd.g9->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
3622     }
3623     cmd.g9->dw4.enable_fbr_bypass = avc_state->fbr_bypass_enable;
3624     cmd.g9->dw4.enable_intra_cost_scaling_for_static_frame = avc_state->sfd_enable && generic_state->hme_enabled;
3625     cmd.g9->dw4.field_parity_flag = 0;//bottom field
3626     cmd.g9->dw4.enable_cur_fld_idr = 0;//field realted
3627     cmd.g9->dw4.contrained_intra_pred_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
3628     cmd.g9->dw4.hme_enable = generic_state->hme_enabled;
3629     cmd.g9->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
3630     cmd.g9->dw4.use_actual_ref_qp_value = generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
3631
3632
3633     cmd.g9->dw7.intra_part_mask = avc_state->transform_8x8_mode_enable ? 0 : 0x02;
3634     cmd.g9->dw7.src_field_polarity = 0;//field related
3635
3636     /*ftq_skip_threshold_lut set,dw14 /15*/
3637
3638     /*r5 disable NonFTQSkipThresholdLUT*/
3639     if (generic_state->frame_type == SLICE_TYPE_P) {
3640         cmd.g9->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
3641
3642     } else if (generic_state->frame_type == SLICE_TYPE_B) {
3643         cmd.g9->dw32.skip_val = gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
3644
3645     }
3646
3647     cmd.g9->dw13.qp_prime_y = qp;
3648     cmd.g9->dw13.qp_prime_cb = qp;
3649     cmd.g9->dw13.qp_prime_cr = qp;
3650     cmd.g9->dw13.target_size_in_word = 0xff;//hardcode for brc disable
3651
3652     if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) {
3653         switch (gen9_avc_multi_pred[preset]) {
3654         case 0:
3655             cmd.g9->dw32.mult_pred_l0_disable = 128;
3656             cmd.g9->dw32.mult_pred_l1_disable = 128;
3657             break;
3658         case 1:
3659             cmd.g9->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_P) ? 1 : 128;
3660             cmd.g9->dw32.mult_pred_l1_disable = 128;
3661             break;
3662         case 2:
3663             cmd.g9->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3664             cmd.g9->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3665             break;
3666         case 3:
3667             cmd.g9->dw32.mult_pred_l0_disable = 1;
3668             cmd.g9->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3669             break;
3670
3671         }
3672
3673     } else {
3674         cmd.g9->dw32.mult_pred_l0_disable = 128;
3675         cmd.g9->dw32.mult_pred_l1_disable = 128;
3676     }
3677
3678     /*field setting for dw33 34, ignored*/
3679
3680     if (avc_state->adaptive_transform_decision_enable) {
3681         if (generic_state->frame_type != SLICE_TYPE_I) {
3682             if (is_g9) {
3683                 cmd.g9->dw34.enable_adaptive_tx_decision = 1;
3684                 cmd.g9->dw58.mb_texture_threshold = 1024;
3685                 cmd.g9->dw58.tx_decision_threshold = 128;
3686             } else if (is_g95) {
3687                 cmd.g95->dw34.enable_adaptive_tx_decision = 1;
3688                 cmd.g95->dw60.mb_texture_threshold = 1024;
3689                 cmd.g95->dw60.tx_decision_threshold = 128;
3690             }
3691         }
3692     }
3693
3694
3695     if (generic_state->frame_type == SLICE_TYPE_B) {
3696         cmd.g9->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
3697         cmd.g9->dw34.list1_ref_id0_frm_field_parity = 0;
3698         cmd.g9->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
3699     }
3700
3701     cmd.g9->dw34.b_original_bff = 0; //frame only
3702     cmd.g9->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
3703     cmd.g9->dw34.roi_enable_flag = curbe_param->roi_enabled;
3704     cmd.g9->dw34.mad_enable_falg = avc_state->mad_enable;
3705     cmd.g9->dw34.mb_brc_enable = avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled;
3706     cmd.g9->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
3707     if (is_g95) {
3708         cmd.g95->dw34.tq_enable = avc_state->tq_enable;
3709         cmd.g95->dw34.cqp_flag = !generic_state->brc_enabled;
3710     }
3711
3712     if (is_g9) {
3713         cmd.g9->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
3714
3715         if (cmd.g9->dw34.force_non_skip_check) {
3716             cmd.g9->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
3717         }
3718     }
3719
3720
3721     cmd.g9->dw36.check_all_fractional_enable = avc_state->caf_enable;
3722     cmd.g9->dw38.ref_threshold = 400;
3723     cmd.g9->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
3724
3725     /* Default:2 used for MBBRC (MB QP Surface width and height are 4x downscaled picture in MB unit * 4  bytes)
3726        0 used for MBQP data surface (MB QP Surface width and height are same as the input picture size in MB unit * 1bytes)
3727        starting GEN9, BRC use split kernel, MB QP surface is same size as input picture */
3728     cmd.g9->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled) ? 0 : 2;
3729
3730     if (mbenc_i_frame_dist_in_use) {
3731         cmd.g9->dw13.qp_prime_y = 0;
3732         cmd.g9->dw13.qp_prime_cb = 0;
3733         cmd.g9->dw13.qp_prime_cr = 0;
3734         cmd.g9->dw33.intra_16x16_nondc_penalty = 0;
3735         cmd.g9->dw33.intra_8x8_nondc_penalty = 0;
3736         cmd.g9->dw33.intra_4x4_nondc_penalty = 0;
3737
3738     }
3739     if (cmd.g9->dw4.use_actual_ref_qp_value) {
3740         cmd.g9->dw44.actual_qp_value_for_ref_id0_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 0);
3741         cmd.g9->dw44.actual_qp_value_for_ref_id1_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 1);
3742         cmd.g9->dw44.actual_qp_value_for_ref_id2_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 2);
3743         cmd.g9->dw44.actual_qp_value_for_ref_id3_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 3);
3744         cmd.g9->dw45.actual_qp_value_for_ref_id4_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 4);
3745         cmd.g9->dw45.actual_qp_value_for_ref_id5_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 5);
3746         cmd.g9->dw45.actual_qp_value_for_ref_id6_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 6);
3747         cmd.g9->dw45.actual_qp_value_for_ref_id7_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 7);
3748         cmd.g9->dw46.actual_qp_value_for_ref_id0_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 0);
3749         cmd.g9->dw46.actual_qp_value_for_ref_id1_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 1);
3750     }
3751
3752     table_idx = slice_type_kernel[generic_state->frame_type];
3753     cmd.g9->dw46.ref_cost = gen9_avc_ref_cost[table_idx][qp];
3754
3755     if (generic_state->frame_type == SLICE_TYPE_I) {
3756         cmd.g9->dw0.skip_mode_enable = 0;
3757         cmd.g9->dw37.skip_mode_enable = 0;
3758         cmd.g9->dw36.hme_combine_overlap = 0;
3759         cmd.g9->dw47.intra_cost_sf = 16;
3760         cmd.g9->dw34.enable_direct_bias_adjustment = 0;
3761         if (is_g9)
3762             cmd.g9->dw34.enable_global_motion_bias_adjustment = 0;
3763
3764     } else if (generic_state->frame_type == SLICE_TYPE_P) {
3765         cmd.g9->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
3766         cmd.g9->dw3.bme_disable_fbr = 1;
3767         cmd.g9->dw5.ref_width = gen9_avc_search_x[preset];
3768         cmd.g9->dw5.ref_height = gen9_avc_search_y[preset];
3769         cmd.g9->dw7.non_skip_zmv_added = 1;
3770         cmd.g9->dw7.non_skip_mode_added = 1;
3771         cmd.g9->dw7.skip_center_mask = 1;
3772         cmd.g9->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
3773         cmd.g9->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame onlys
3774         cmd.g9->dw36.hme_combine_overlap = 1;
3775         cmd.g9->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
3776         cmd.g9->dw39.ref_width = gen9_avc_search_x[preset];
3777         cmd.g9->dw39.ref_height = gen9_avc_search_y[preset];
3778         cmd.g9->dw34.enable_direct_bias_adjustment = 0;
3779         cmd.g9->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
3780         if (is_g9 && avc_state->global_motion_bias_adjustment_enable)
3781             cmd.g9->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
3782
3783     } else {
3784         cmd.g9->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
3785         cmd.g9->dw1.bi_weight = avc_state->bi_weight;
3786         cmd.g9->dw3.search_ctrl = 7;
3787         cmd.g9->dw3.skip_type = 1;
3788         cmd.g9->dw5.ref_width = gen9_avc_b_search_x[preset];
3789         cmd.g9->dw5.ref_height = gen9_avc_b_search_y[preset];
3790         cmd.g9->dw7.skip_center_mask = 0xff;
3791         cmd.g9->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
3792         cmd.g9->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
3793         cmd.g9->dw36.hme_combine_overlap = 1;
3794         surface_id = slice_param->RefPicList1[0].picture_id;
3795         obj_surface = SURFACE(surface_id);
3796         if (!obj_surface) {
3797             WARN_ONCE("Invalid backward reference frame\n");
3798             return;
3799         }
3800         cmd.g9->dw36.is_fwd_frame_short_term_ref = !!(slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
3801
3802         cmd.g9->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
3803         cmd.g9->dw36.num_ref_idx_l1_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l1_active_minus1 : 0;
3804         cmd.g9->dw39.ref_width = gen9_avc_b_search_x[preset];
3805         cmd.g9->dw39.ref_height = gen9_avc_b_search_y[preset];
3806         cmd.g9->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
3807         cmd.g9->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
3808         cmd.g9->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
3809         cmd.g9->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
3810         cmd.g9->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
3811         cmd.g9->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
3812         cmd.g9->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
3813         cmd.g9->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
3814
3815         cmd.g9->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
3816         if (cmd.g9->dw34.enable_direct_bias_adjustment) {
3817             cmd.g9->dw7.non_skip_zmv_added = 1;
3818             cmd.g9->dw7.non_skip_mode_added = 1;
3819         }
3820
3821         cmd.g9->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
3822         if (is_g9 && avc_state->global_motion_bias_adjustment_enable)
3823             cmd.g9->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
3824
3825     }
3826
3827     avc_state->block_based_skip_enable = cmd.g9->dw3.block_based_skip_enable;
3828
3829     if (avc_state->rolling_intra_refresh_enable) {
3830         /*by now disable it*/
3831         cmd.g9->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
3832         cmd.g9->dw32.mult_pred_l0_disable = 128;
3833         /* Pass the same IntraRefreshUnit to the kernel w/o the adjustment by -1, so as to have an overlap of one MB row or column of Intra macroblocks
3834          across one P frame to another P frame, as needed by the RollingI algo */
3835         if (is_g9) {
3836             cmd.g9->dw48.widi_intra_refresh_mb_num = 0;
3837             cmd.g9->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
3838             cmd.g9->dw48.widi_intra_refresh_qp_delta = 0;
3839         }
3840
3841         if (is_g95) {
3842             if (avc_state->rolling_intra_refresh_enable == INTEL_ROLLING_I_SQUARE && generic_state->brc_enabled) {
3843                 cmd.g95->dw4.enable_intra_refresh = 0;
3844                 cmd.g95->dw34.widi_intra_refresh_en = INTEL_ROLLING_I_DISABLED;
3845                 cmd.g95->dw48.widi_intra_refresh_mb_x = 0;
3846                 cmd.g95->dw61.widi_intra_refresh_mb_y = 0;
3847             } else {
3848                 cmd.g95->dw4.enable_intra_refresh = 1;
3849                 cmd.g95->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
3850                 cmd.g95->dw48.widi_intra_refresh_mb_x = 0;
3851                 cmd.g95->dw61.widi_intra_refresh_mb_y = 0;
3852                 cmd.g9->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
3853                 cmd.g9->dw48.widi_intra_refresh_qp_delta = 0;
3854             }
3855         }
3856
3857     } else {
3858         cmd.g9->dw34.widi_intra_refresh_en = 0;
3859     }
3860
3861     cmd.g9->dw34.enable_per_mb_static_check = avc_state->sfd_enable && generic_state->hme_enabled;
3862     if (is_g9)
3863         cmd.g9->dw34.enable_adaptive_search_window_size = avc_state->adaptive_search_window_enable;
3864     else if (is_g95)
3865         cmd.g95->dw34.enable_adaptive_search_window_size = avc_state->adaptive_search_window_enable;
3866
3867     /*roi set disable by now. 49-56*/
3868     if (curbe_param->roi_enabled) {
3869         cmd.g9->dw49.roi_1_x_left   = generic_state->roi[0].left;
3870         cmd.g9->dw49.roi_1_y_top    = generic_state->roi[0].top;
3871         cmd.g9->dw50.roi_1_x_right  = generic_state->roi[0].right;
3872         cmd.g9->dw50.roi_1_y_bottom = generic_state->roi[0].bottom;
3873
3874         cmd.g9->dw51.roi_2_x_left   = generic_state->roi[1].left;
3875         cmd.g9->dw51.roi_2_y_top    = generic_state->roi[1].top;
3876         cmd.g9->dw52.roi_2_x_right  = generic_state->roi[1].right;
3877         cmd.g9->dw52.roi_2_y_bottom = generic_state->roi[1].bottom;
3878
3879         cmd.g9->dw53.roi_3_x_left   = generic_state->roi[2].left;
3880         cmd.g9->dw53.roi_3_y_top    = generic_state->roi[2].top;
3881         cmd.g9->dw54.roi_3_x_right  = generic_state->roi[2].right;
3882         cmd.g9->dw54.roi_3_y_bottom = generic_state->roi[2].bottom;
3883
3884         cmd.g9->dw55.roi_4_x_left   = generic_state->roi[3].left;
3885         cmd.g9->dw55.roi_4_y_top    = generic_state->roi[3].top;
3886         cmd.g9->dw56.roi_4_x_right  = generic_state->roi[3].right;
3887         cmd.g9->dw56.roi_4_y_bottom = generic_state->roi[3].bottom;
3888
3889         if (!generic_state->brc_enabled) {
3890             char tmp = 0;
3891             tmp = generic_state->roi[0].value;
3892             CLIP(tmp, -qp, AVC_QP_MAX - qp);
3893             cmd.g9->dw57.roi_1_dqp_prime_y = tmp;
3894             tmp = generic_state->roi[1].value;
3895             CLIP(tmp, -qp, AVC_QP_MAX - qp);
3896             cmd.g9->dw57.roi_2_dqp_prime_y = tmp;
3897             tmp = generic_state->roi[2].value;
3898             CLIP(tmp, -qp, AVC_QP_MAX - qp);
3899             cmd.g9->dw57.roi_3_dqp_prime_y = tmp;
3900             tmp = generic_state->roi[3].value;
3901             CLIP(tmp, -qp, AVC_QP_MAX - qp);
3902             cmd.g9->dw57.roi_4_dqp_prime_y = tmp;
3903         } else {
3904             cmd.g9->dw34.roi_enable_flag = 0;
3905         }
3906     }
3907
3908     if (is_g95) {
3909         if (avc_state->tq_enable) {
3910             if (generic_state->frame_type == SLICE_TYPE_I) {
3911                 cmd.g95->dw58.value = gen95_avc_tq_lambda_i_frame[qp][0];
3912                 cmd.g95->dw59.value = gen95_avc_tq_lambda_i_frame[qp][1];
3913
3914             } else if (generic_state->frame_type == SLICE_TYPE_P) {
3915                 cmd.g95->dw58.value = gen95_avc_tq_lambda_p_frame[qp][0];
3916                 cmd.g95->dw59.value = gen95_avc_tq_lambda_p_frame[qp][1];
3917
3918             } else {
3919                 cmd.g95->dw58.value = gen95_avc_tq_lambda_b_frame[qp][0];
3920                 cmd.g95->dw59.value = gen95_avc_tq_lambda_b_frame[qp][1];
3921             }
3922
3923             if (cmd.g95->dw58.lambda_8x8_inter > GEN95_AVC_MAX_LAMBDA)
3924                 cmd.g95->dw58.lambda_8x8_inter = 0xf000 + avc_state->rounding_value;
3925
3926             if (cmd.g95->dw58.lambda_8x8_intra > GEN95_AVC_MAX_LAMBDA)
3927                 cmd.g95->dw58.lambda_8x8_intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
3928
3929             if (cmd.g95->dw59.lambda_inter > GEN95_AVC_MAX_LAMBDA)
3930                 cmd.g95->dw59.lambda_inter = 0xf000 + avc_state->rounding_value;
3931
3932             if (cmd.g95->dw59.lambda_intra > GEN95_AVC_MAX_LAMBDA)
3933                 cmd.g95->dw59.lambda_intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
3934         }
3935     }
3936
3937     if (is_g95) {
3938         cmd.g95->dw66.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
3939         cmd.g95->dw67.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
3940         cmd.g95->dw68.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
3941         cmd.g95->dw69.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
3942         cmd.g95->dw70.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
3943         cmd.g95->dw71.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
3944         cmd.g95->dw72.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
3945         cmd.g95->dw73.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
3946         cmd.g95->dw74.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
3947         cmd.g95->dw75.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
3948         cmd.g95->dw76.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
3949         cmd.g95->dw77.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
3950         cmd.g95->dw78.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
3951         cmd.g95->dw79.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
3952         cmd.g95->dw80.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
3953         cmd.g95->dw81.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
3954         cmd.g95->dw82.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
3955         cmd.g95->dw83.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
3956         cmd.g95->dw84.brc_curbe_surf_index = GEN95_AVC_MBENC_BRC_CURBE_DATA_INDEX;
3957         cmd.g95->dw85.force_non_skip_mb_map_surface = GEN95_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
3958         cmd.g95->dw86.widi_wa_surf_index = GEN95_AVC_MBENC_WIDI_WA_INDEX;
3959         cmd.g95->dw87.static_detection_cost_table_index = GEN95_AVC_MBENC_SFD_COST_TABLE_INDEX;
3960     }
3961
3962     if (is_g9) {
3963         cmd.g9->dw64.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
3964         cmd.g9->dw65.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
3965         cmd.g9->dw66.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
3966         cmd.g9->dw67.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
3967         cmd.g9->dw68.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
3968         cmd.g9->dw69.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
3969         cmd.g9->dw70.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
3970         cmd.g9->dw71.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
3971         cmd.g9->dw72.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
3972         cmd.g9->dw73.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
3973         cmd.g9->dw74.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
3974         cmd.g9->dw75.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
3975         cmd.g9->dw76.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
3976         cmd.g9->dw77.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
3977         cmd.g9->dw78.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
3978         cmd.g9->dw79.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
3979         cmd.g9->dw80.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
3980         cmd.g9->dw81.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
3981         cmd.g9->dw82.force_non_skip_mb_map_surface = GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
3982         cmd.g9->dw83.widi_wa_surf_index = GEN9_AVC_MBENC_WIDI_WA_INDEX;
3983         cmd.g9->dw84.brc_curbe_surf_index = GEN9_AVC_MBENC_BRC_CURBE_DATA_INDEX;
3984         cmd.g9->dw85.static_detection_cost_table_index = GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX;
3985     }
3986
3987     i965_gpe_context_unmap_curbe(gpe_context);
3988
3989     return;
3990 }
3991
3992 static void
3993 gen9_avc_fei_set_curbe_mbenc(VADriverContextP ctx,
3994                              struct encode_state *encode_state,
3995                              struct i965_gpe_context *gpe_context,
3996                              struct intel_encoder_context *encoder_context,
3997                              void * param)
3998 {
3999     struct i965_driver_data *i965 = i965_driver_data(ctx);
4000     gen9_avc_fei_mbenc_curbe_data *cmd;
4001     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4002     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4003     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4004     VASurfaceID surface_id;
4005     struct object_surface *obj_surface;
4006     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4007     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
4008     VAEncMiscParameterFEIFrameControlH264 *fei_param = avc_state->fei_framectl_param;
4009
4010     struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
4011     unsigned char qp = 0;
4012     unsigned char me_method = 0;
4013     unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
4014     unsigned int table_idx = 0;
4015     int ref_width, ref_height, len_sp;
4016     int is_bframe = (generic_state->frame_type == SLICE_TYPE_B);
4017     int is_pframe = (generic_state->frame_type == SLICE_TYPE_P);
4018     unsigned int preset = generic_state->preset;
4019
4020     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
4021
4022     assert(gpe_context != NULL);
4023     cmd = (gen9_avc_fei_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
4024     memset(cmd, 0, sizeof(gen9_avc_fei_mbenc_curbe_data));
4025
4026     if (mbenc_i_frame_dist_in_use) {
4027         memcpy(cmd, gen9_avc_fei_mbenc_curbe_i_frame_dist_init_data, sizeof(gen9_avc_fei_mbenc_curbe_data));
4028
4029     } else {
4030         switch (generic_state->frame_type) {
4031         case SLICE_TYPE_I:
4032             memcpy(cmd, gen9_avc_fei_mbenc_curbe_i_frame_init_data, sizeof(gen9_avc_fei_mbenc_curbe_data));
4033             break;
4034         case SLICE_TYPE_P:
4035             memcpy(cmd, gen9_avc_fei_mbenc_curbe_p_frame_init_data, sizeof(gen9_avc_fei_mbenc_curbe_data));
4036             break;
4037         case SLICE_TYPE_B:
4038             memcpy(cmd, gen9_avc_fei_mbenc_curbe_b_frame_init_data, sizeof(gen9_avc_fei_mbenc_curbe_data));
4039             break;
4040         default:
4041             assert(0);
4042         }
4043
4044     }
4045     /* 4 means full search, 6 means diamand search */
4046     me_method  = (fei_param->search_window == 5) ||
4047                  (fei_param->search_window == 8) ? 4 : 6;
4048
4049     ref_width    = fei_param->ref_width;
4050     ref_height   = fei_param->ref_height;
4051     len_sp       = fei_param->len_sp;
4052     /* If there is a serch_window, discard user provided ref_width, ref_height
4053      * and search_path length */
4054     switch (fei_param->search_window) {
4055     case 0:
4056         /*  not use predefined search window, there should be a search_path input */
4057         if ((fei_param->search_path != 0) &&
4058             (fei_param->search_path != 1) &&
4059             (fei_param->search_path != 2)) {
4060             WARN_ONCE("Invalid input search_path for SearchWindow=0  \n");
4061             assert(0);
4062         }
4063         /* 4 means full search, 6 means diamand search */
4064         me_method = (fei_param->search_path == 1) ? 6 : 4;
4065         if (((ref_width * ref_height) > 2048) || (ref_width > 64) || (ref_height > 64)) {
4066             WARN_ONCE("Invalid input ref_width/ref_height in"
4067                       "SearchWindow=0 case! \n");
4068             assert(0);
4069         }
4070         break;
4071
4072     case 1:
4073         /* Tiny - 4 SUs 24x24 window */
4074         ref_width  = 24;
4075         ref_height = 24;
4076         len_sp     = 4;
4077         break;
4078
4079     case 2:
4080         /* Small - 9 SUs 28x28 window */
4081         ref_width  = 28;
4082         ref_height = 28;
4083         len_sp     = 9;
4084         break;
4085     case 3:
4086         /* Diamond - 16 SUs 48x40 window */
4087         ref_width  = 48;
4088         ref_height = 40;
4089         len_sp     = 16;
4090         break;
4091     case 4:
4092         /* Large Diamond - 32 SUs 48x40 window */
4093         ref_width  = 48;
4094         ref_height = 40;
4095         len_sp     = 32;
4096         break;
4097     case 5:
4098         /* Exhaustive - 48 SUs 48x40 window */
4099         ref_width  = 48;
4100         ref_height = 40;
4101         len_sp     = 48;
4102         break;
4103     case 6:
4104         /* Diamond - 16 SUs 64x32 window */
4105         ref_width  = 64;
4106         ref_height = 32;
4107         len_sp     = 16;
4108         break;
4109     case 7:
4110         /* Large Diamond - 32 SUs 64x32 window */
4111         ref_width  = 64;
4112         ref_height = 32;
4113         len_sp     = 32;
4114         break;
4115     case 8:
4116         /* Exhaustive - 48 SUs 64x32 window */
4117         ref_width  = 64;
4118         ref_height = 32;
4119         len_sp     = 48;
4120         break;
4121
4122     default:
4123         assert(0);
4124     }
4125
4126     /* ref_width*ref_height = Max 64x32 one direction, Max 32x32 two directions */
4127     if (is_bframe) {
4128         CLIP(ref_width, 4, 32);
4129         CLIP(ref_height, 4, 32);
4130     } else if (is_pframe) {
4131         CLIP(ref_width, 4, 64);
4132         CLIP(ref_height, 4, 32);
4133     }
4134
4135     cmd->dw0.adaptive_enable =
4136         cmd->dw37.adaptive_enable = fei_param->adaptive_search;
4137     cmd->dw0.t8x8_flag_for_inter_enable = cmd->dw37.t8x8_flag_for_inter_enable
4138                                           = avc_state->transform_8x8_mode_enable;
4139     cmd->dw2.max_len_sp = len_sp;
4140     cmd->dw38.max_len_sp = 0; // HLD mandates this field to be Zero
4141     cmd->dw2.max_num_su = cmd->dw38.max_num_su = 57;
4142     cmd->dw3.src_access =
4143         cmd->dw3.ref_access = 0; // change it to (is_frame ? 0: 1) when interlace is suppoted
4144
4145     if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) {
4146         if (avc_state->ftq_override) {
4147             cmd->dw3.ft_enable = avc_state->ftq_enable;
4148         } else {
4149             if (generic_state->frame_type == SLICE_TYPE_P) {
4150                 cmd->dw3.ft_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
4151             } else {
4152                 cmd->dw3.ft_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
4153             }
4154         }
4155     } else {
4156         cmd->dw3.ft_enable = 0;
4157     }
4158
4159     if (avc_state->disable_sub_mb_partion)
4160         cmd->dw3.sub_mb_part_mask = 0x7;
4161
4162     if (mbenc_i_frame_dist_in_use) {
4163         /* Fixme: Not supported, no brc in fei */
4164         assert(0);
4165         cmd->dw2.pic_width = generic_state->downscaled_width_4x_in_mb;
4166         cmd->dw4.pic_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
4167         cmd->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4;
4168         cmd->dw6.batch_buffer_end = 0;
4169         cmd->dw31.intra_compute_type = 1;
4170     }
4171
4172     cmd->dw2.pic_width = generic_state->frame_width_in_mbs;
4173     cmd->dw4.pic_height_minus1 = generic_state->frame_height_in_mbs - 1;
4174     cmd->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ?
4175                                generic_state->frame_height_in_mbs : avc_state->slice_height;
4176     cmd->dw3.sub_mb_part_mask = fei_param->sub_mb_part_mask;
4177     cmd->dw3.sub_pel_mode = fei_param->sub_pel_mode;
4178     cmd->dw3.inter_sad = fei_param->inter_sad;
4179     cmd->dw3.Intra_sad = fei_param->intra_sad;
4180     cmd->dw3.search_ctrl = (is_bframe) ? 7 : 0;
4181     cmd->dw4.enable_fbr_bypass = avc_state->fbr_bypass_enable;
4182     cmd->dw4.enable_intra_cost_scaling_for_static_frame =
4183         avc_state->sfd_enable && generic_state->hme_enabled;
4184     cmd->dw4.true_distortion_enable = fei_param->distortion_type == 0 ? 1 : 0;
4185     cmd->dw4.constrained_intra_pred_flag =
4186         pic_param->pic_fields.bits.constrained_intra_pred_flag;
4187     cmd->dw4.hme_enable = 0;
4188     cmd->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
4189     cmd->dw4.use_actual_ref_qp_value =
4190         generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
4191     cmd->dw7.intra_part_mask = fei_param->intra_part_mask;
4192     cmd->dw7.src_field_polarity = 0;
4193
4194     /* mv mode cost */
4195     memcpy(&(cmd->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
4196     if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
4197         // cmd->dw8 = gen9_avc_old_intra_mode_cost[qp];
4198     } else if (avc_state->skip_bias_adjustment_enable) {
4199         // Load different MvCost for P picture when SkipBiasAdjustment is enabled
4200         // No need to check for P picture as the flag is only enabled for P picture
4201         cmd->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
4202     }
4203
4204     //dw16
4205     /* search path tables */
4206     table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
4207     memcpy(&(cmd->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
4208
4209     //ftq_skip_threshold_lut set,dw14 /15
4210
4211     //r5 disable NonFTQSkipThresholdLUT
4212     if (generic_state->frame_type == SLICE_TYPE_P) {
4213         cmd->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
4214     } else if (generic_state->frame_type == SLICE_TYPE_B) {
4215         cmd->dw32.skip_val =
4216             gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
4217     }
4218     cmd->dw13.qp_prime_y = qp;
4219     cmd->dw13.qp_prime_cb = qp;
4220     cmd->dw13.qp_prime_cr = qp;
4221     cmd->dw13.target_size_in_word = 0xff; /* hardcoded for brc disable */
4222
4223     if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) {
4224         cmd->dw32.mult_pred_l0_disable = fei_param->multi_pred_l0 ? 0x01 : 0x80;
4225         cmd->dw32.mult_pred_l1_disable = ((generic_state->frame_type == SLICE_TYPE_B) && fei_param->multi_pred_l1) ? 0x01 : 0x80;
4226     } else {
4227         /* disable */
4228         cmd->dw32.mult_pred_l0_disable = 0x80;
4229         cmd->dw32.mult_pred_l1_disable = 0x80;
4230     }
4231     /* no field pic setting, not supported */
4232
4233     //dw34 58
4234     if (avc_state->adaptive_transform_decision_enable) {
4235         if (generic_state->frame_type != SLICE_TYPE_I) {
4236             cmd->dw34.enable_adaptive_tx_decision = 1;
4237         }
4238
4239         cmd->dw58.mb_texture_threshold = 1024;
4240         cmd->dw58.tx_decision_threshold = 128;
4241     }
4242     if (generic_state->frame_type == SLICE_TYPE_B) {
4243         cmd->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
4244         cmd->dw34.list1_ref_id1_frm_field_parity = 0;
4245         cmd->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
4246     }
4247     cmd->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
4248     cmd->dw34.roi_enable_flag = generic_state->brc_roi_enable;
4249     cmd->dw34.mad_enable_falg = avc_state->mad_enable;
4250     cmd->dw34.mb_brc_enable = avc_state->mb_qp_data_enable && generic_state->mb_brc_enabled;
4251     cmd->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
4252     cmd->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
4253
4254     if (cmd->dw34.force_non_skip_check) {
4255         cmd->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
4256     }
4257     cmd->dw36.check_all_fractional_enable = avc_state->caf_enable;
4258     cmd->dw38.ref_threshold = 400;
4259     cmd->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
4260     // Default:2 used for MBBRC (MB QP Surface width and height are 4x downscaled picture in MB unit * 4  bytes)
4261     // 0 used for MBQP data surface (MB QP Surface width and height are same as the input picture size in MB unit * 1bytes)
4262     // starting GEN9, BRC use split kernel, MB QP surface is same size as input picture
4263     cmd->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled) ? 0 : 2;
4264     if (mbenc_i_frame_dist_in_use) {
4265         cmd->dw13.qp_prime_y = 0;
4266         cmd->dw13.qp_prime_cb = 0;
4267         cmd->dw13.qp_prime_cr = 0;
4268         cmd->dw33.intra_16x16_nondc_penalty = 0;
4269         cmd->dw33.intra_8x8_nondc_penalty = 0;
4270         cmd->dw33.intra_4x4_nondc_penalty = 0;
4271     }
4272     if (cmd->dw4.use_actual_ref_qp_value) {
4273         cmd->dw44.actual_qp_value_for_ref_id0_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 0);
4274         cmd->dw44.actual_qp_value_for_ref_id1_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 1);
4275         cmd->dw44.actual_qp_value_for_ref_id2_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 2);
4276         cmd->dw44.actual_qp_value_for_ref_id3_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 3);
4277         cmd->dw45.actual_qp_value_for_ref_id4_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 4);
4278         cmd->dw45.actual_qp_value_for_ref_id5_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 5);
4279         cmd->dw45.actual_qp_value_for_ref_id6_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 6);
4280         cmd->dw45.actual_qp_value_for_ref_id7_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 7);
4281         cmd->dw46.actual_qp_value_for_ref_id0_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 0);
4282         cmd->dw46.actual_qp_value_for_ref_id1_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 1);
4283     }
4284
4285     table_idx = slice_type_kernel[generic_state->frame_type];
4286     cmd->dw46.ref_cost = gen9_avc_ref_cost[table_idx][qp];
4287
4288     if (generic_state->frame_type == SLICE_TYPE_I) {
4289         cmd->dw0.skip_mode_enable = 0;
4290         cmd->dw37.skip_mode_enable = 0;
4291         cmd->dw36.hme_combine_overlap = 0;
4292         cmd->dw36.check_all_fractional_enable = 0;
4293         cmd->dw47.intra_cost_sf = 16;/* not used, but recommended to set 16 by kernel team */
4294         cmd->dw34.enable_direct_bias_adjustment = 0;
4295         cmd->dw34.enable_global_motion_bias_adjustment = 0;
4296
4297     } else if (generic_state->frame_type == SLICE_TYPE_P) {
4298         cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
4299         cmd->dw3.bme_disable_fbr = 1;
4300         cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
4301         cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
4302         cmd->dw7.non_skip_zmv_added = 1;
4303         cmd->dw7.non_skip_mode_added = 1;
4304         cmd->dw7.skip_center_mask = 1;
4305
4306         cmd->dw47.intra_cost_sf =
4307             (avc_state->adaptive_intra_scaling_enable) ?
4308             gen9_avc_adaptive_intra_scaling_factor[preset] :
4309             gen9_avc_intra_scaling_factor[preset];
4310
4311         cmd->dw47.max_vmv_r =
4312             i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
4313
4314         cmd->dw36.hme_combine_overlap = 1;
4315         cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
4316         cmd->dw36.check_all_fractional_enable = fei_param->repartition_check_enable;
4317         cmd->dw34.enable_direct_bias_adjustment = 0;
4318         cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
4319         if (avc_state->global_motion_bias_adjustment_enable)
4320             cmd->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
4321
4322         cmd->dw64.num_mv_predictors_l0 = fei_param->num_mv_predictors_l0;
4323
4324     } else { /* B slice */
4325
4326         cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
4327         cmd->dw1.bi_Weight = avc_state->bi_weight;
4328         cmd->dw3.search_ctrl = 7;
4329         cmd->dw3.skip_type = 1;
4330         cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
4331         cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
4332         cmd->dw7.skip_center_mask = 0xff;
4333
4334         cmd->dw47.intra_cost_sf = avc_state->adaptive_intra_scaling_enable ?
4335                                   gen9_avc_adaptive_intra_scaling_factor[qp] :
4336                                   gen9_avc_intra_scaling_factor[qp];
4337
4338         cmd->dw47.max_vmv_r =
4339             i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
4340
4341         cmd->dw36.hme_combine_overlap = 1;
4342
4343         //check is_fwd_frame_short_term_ref
4344         surface_id = slice_param->RefPicList1[0].picture_id;
4345         obj_surface = SURFACE(surface_id);
4346         if (!obj_surface) {
4347             WARN_ONCE("Invalid backward reference frame\n");
4348             if (gpe_context)
4349                 i965_gpe_context_unmap_curbe(gpe_context);
4350             return;
4351         }
4352         cmd->dw36.is_fwd_frame_short_term_ref = !!(slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
4353
4354         cmd->dw36.num_ref_idx_l0_minus_one =
4355             (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1
4356             : 0;
4357         cmd->dw36.num_ref_idx_l1_minus_one =
4358             (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l1_active_minus1
4359             : 0;
4360         cmd->dw36.check_all_fractional_enable = fei_param->repartition_check_enable;
4361
4362         cmd->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
4363         cmd->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
4364         cmd->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
4365         cmd->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
4366         cmd->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
4367         cmd->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
4368         cmd->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
4369         cmd->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
4370
4371         cmd->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
4372         if (cmd->dw34.enable_direct_bias_adjustment) {
4373             cmd->dw7.non_skip_mode_added = 1;
4374             cmd->dw7.non_skip_zmv_added = 1;
4375         }
4376
4377         cmd->dw34.enable_global_motion_bias_adjustment =
4378             avc_state->global_motion_bias_adjustment_enable;
4379         if (avc_state->global_motion_bias_adjustment_enable)
4380             cmd->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
4381
4382         cmd->dw64.num_mv_predictors_l0 = fei_param->num_mv_predictors_l0;
4383         cmd->dw64.num_mv_predictors_l1 = fei_param->num_mv_predictors_l1;
4384     }
4385
4386     avc_state->block_based_skip_enable = cmd->dw3.block_based_skip_enable;
4387
4388     if (avc_state->rolling_intra_refresh_enable) {
4389         //Not supported
4390         cmd->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
4391
4392     } else {
4393         cmd->dw34.widi_intra_refresh_en = 0;
4394     }
4395     cmd->dw34.enable_per_mb_static_check = avc_state->sfd_enable && generic_state->hme_enabled;
4396     cmd->dw34.enable_adaptive_search_window_size = avc_state->adaptive_search_window_enable;
4397
4398     /* Fixme: Skipped ROI stuffs for now */
4399
4400     /* r64: FEI specific fields */
4401     cmd->dw64.fei_enable = 1;
4402     cmd->dw64.multiple_mv_predictor_per_mb_enable = fei_param->mv_predictor_enable;
4403     if (fei_param->distortion != VA_INVALID_ID)
4404         cmd->dw64.vme_distortion_output_enable = 1;
4405     cmd->dw64.per_mb_qp_enable = fei_param->mb_qp;
4406     cmd->dw64.mb_input_enable = fei_param->mb_input;
4407
4408     // FEI mode is disabled when external MVP is available
4409     if (fei_param->mv_predictor_enable)
4410         cmd->dw64.fei_mode = 0;
4411     else
4412         cmd->dw64.fei_mode = 1;
4413
4414     cmd->dw80.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
4415     cmd->dw81.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
4416     cmd->dw82.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
4417     cmd->dw83.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
4418     cmd->dw84.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
4419     cmd->dw85.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
4420     cmd->dw86.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
4421     cmd->dw87.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
4422     cmd->dw88.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
4423     cmd->dw89.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
4424     cmd->dw90.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
4425     cmd->dw91.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
4426     cmd->dw92.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
4427     cmd->dw93.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
4428     cmd->dw94.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
4429     cmd->dw95.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
4430     cmd->dw96.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
4431     cmd->dw97.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
4432     cmd->dw98.force_non_skip_mb_map_surface = GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
4433     cmd->dw99.widi_wa_surf_index = GEN9_AVC_MBENC_WIDI_WA_INDEX;
4434     cmd->dw100.brc_curbe_surf_index = GEN9_AVC_MBENC_BRC_CURBE_DATA_INDEX;
4435     cmd->dw101.static_detection_cost_table_index = GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX;
4436     cmd->dw102.fei_mv_predictor_surf_index = GEN9_AVC_MBENC_MV_PREDICTOR_INDEX;
4437     i965_gpe_context_unmap_curbe(gpe_context);
4438
4439     return;
4440 }
4441
4442 static void
4443 gen9_avc_send_surface_mbenc(VADriverContextP ctx,
4444                             struct encode_state *encode_state,
4445                             struct i965_gpe_context *gpe_context,
4446                             struct intel_encoder_context *encoder_context,
4447                             void * param_mbenc)
4448 {
4449     struct i965_driver_data *i965 = i965_driver_data(ctx);
4450     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4451     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4452     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4453     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4454     struct object_surface *obj_surface;
4455     struct gen9_surface_avc *avc_priv_surface;
4456     struct i965_gpe_resource *gpe_resource;
4457     struct mbenc_param * param = (struct mbenc_param *)param_mbenc ;
4458     VASurfaceID surface_id;
4459     unsigned int mbenc_i_frame_dist_in_use = param->mbenc_i_frame_dist_in_use;
4460     unsigned int size = 0;
4461     unsigned int frame_mb_size = generic_state->frame_width_in_mbs *
4462                                  generic_state->frame_height_in_mbs;
4463     int i = 0;
4464     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4465     unsigned char is_g95 = 0;
4466
4467     if (IS_SKL(i965->intel.device_info) ||
4468         IS_BXT(i965->intel.device_info))
4469         is_g95 = 0;
4470     else if (IS_KBL(i965->intel.device_info) ||
4471              IS_GEN10(i965->intel.device_info) ||
4472              IS_GLK(i965->intel.device_info))
4473         is_g95 = 1;
4474
4475     obj_surface = encode_state->reconstructed_object;
4476
4477     if (!obj_surface || !obj_surface->private_data)
4478         return;
4479     avc_priv_surface = obj_surface->private_data;
4480
4481     /*pak obj command buffer output*/
4482     size = frame_mb_size * 16 * 4;
4483     gpe_resource = &avc_priv_surface->res_mb_code_surface;
4484     i965_add_buffer_gpe_surface(ctx,
4485                                 gpe_context,
4486                                 gpe_resource,
4487                                 0,
4488                                 size / 4,
4489                                 0,
4490                                 GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX);
4491
4492     /*mv data buffer output*/
4493     size = frame_mb_size * 32 * 4;
4494     gpe_resource = &avc_priv_surface->res_mv_data_surface;
4495     i965_add_buffer_gpe_surface(ctx,
4496                                 gpe_context,
4497                                 gpe_resource,
4498                                 0,
4499                                 size / 4,
4500                                 0,
4501                                 GEN9_AVC_MBENC_IND_MV_DATA_INDEX);
4502
4503     /*input current  YUV surface, current input Y/UV object*/
4504     if (mbenc_i_frame_dist_in_use) {
4505         obj_surface = encode_state->reconstructed_object;
4506         if (!obj_surface || !obj_surface->private_data)
4507             return;
4508         avc_priv_surface = obj_surface->private_data;
4509         obj_surface = avc_priv_surface->scaled_4x_surface_obj;
4510     } else {
4511         obj_surface = encode_state->input_yuv_object;
4512     }
4513     i965_add_2d_gpe_surface(ctx,
4514                             gpe_context,
4515                             obj_surface,
4516                             0,
4517                             1,
4518                             I965_SURFACEFORMAT_R8_UNORM,
4519                             GEN9_AVC_MBENC_CURR_Y_INDEX);
4520
4521     i965_add_2d_gpe_surface(ctx,
4522                             gpe_context,
4523                             obj_surface,
4524                             1,
4525                             1,
4526                             I965_SURFACEFORMAT_R16_UINT,
4527                             GEN9_AVC_MBENC_CURR_UV_INDEX);
4528
4529     if (generic_state->hme_enabled) {
4530         /*memv input 4x*/
4531         if (!IS_GEN8(i965->intel.device_info)) {
4532             gpe_resource = &(avc_ctx->s4x_memv_data_buffer);
4533             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4534                                            gpe_resource,
4535                                            1,
4536                                            I965_SURFACEFORMAT_R8_UNORM,
4537                                            GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX);
4538             /* memv distortion input*/
4539             gpe_resource = &(avc_ctx->s4x_memv_distortion_buffer);
4540             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4541                                            gpe_resource,
4542                                            1,
4543                                            I965_SURFACEFORMAT_R8_UNORM,
4544                                            GEN9_AVC_MBENC_4XME_DISTORTION_INDEX);
4545         } else if (generic_state->frame_type != SLICE_TYPE_I) {
4546             gpe_resource = &(avc_ctx->s4x_memv_data_buffer);
4547             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4548                                            gpe_resource,
4549                                            1,
4550                                            I965_SURFACEFORMAT_R8_UNORM,
4551                                            GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX);
4552             /* memv distortion input*/
4553             gpe_resource = &(avc_ctx->s4x_memv_distortion_buffer);
4554             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4555                                            gpe_resource,
4556                                            1,
4557                                            I965_SURFACEFORMAT_R8_UNORM,
4558                                            GEN9_AVC_MBENC_4XME_DISTORTION_INDEX);
4559         }
4560     }
4561
4562     /*mbbrc const data_buffer*/
4563     if (param->mb_const_data_buffer_in_use) {
4564         size = 16 * AVC_QP_MAX * sizeof(unsigned int);
4565         gpe_resource = &avc_ctx->res_mbbrc_const_data_buffer;
4566         i965_add_buffer_gpe_surface(ctx,
4567                                     gpe_context,
4568                                     gpe_resource,
4569                                     0,
4570                                     size / 4,
4571                                     0,
4572                                     GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX);
4573
4574     }
4575
4576     /*mb qp data_buffer*/
4577     if (param->mb_qp_buffer_in_use) {
4578         if (avc_state->mb_qp_data_enable)
4579             gpe_resource = &(avc_ctx->res_mb_qp_data_surface);
4580         else
4581             gpe_resource = &(avc_ctx->res_mbbrc_mb_qp_data_surface);
4582         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4583                                        gpe_resource,
4584                                        1,
4585                                        I965_SURFACEFORMAT_R8_UNORM,
4586                                        GEN9_AVC_MBENC_MBQP_INDEX);
4587     }
4588
4589     /*input current  YUV surface, current input Y/UV object*/
4590     if (mbenc_i_frame_dist_in_use) {
4591         obj_surface = encode_state->reconstructed_object;
4592         if (!obj_surface || !obj_surface->private_data)
4593             return;
4594         avc_priv_surface = obj_surface->private_data;
4595         obj_surface = avc_priv_surface->scaled_4x_surface_obj;
4596     } else {
4597         obj_surface = encode_state->input_yuv_object;
4598     }
4599     i965_add_adv_gpe_surface(ctx, gpe_context,
4600                              obj_surface,
4601                              GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX);
4602     /*input ref YUV surface*/
4603     for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
4604         surface_id = slice_param->RefPicList0[i].picture_id;
4605         obj_surface = SURFACE(surface_id);
4606         if (!obj_surface || !obj_surface->private_data)
4607             break;
4608
4609         i965_add_adv_gpe_surface(ctx, gpe_context,
4610                                  obj_surface,
4611                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 1);
4612     }
4613     /*input current  YUV surface, current input Y/UV object*/
4614     if (mbenc_i_frame_dist_in_use) {
4615         obj_surface = encode_state->reconstructed_object;
4616         if (!obj_surface || !obj_surface->private_data)
4617             return;
4618         avc_priv_surface = obj_surface->private_data;
4619         obj_surface = avc_priv_surface->scaled_4x_surface_obj;
4620     } else {
4621         obj_surface = encode_state->input_yuv_object;
4622     }
4623     i965_add_adv_gpe_surface(ctx, gpe_context,
4624                              obj_surface,
4625                              GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX);
4626
4627     for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
4628         if (i > 0) break; // only  one ref supported here for B frame
4629         surface_id = slice_param->RefPicList1[i].picture_id;
4630         obj_surface = SURFACE(surface_id);
4631         if (!obj_surface || !obj_surface->private_data)
4632             break;
4633
4634         i965_add_adv_gpe_surface(ctx, gpe_context,
4635                                  obj_surface,
4636                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1);
4637         i965_add_adv_gpe_surface(ctx, gpe_context,
4638                                  obj_surface,
4639                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 2);
4640         if (i == 0) {
4641             avc_priv_surface = obj_surface->private_data;
4642             /*pak obj command buffer output(mb code)*/
4643             size = frame_mb_size * 16 * 4;
4644             gpe_resource = &avc_priv_surface->res_mb_code_surface;
4645             i965_add_buffer_gpe_surface(ctx,
4646                                         gpe_context,
4647                                         gpe_resource,
4648                                         0,
4649                                         size / 4,
4650                                         0,
4651                                         GEN9_AVC_MBENC_FWD_MB_DATA_INDEX);
4652
4653             /*mv data buffer output*/
4654             size = frame_mb_size * 32 * 4;
4655             gpe_resource = &avc_priv_surface->res_mv_data_surface;
4656             i965_add_buffer_gpe_surface(ctx,
4657                                         gpe_context,
4658                                         gpe_resource,
4659                                         0,
4660                                         size / 4,
4661                                         0,
4662                                         GEN9_AVC_MBENC_FWD_MV_DATA_INDEX);
4663
4664         }
4665
4666         if (i < INTEL_AVC_MAX_BWD_REF_NUM) {
4667             i965_add_adv_gpe_surface(ctx, gpe_context,
4668                                      obj_surface,
4669                                      GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1 + INTEL_AVC_MAX_BWD_REF_NUM);
4670         }
4671
4672     }
4673
4674     /* BRC distortion data buffer for I frame*/
4675     if (mbenc_i_frame_dist_in_use) {
4676         gpe_resource = &(avc_ctx->res_brc_dist_data_surface);
4677         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4678                                        gpe_resource,
4679                                        1,
4680                                        I965_SURFACEFORMAT_R8_UNORM,
4681                                        GEN9_AVC_MBENC_BRC_DISTORTION_INDEX);
4682     }
4683
4684     /* as ref frame ,update later RefPicSelect of Current Picture*/
4685     obj_surface = encode_state->reconstructed_object;
4686     avc_priv_surface = obj_surface->private_data;
4687     if (avc_state->ref_pic_select_list_supported && avc_priv_surface->is_as_ref) {
4688         gpe_resource = &(avc_priv_surface->res_ref_pic_select_surface);
4689         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4690                                        gpe_resource,
4691                                        1,
4692                                        I965_SURFACEFORMAT_R8_UNORM,
4693                                        GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX);
4694
4695     }
4696     if (!IS_GEN8(i965->intel.device_info)) {
4697         if (param->mb_vproc_stats_enable) {
4698             /*mb status buffer input*/
4699             size = frame_mb_size * 16 * 4;
4700             gpe_resource = &(avc_ctx->res_mb_status_buffer);
4701             i965_add_buffer_gpe_surface(ctx,
4702                                         gpe_context,
4703                                         gpe_resource,
4704                                         0,
4705                                         size / 4,
4706                                         0,
4707                                         GEN9_AVC_MBENC_MB_STATS_INDEX);
4708
4709         } else if (avc_state->flatness_check_enable) {
4710             gpe_resource = &(avc_ctx->res_flatness_check_surface);
4711             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4712                                            gpe_resource,
4713                                            1,
4714                                            I965_SURFACEFORMAT_R8_UNORM,
4715                                            GEN9_AVC_MBENC_MB_STATS_INDEX);
4716         }
4717     } else if (avc_state->flatness_check_enable) {
4718         gpe_resource = &(avc_ctx->res_flatness_check_surface);
4719         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4720                                        gpe_resource,
4721                                        1,
4722                                        I965_SURFACEFORMAT_R8_UNORM,
4723                                        GEN9_AVC_MBENC_MB_STATS_INDEX);
4724     }
4725
4726     if (param->mad_enable) {
4727         /*mad buffer input*/
4728         size = 4;
4729         gpe_resource = &(avc_ctx->res_mad_data_buffer);
4730         i965_add_buffer_gpe_surface(ctx,
4731                                     gpe_context,
4732                                     gpe_resource,
4733                                     0,
4734                                     size / 4,
4735                                     0,
4736                                     GEN9_AVC_MBENC_MAD_DATA_INDEX);
4737         i965_zero_gpe_resource(gpe_resource);
4738     }
4739
4740     /*brc updated mbenc curbe data buffer,it is ignored by gen9 and used in gen95*/
4741     if (avc_state->mbenc_brc_buffer_size > 0) {
4742         size = avc_state->mbenc_brc_buffer_size;
4743         gpe_resource = &(avc_ctx->res_mbenc_brc_buffer);
4744         i965_add_buffer_gpe_surface(ctx,
4745                                     gpe_context,
4746                                     gpe_resource,
4747                                     0,
4748                                     size / 4,
4749                                     0,
4750                                     GEN95_AVC_MBENC_BRC_CURBE_DATA_INDEX);
4751     }
4752
4753     /*artitratry num mbs in slice*/
4754     if (avc_state->arbitrary_num_mbs_in_slice) {
4755         /*slice surface input*/
4756         gpe_resource = &(avc_ctx->res_mbenc_slice_map_surface);
4757         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4758                                        gpe_resource,
4759                                        1,
4760                                        I965_SURFACEFORMAT_R8_UNORM,
4761                                        GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX);
4762         gen9_avc_generate_slice_map(ctx, encode_state, encoder_context);
4763     }
4764
4765     /* BRC distortion data buffer for I frame */
4766     if (!mbenc_i_frame_dist_in_use) {
4767         if (avc_state->mb_disable_skip_map_enable) {
4768             gpe_resource = &(avc_ctx->res_mb_disable_skip_map_surface);
4769             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4770                                            gpe_resource,
4771                                            1,
4772                                            I965_SURFACEFORMAT_R8_UNORM,
4773                                            (is_g95 ? GEN95_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX : GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX));
4774         }
4775         if (IS_GEN8(i965->intel.device_info)) {
4776             if (avc_state->sfd_enable) {
4777                 size = 128 / sizeof(unsigned long);
4778                 gpe_resource = &(avc_ctx->res_sfd_output_buffer);
4779                 i965_add_buffer_gpe_surface(ctx,
4780                                             gpe_context,
4781                                             gpe_resource,
4782                                             0,
4783                                             size / 4,
4784                                             0,
4785                                             GEN8_AVC_MBENC_STATIC_FRAME_DETECTION_OUTPUT_CM);
4786
4787             }
4788         } else {
4789             if (avc_state->sfd_enable && generic_state->hme_enabled) {
4790                 if (generic_state->frame_type == SLICE_TYPE_P) {
4791                     gpe_resource = &(avc_ctx->res_sfd_cost_table_p_frame_buffer);
4792                 } else if (generic_state->frame_type == SLICE_TYPE_B) {
4793                     gpe_resource = &(avc_ctx->res_sfd_cost_table_b_frame_buffer);
4794                 }
4795                 if (generic_state->frame_type != SLICE_TYPE_I) {
4796                     size = 64;
4797                     i965_add_buffer_gpe_surface(ctx,
4798                                                 gpe_context,
4799                                                 gpe_resource,
4800                                                 0,
4801                                                 size / 4,
4802                                                 0,
4803                                                 (is_g95 ? GEN95_AVC_MBENC_SFD_COST_TABLE_INDEX : GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX));
4804
4805
4806                 }
4807             }
4808         }
4809     }
4810     return;
4811 }
4812
4813 static void
4814 gen9_avc_fei_send_surface_mbenc(VADriverContextP ctx,
4815                                 struct encode_state *encode_state,
4816                                 struct i965_gpe_context *gpe_context,
4817                                 struct intel_encoder_context *encoder_context,
4818                                 void * param_mbenc)
4819 {
4820     struct i965_driver_data *i965 = i965_driver_data(ctx);
4821     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4822     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4823     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4824     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4825     VAEncMiscParameterFEIFrameControlH264 *fei_param = NULL;
4826     struct object_buffer *obj_buffer = NULL;
4827     struct buffer_store *buffer_store = NULL;
4828     struct object_surface *obj_surface = NULL;
4829     struct gen9_surface_avc *avc_priv_surface;
4830     struct i965_gpe_resource *gpe_resource;
4831     VASurfaceID surface_id;
4832     unsigned int size = 0;
4833     unsigned int frame_mb_nums;
4834     int i = 0, allocate_flag = 1;
4835
4836     obj_surface = encode_state->reconstructed_object;
4837     if (!obj_surface || !obj_surface->private_data)
4838         return;
4839     avc_priv_surface = obj_surface->private_data;
4840
4841     frame_mb_nums = generic_state->frame_width_in_mbs *
4842                     generic_state->frame_height_in_mbs;
4843     fei_param = avc_state->fei_framectl_param;
4844
4845     assert(fei_param != NULL);
4846
4847     /* res_mb_code_surface for MB code */
4848     size = frame_mb_nums * FEI_AVC_MB_CODE_BUFFER_SIZE;
4849     if (avc_priv_surface->res_mb_code_surface.bo != NULL)
4850         i965_free_gpe_resource(&avc_priv_surface->res_mb_code_surface);
4851     if (fei_param->mb_code_data != VA_INVALID_ID) {
4852         obj_buffer = BUFFER(fei_param->mb_code_data);
4853         assert(obj_buffer != NULL);
4854         buffer_store = obj_buffer->buffer_store;
4855         assert(size <= buffer_store->bo->size);
4856         i965_dri_object_to_buffer_gpe_resource(
4857             &avc_priv_surface->res_mb_code_surface,
4858             buffer_store->bo);
4859     } else {
4860         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
4861                                                    &avc_priv_surface->res_mb_code_surface,
4862                                                    ALIGN(size, 0x1000),
4863                                                    "mb code buffer");
4864         assert(allocate_flag != 0);
4865     }
4866
4867     /* res_mv_data_surface for MV data */
4868     size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
4869     if (avc_priv_surface->res_mv_data_surface.bo != NULL)
4870         i965_free_gpe_resource(&avc_priv_surface->res_mv_data_surface);
4871     if (fei_param->mv_data != VA_INVALID_ID) {
4872         obj_buffer = BUFFER(fei_param->mv_data);
4873         assert(obj_buffer != NULL);
4874         buffer_store = obj_buffer->buffer_store;
4875         assert(size <= buffer_store->bo->size);
4876         i965_dri_object_to_buffer_gpe_resource(
4877             &avc_priv_surface->res_mv_data_surface,
4878             buffer_store->bo);
4879     } else {
4880         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
4881                                                    &avc_priv_surface->res_mv_data_surface,
4882                                                    ALIGN(size, 0x1000),
4883                                                    "mv data buffer");
4884         assert(allocate_flag != 0);
4885     }
4886
4887     /* fei mb control data surface */
4888     size = frame_mb_nums * FEI_AVC_MB_CONTROL_BUFFER_SIZE;
4889     if (fei_param->mb_input | fei_param->mb_size_ctrl) {
4890         assert(fei_param->mb_ctrl != VA_INVALID_ID);
4891         obj_buffer = BUFFER(fei_param->mb_ctrl);
4892         assert(obj_buffer != NULL);
4893         buffer_store = obj_buffer->buffer_store;
4894         assert(size <= buffer_store->bo->size);
4895         if (avc_priv_surface->res_fei_mb_cntrl_surface.bo != NULL)
4896             i965_free_gpe_resource(&avc_priv_surface->res_fei_mb_cntrl_surface);
4897         i965_dri_object_to_buffer_gpe_resource(
4898             &avc_priv_surface->res_fei_mb_cntrl_surface,
4899             buffer_store->bo);
4900     }
4901
4902     /* fei mv predictor surface*/
4903     size = frame_mb_nums * FEI_AVC_MV_PREDICTOR_BUFFER_SIZE;
4904     if (fei_param->mv_predictor_enable &&
4905         (fei_param->mv_predictor != VA_INVALID_ID)) {
4906         obj_buffer = BUFFER(fei_param->mv_predictor);
4907         assert(obj_buffer != NULL);
4908         buffer_store = obj_buffer->buffer_store;
4909         assert(size <= buffer_store->bo->size);
4910         if (avc_priv_surface->res_fei_mv_predictor_surface.bo != NULL)
4911             i965_free_gpe_resource(&avc_priv_surface->res_fei_mv_predictor_surface);
4912         i965_dri_object_to_buffer_gpe_resource(
4913             &avc_priv_surface->res_fei_mv_predictor_surface,
4914             buffer_store->bo);
4915     } else {
4916         if (fei_param->mv_predictor_enable)
4917             assert(fei_param->mv_predictor != VA_INVALID_ID);
4918     }
4919
4920     /* fei vme distortion */
4921     size = frame_mb_nums * FEI_AVC_DISTORTION_BUFFER_SIZE;
4922     if (avc_priv_surface->res_fei_vme_distortion_surface.bo != NULL)
4923         i965_free_gpe_resource(&avc_priv_surface->res_fei_vme_distortion_surface);
4924     if (fei_param->distortion != VA_INVALID_ID) {
4925         obj_buffer = BUFFER(fei_param->distortion);
4926         assert(obj_buffer != NULL);
4927         buffer_store = obj_buffer->buffer_store;
4928         assert(size <= buffer_store->bo->size);
4929         i965_dri_object_to_buffer_gpe_resource(
4930             &avc_priv_surface->res_fei_vme_distortion_surface,
4931             buffer_store->bo);
4932     } else {
4933         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
4934                                                    &avc_priv_surface->res_fei_vme_distortion_surface,
4935                                                    ALIGN(size, 0x1000),
4936                                                    "fei vme distortion");
4937         assert(allocate_flag != 0);
4938     }
4939
4940     /* fei mb qp  */
4941     /* Fixme/Confirm:  not sure why we need 3 byte padding here */
4942     size = frame_mb_nums * FEI_AVC_QP_BUFFER_SIZE + 3;
4943     if (fei_param->mb_qp && (fei_param->qp != VA_INVALID_ID)) {
4944         obj_buffer = BUFFER(fei_param->qp);
4945         assert(obj_buffer != NULL);
4946         buffer_store = obj_buffer->buffer_store;
4947         assert((size - 3) <= buffer_store->bo->size);
4948         if (avc_priv_surface->res_fei_mb_qp_surface.bo != NULL)
4949             i965_free_gpe_resource(&avc_priv_surface->res_fei_mb_qp_surface);
4950         i965_dri_object_to_buffer_gpe_resource(
4951             &avc_priv_surface->res_fei_mb_qp_surface,
4952             buffer_store->bo);
4953     } else {
4954         if (fei_param->mb_qp)
4955             assert(fei_param->qp != VA_INVALID_ID);
4956     }
4957
4958     /*==== pak obj command buffer output ====*/
4959     size = frame_mb_nums * FEI_AVC_MB_CODE_BUFFER_SIZE;
4960     gpe_resource = &avc_priv_surface->res_mb_code_surface;
4961     i965_add_buffer_gpe_surface(ctx,
4962                                 gpe_context,
4963                                 gpe_resource,
4964                                 0,
4965                                 size / 4,
4966                                 0,
4967                                 GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX);
4968
4969
4970     /*=== mv data buffer output */
4971     size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
4972     gpe_resource = &avc_priv_surface->res_mv_data_surface;
4973     i965_add_buffer_gpe_surface(ctx,
4974                                 gpe_context,
4975                                 gpe_resource,
4976                                 0,
4977                                 size / 4,
4978                                 0,
4979                                 GEN9_AVC_MBENC_IND_MV_DATA_INDEX);
4980
4981
4982     /* === current input Y (binding table offset = 3)=== */
4983     obj_surface = encode_state->input_yuv_object;
4984     i965_add_2d_gpe_surface(ctx,
4985                             gpe_context,
4986                             obj_surface,
4987                             0,
4988                             1,
4989                             I965_SURFACEFORMAT_R8_UNORM,
4990                             GEN9_AVC_MBENC_CURR_Y_INDEX);
4991
4992     /* === current input UV === (binding table offset == 4)*/
4993     i965_add_2d_gpe_surface(ctx,
4994                             gpe_context,
4995                             obj_surface,
4996                             1,
4997                             1,
4998                             I965_SURFACEFORMAT_R16_UINT,
4999                             GEN9_AVC_MBENC_CURR_UV_INDEX);
5000
5001     /* === input current YUV surface, (binding table offset == 15) === */
5002     i965_add_adv_gpe_surface(ctx, gpe_context,
5003                              obj_surface,
5004                              GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX);
5005
5006
5007     /*== input current  YUV surface, (binding table offset == 32)*/
5008     i965_add_adv_gpe_surface(ctx, gpe_context,
5009                              obj_surface,
5010                              GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX);
5011
5012     /* list 0 references */
5013     for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
5014
5015         surface_id = slice_param->RefPicList0[i].picture_id;
5016         obj_surface = SURFACE(surface_id);
5017         if (!obj_surface || !obj_surface->private_data)
5018             break;
5019         i965_add_adv_gpe_surface(ctx, gpe_context,
5020                                  obj_surface,
5021                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 1);
5022     }
5023
5024
5025     /* list 1 references */
5026     for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
5027         if (i > 0) break; // only  one ref supported here for B frame
5028         surface_id = slice_param->RefPicList1[i].picture_id;
5029         obj_surface = SURFACE(surface_id);
5030         if (!obj_surface || !obj_surface->private_data)
5031             break;
5032
5033         i965_add_adv_gpe_surface(ctx, gpe_context,
5034                                  obj_surface,
5035                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 2);
5036         if (i == 0) {
5037             avc_priv_surface = obj_surface->private_data;
5038             /* mb code of Backward reference frame */
5039             size = frame_mb_nums * FEI_AVC_MB_CODE_BUFFER_SIZE;
5040             gpe_resource = &avc_priv_surface->res_mb_code_surface;
5041             i965_add_buffer_gpe_surface(ctx,
5042                                         gpe_context,
5043                                         gpe_resource,
5044                                         0,
5045                                         size / 4,
5046                                         0,
5047                                         GEN9_AVC_MBENC_FWD_MB_DATA_INDEX);
5048
5049             /* mv data of backward ref frame */
5050             size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
5051             gpe_resource = &avc_priv_surface->res_mv_data_surface;
5052             i965_add_buffer_gpe_surface(ctx,
5053                                         gpe_context,
5054                                         gpe_resource,
5055                                         0,
5056                                         size / 4,
5057                                         0,
5058                                         GEN9_AVC_MBENC_FWD_MV_DATA_INDEX);
5059
5060         }
5061         //again
5062         if (i < INTEL_AVC_MAX_BWD_REF_NUM) {
5063             i965_add_adv_gpe_surface(ctx, gpe_context,
5064                                      obj_surface,
5065                                      GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1);
5066         }
5067     }
5068
5069     /* as ref frame ,update later RefPicSelect of Current Picture*/
5070     obj_surface = encode_state->reconstructed_object;
5071     avc_priv_surface = obj_surface->private_data;
5072     if (avc_state->ref_pic_select_list_supported && avc_priv_surface->is_as_ref) {
5073         gpe_resource = &(avc_priv_surface->res_ref_pic_select_surface);
5074         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5075                                        gpe_resource,
5076                                        1,
5077                                        I965_SURFACEFORMAT_R8_UNORM,
5078                                        GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX);
5079
5080     }
5081
5082
5083     /* mb specific data, macroblock control parameters */
5084     if ((fei_param->mb_input | fei_param->mb_size_ctrl) &&
5085         (fei_param->mb_ctrl != VA_INVALID_ID)) {
5086         size = frame_mb_nums * FEI_AVC_MB_CONTROL_BUFFER_SIZE;
5087         gpe_resource = &avc_priv_surface->res_fei_mb_cntrl_surface;
5088         i965_add_buffer_gpe_surface(ctx,
5089                                     gpe_context,
5090                                     gpe_resource,
5091                                     0,
5092                                     size / 4,
5093                                     0,
5094                                     GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX);
5095     }
5096
5097     /* multi mv predictor surface */
5098     if (fei_param->mv_predictor_enable && (fei_param->mv_predictor != VA_INVALID_ID)) {
5099         size = frame_mb_nums * 48; //sizeof (VAEncMVPredictorH264Intel) == 40
5100         gpe_resource = &avc_priv_surface->res_fei_mv_predictor_surface;
5101         i965_add_buffer_gpe_surface(ctx,
5102                                     gpe_context,
5103                                     gpe_resource,
5104                                     0,
5105                                     size / 4,
5106                                     0,
5107                                     GEN9_AVC_MBENC_MV_PREDICTOR_INDEX);
5108     }
5109
5110     /* mb qp */
5111     if (fei_param->mb_qp && (fei_param->qp != VA_INVALID_ID)) {
5112         size = frame_mb_nums  + 3;
5113         gpe_resource = &avc_priv_surface->res_fei_mb_qp_surface,
5114         i965_add_buffer_gpe_surface(ctx,
5115                                     gpe_context,
5116                                     gpe_resource,
5117                                     0,
5118                                     size / 4,
5119                                     0,
5120                                     GEN9_AVC_MBENC_MBQP_INDEX);
5121     }
5122
5123
5124     /*=== FEI distortion surface ====*/
5125     size = frame_mb_nums * 48; //sizeof (VAEncFEIDistortionBufferH264Intel) == 48
5126     gpe_resource = &avc_priv_surface->res_fei_vme_distortion_surface;
5127     i965_add_buffer_gpe_surface(ctx,
5128                                 gpe_context,
5129                                 gpe_resource,
5130                                 0,
5131                                 size / 4,
5132                                 0,
5133                                 GEN9_AVC_MBENC_AUX_VME_OUT_INDEX);
5134
5135     return;
5136 }
5137
5138 static VAStatus
5139 gen9_avc_kernel_mbenc(VADriverContextP ctx,
5140                       struct encode_state *encode_state,
5141                       struct intel_encoder_context *encoder_context,
5142                       bool i_frame_dist_in_use)
5143 {
5144     struct i965_driver_data *i965 = i965_driver_data(ctx);
5145     struct i965_gpe_table *gpe = &i965->gpe_table;
5146     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5147     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5148     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5149     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5150     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5151
5152     struct i965_gpe_context *gpe_context;
5153     struct gpe_media_object_walker_parameter media_object_walker_param;
5154     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
5155     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
5156     int media_function = 0;
5157     int kernel_idx = 0;
5158     unsigned int mb_const_data_buffer_in_use = 0;
5159     unsigned int mb_qp_buffer_in_use = 0;
5160     unsigned int brc_enabled = 0;
5161     unsigned int roi_enable = (generic_state->num_roi > 0) ? 1 : 0;
5162     unsigned int dirty_roi_enable = ((generic_state->dirty_num_roi > 0) && (generic_state->frame_type == SLICE_TYPE_P) && (0));
5163     struct mbenc_param param ;
5164
5165     int mbenc_i_frame_dist_in_use = i_frame_dist_in_use;
5166     int mad_enable = 0;
5167     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5168
5169     mb_const_data_buffer_in_use =
5170         generic_state->mb_brc_enabled ||
5171         roi_enable ||
5172         dirty_roi_enable ||
5173         avc_state->mb_qp_data_enable ||
5174         avc_state->rolling_intra_refresh_enable;
5175     mb_qp_buffer_in_use =
5176         generic_state->mb_brc_enabled ||
5177         generic_state->brc_roi_enable ||
5178         avc_state->mb_qp_data_enable;
5179
5180     if (mbenc_i_frame_dist_in_use) {
5181         media_function = INTEL_MEDIA_STATE_ENC_I_FRAME_DIST;
5182         kernel_idx = GEN9_AVC_KERNEL_BRC_I_FRAME_DIST;
5183         downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
5184         downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
5185         mad_enable = 0;
5186         brc_enabled = 0;
5187
5188         gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
5189     } else {
5190         switch (generic_state->kernel_mode) {
5191         case INTEL_ENC_KERNEL_NORMAL : {
5192             media_function = INTEL_MEDIA_STATE_ENC_NORMAL;
5193             kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_NORMAL_I;
5194             break;
5195         }
5196         case INTEL_ENC_KERNEL_PERFORMANCE : {
5197             media_function = INTEL_MEDIA_STATE_ENC_PERFORMANCE;
5198             kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_PERFORMANCE_I;
5199             break;
5200         }
5201         case INTEL_ENC_KERNEL_QUALITY : {
5202             media_function = INTEL_MEDIA_STATE_ENC_QUALITY;
5203             kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_QUALITY_I;
5204             break;
5205         }
5206         default:
5207             assert(0);
5208
5209         }
5210
5211         if (encoder_context->fei_enabled) {
5212             media_function = INTEL_MEDIA_STATE_ENC_NORMAL;
5213             kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_FEI_KERNEL_I;
5214         }
5215
5216         if (generic_state->frame_type == SLICE_TYPE_P) {
5217             kernel_idx += 1;
5218         } else if (generic_state->frame_type == SLICE_TYPE_B) {
5219             kernel_idx += 2;
5220         }
5221
5222         downscaled_width_in_mb = generic_state->frame_width_in_mbs;
5223         downscaled_height_in_mb = generic_state->frame_height_in_mbs;
5224         mad_enable = avc_state->mad_enable;
5225         brc_enabled = generic_state->brc_enabled;
5226
5227         gpe_context = &(avc_ctx->context_mbenc.gpe_contexts[kernel_idx]);
5228     }
5229
5230     memset(&param, 0, sizeof(struct mbenc_param));
5231
5232     param.mb_const_data_buffer_in_use = mb_const_data_buffer_in_use;
5233     param.mb_qp_buffer_in_use = mb_qp_buffer_in_use;
5234     param.mbenc_i_frame_dist_in_use = mbenc_i_frame_dist_in_use;
5235     param.mad_enable = mad_enable;
5236     param.brc_enabled = brc_enabled;
5237     param.roi_enabled = roi_enable;
5238
5239     if (avc_state->mb_status_supported) {
5240         param.mb_vproc_stats_enable =  avc_state->flatness_check_enable || avc_state->adaptive_transform_decision_enable;
5241     }
5242
5243     if (!avc_state->mbenc_curbe_set_in_brc_update) {
5244         gpe->context_init(ctx, gpe_context);
5245     }
5246
5247     gpe->reset_binding_table(ctx, gpe_context);
5248
5249     if (!avc_state->mbenc_curbe_set_in_brc_update) {
5250         /*set curbe here*/
5251         generic_ctx->pfn_set_curbe_mbenc(ctx, encode_state, gpe_context, encoder_context, &param);
5252     }
5253
5254     /* MB brc const data buffer set up*/
5255     if (mb_const_data_buffer_in_use) {
5256         // caculate the lambda table, it is kernel controlled trellis quantization,gen95+
5257         if (avc_state->lambda_table_enable)
5258             gen95_avc_calc_lambda_table(ctx, encode_state, encoder_context);
5259
5260         gen9_avc_load_mb_brc_const_data(ctx, encode_state, encoder_context);
5261     }
5262
5263     /*clear the mad buffer*/
5264     if (mad_enable) {
5265         i965_zero_gpe_resource(&(avc_ctx->res_mad_data_buffer));
5266     }
5267     /*send surface*/
5268     generic_ctx->pfn_send_mbenc_surface(ctx, encode_state, gpe_context, encoder_context, &param);
5269
5270     gpe->setup_interface_data(ctx, gpe_context);
5271
5272     /*walker setting*/
5273     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
5274
5275     kernel_walker_param.use_scoreboard = 1;
5276     kernel_walker_param.resolution_x = downscaled_width_in_mb ;
5277     kernel_walker_param.resolution_y = downscaled_height_in_mb ;
5278     if (mbenc_i_frame_dist_in_use) {
5279         kernel_walker_param.no_dependency = 1;
5280     } else {
5281         switch (generic_state->frame_type) {
5282         case SLICE_TYPE_I:
5283             kernel_walker_param.walker_degree = WALKER_45_DEGREE;
5284             break;
5285         case SLICE_TYPE_P:
5286             kernel_walker_param.walker_degree = WALKER_26_DEGREE;
5287             break;
5288         case SLICE_TYPE_B:
5289             kernel_walker_param.walker_degree = WALKER_26_DEGREE;
5290             if (!slice_param->direct_spatial_mv_pred_flag) {
5291                 kernel_walker_param.walker_degree = WALKER_45_DEGREE;
5292             }
5293             break;
5294         default:
5295             assert(0);
5296         }
5297         kernel_walker_param.no_dependency = 0;
5298     }
5299
5300     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
5301
5302     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
5303                                             gpe_context,
5304                                             media_function,
5305                                             &media_object_walker_param);
5306     return VA_STATUS_SUCCESS;
5307 }
5308
5309 /*
5310 me kernle related function
5311 */
5312 static void
5313 gen9_avc_set_curbe_me(VADriverContextP ctx,
5314                       struct encode_state *encode_state,
5315                       struct i965_gpe_context *gpe_context,
5316                       struct intel_encoder_context *encoder_context,
5317                       void * param)
5318 {
5319     gen9_avc_me_curbe_data *curbe_cmd;
5320     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5321     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5322     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5323
5324     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5325
5326     struct me_param * curbe_param = (struct me_param *)param ;
5327     unsigned char  use_mv_from_prev_step = 0;
5328     unsigned char write_distortions = 0;
5329     unsigned char qp_prime_y = 0;
5330     unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
5331     unsigned char seach_table_idx = 0;
5332     unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
5333     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
5334     unsigned int scale_factor = 0;
5335
5336     qp_prime_y = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta;
5337     switch (curbe_param->hme_type) {
5338     case INTEL_ENC_HME_4x : {
5339         use_mv_from_prev_step = (generic_state->b16xme_enabled) ? 1 : 0;
5340         write_distortions = 1;
5341         mv_shift_factor = 2;
5342         scale_factor = 4;
5343         prev_mv_read_pos_factor = 0;
5344         break;
5345     }
5346     case INTEL_ENC_HME_16x : {
5347         use_mv_from_prev_step = (generic_state->b32xme_enabled) ? 1 : 0;
5348         write_distortions = 0;
5349         mv_shift_factor = 2;
5350         scale_factor = 16;
5351         prev_mv_read_pos_factor = 1;
5352         break;
5353     }
5354     case INTEL_ENC_HME_32x : {
5355         use_mv_from_prev_step = 0;
5356         write_distortions = 0;
5357         mv_shift_factor = 1;
5358         scale_factor = 32;
5359         prev_mv_read_pos_factor = 0;
5360         break;
5361     }
5362     default:
5363         assert(0);
5364
5365     }
5366     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
5367
5368     if (!curbe_cmd)
5369         return;
5370
5371     downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
5372     downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
5373
5374     memcpy(curbe_cmd, gen9_avc_me_curbe_init_data, sizeof(gen9_avc_me_curbe_data));
5375
5376     curbe_cmd->dw3.sub_pel_mode = 3;
5377     if (avc_state->field_scaling_output_interleaved) {
5378         /*frame set to zero,field specified*/
5379         curbe_cmd->dw3.src_access = 0;
5380         curbe_cmd->dw3.ref_access = 0;
5381         curbe_cmd->dw7.src_field_polarity = 0;
5382     }
5383     curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
5384     curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
5385     curbe_cmd->dw5.qp_prime_y = qp_prime_y;
5386
5387     curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
5388     curbe_cmd->dw6.write_distortions = write_distortions;
5389     curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
5390     curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
5391
5392     if (generic_state->frame_type == SLICE_TYPE_B) {
5393         curbe_cmd->dw1.bi_weight = 32;
5394         curbe_cmd->dw13.num_ref_idx_l1_minus1 = slice_param->num_ref_idx_l1_active_minus1;
5395         me_method = gen9_avc_b_me_method[generic_state->preset];
5396         seach_table_idx = 1;
5397     }
5398
5399     if (generic_state->frame_type == SLICE_TYPE_P ||
5400         generic_state->frame_type == SLICE_TYPE_B)
5401         curbe_cmd->dw13.num_ref_idx_l0_minus1 = slice_param->num_ref_idx_l0_active_minus1;
5402
5403     curbe_cmd->dw13.ref_streamin_cost = 5;
5404     curbe_cmd->dw13.roi_enable = 0;
5405
5406     curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
5407     curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
5408
5409     memcpy(&curbe_cmd->dw16, table_enc_search_path[seach_table_idx][me_method], 14 * sizeof(int));
5410
5411     curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN9_AVC_ME_MV_DATA_SURFACE_INDEX;
5412     curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x) ? GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX : GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX ;
5413     curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN9_AVC_ME_DISTORTION_SURFACE_INDEX;
5414     curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN9_AVC_ME_BRC_DISTORTION_INDEX;
5415     curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX;
5416     curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX;
5417     curbe_cmd->dw38.reserved = GEN9_AVC_ME_VDENC_STREAMIN_INDEX;
5418
5419     i965_gpe_context_unmap_curbe(gpe_context);
5420     return;
5421 }
5422
5423 static void
5424 gen9_avc_send_surface_me(VADriverContextP ctx,
5425                          struct encode_state *encode_state,
5426                          struct i965_gpe_context *gpe_context,
5427                          struct intel_encoder_context *encoder_context,
5428                          void * param)
5429 {
5430     struct i965_driver_data *i965 = i965_driver_data(ctx);
5431
5432     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5433     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5434     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5435     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5436
5437     struct object_surface *obj_surface, *input_surface;
5438     struct gen9_surface_avc *avc_priv_surface;
5439     struct i965_gpe_resource *gpe_resource;
5440     struct me_param * curbe_param = (struct me_param *)param ;
5441
5442     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5443     VASurfaceID surface_id;
5444     int i = 0;
5445
5446     /* all scaled input surface stored in reconstructed_object*/
5447     obj_surface = encode_state->reconstructed_object;
5448     if (!obj_surface || !obj_surface->private_data)
5449         return;
5450     avc_priv_surface = obj_surface->private_data;
5451
5452
5453     switch (curbe_param->hme_type) {
5454     case INTEL_ENC_HME_4x : {
5455         /*memv output 4x*/
5456         gpe_resource = &avc_ctx->s4x_memv_data_buffer;
5457         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5458                                        gpe_resource,
5459                                        1,
5460                                        I965_SURFACEFORMAT_R8_UNORM,
5461                                        GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
5462
5463         /*memv input 16x*/
5464         if (generic_state->b16xme_enabled) {
5465             gpe_resource = &avc_ctx->s16x_memv_data_buffer;
5466             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5467                                            gpe_resource,
5468                                            1,
5469                                            I965_SURFACEFORMAT_R8_UNORM,
5470                                            GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX);
5471         }
5472         /* brc distortion  output*/
5473         gpe_resource = &avc_ctx->res_brc_dist_data_surface;
5474         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5475                                        gpe_resource,
5476                                        1,
5477                                        I965_SURFACEFORMAT_R8_UNORM,
5478                                        GEN9_AVC_ME_BRC_DISTORTION_INDEX);
5479         /* memv distortion output*/
5480         gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
5481         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5482                                        gpe_resource,
5483                                        1,
5484                                        I965_SURFACEFORMAT_R8_UNORM,
5485                                        GEN9_AVC_ME_DISTORTION_SURFACE_INDEX);
5486         /*input current down scaled YUV surface*/
5487         obj_surface = encode_state->reconstructed_object;
5488         avc_priv_surface = obj_surface->private_data;
5489         input_surface = avc_priv_surface->scaled_4x_surface_obj;
5490         i965_add_adv_gpe_surface(ctx, gpe_context,
5491                                  input_surface,
5492                                  GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
5493         /*input ref scaled YUV surface*/
5494         for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
5495             surface_id = slice_param->RefPicList0[i].picture_id;
5496             obj_surface = SURFACE(surface_id);
5497             if (!obj_surface || !obj_surface->private_data)
5498                 break;
5499             avc_priv_surface = obj_surface->private_data;
5500
5501             input_surface = avc_priv_surface->scaled_4x_surface_obj;
5502
5503             i965_add_adv_gpe_surface(ctx, gpe_context,
5504                                      input_surface,
5505                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
5506         }
5507
5508         obj_surface = encode_state->reconstructed_object;
5509         avc_priv_surface = obj_surface->private_data;
5510         input_surface = avc_priv_surface->scaled_4x_surface_obj;
5511
5512         i965_add_adv_gpe_surface(ctx, gpe_context,
5513                                  input_surface,
5514                                  GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
5515
5516         for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
5517             surface_id = slice_param->RefPicList1[i].picture_id;
5518             obj_surface = SURFACE(surface_id);
5519             if (!obj_surface || !obj_surface->private_data)
5520                 break;
5521             avc_priv_surface = obj_surface->private_data;
5522
5523             input_surface = avc_priv_surface->scaled_4x_surface_obj;
5524
5525             i965_add_adv_gpe_surface(ctx, gpe_context,
5526                                      input_surface,
5527                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
5528         }
5529         break;
5530
5531     }
5532     case INTEL_ENC_HME_16x : {
5533         gpe_resource = &avc_ctx->s16x_memv_data_buffer;
5534         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5535                                        gpe_resource,
5536                                        1,
5537                                        I965_SURFACEFORMAT_R8_UNORM,
5538                                        GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
5539
5540         if (generic_state->b32xme_enabled) {
5541             gpe_resource = &avc_ctx->s32x_memv_data_buffer;
5542             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5543                                            gpe_resource,
5544                                            1,
5545                                            I965_SURFACEFORMAT_R8_UNORM,
5546                                            GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX);
5547         }
5548
5549         obj_surface = encode_state->reconstructed_object;
5550         avc_priv_surface = obj_surface->private_data;
5551         input_surface = avc_priv_surface->scaled_16x_surface_obj;
5552         i965_add_adv_gpe_surface(ctx, gpe_context,
5553                                  input_surface,
5554                                  GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
5555
5556         for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
5557             surface_id = slice_param->RefPicList0[i].picture_id;
5558             obj_surface = SURFACE(surface_id);
5559             if (!obj_surface || !obj_surface->private_data)
5560                 break;
5561             avc_priv_surface = obj_surface->private_data;
5562
5563             input_surface = avc_priv_surface->scaled_16x_surface_obj;
5564
5565             i965_add_adv_gpe_surface(ctx, gpe_context,
5566                                      input_surface,
5567                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
5568         }
5569
5570         obj_surface = encode_state->reconstructed_object;
5571         avc_priv_surface = obj_surface->private_data;
5572         input_surface = avc_priv_surface->scaled_16x_surface_obj;
5573
5574         i965_add_adv_gpe_surface(ctx, gpe_context,
5575                                  input_surface,
5576                                  GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
5577
5578         for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
5579             surface_id = slice_param->RefPicList1[i].picture_id;
5580             obj_surface = SURFACE(surface_id);
5581             if (!obj_surface || !obj_surface->private_data)
5582                 break;
5583             avc_priv_surface = obj_surface->private_data;
5584
5585             input_surface = avc_priv_surface->scaled_16x_surface_obj;
5586
5587             i965_add_adv_gpe_surface(ctx, gpe_context,
5588                                      input_surface,
5589                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
5590         }
5591         break;
5592     }
5593     case INTEL_ENC_HME_32x : {
5594         gpe_resource = &avc_ctx->s32x_memv_data_buffer;
5595         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5596                                        gpe_resource,
5597                                        1,
5598                                        I965_SURFACEFORMAT_R8_UNORM,
5599                                        GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
5600
5601         obj_surface = encode_state->reconstructed_object;
5602         avc_priv_surface = obj_surface->private_data;
5603         input_surface = avc_priv_surface->scaled_32x_surface_obj;
5604         i965_add_adv_gpe_surface(ctx, gpe_context,
5605                                  input_surface,
5606                                  GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
5607
5608         for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
5609             surface_id = slice_param->RefPicList0[i].picture_id;
5610             obj_surface = SURFACE(surface_id);
5611             if (!obj_surface || !obj_surface->private_data)
5612                 break;
5613             avc_priv_surface = obj_surface->private_data;
5614
5615             input_surface = avc_priv_surface->scaled_32x_surface_obj;
5616
5617             i965_add_adv_gpe_surface(ctx, gpe_context,
5618                                      input_surface,
5619                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
5620         }
5621
5622         obj_surface = encode_state->reconstructed_object;
5623         avc_priv_surface = obj_surface->private_data;
5624         input_surface = avc_priv_surface->scaled_32x_surface_obj;
5625
5626         i965_add_adv_gpe_surface(ctx, gpe_context,
5627                                  input_surface,
5628                                  GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
5629
5630         for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
5631             surface_id = slice_param->RefPicList1[i].picture_id;
5632             obj_surface = SURFACE(surface_id);
5633             if (!obj_surface || !obj_surface->private_data)
5634                 break;
5635             avc_priv_surface = obj_surface->private_data;
5636
5637             input_surface = avc_priv_surface->scaled_32x_surface_obj;
5638
5639             i965_add_adv_gpe_surface(ctx, gpe_context,
5640                                      input_surface,
5641                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
5642         }
5643         break;
5644     }
5645     default:
5646         assert(0);
5647
5648     }
5649 }
5650
5651 static VAStatus
5652 gen9_avc_kernel_me(VADriverContextP ctx,
5653                    struct encode_state *encode_state,
5654                    struct intel_encoder_context *encoder_context,
5655                    int hme_type)
5656 {
5657     struct i965_driver_data *i965 = i965_driver_data(ctx);
5658     struct i965_gpe_table *gpe = &i965->gpe_table;
5659     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5660     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5661     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5662     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5663
5664     struct i965_gpe_context *gpe_context;
5665     struct gpe_media_object_walker_parameter media_object_walker_param;
5666     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
5667     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
5668     int media_function = 0;
5669     int kernel_idx = 0;
5670     struct me_param param ;
5671     unsigned int scale_factor = 0;
5672
5673     switch (hme_type) {
5674     case INTEL_ENC_HME_4x : {
5675         media_function = INTEL_MEDIA_STATE_4X_ME;
5676         scale_factor = 4;
5677         break;
5678     }
5679     case INTEL_ENC_HME_16x : {
5680         media_function = INTEL_MEDIA_STATE_16X_ME;
5681         scale_factor = 16;
5682         break;
5683     }
5684     case INTEL_ENC_HME_32x : {
5685         media_function = INTEL_MEDIA_STATE_32X_ME;
5686         scale_factor = 32;
5687         break;
5688     }
5689     default:
5690         assert(0);
5691
5692     }
5693
5694     downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
5695     downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
5696
5697     /* I frame should not come here.*/
5698     kernel_idx = (generic_state->frame_type == SLICE_TYPE_P) ? GEN9_AVC_KERNEL_ME_P_IDX : GEN9_AVC_KERNEL_ME_B_IDX;
5699     gpe_context = &(avc_ctx->context_me.gpe_contexts[kernel_idx]);
5700
5701     gpe->context_init(ctx, gpe_context);
5702     gpe->reset_binding_table(ctx, gpe_context);
5703
5704     /*set curbe*/
5705     memset(&param, 0, sizeof(param));
5706     param.hme_type = hme_type;
5707     generic_ctx->pfn_set_curbe_me(ctx, encode_state, gpe_context, encoder_context, &param);
5708
5709     /*send surface*/
5710     generic_ctx->pfn_send_me_surface(ctx, encode_state, gpe_context, encoder_context, &param);
5711
5712     gpe->setup_interface_data(ctx, gpe_context);
5713
5714     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
5715     /* the scaling is based on 8x8 blk level */
5716     kernel_walker_param.resolution_x = downscaled_width_in_mb ;
5717     kernel_walker_param.resolution_y = downscaled_height_in_mb ;
5718     kernel_walker_param.no_dependency = 1;
5719
5720     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
5721
5722     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
5723                                             gpe_context,
5724                                             media_function,
5725                                             &media_object_walker_param);
5726
5727     return VA_STATUS_SUCCESS;
5728 }
5729
5730 /*
5731 wp related function
5732 */
5733 static void
5734 gen9_avc_set_curbe_wp(VADriverContextP ctx,
5735                       struct encode_state *encode_state,
5736                       struct i965_gpe_context *gpe_context,
5737                       struct intel_encoder_context *encoder_context,
5738                       void * param)
5739 {
5740     gen9_avc_wp_curbe_data *cmd;
5741     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5742     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5743     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5744     struct wp_param * curbe_param = (struct wp_param *)param;
5745
5746     cmd = i965_gpe_context_map_curbe(gpe_context);
5747
5748     if (!cmd)
5749         return;
5750     memset(cmd, 0, sizeof(gen9_avc_wp_curbe_data));
5751     if (curbe_param->ref_list_idx) {
5752         cmd->dw0.default_weight = slice_param->luma_weight_l1[0];
5753         cmd->dw0.default_offset = slice_param->luma_offset_l1[0];
5754     } else {
5755         cmd->dw0.default_weight = slice_param->luma_weight_l0[0];
5756         cmd->dw0.default_offset = slice_param->luma_offset_l0[0];
5757     }
5758
5759     cmd->dw49.input_surface = GEN9_AVC_WP_INPUT_REF_SURFACE_INDEX;
5760     cmd->dw50.output_surface = GEN9_AVC_WP_OUTPUT_SCALED_SURFACE_INDEX;
5761
5762     i965_gpe_context_unmap_curbe(gpe_context);
5763
5764 }
5765
5766 static void
5767 gen9_avc_send_surface_wp(VADriverContextP ctx,
5768                          struct encode_state *encode_state,
5769                          struct i965_gpe_context *gpe_context,
5770                          struct intel_encoder_context *encoder_context,
5771                          void * param)
5772 {
5773     struct i965_driver_data *i965 = i965_driver_data(ctx);
5774     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5775     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5776     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5777     struct wp_param * curbe_param = (struct wp_param *)param;
5778     struct object_surface *obj_surface;
5779     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5780     VASurfaceID surface_id;
5781
5782     if (curbe_param->ref_list_idx) {
5783         surface_id = slice_param->RefPicList1[0].picture_id;
5784         obj_surface = SURFACE(surface_id);
5785         if (!obj_surface || !obj_surface->private_data)
5786             avc_state->weighted_ref_l1_enable = 0;
5787         else
5788             avc_state->weighted_ref_l1_enable = 1;
5789     } else {
5790         surface_id = slice_param->RefPicList0[0].picture_id;
5791         obj_surface = SURFACE(surface_id);
5792         if (!obj_surface || !obj_surface->private_data)
5793             avc_state->weighted_ref_l0_enable = 0;
5794         else
5795             avc_state->weighted_ref_l0_enable = 1;
5796     }
5797     if (!obj_surface)
5798         obj_surface = encode_state->reference_objects[0];
5799
5800
5801     i965_add_adv_gpe_surface(ctx, gpe_context,
5802                              obj_surface,
5803                              GEN9_AVC_WP_INPUT_REF_SURFACE_INDEX);
5804
5805     obj_surface = avc_ctx->wp_output_pic_select_surface_obj[curbe_param->ref_list_idx];
5806     i965_add_adv_gpe_surface(ctx, gpe_context,
5807                              obj_surface,
5808                              GEN9_AVC_WP_OUTPUT_SCALED_SURFACE_INDEX);
5809 }
5810
5811
5812 static VAStatus
5813 gen9_avc_kernel_wp(VADriverContextP ctx,
5814                    struct encode_state *encode_state,
5815                    struct intel_encoder_context *encoder_context,
5816                    unsigned int list1_in_use)
5817 {
5818     struct i965_driver_data *i965 = i965_driver_data(ctx);
5819     struct i965_gpe_table *gpe = &i965->gpe_table;
5820     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5821     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5822     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5823     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5824
5825     struct i965_gpe_context *gpe_context;
5826     struct gpe_media_object_walker_parameter media_object_walker_param;
5827     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
5828     int media_function = INTEL_MEDIA_STATE_ENC_WP;
5829     struct wp_param param;
5830
5831     gpe_context = &(avc_ctx->context_wp.gpe_contexts);
5832
5833     gpe->context_init(ctx, gpe_context);
5834     gpe->reset_binding_table(ctx, gpe_context);
5835
5836     memset(&param, 0, sizeof(param));
5837     param.ref_list_idx = (list1_in_use == 1) ? 1 : 0;
5838     /*set curbe*/
5839     generic_ctx->pfn_set_curbe_wp(ctx, encode_state, gpe_context, encoder_context, &param);
5840
5841     /*send surface*/
5842     generic_ctx->pfn_send_wp_surface(ctx, encode_state, gpe_context, encoder_context, &param);
5843
5844     gpe->setup_interface_data(ctx, gpe_context);
5845
5846     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
5847     /* the scaling is based on 8x8 blk level */
5848     kernel_walker_param.resolution_x = generic_state->frame_width_in_mbs;
5849     kernel_walker_param.resolution_y = generic_state->frame_height_in_mbs;
5850     kernel_walker_param.no_dependency = 1;
5851
5852     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
5853
5854     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
5855                                             gpe_context,
5856                                             media_function,
5857                                             &media_object_walker_param);
5858
5859     return VA_STATUS_SUCCESS;
5860 }
5861
5862
5863 /*
5864 sfd related function
5865 */
5866 static void
5867 gen9_avc_set_curbe_sfd(VADriverContextP ctx,
5868                        struct encode_state *encode_state,
5869                        struct i965_gpe_context *gpe_context,
5870                        struct intel_encoder_context *encoder_context,
5871                        void * param)
5872 {
5873     gen9_avc_sfd_curbe_data *cmd;
5874     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5875     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5876     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5877     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5878
5879     cmd = i965_gpe_context_map_curbe(gpe_context);
5880
5881     if (!cmd)
5882         return;
5883     memset(cmd, 0, sizeof(gen9_avc_sfd_curbe_data));
5884
5885     cmd->dw0.enable_intra_cost_scaling_for_static_frame = 1 ;
5886     cmd->dw0.enable_adaptive_mv_stream_in = 0 ;
5887     cmd->dw0.stream_in_type = 7 ;
5888     cmd->dw0.slice_type = slice_type_kernel[generic_state->frame_type]  ;
5889     cmd->dw0.brc_mode_enable = generic_state->brc_enabled ;
5890     cmd->dw0.vdenc_mode_disable = 1 ;
5891
5892     cmd->dw1.hme_stream_in_ref_cost = 5 ;
5893     cmd->dw1.num_of_refs = slice_param->num_ref_idx_l0_active_minus1 ;
5894     cmd->dw1.qp_value = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta ;
5895
5896     cmd->dw2.frame_width_in_mbs = generic_state->frame_width_in_mbs ;
5897     cmd->dw2.frame_height_in_mbs = generic_state->frame_height_in_mbs ;
5898
5899     cmd->dw3.large_mv_threshold = 128 ;
5900     cmd->dw4.total_large_mv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs) / 100 ;
5901     cmd->dw5.zmv_threshold = 4 ;
5902     cmd->dw6.total_zmv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * avc_state->zero_mv_threshold) / 100 ; // zero_mv_threshold = 60;
5903     cmd->dw7.min_dist_threshold = 10 ;
5904
5905     if (generic_state->frame_type == SLICE_TYPE_P) {
5906         memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_p_frame, AVC_QP_MAX * sizeof(unsigned char));
5907
5908     } else if (generic_state->frame_type == SLICE_TYPE_B) {
5909         memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_b_frame, AVC_QP_MAX * sizeof(unsigned char));
5910     }
5911
5912     cmd->dw21.actual_width_in_mb = cmd->dw2.frame_width_in_mbs ;
5913     cmd->dw21.actual_height_in_mb = cmd->dw2.frame_height_in_mbs ;
5914     cmd->dw24.vdenc_input_image_state_index = GEN9_AVC_SFD_VDENC_INPUT_IMAGE_STATE_INDEX ;
5915     cmd->dw26.mv_data_surface_index = GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX ;
5916     cmd->dw27.inter_distortion_surface_index = GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX ;
5917     cmd->dw28.output_data_surface_index = GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX ;
5918     cmd->dw29.vdenc_output_image_state_index = GEN9_AVC_SFD_VDENC_OUTPUT_IMAGE_STATE_INDEX ;
5919
5920     i965_gpe_context_unmap_curbe(gpe_context);
5921
5922 }
5923
5924 static void
5925 gen9_avc_send_surface_sfd(VADriverContextP ctx,
5926                           struct encode_state *encode_state,
5927                           struct i965_gpe_context *gpe_context,
5928                           struct intel_encoder_context *encoder_context,
5929                           void * param)
5930 {
5931     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5932     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5933     struct i965_gpe_resource *gpe_resource;
5934     int size = 0;
5935
5936     /*HME mv data surface memv output 4x*/
5937     gpe_resource = &avc_ctx->s4x_memv_data_buffer;
5938     i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5939                                    gpe_resource,
5940                                    1,
5941                                    I965_SURFACEFORMAT_R8_UNORM,
5942                                    GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX);
5943
5944     /* memv distortion */
5945     gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
5946     i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5947                                    gpe_resource,
5948                                    1,
5949                                    I965_SURFACEFORMAT_R8_UNORM,
5950                                    GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX);
5951     /*buffer output*/
5952     size = 32 * 4 * 4;
5953     gpe_resource = &avc_ctx->res_sfd_output_buffer;
5954     i965_add_buffer_gpe_surface(ctx,
5955                                 gpe_context,
5956                                 gpe_resource,
5957                                 0,
5958                                 size / 4,
5959                                 0,
5960                                 GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX);
5961
5962 }
5963
5964 static VAStatus
5965 gen9_avc_kernel_sfd(VADriverContextP ctx,
5966                     struct encode_state *encode_state,
5967                     struct intel_encoder_context *encoder_context)
5968 {
5969     struct i965_driver_data *i965 = i965_driver_data(ctx);
5970     struct i965_gpe_table *gpe = &i965->gpe_table;
5971     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5972     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5973     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5974
5975     struct i965_gpe_context *gpe_context;
5976     struct gpe_media_object_parameter media_object_param;
5977     struct gpe_media_object_inline_data media_object_inline_data;
5978     int media_function = INTEL_MEDIA_STATE_STATIC_FRAME_DETECTION;
5979     gpe_context = &(avc_ctx->context_sfd.gpe_contexts);
5980
5981     gpe->context_init(ctx, gpe_context);
5982     gpe->reset_binding_table(ctx, gpe_context);
5983
5984     /*set curbe*/
5985     generic_ctx->pfn_set_curbe_sfd(ctx, encode_state, gpe_context, encoder_context, NULL);
5986
5987     /*send surface*/
5988     generic_ctx->pfn_send_sfd_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
5989
5990     gpe->setup_interface_data(ctx, gpe_context);
5991
5992     memset(&media_object_param, 0, sizeof(media_object_param));
5993     memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
5994     media_object_param.pinline_data = &media_object_inline_data;
5995     media_object_param.inline_size = sizeof(media_object_inline_data);
5996
5997     gen9_avc_run_kernel_media_object(ctx, encoder_context,
5998                                      gpe_context,
5999                                      media_function,
6000                                      &media_object_param);
6001
6002     return VA_STATUS_SUCCESS;
6003 }
6004
6005 /**************** PreEnc Scaling *************************************/
6006 /* function to run preenc scaling: gen9_avc_preenc_kernel_scaling()
6007  * function to set preenc scaling curbe is the same one using for avc encode
6008         == gen95_avc_set_curbe_scaling4x()
6009  * function to send buffer/surface resources is the same one using for avc encode
6010         == gen9_avc_send_surface_scaling()
6011  */
6012 static VAStatus
6013 gen9_avc_preenc_kernel_scaling(VADriverContextP ctx,
6014                                struct encode_state *encode_state,
6015                                struct intel_encoder_context *encoder_context,
6016                                int hme_type,
6017                                int scale_surface_type)
6018 {
6019     struct i965_driver_data *i965 = i965_driver_data(ctx);
6020     struct i965_gpe_table *gpe = &i965->gpe_table;
6021     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6022     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
6023     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6024     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6025     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
6026     VAStatsStatisticsParameterH264 *stat_param_h264 = NULL;
6027     VAStatsStatisticsParameter *stat_param = NULL;
6028     struct i965_gpe_context *gpe_context;
6029     struct scaling_param surface_param;
6030     struct object_surface *obj_surface = NULL;
6031     struct gpe_media_object_walker_parameter media_object_walker_param;
6032     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
6033     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
6034     int media_function = 0;
6035     int kernel_idx = 0;
6036     int enable_statistics_output;
6037
6038     stat_param_h264 = avc_state->stat_param;
6039     assert(stat_param_h264);
6040     stat_param = &stat_param_h264->stats_params;
6041     enable_statistics_output = !stat_param_h264->disable_statistics_output;
6042
6043     memset(&surface_param, 0, sizeof(struct scaling_param));
6044     media_function = INTEL_MEDIA_STATE_4X_SCALING;
6045     kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
6046     downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
6047     downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
6048
6049     surface_param.input_frame_width = generic_state->frame_width_in_pixel;
6050     surface_param.input_frame_height = generic_state->frame_height_in_pixel;
6051     surface_param.output_frame_width = generic_state->frame_width_4x;
6052     surface_param.output_frame_height = generic_state->frame_height_4x;
6053     surface_param.use_4x_scaling  = 1 ;
6054     surface_param.use_16x_scaling = 0 ;
6055     surface_param.use_32x_scaling = 0 ;
6056     surface_param.enable_mb_flatness_check = enable_statistics_output;
6057     surface_param.enable_mb_variance_output = enable_statistics_output;
6058     surface_param.enable_mb_pixel_average_output = enable_statistics_output;
6059     surface_param.blk8x8_stat_enabled = stat_param_h264->enable_8x8_statistics;
6060
6061     switch (scale_surface_type) {
6062
6063     case  SCALE_CUR_PIC:
6064         surface_param.input_surface = encode_state->input_yuv_object ;
6065         surface_param.output_surface = avc_ctx->preenc_scaled_4x_surface_obj ;
6066
6067         if (enable_statistics_output) {
6068             surface_param.pres_mbv_proc_stat_buffer =
6069                 &avc_ctx->preproc_stat_data_out_buffer;
6070             surface_param.mbv_proc_stat_enabled = 1;
6071         } else {
6072             surface_param.mbv_proc_stat_enabled = 0;
6073             surface_param.pres_mbv_proc_stat_buffer = NULL;
6074         }
6075         break;
6076
6077     case SCALE_PAST_REF_PIC:
6078         obj_surface = SURFACE(stat_param->past_references[0].picture_id);
6079         assert(obj_surface);
6080         surface_param.input_surface = obj_surface;
6081         surface_param.output_surface = avc_ctx->preenc_past_ref_scaled_4x_surface_obj;
6082
6083         if (stat_param->past_ref_stat_buf) {
6084             surface_param.pres_mbv_proc_stat_buffer =
6085                 &avc_ctx->preenc_past_ref_stat_data_out_buffer;
6086             surface_param.mbv_proc_stat_enabled = 1;
6087         } else {
6088             surface_param.mbv_proc_stat_enabled = 0;
6089             surface_param.pres_mbv_proc_stat_buffer = NULL;
6090         }
6091         break;
6092
6093     case SCALE_FUTURE_REF_PIC:
6094
6095         obj_surface = SURFACE(stat_param->future_references[0].picture_id);
6096         assert(obj_surface);
6097         surface_param.input_surface = obj_surface;
6098         surface_param.output_surface = avc_ctx->preenc_future_ref_scaled_4x_surface_obj;
6099
6100         if (stat_param->future_ref_stat_buf) {
6101             surface_param.pres_mbv_proc_stat_buffer =
6102                 &avc_ctx->preenc_future_ref_stat_data_out_buffer;
6103             surface_param.mbv_proc_stat_enabled = 1;
6104         } else {
6105             surface_param.mbv_proc_stat_enabled = 0;
6106             surface_param.pres_mbv_proc_stat_buffer = NULL;
6107         }
6108         break;
6109     default :
6110         assert(0);
6111     }
6112
6113     gpe_context = &(avc_ctx->context_scaling.gpe_contexts[kernel_idx]);
6114
6115     gpe->context_init(ctx, gpe_context);
6116     gpe->reset_binding_table(ctx, gpe_context);
6117
6118     generic_ctx->pfn_set_curbe_scaling4x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
6119
6120     surface_param.scaling_out_use_16unorm_surf_fmt = 0 ;
6121     surface_param.scaling_out_use_32unorm_surf_fmt = 1 ;
6122
6123     /* No need of explicit flatness_check surface allocation. The field mb_is_flat
6124      * VAStatsStatisticsH264 will be used to store the output.  */
6125     surface_param.enable_mb_flatness_check = 0;
6126     generic_ctx->pfn_send_scaling_surface(ctx, encode_state, gpe_context, encoder_context, &surface_param);
6127
6128     /* setup the interface data */
6129     gpe->setup_interface_data(ctx, gpe_context);
6130
6131     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
6132     /* the scaling is based on 8x8 blk level */
6133     kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
6134     kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
6135     kernel_walker_param.no_dependency = 1;
6136
6137     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
6138
6139     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
6140                                             gpe_context,
6141                                             media_function,
6142                                             &media_object_walker_param);
6143
6144     return VA_STATUS_SUCCESS;
6145 }
6146
6147 /**************** PreEnc HME *************************************/
6148 /* function to run preenc hme is the same one we using in avc encode:
6149          ==  gen9_avc_kernel_me()
6150  * function to set preenc hme curbe: gen9_avc_preenc_set_curbe_me()
6151  * function to send hme buffer/surface: gen9_avc_preenc_send_surface_me()
6152  */
6153 static void
6154 gen9_avc_preenc_set_curbe_me(VADriverContextP ctx,
6155                              struct encode_state *encode_state,
6156                              struct i965_gpe_context *gpe_context,
6157                              struct intel_encoder_context *encoder_context,
6158                              void * param)
6159 {
6160     gen9_avc_fei_me_curbe_data *curbe_cmd;
6161     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6162     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6163     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6164     VAStatsStatisticsParameterH264 * stat_param_h264 = avc_state->stat_param;
6165     VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
6166
6167     struct me_param * curbe_param = (struct me_param *)param ;
6168     unsigned char  use_mv_from_prev_step = 0;
6169     unsigned char write_distortions = 0;
6170     unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
6171     unsigned char seach_table_idx = 0;
6172     unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
6173     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
6174     unsigned int scale_factor = 0;
6175
6176     switch (curbe_param->hme_type) {
6177     case INTEL_ENC_HME_4x:
6178         use_mv_from_prev_step = 0;
6179         write_distortions = 0;
6180         mv_shift_factor = 2;
6181         scale_factor = 4;
6182         prev_mv_read_pos_factor = 0;
6183         break;
6184
6185     default:
6186         assert(0);
6187     }
6188
6189     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
6190     if (!curbe_cmd)
6191         return;
6192
6193     downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
6194     downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
6195
6196     memcpy(curbe_cmd, gen9_avc_me_curbe_init_data, sizeof(gen9_avc_me_curbe_init_data));
6197
6198     curbe_cmd->dw3.sub_pel_mode = stat_param_h264->sub_pel_mode;
6199     if (avc_state->field_scaling_output_interleaved) {
6200         /*frame set to zero,field specified*/
6201         curbe_cmd->dw3.src_access = 0;
6202         curbe_cmd->dw3.ref_access = 0;
6203         curbe_cmd->dw7.src_field_polarity = 0;
6204     }
6205     curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
6206     curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
6207     curbe_cmd->dw5.qp_prime_y = stat_param_h264->frame_qp;
6208
6209     curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
6210     curbe_cmd->dw6.write_distortions = write_distortions;
6211     curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
6212     curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(INTEL_AVC_LEVEL_52) * 4;//frame only
6213
6214     if (generic_state->frame_type == SLICE_TYPE_B) {
6215         curbe_cmd->dw1.bi_weight = 32;
6216         curbe_cmd->dw13.num_ref_idx_l1_minus1 = stat_param->num_future_references - 1;
6217         me_method = gen9_avc_b_me_method[generic_state->preset];
6218         seach_table_idx = 1;
6219     }
6220
6221     if (generic_state->frame_type == SLICE_TYPE_P ||
6222         generic_state->frame_type == SLICE_TYPE_B)
6223         curbe_cmd->dw13.num_ref_idx_l0_minus1 = stat_param->num_past_references - 1;
6224
6225     curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
6226     curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
6227
6228     memcpy(&curbe_cmd->dw16, table_enc_search_path[seach_table_idx][me_method], 14 * sizeof(int));
6229
6230     curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN9_AVC_ME_MV_DATA_SURFACE_INDEX;
6231     curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x) ? GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX : GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX ;
6232     curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN9_AVC_ME_DISTORTION_SURFACE_INDEX;
6233     curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN9_AVC_ME_BRC_DISTORTION_INDEX;
6234     curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX;
6235     curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX;
6236     curbe_cmd->dw38.reserved = 0;
6237
6238     i965_gpe_context_unmap_curbe(gpe_context);
6239     return;
6240 }
6241
6242 static void
6243 gen9_avc_preenc_send_surface_me(VADriverContextP ctx,
6244                                 struct encode_state *encode_state,
6245                                 struct i965_gpe_context *gpe_context,
6246                                 struct intel_encoder_context *encoder_context,
6247                                 void * param)
6248 {
6249     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6250     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
6251     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6252     VAStatsStatisticsParameterH264 * stat_param_h264 = avc_state->stat_param;
6253     VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
6254     struct object_surface *input_surface;
6255     struct i965_gpe_resource *gpe_resource;
6256     struct me_param * curbe_param = (struct me_param *)param ;
6257     int i = 0;
6258
6259     /* PreEnc Only supports 4xme */
6260     assert(curbe_param->hme_type == INTEL_ENC_HME_4x);
6261
6262     switch (curbe_param->hme_type) {
6263     case INTEL_ENC_HME_4x : {
6264         /*memv output 4x*/
6265         gpe_resource = &avc_ctx->s4x_memv_data_buffer;
6266         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
6267                                        gpe_resource,
6268                                        1,
6269                                        I965_SURFACEFORMAT_R8_UNORM,
6270                                        GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
6271
6272         /* memv distortion output*/
6273         gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
6274         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
6275                                        gpe_resource,
6276                                        1,
6277                                        I965_SURFACEFORMAT_R8_UNORM,
6278                                        GEN9_AVC_ME_DISTORTION_SURFACE_INDEX);
6279
6280         /* brc distortion  output*/
6281         gpe_resource = &avc_ctx->res_brc_dist_data_surface;
6282         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
6283                                        gpe_resource,
6284                                        1,
6285                                        I965_SURFACEFORMAT_R8_UNORM,
6286                                        GEN9_AVC_ME_BRC_DISTORTION_INDEX);
6287
6288         /* input past ref scaled YUV surface*/
6289         for (i = 0; i < stat_param->num_past_references; i++) {
6290             /*input current down scaled YUV surface for forward refef */
6291             input_surface = avc_ctx->preenc_scaled_4x_surface_obj;
6292             i965_add_adv_gpe_surface(ctx, gpe_context,
6293                                      input_surface,
6294                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
6295
6296             input_surface = avc_ctx->preenc_past_ref_scaled_4x_surface_obj;
6297             i965_add_adv_gpe_surface(ctx, gpe_context,
6298                                      input_surface,
6299                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
6300         }
6301
6302         /* input future ref scaled YUV surface*/
6303         for (i = 0; i < stat_param->num_future_references; i++) {
6304             /*input current down scaled YUV surface for backward ref */
6305             input_surface = avc_ctx->preenc_scaled_4x_surface_obj;
6306             i965_add_adv_gpe_surface(ctx, gpe_context,
6307                                      input_surface,
6308                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
6309
6310             input_surface = avc_ctx->preenc_future_ref_scaled_4x_surface_obj;
6311             i965_add_adv_gpe_surface(ctx, gpe_context,
6312                                      input_surface,
6313                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
6314         }
6315         break;
6316
6317     }
6318     default:
6319         break;
6320
6321     }
6322 }
6323
6324 /**************** PreEnc PreProc *************************************/
6325 /* function to run preenc preproc: gen9_avc_preenc_kernel_preproc()
6326  * function to set preenc preproc curbe: gen9_avc_preenc_set_curbe_preproc()
6327  * function to send preproc buffer/surface: gen9_avc_preenc_send_surface_preproc ()
6328  */
6329 static void
6330 gen9_avc_preenc_set_curbe_preproc(VADriverContextP ctx,
6331                                   struct encode_state *encode_state,
6332                                   struct i965_gpe_context *gpe_context,
6333                                   struct intel_encoder_context *encoder_context,
6334                                   void * param)
6335 {
6336     gen9_avc_preproc_curbe_data *cmd;
6337     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6338     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6339     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6340     VAStatsStatisticsParameterH264 *stat_param_h264 = avc_state->stat_param;
6341     VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
6342     unsigned char me_method = 0;
6343     unsigned int table_idx = 0;
6344     int ref_width, ref_height, len_sp;
6345     int is_bframe = (generic_state->frame_type == SLICE_TYPE_B);
6346     int is_pframe = (generic_state->frame_type == SLICE_TYPE_P);
6347     unsigned int preset = generic_state->preset;
6348
6349     cmd = (gen9_avc_preproc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
6350     if (!cmd)
6351         return;
6352     memset(cmd, 0, sizeof(gen9_avc_preproc_curbe_data));
6353
6354     switch (generic_state->frame_type) {
6355     case SLICE_TYPE_I:
6356         memcpy(cmd, gen9_avc_preenc_preproc_curbe_i_frame_init_data,
6357                sizeof(gen9_avc_preproc_curbe_data));
6358         break;
6359     case SLICE_TYPE_P:
6360         memcpy(cmd, gen9_avc_preenc_preproc_curbe_p_frame_init_data,
6361                sizeof(gen9_avc_preproc_curbe_data));
6362         break;
6363     case SLICE_TYPE_B:
6364         memcpy(cmd, gen9_avc_preenc_preproc_curbe_b_frame_init_data,
6365                sizeof(gen9_avc_preproc_curbe_data));
6366         break;
6367     default:
6368         assert(0);
6369     }
6370     /* 4 means full search, 6 means diamand search */
6371     me_method  = (stat_param_h264->search_window == 5) ||
6372                  (stat_param_h264->search_window == 8) ? 4 : 6;
6373
6374     ref_width    = stat_param_h264->ref_width;
6375     ref_height   = stat_param_h264->ref_height;
6376     len_sp       = stat_param_h264->len_sp;
6377     /* If there is a serch_window, discard user provided ref_width, ref_height
6378      * and search_path length */
6379     switch (stat_param_h264->search_window) {
6380     case 0:
6381         /*  not use predefined search window, there should be a search_path input */
6382         if ((stat_param_h264->search_path != 0) &&
6383             (stat_param_h264->search_path != 1) &&
6384             (stat_param_h264->search_path != 2)) {
6385             WARN_ONCE("Invalid input search_path for SearchWindow=0  \n");
6386             assert(0);
6387         }
6388         /* 4 means full search, 6 means diamand search */
6389         me_method = (stat_param_h264->search_path == 1) ? 6 : 4;
6390         if (((ref_width * ref_height) > 2048) || (ref_width > 64) || (ref_height > 64)) {
6391             WARN_ONCE("Invalid input ref_width/ref_height in"
6392                       "SearchWindow=0 case! \n");
6393             assert(0);
6394         }
6395         break;
6396
6397     case 1:
6398         /* Tiny - 4 SUs 24x24 window */
6399         ref_width  = 24;
6400         ref_height = 24;
6401         len_sp     = 4;
6402         break;
6403
6404     case 2:
6405         /* Small - 9 SUs 28x28 window */
6406         ref_width  = 28;
6407         ref_height = 28;
6408         len_sp     = 9;
6409         break;
6410     case 3:
6411         /* Diamond - 16 SUs 48x40 window */
6412         ref_width  = 48;
6413         ref_height = 40;
6414         len_sp     = 16;
6415         break;
6416     case 4:
6417         /* Large Diamond - 32 SUs 48x40 window */
6418         ref_width  = 48;
6419         ref_height = 40;
6420         len_sp     = 32;
6421         break;
6422     case 5:
6423         /* Exhaustive - 48 SUs 48x40 window */
6424         ref_width  = 48;
6425         ref_height = 40;
6426         len_sp     = 48;
6427         break;
6428     case 6:
6429         /* Diamond - 16 SUs 64x32 window */
6430         ref_width  = 64;
6431         ref_height = 32;
6432         len_sp     = 16;
6433         break;
6434     case 7:
6435         /* Large Diamond - 32 SUs 64x32 window */
6436         ref_width  = 64;
6437         ref_height = 32;
6438         len_sp     = 32;
6439         break;
6440     case 8:
6441         /* Exhaustive - 48 SUs 64x32 window */
6442         ref_width  = 64;
6443         ref_height = 32;
6444         len_sp     = 48;
6445         break;
6446
6447     default:
6448         assert(0);
6449     }
6450
6451     /* ref_width*ref_height = Max 64x32 one direction, Max 32x32 two directions */
6452     if (is_bframe) {
6453         CLIP(ref_width, 4, 32);
6454         CLIP(ref_height, 4, 32);
6455     } else if (is_pframe) {
6456         CLIP(ref_width, 4, 64);
6457         CLIP(ref_height, 4, 32);
6458     }
6459
6460     cmd->dw0.adaptive_enable =
6461         cmd->dw37.adaptive_enable = stat_param_h264->adaptive_search;
6462     cmd->dw2.max_len_sp = len_sp;
6463     cmd->dw38.max_len_sp = 0; // HLD mandates this field to be Zero
6464     cmd->dw2.max_num_su = cmd->dw38.max_num_su = 57;
6465     cmd->dw3.src_access =
6466         cmd->dw3.ref_access = 0; // change it to (is_frame ? 0: 1) when interlace is suppoted
6467
6468     if (generic_state->frame_type != SLICE_TYPE_I && avc_state->ftq_enable)
6469         cmd->dw3.ft_enable = stat_param_h264->ft_enable;
6470     else
6471         cmd->dw3.ft_enable = 0;
6472
6473     cmd->dw2.pic_width = generic_state->frame_width_in_mbs;
6474     cmd->dw6.pic_height = cmd->dw5.slice_mb_height = generic_state->frame_height_in_mbs;
6475     cmd->dw3.sub_mb_part_mask = stat_param_h264->sub_mb_part_mask;
6476     cmd->dw3.sub_pel_mode = stat_param_h264->sub_pel_mode;
6477     cmd->dw3.inter_sad = stat_param_h264->inter_sad;
6478     cmd->dw3.intra_sad = stat_param_h264->intra_sad;
6479     cmd->dw4.hme_enable = generic_state->hme_enabled;
6480     cmd->dw4.frame_qp = stat_param_h264->frame_qp;
6481     cmd->dw4.per_mb_qp_enable = stat_param_h264->mb_qp;
6482
6483     cmd->dw4.multiple_mv_predictor_per_mb_enable =
6484         (generic_state->frame_type != SLICE_TYPE_I) ? 0 : stat_param_h264->mv_predictor_ctrl;
6485
6486     cmd->dw4.disable_mv_output = (generic_state->frame_type == SLICE_TYPE_I) ? 1 : stat_param_h264->disable_mv_output;
6487     cmd->dw4.disable_mb_stats = stat_param_h264->disable_statistics_output;
6488
6489     cmd->dw4.fwd_ref_pic_enable = (stat_param->num_past_references > 0) ? 1 : 0;
6490     cmd->dw4.bwd_ref_pic_enable = (stat_param->num_future_references > 0) ? 1 : 0;
6491
6492     cmd->dw7.intra_part_mask = stat_param_h264->intra_part_mask;
6493
6494     /* mv mode cost */
6495     memcpy(&(cmd->dw8), gen75_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][stat_param_h264->frame_qp], 8 * sizeof(unsigned int));
6496
6497     /* reset all except sic_fwd_trans_coeff_threshold_* from dw8 to dw15 */
6498     memset(&(cmd->dw8), 0, 6 * (sizeof(unsigned int)));
6499
6500     /* search path tables */
6501     table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
6502     memcpy(&(cmd->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
6503
6504     if (stat_param_h264->intra_part_mask  == 0x07)
6505         cmd->dw31.intra_compute_type  = 3;
6506
6507     cmd->dw38.ref_threshold = 400;
6508     cmd->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
6509
6510     if (generic_state->frame_type == SLICE_TYPE_I) {
6511         cmd->dw0.skip_mode_enable = cmd->dw37.skip_mode_enable = 0;
6512         cmd->dw36.hme_combine_overlap = 0;
6513     } else if (generic_state->frame_type == SLICE_TYPE_P) {
6514         cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(INTEL_AVC_LEVEL_52) / 2;
6515         cmd->dw3.bme_disable_fbr = 1;
6516         cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
6517         cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
6518         cmd->dw7.non_skip_zmv_added = 1;
6519         cmd->dw7.non_skip_mode_added = 1;
6520         cmd->dw7.skip_center_mask = 1;
6521         cmd->dw32.max_vmv_r =
6522             i965_avc_get_max_mv_len(INTEL_AVC_LEVEL_52) * 4;
6523         cmd->dw36.hme_combine_overlap = 1;
6524
6525     } else if (generic_state->frame_type == SLICE_TYPE_P) { /* B slice */
6526
6527         cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(INTEL_AVC_LEVEL_52) / 2;
6528         cmd->dw3.search_ctrl = 0;
6529         cmd->dw3.skip_type = 1;
6530         cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
6531         cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
6532         cmd->dw7.skip_center_mask = 0xff;
6533         cmd->dw32.max_vmv_r =
6534             i965_avc_get_max_mv_len(INTEL_AVC_LEVEL_52) * 4;
6535         cmd->dw36.hme_combine_overlap = 1;
6536     }
6537
6538     cmd->dw40.curr_pic_surf_index = GEN9_AVC_PREPROC_CURR_Y_INDEX;
6539     cmd->dw41.hme_mv_dat_surf_index = GEN9_AVC_PREPROC_HME_MV_DATA_INDEX;
6540     cmd->dw42.mv_predictor_surf_index = GEN9_AVC_PREPROC_MV_PREDICTOR_INDEX;
6541     cmd->dw43.mb_qp_surf_index = GEN9_AVC_PREPROC_MBQP_INDEX;
6542     cmd->dw44.mv_data_out_surf_index = GEN9_AVC_PREPROC_MV_DATA_INDEX;
6543     cmd->dw45.mb_stats_out_surf_index = GEN9_AVC_PREPROC_MB_STATS_INDEX;
6544     cmd->dw46.vme_inter_prediction_surf_index = GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_0_INDEX;
6545     cmd->dw47.vme_Inter_prediction_mr_surf_index = GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_1_INDEX;
6546     cmd->dw48.ftq_lut_surf_index = GEN9_AVC_PREPROC_FTQ_LUT_INDEX;
6547
6548     i965_gpe_context_unmap_curbe(gpe_context);
6549 }
6550
6551 static void
6552 gen9_avc_preenc_send_surface_preproc(VADriverContextP ctx,
6553                                      struct encode_state *encode_state,
6554                                      struct i965_gpe_context *gpe_context,
6555                                      struct intel_encoder_context *encoder_context,
6556                                      void * param)
6557 {
6558     struct i965_driver_data *i965 = i965_driver_data(ctx);
6559     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6560     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
6561     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6562     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6563     struct object_surface *obj_surface;
6564     struct i965_gpe_resource *gpe_resource;
6565     VASurfaceID surface_id;
6566     VAStatsStatisticsParameterH264 *stat_param_h264 = avc_state->stat_param;
6567     VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
6568     unsigned int size = 0, frame_mb_nums = 0;
6569
6570     frame_mb_nums = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs;
6571
6572     /* input yuv surface, Y index */
6573     obj_surface = encode_state->input_yuv_object;
6574     i965_add_2d_gpe_surface(ctx,
6575                             gpe_context,
6576                             obj_surface,
6577                             0,
6578                             1,
6579                             I965_SURFACEFORMAT_R8_UNORM,
6580                             GEN9_AVC_PREPROC_CURR_Y_INDEX);
6581
6582     /* input yuv surface, UV index */
6583     i965_add_2d_gpe_surface(ctx,
6584                             gpe_context,
6585                             obj_surface,
6586                             1,
6587                             1,
6588                             I965_SURFACEFORMAT_R16_UINT,
6589                             GEN9_AVC_MBENC_CURR_UV_INDEX);
6590
6591
6592     if (generic_state->hme_enabled) {
6593         /* HME mv data buffer */
6594         gpe_resource = &avc_ctx->s4x_memv_data_buffer;
6595         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
6596                                        gpe_resource,
6597                                        1,
6598                                        I965_SURFACEFORMAT_R8_UNORM,
6599                                        GEN9_AVC_PREPROC_HME_MV_DATA_INDEX);
6600     }
6601
6602     /* mv predictor buffer */
6603     if (stat_param_h264->mv_predictor_ctrl) {
6604         size = frame_mb_nums * FEI_AVC_MV_PREDICTOR_BUFFER_SIZE;
6605         gpe_resource = &avc_ctx->preproc_mv_predictor_buffer;
6606         i965_add_buffer_gpe_surface(ctx,
6607                                     gpe_context,
6608                                     gpe_resource,
6609                                     0,
6610                                     size / 4,
6611                                     0,
6612                                     GEN9_AVC_PREPROC_MV_PREDICTOR_INDEX);
6613     }
6614
6615     /* MB qp buffer */
6616     if (stat_param_h264->mb_qp) {
6617         size = frame_mb_nums * FEI_AVC_QP_BUFFER_SIZE;
6618         gpe_resource = &avc_ctx->preproc_mb_qp_buffer;
6619         i965_add_buffer_gpe_surface(ctx,
6620                                     gpe_context,
6621                                     gpe_resource,
6622                                     0,
6623                                     size / 4,
6624                                     0,
6625                                     GEN9_AVC_PREPROC_MBQP_INDEX);
6626
6627         gpe_resource = &avc_ctx->res_mbbrc_const_data_buffer;
6628         size = 16 * AVC_QP_MAX * 4;
6629         i965_add_buffer_gpe_surface(ctx,
6630                                     gpe_context,
6631                                     gpe_resource,
6632                                     0,
6633                                     size / 4,
6634                                     0,
6635                                     GEN9_AVC_PREPROC_FTQ_LUT_INDEX);
6636
6637     }
6638
6639     /* mv data output buffer */
6640     if (!stat_param_h264->disable_mv_output) {
6641         gpe_resource = &avc_ctx->preproc_mv_data_out_buffer;
6642         size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
6643         i965_add_buffer_gpe_surface(ctx,
6644                                     gpe_context,
6645                                     gpe_resource,
6646                                     0,
6647                                     size / 4,
6648                                     0,
6649                                     GEN9_AVC_PREPROC_MV_DATA_INDEX);
6650     }
6651
6652     /* statistics output buffer */
6653     if (!stat_param_h264->disable_statistics_output) {
6654         gpe_resource = &avc_ctx->preproc_stat_data_out_buffer;
6655         size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
6656         i965_add_buffer_gpe_surface(ctx,
6657                                     gpe_context,
6658                                     gpe_resource,
6659                                     0,
6660                                     size / 4,
6661                                     0,
6662                                     GEN9_AVC_PREPROC_MB_STATS_INDEX);
6663     }
6664
6665     /* vme cur pic y */
6666     obj_surface = encode_state->input_yuv_object;
6667     i965_add_2d_gpe_surface(ctx,
6668                             gpe_context,
6669                             obj_surface,
6670                             0,
6671                             1,
6672                             I965_SURFACEFORMAT_R8_UNORM,
6673                             GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_0_INDEX);
6674
6675     /* vme cur pic y (repeating based on required BTI order for mediakerel)*/
6676     obj_surface = encode_state->input_yuv_object;
6677     i965_add_2d_gpe_surface(ctx,
6678                             gpe_context,
6679                             obj_surface,
6680                             0,
6681                             1,
6682                             I965_SURFACEFORMAT_R8_UNORM,
6683                             GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_1_INDEX);
6684
6685     /* vme forward ref */
6686     /* Only supports one past ref */
6687     if (stat_param->num_past_references > 0) {
6688         surface_id = stat_param->past_references[0].picture_id;
6689         assert(surface_id != VA_INVALID_ID);
6690         obj_surface = SURFACE(surface_id);
6691         if (!obj_surface)
6692             return;
6693         i965_add_adv_gpe_surface(ctx, gpe_context,
6694                                  obj_surface,
6695                                  GEN9_AVC_PREPROC_VME_FWD_PIC_IDX0_INDEX);
6696
6697     }
6698
6699     /* vme future ref */
6700     /* Only supports one future ref */
6701     if (stat_param->num_future_references > 0) {
6702         surface_id = stat_param->future_references[0].picture_id;
6703         assert(surface_id != VA_INVALID_ID);
6704         obj_surface = SURFACE(surface_id);
6705         if (!obj_surface)
6706             return;
6707         i965_add_adv_gpe_surface(ctx, gpe_context,
6708                                  obj_surface,
6709                                  GEN9_AVC_PREPROC_VME_BWD_PIC_IDX0_0_INDEX);
6710
6711         surface_id = stat_param->future_references[0].picture_id;
6712         assert(surface_id != VA_INVALID_ID);
6713         obj_surface = SURFACE(surface_id);
6714         if (!obj_surface)
6715             return;
6716         i965_add_adv_gpe_surface(ctx, gpe_context,
6717                                  obj_surface,
6718                                  GEN9_AVC_PREPROC_VME_BWD_PIC_IDX0_1_INDEX);
6719     }
6720
6721     return;
6722
6723 }
6724
6725 static VAStatus
6726 gen9_avc_preenc_kernel_preproc(VADriverContextP ctx,
6727                                struct encode_state *encode_state,
6728                                struct intel_encoder_context *encoder_context)
6729 {
6730     struct i965_driver_data *i965 = i965_driver_data(ctx);
6731     struct i965_gpe_table *gpe = &i965->gpe_table;
6732     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6733     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
6734     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
6735     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6736     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6737     VAStatsStatisticsParameterH264 * stat_param_h264 = avc_state->stat_param;
6738     struct i965_gpe_context *gpe_context;
6739     struct gpe_media_object_walker_parameter media_object_walker_param;
6740     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
6741     int media_function = INTEL_MEDIA_STATE_PREPROC;
6742     struct i965_gpe_resource *gpe_resource = NULL;
6743     unsigned int * data = NULL;
6744     unsigned int ftq_lut_table_size = 16 * 52; /* 16 DW per QP for each qp*/
6745
6746     gpe_context = &(avc_ctx->context_preproc.gpe_contexts);
6747     gpe->context_init(ctx, gpe_context);
6748     gpe->reset_binding_table(ctx, gpe_context);
6749
6750     /*set curbe*/
6751     generic_ctx->pfn_set_curbe_preproc(ctx, encode_state, gpe_context, encoder_context, NULL);
6752
6753     /*send surface*/
6754     generic_ctx->pfn_send_preproc_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
6755
6756     gpe->setup_interface_data(ctx, gpe_context);
6757
6758     /*  Set up FtqLut Buffer if there is QP change within a frame */
6759     if (stat_param_h264->mb_qp) {
6760         gpe_resource = &(avc_ctx->res_mbbrc_const_data_buffer);
6761         assert(gpe_resource);
6762         data = i965_map_gpe_resource(gpe_resource);
6763         assert(data);
6764         memcpy(data, gen9_avc_preenc_preproc_ftq_lut, ftq_lut_table_size * sizeof(unsigned int));
6765     }
6766
6767     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
6768     kernel_walker_param.resolution_x = generic_state->frame_width_in_mbs ;
6769     kernel_walker_param.resolution_y = generic_state->frame_height_in_mbs ;
6770     kernel_walker_param.no_dependency = 1;
6771
6772     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
6773
6774     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
6775                                             gpe_context,
6776                                             media_function,
6777                                             &media_object_walker_param);
6778
6779     return VA_STATUS_SUCCESS;
6780 }
6781
6782
6783 static void
6784 gen8_avc_set_curbe_mbenc(VADriverContextP ctx,
6785                          struct encode_state *encode_state,
6786                          struct i965_gpe_context *gpe_context,
6787                          struct intel_encoder_context *encoder_context,
6788                          void * param)
6789 {
6790     struct i965_driver_data *i965 = i965_driver_data(ctx);
6791     gen8_avc_mbenc_curbe_data *cmd;
6792     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6793     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6794     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6795
6796     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
6797     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
6798     VASurfaceID surface_id;
6799     struct object_surface *obj_surface;
6800
6801     struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
6802     unsigned char qp = 0;
6803     unsigned char me_method = 0;
6804     unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
6805     unsigned int table_idx = 0;
6806     unsigned int curbe_size = 0;
6807
6808     unsigned int preset = generic_state->preset;
6809     if (IS_GEN8(i965->intel.device_info)) {
6810         cmd = (gen8_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
6811         if (!cmd)
6812             return;
6813         curbe_size = sizeof(gen8_avc_mbenc_curbe_data);
6814         memset(cmd, 0, curbe_size);
6815
6816         if (mbenc_i_frame_dist_in_use) {
6817             memcpy(cmd, gen8_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
6818         } else {
6819             switch (generic_state->frame_type) {
6820             case SLICE_TYPE_I:
6821                 memcpy(cmd, gen8_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
6822                 break;
6823             case SLICE_TYPE_P:
6824                 memcpy(cmd, gen8_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
6825                 break;
6826             case SLICE_TYPE_B:
6827                 memcpy(cmd, gen8_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
6828                 break;
6829             default:
6830                 assert(0);
6831             }
6832         }
6833     } else {
6834         assert(0);
6835
6836         return;
6837     }
6838
6839     me_method = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_b_me_method[preset] : gen9_avc_p_me_method[preset];
6840     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
6841
6842     cmd->dw0.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
6843     cmd->dw37.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
6844     cmd->dw0.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
6845     cmd->dw37.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
6846
6847     cmd->dw2.max_len_sp = gen9_avc_max_len_sp[preset];
6848     cmd->dw38.max_len_sp = 0;
6849
6850     cmd->dw3.src_access = 0;
6851     cmd->dw3.ref_access = 0;
6852
6853     if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) {
6854         //disable ftq_override by now.
6855         if (avc_state->ftq_override) {
6856             cmd->dw3.ftq_enable = avc_state->ftq_enable;
6857
6858         } else {
6859             if (generic_state->frame_type == SLICE_TYPE_P) {
6860                 cmd->dw3.ftq_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
6861
6862             } else {
6863                 cmd->dw3.ftq_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
6864             }
6865         }
6866     } else {
6867         cmd->dw3.ftq_enable = 0;
6868     }
6869
6870     if (avc_state->disable_sub_mb_partion)
6871         cmd->dw3.sub_mb_part_mask = 0x7;
6872
6873     if (mbenc_i_frame_dist_in_use) {
6874         cmd->dw2.pitch_width = generic_state->downscaled_width_4x_in_mb;
6875         cmd->dw4.picture_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
6876         cmd->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4;
6877         cmd->dw6.batch_buffer_end = 0;
6878         cmd->dw31.intra_compute_type = 1;
6879     } else {
6880         cmd->dw2.pitch_width = generic_state->frame_width_in_mbs;
6881         cmd->dw4.picture_height_minus1 = generic_state->frame_height_in_mbs - 1;
6882         cmd->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ? generic_state->frame_height_in_mbs : avc_state->slice_height;
6883
6884         {
6885             memcpy(&(cmd->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
6886             if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
6887             } else if (avc_state->skip_bias_adjustment_enable) {
6888                 /* Load different MvCost for P picture when SkipBiasAdjustment is enabled
6889                 // No need to check for P picture as the flag is only enabled for P picture */
6890                 cmd->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
6891             }
6892         }
6893         table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
6894         memcpy(&(cmd->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
6895     }
6896     cmd->dw4.enable_intra_cost_scaling_for_static_frame = avc_state->sfd_enable && generic_state->hme_enabled;
6897     cmd->dw4.field_parity_flag = 0;//bottom field
6898     cmd->dw4.enable_cur_fld_idr = 0;//field realted
6899     cmd->dw4.contrained_intra_pred_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
6900     cmd->dw4.hme_enable = generic_state->hme_enabled;
6901     cmd->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
6902     cmd->dw4.use_actual_ref_qp_value = generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
6903
6904     cmd->dw7.intra_part_mask = avc_state->transform_8x8_mode_enable ? 0 : 0x02;
6905     cmd->dw7.src_field_polarity = 0;//field related
6906
6907     /*ftq_skip_threshold_lut set,dw14 /15*/
6908
6909     /*r5 disable NonFTQSkipThresholdLUT*/
6910     if (generic_state->frame_type == SLICE_TYPE_P) {
6911         cmd->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
6912     } else if (generic_state->frame_type == SLICE_TYPE_B) {
6913         cmd->dw32.skip_val = gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
6914     }
6915
6916     cmd->dw13.qp_prime_y = qp;
6917     cmd->dw13.qp_prime_cb = qp;
6918     cmd->dw13.qp_prime_cr = qp;
6919     cmd->dw13.target_size_in_word = 0xff;//hardcode for brc disable
6920
6921     if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) {
6922         switch (gen9_avc_multi_pred[preset]) {
6923         case 0:
6924             cmd->dw32.mult_pred_l0_disable = 128;
6925             cmd->dw32.mult_pred_l1_disable = 128;
6926             break;
6927         case 1:
6928             cmd->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_P) ? 1 : 128;
6929             cmd->dw32.mult_pred_l1_disable = 128;
6930             break;
6931         case 2:
6932             cmd->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
6933             cmd->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
6934             break;
6935         case 3:
6936             cmd->dw32.mult_pred_l0_disable = 1;
6937             cmd->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
6938             break;
6939         }
6940
6941     } else {
6942         cmd->dw32.mult_pred_l0_disable = 128;
6943         cmd->dw32.mult_pred_l1_disable = 128;
6944     }
6945
6946     if (generic_state->frame_type == SLICE_TYPE_B) {
6947         cmd->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
6948         cmd->dw34.list1_ref_id0_frm_field_parity = 0;
6949         cmd->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
6950     }
6951
6952     cmd->dw34.b_original_bff = 0; //frame only
6953     cmd->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
6954     cmd->dw34.roi_enable_flag = curbe_param->roi_enabled;
6955     cmd->dw34.mad_enable_falg = avc_state->mad_enable;
6956     cmd->dw34.mb_brc_enable = avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled;
6957     cmd->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
6958     cmd->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
6959
6960     if (cmd->dw34.force_non_skip_check) {
6961         cmd->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
6962     }
6963
6964     cmd->dw36.check_all_fractional_enable = avc_state->caf_enable;
6965     cmd->dw38.ref_threshold = 400;
6966     cmd->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
6967     cmd->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable) ? 0 : 2;
6968
6969     if (mbenc_i_frame_dist_in_use) {
6970         cmd->dw13.qp_prime_y = 0;
6971         cmd->dw13.qp_prime_cb = 0;
6972         cmd->dw13.qp_prime_cr = 0;
6973         cmd->dw33.intra_16x16_nondc_penalty = 0;
6974         cmd->dw33.intra_8x8_nondc_penalty = 0;
6975         cmd->dw33.intra_4x4_nondc_penalty = 0;
6976     }
6977     if (cmd->dw4.use_actual_ref_qp_value) {
6978         cmd->dw44.actual_qp_value_for_ref_id0_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 0);
6979         cmd->dw44.actual_qp_value_for_ref_id1_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 1);
6980         cmd->dw44.actual_qp_value_for_ref_id2_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 2);
6981         cmd->dw44.actual_qp_value_for_ref_id3_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 3);
6982         cmd->dw45.actual_qp_value_for_ref_id4_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 4);
6983         cmd->dw45.actual_qp_value_for_ref_id5_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 5);
6984         cmd->dw45.actual_qp_value_for_ref_id6_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 6);
6985         cmd->dw45.actual_qp_value_for_ref_id7_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 7);
6986         cmd->dw46.actual_qp_value_for_ref_id0_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 0);
6987         cmd->dw46.actual_qp_value_for_ref_id1_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 1);
6988     }
6989
6990     table_idx = slice_type_kernel[generic_state->frame_type];
6991     cmd->dw46.ref_cost = gen8_avc_ref_cost[table_idx][qp];
6992     if (generic_state->frame_type == SLICE_TYPE_I) {
6993         cmd->dw0.skip_mode_enable = 0;
6994         cmd->dw37.skip_mode_enable = 0;
6995         cmd->dw36.hme_combine_overlap = 0;
6996         cmd->dw47.intra_cost_sf = 16;
6997         cmd->dw34.enable_direct_bias_adjustment = 0;
6998         cmd->dw34.enable_global_motion_bias_adjustment = 0;
6999
7000     } else if (generic_state->frame_type == SLICE_TYPE_P) {
7001         cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
7002         cmd->dw3.bme_disable_fbr = 1;
7003         cmd->dw5.ref_width = gen9_avc_search_x[preset];
7004         cmd->dw5.ref_height = gen9_avc_search_y[preset];
7005         cmd->dw7.non_skip_zmv_added = 1;
7006         cmd->dw7.non_skip_mode_added = 1;
7007         cmd->dw7.skip_center_mask = 1;
7008         cmd->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
7009         cmd->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame onlys
7010         cmd->dw36.hme_combine_overlap = 1;
7011         cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
7012         cmd->dw39.ref_width = gen9_avc_search_x[preset];
7013         cmd->dw39.ref_height = gen9_avc_search_y[preset];
7014         cmd->dw34.enable_direct_bias_adjustment = 0;
7015         cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
7016         if (avc_state->global_motion_bias_adjustment_enable)
7017             cmd->dw58.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
7018     } else {
7019         cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
7020         cmd->dw1.bi_weight = avc_state->bi_weight;
7021         cmd->dw3.search_ctrl = 7;
7022         cmd->dw3.skip_type = 1;
7023         cmd->dw5.ref_width = gen9_avc_b_search_x[preset];
7024         cmd->dw5.ref_height = gen9_avc_b_search_y[preset];
7025         cmd->dw7.skip_center_mask = 0xff;
7026         cmd->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
7027         cmd->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
7028         cmd->dw36.hme_combine_overlap = 1;
7029         surface_id = slice_param->RefPicList1[0].picture_id;
7030         obj_surface = SURFACE(surface_id);
7031         if (!obj_surface) {
7032             WARN_ONCE("Invalid backward reference frame\n");
7033             return;
7034         }
7035         cmd->dw36.is_fwd_frame_short_term_ref = !!(slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
7036         cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
7037         cmd->dw36.num_ref_idx_l1_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l1_active_minus1 : 0;
7038         cmd->dw39.ref_width = gen9_avc_b_search_x[preset];
7039         cmd->dw39.ref_height = gen9_avc_b_search_y[preset];
7040         cmd->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
7041         cmd->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
7042         cmd->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
7043         cmd->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
7044         cmd->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
7045         cmd->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
7046         cmd->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
7047         cmd->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
7048         cmd->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
7049         if (cmd->dw34.enable_direct_bias_adjustment) {
7050             cmd->dw7.non_skip_zmv_added = 1;
7051             cmd->dw7.non_skip_mode_added = 1;
7052         }
7053
7054         cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
7055         if (avc_state->global_motion_bias_adjustment_enable)
7056             cmd->dw58.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
7057     }
7058     avc_state->block_based_skip_enable = cmd->dw3.block_based_skip_enable;
7059
7060     if (avc_state->rolling_intra_refresh_enable) {
7061         /*by now disable it*/
7062         if (generic_state->brc_enabled) {
7063             cmd->dw4.enable_intra_refresh = false;
7064             cmd->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
7065             cmd->dw48.widi_intra_refresh_mbx = 0;
7066             cmd->dw58.widi_intra_refresh_mby = 0;
7067         } else {
7068             cmd->dw4.enable_intra_refresh = true;
7069             cmd->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
7070         }
7071         cmd->dw32.mult_pred_l0_disable = 128;
7072         /* Pass the same IntraRefreshUnit to the kernel w/o the adjustment by -1, so as to have an overlap of one MB row or column of Intra macroblocks
7073          across one P frame to another P frame, as needed by the RollingI algo */
7074         cmd->dw48.widi_intra_refresh_mbx = 0;
7075         cmd->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
7076         cmd->dw48.widi_intra_refresh_qp_delta = 0;
7077
7078     } else {
7079         cmd->dw34.widi_intra_refresh_en = 0;
7080     }
7081
7082     /*roi set disable by now. 49-56*/
7083     if (curbe_param->roi_enabled) {
7084         cmd->dw49.roi_1_x_left   = generic_state->roi[0].left;
7085         cmd->dw49.roi_1_y_top    = generic_state->roi[0].top;
7086         cmd->dw50.roi_1_x_right  = generic_state->roi[0].right;
7087         cmd->dw50.roi_1_y_bottom = generic_state->roi[0].bottom;
7088
7089         cmd->dw51.roi_2_x_left   = generic_state->roi[1].left;
7090         cmd->dw51.roi_2_y_top    = generic_state->roi[1].top;
7091         cmd->dw52.roi_2_x_right  = generic_state->roi[1].right;
7092         cmd->dw52.roi_2_y_bottom = generic_state->roi[1].bottom;
7093
7094         cmd->dw53.roi_3_x_left   = generic_state->roi[2].left;
7095         cmd->dw53.roi_3_y_top    = generic_state->roi[2].top;
7096         cmd->dw54.roi_3_x_right  = generic_state->roi[2].right;
7097         cmd->dw54.roi_3_y_bottom = generic_state->roi[2].bottom;
7098
7099         cmd->dw55.roi_4_x_left   = generic_state->roi[3].left;
7100         cmd->dw55.roi_4_y_top    = generic_state->roi[3].top;
7101         cmd->dw56.roi_4_x_right  = generic_state->roi[3].right;
7102         cmd->dw56.roi_4_y_bottom = generic_state->roi[3].bottom;
7103
7104         cmd->dw36.enable_cabac_work_around = 0;
7105
7106         if (!generic_state->brc_enabled) {
7107             char tmp = 0;
7108             tmp = generic_state->roi[0].value;
7109             CLIP(tmp, -qp, AVC_QP_MAX - qp);
7110             cmd->dw57.roi_1_dqp_prime_y = tmp;
7111             tmp = generic_state->roi[1].value;
7112             CLIP(tmp, -qp, AVC_QP_MAX - qp);
7113             cmd->dw57.roi_2_dqp_prime_y = tmp;
7114             tmp = generic_state->roi[2].value;
7115             CLIP(tmp, -qp, AVC_QP_MAX - qp);
7116             cmd->dw57.roi_3_dqp_prime_y = tmp;
7117             tmp = generic_state->roi[3].value;
7118             CLIP(tmp, -qp, AVC_QP_MAX - qp);
7119             cmd->dw57.roi_4_dqp_prime_y = tmp;
7120         } else {
7121             cmd->dw34.roi_enable_flag = 0;
7122         }
7123     }
7124
7125     cmd->dw65.mb_data_surf_index = GEN8_AVC_MBENC_MFC_AVC_PAK_OBJ_CM;
7126     cmd->dw66.mv_data_surf_index =  GEN8_AVC_MBENC_IND_MV_DATA_CM;
7127     cmd->dw67.i_dist_surf_index = GEN8_AVC_MBENC_BRC_DISTORTION_CM;
7128     cmd->dw68.src_y_surf_index = GEN8_AVC_MBENC_CURR_Y_CM;
7129     cmd->dw69.mb_specific_data_surf_index = GEN8_AVC_MBENC_MB_SPECIFIC_DATA_CM;
7130     cmd->dw70.aux_vme_out_surf_index = GEN8_AVC_MBENC_AUX_VME_OUT_CM;
7131     cmd->dw71.curr_ref_pic_sel_surf_index = GEN8_AVC_MBENC_REFPICSELECT_L0_CM;
7132     cmd->dw72.hme_mv_pred_fwd_bwd_surf_index = GEN8_AVC_MBENC_MV_DATA_FROM_ME_CM;
7133     cmd->dw73.hme_dist_surf_index = GEN8_AVC_MBENC_4xME_DISTORTION_CM;
7134     cmd->dw74.slice_map_surf_index = GEN8_AVC_MBENC_SLICEMAP_DATA_CM;
7135     cmd->dw75.fwd_frm_mb_data_surf_index = GEN8_AVC_MBENC_FWD_MB_DATA_CM;
7136     cmd->dw76.fwd_frm_mv_surf_index = GEN8_AVC_MBENC_FWD_MV_DATA_CM;
7137     cmd->dw77.mb_qp_buffer = GEN8_AVC_MBENC_MBQP_CM;
7138     cmd->dw78.mb_brc_lut = GEN8_AVC_MBENC_MBBRC_CONST_DATA_CM;
7139     cmd->dw79.vme_inter_prediction_surf_index = GEN8_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_CM;
7140     cmd->dw80.vme_inter_prediction_mr_surf_index = GEN8_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_CM;
7141     cmd->dw81.flatness_chk_surf_index = GEN8_AVC_MBENC_FLATNESS_CHECK_CM;
7142     cmd->dw82.mad_surf_index = GEN8_AVC_MBENC_MAD_DATA_CM;
7143     cmd->dw83.force_non_skip_mb_map_surface = GEN8_AVC_MBENC_FORCE_NONSKIP_MB_MAP_CM;
7144     cmd->dw84.widi_wa_surf_index = GEN8_AVC_MBENC_WIDI_WA_DATA_CM;
7145     cmd->dw85.brc_curbe_surf_index = GEN8_AVC_MBENC_BRC_CURBE_DATA_CM;
7146     cmd->dw86.static_detection_cost_table_index = GEN8_AVC_MBENC_STATIC_FRAME_DETECTION_OUTPUT_CM;
7147
7148     i965_gpe_context_unmap_curbe(gpe_context);
7149
7150     return;
7151 }
7152
7153 static void
7154 gen8_avc_set_curbe_scaling4x(VADriverContextP ctx,
7155                              struct encode_state *encode_state,
7156                              struct i965_gpe_context *gpe_context,
7157                              struct intel_encoder_context *encoder_context,
7158                              void *param)
7159 {
7160     gen8_avc_scaling4x_curbe_data *curbe_cmd;
7161     struct scaling_param *surface_param = (struct scaling_param *)param;
7162
7163     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
7164
7165     if (!curbe_cmd)
7166         return;
7167
7168     memset(curbe_cmd, 0, sizeof(gen9_avc_scaling4x_curbe_data));
7169
7170     curbe_cmd->dw0.input_picture_width  = surface_param->input_frame_width;
7171     curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
7172
7173     curbe_cmd->dw1.input_y_bti = GEN8_SCALING_FRAME_SRC_Y_CM;
7174     curbe_cmd->dw2.output_y_bti = GEN8_SCALING_FRAME_DST_Y_CM;
7175
7176     curbe_cmd->dw5.flatness_threshold = 0;
7177     if (surface_param->enable_mb_flatness_check) {
7178         curbe_cmd->dw5.flatness_threshold = 128;
7179         curbe_cmd->dw8.flatness_output_bti_top_field = 4;
7180     }
7181
7182     curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
7183     curbe_cmd->dw6.enable_mb_variance_output = surface_param->enable_mb_variance_output;
7184     curbe_cmd->dw6.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
7185
7186     if (curbe_cmd->dw6.enable_mb_variance_output ||
7187         curbe_cmd->dw6.enable_mb_pixel_average_output) {
7188         curbe_cmd->dw10.mbv_proc_states_bti_top_field  = GEN8_SCALING_FIELD_TOP_MBVPROCSTATS_DST_CM;
7189         curbe_cmd->dw11.mbv_proc_states_bti_bottom_field = GEN8_SCALING_FIELD_BOT_MBVPROCSTATS_DST_CM;
7190     }
7191
7192     i965_gpe_context_unmap_curbe(gpe_context);
7193     return;
7194 }
7195
7196 static void
7197 gen8_avc_set_curbe_me(VADriverContextP ctx,
7198                       struct encode_state *encode_state,
7199                       struct i965_gpe_context *gpe_context,
7200                       struct intel_encoder_context *encoder_context,
7201                       void * param)
7202 {
7203     gen8_avc_me_curbe_data *curbe_cmd;
7204     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7205     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
7206     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
7207
7208     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
7209
7210     struct me_param * curbe_param = (struct me_param *)param ;
7211     unsigned char  use_mv_from_prev_step = 0;
7212     unsigned char write_distortions = 0;
7213     unsigned char qp_prime_y = 0;
7214     unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
7215     unsigned char seach_table_idx = 0;
7216     unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
7217     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
7218     unsigned int scale_factor = 0;
7219
7220     qp_prime_y = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta;
7221     switch (curbe_param->hme_type) {
7222     case INTEL_ENC_HME_4x : {
7223         use_mv_from_prev_step = (generic_state->b16xme_enabled) ? 1 : 0;
7224         write_distortions = 1;
7225         mv_shift_factor = 2;
7226         scale_factor = 4;
7227         prev_mv_read_pos_factor = 0;
7228         break;
7229     }
7230     case INTEL_ENC_HME_16x : {
7231         use_mv_from_prev_step = (generic_state->b32xme_enabled) ? 1 : 0;
7232         write_distortions = 0;
7233         mv_shift_factor = 2;
7234         scale_factor = 16;
7235         prev_mv_read_pos_factor = 1;
7236         break;
7237     }
7238     case INTEL_ENC_HME_32x : {
7239         use_mv_from_prev_step = 0;
7240         write_distortions = 0;
7241         mv_shift_factor = 1;
7242         scale_factor = 32;
7243         prev_mv_read_pos_factor = 0;
7244         break;
7245     }
7246     default:
7247         assert(0);
7248
7249     }
7250     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
7251
7252     if (!curbe_cmd)
7253         return;
7254
7255     downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
7256     downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
7257
7258     memcpy(curbe_cmd, gen8_avc_me_curbe_init_data, sizeof(gen8_avc_me_curbe_data));
7259
7260     curbe_cmd->dw3.sub_pel_mode = 3;
7261     if (avc_state->field_scaling_output_interleaved) {
7262         /*frame set to zero,field specified*/
7263         curbe_cmd->dw3.src_access = 0;
7264         curbe_cmd->dw3.ref_access = 0;
7265         curbe_cmd->dw7.src_field_polarity = 0;
7266     }
7267     curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
7268     curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
7269     curbe_cmd->dw5.qp_prime_y = qp_prime_y;
7270
7271     curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
7272     curbe_cmd->dw6.write_distortions = write_distortions;
7273     curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
7274     curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
7275
7276     if (generic_state->frame_type == SLICE_TYPE_B) {
7277         curbe_cmd->dw1.bi_weight = 32;
7278         curbe_cmd->dw13.num_ref_idx_l1_minus1 = slice_param->num_ref_idx_l1_active_minus1;
7279         me_method = gen9_avc_b_me_method[generic_state->preset];
7280         seach_table_idx = 1;
7281     }
7282
7283     if (generic_state->frame_type == SLICE_TYPE_P ||
7284         generic_state->frame_type == SLICE_TYPE_B)
7285         curbe_cmd->dw13.num_ref_idx_l0_minus1 = slice_param->num_ref_idx_l0_active_minus1;
7286
7287     curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
7288     curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
7289
7290     memcpy(&curbe_cmd->dw16, table_enc_search_path[seach_table_idx][me_method], 14 * sizeof(int));
7291
7292     curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN8_AVC_ME_MV_DATA_SURFACE_CM;
7293     curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x) ? GEN8_AVC_32xME_MV_DATA_SURFACE_CM : GEN8_AVC_16xME_MV_DATA_SURFACE_CM ;
7294     curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN8_AVC_ME_DISTORTION_SURFACE_CM;
7295     curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN8_AVC_ME_BRC_DISTORTION_CM;
7296     curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN8_AVC_ME_CURR_FOR_FWD_REF_CM;
7297     curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN8_AVC_ME_CURR_FOR_BWD_REF_CM;
7298     curbe_cmd->dw38.reserved = 0;
7299
7300     i965_gpe_context_unmap_curbe(gpe_context);
7301     return;
7302 }
7303
7304 static void
7305 gen8_avc_set_curbe_brc_frame_update(VADriverContextP ctx,
7306                                     struct encode_state *encode_state,
7307                                     struct i965_gpe_context *gpe_context,
7308                                     struct intel_encoder_context *encoder_context,
7309                                     void * param)
7310 {
7311     gen8_avc_frame_brc_update_curbe_data *cmd;
7312     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7313     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
7314     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
7315     struct object_surface *obj_surface;
7316     struct gen9_surface_avc *avc_priv_surface;
7317     struct avc_param common_param;
7318
7319     obj_surface = encode_state->reconstructed_object;
7320
7321     if (!obj_surface || !obj_surface->private_data)
7322         return;
7323     avc_priv_surface = obj_surface->private_data;
7324
7325     cmd = i965_gpe_context_map_curbe(gpe_context);
7326
7327     if (!cmd)
7328         return;
7329
7330     memcpy(cmd, &gen8_avc_frame_brc_update_curbe_init_data, sizeof(gen8_avc_frame_brc_update_curbe_data));
7331
7332     cmd->dw5.target_size_flag = 0 ;
7333     if (generic_state->brc_init_current_target_buf_full_in_bits > (double)generic_state->brc_init_reset_buf_size_in_bits) {
7334         /*overflow*/
7335         generic_state->brc_init_current_target_buf_full_in_bits -= (double)generic_state->brc_init_reset_buf_size_in_bits;
7336         cmd->dw5.target_size_flag = 1 ;
7337     }
7338
7339     if (generic_state->skip_frame_enbale) {
7340         cmd->dw6.num_skip_frames = generic_state->num_skip_frames ;
7341         cmd->dw7.size_skip_frames = generic_state->size_skip_frames;
7342
7343         generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame * generic_state->num_skip_frames;
7344
7345     }
7346     cmd->dw0.target_size = (unsigned int)generic_state->brc_init_current_target_buf_full_in_bits ;
7347     cmd->dw1.frame_number = generic_state->seq_frame_number ;
7348     cmd->dw2.size_of_pic_headers = generic_state->herder_bytes_inserted << 3 ;
7349     cmd->dw5.cur_frame_type = generic_state->frame_type ;
7350     cmd->dw5.brc_flag = 0 ;
7351     cmd->dw5.brc_flag |= (avc_priv_surface->is_as_ref) ? INTEL_ENCODE_BRCUPDATE_IS_REFERENCE : 0 ;
7352
7353     if (avc_state->multi_pre_enable) {
7354         cmd->dw5.brc_flag  |= INTEL_ENCODE_BRCUPDATE_IS_ACTUALQP ;
7355         cmd->dw14.qp_index_of_cur_pic = avc_priv_surface->frame_idx ; //do not know this. use -1
7356     }
7357
7358     cmd->dw5.max_num_paks = generic_state->num_pak_passes ;
7359     if (avc_state->min_max_qp_enable) {
7360         switch (generic_state->frame_type) {
7361         case SLICE_TYPE_I:
7362             cmd->dw6.minimum_qp = avc_state->min_qp_i ;
7363             cmd->dw6.maximum_qp = avc_state->max_qp_i ;
7364             break;
7365         case SLICE_TYPE_P:
7366             cmd->dw6.minimum_qp = avc_state->min_qp_p ;
7367             cmd->dw6.maximum_qp = avc_state->max_qp_p ;
7368             break;
7369         case SLICE_TYPE_B:
7370             cmd->dw6.minimum_qp = avc_state->min_qp_b ;
7371             cmd->dw6.maximum_qp = avc_state->max_qp_b ;
7372             break;
7373         }
7374     } else {
7375         cmd->dw6.minimum_qp = 0 ;
7376         cmd->dw6.maximum_qp = 0 ;
7377     }
7378
7379     generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame;
7380
7381     if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
7382         cmd->dw3.start_gadj_frame0 = (unsigned int)((10 *   generic_state->avbr_convergence) / (double)150);
7383         cmd->dw3.start_gadj_frame1 = (unsigned int)((50 *   generic_state->avbr_convergence) / (double)150);
7384         cmd->dw4.start_gadj_frame2 = (unsigned int)((100 *  generic_state->avbr_convergence) / (double)150);
7385         cmd->dw4.start_gadj_frame3 = (unsigned int)((150 *  generic_state->avbr_convergence) / (double)150);
7386         cmd->dw11.g_rate_ratio_threshold_0 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 40)));
7387         cmd->dw11.g_rate_ratio_threshold_1 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 75)));
7388         cmd->dw12.g_rate_ratio_threshold_2 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 97)));
7389         cmd->dw12.g_rate_ratio_threshold_3 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (103 - 100)));
7390         cmd->dw12.g_rate_ratio_threshold_4 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (125 - 100)));
7391         cmd->dw12.g_rate_ratio_threshold_5 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (160 - 100)));
7392
7393     }
7394
7395     memset(&common_param, 0, sizeof(common_param));
7396     common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
7397     common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
7398     common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
7399     common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
7400     common_param.frames_per_100s = generic_state->frames_per_100s;
7401     common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
7402     common_param.target_bit_rate = generic_state->target_bit_rate;
7403
7404     i965_gpe_context_unmap_curbe(gpe_context);
7405
7406     return;
7407 }
7408
7409 /*
7410 kernel related function:init/destroy etc
7411 */
7412 static void
7413 gen9_avc_kernel_init_scaling(VADriverContextP ctx,
7414                              struct generic_encoder_context *generic_context,
7415                              struct gen_avc_scaling_context *kernel_context,
7416                              int preenc_enabled)
7417 {
7418     struct i965_driver_data *i965 = i965_driver_data(ctx);
7419     struct i965_gpe_table *gpe = &i965->gpe_table;
7420     struct i965_gpe_context *gpe_context = NULL;
7421     struct encoder_kernel_parameter kernel_param ;
7422     struct encoder_scoreboard_parameter scoreboard_param;
7423     struct i965_kernel common_kernel;
7424
7425     memset(&kernel_param, 0, sizeof(kernel_param));
7426     if (IS_SKL(i965->intel.device_info) ||
7427         IS_BXT(i965->intel.device_info)) {
7428         if (!preenc_enabled) {
7429             kernel_param.curbe_size = sizeof(gen9_avc_scaling4x_curbe_data);
7430             kernel_param.inline_data_size = sizeof(gen9_avc_scaling4x_curbe_data);
7431         } else {
7432             /* Skylake PreEnc using GEN95/gen10 DS kernel */
7433             kernel_param.curbe_size = sizeof(gen95_avc_scaling4x_curbe_data);
7434             kernel_param.inline_data_size = sizeof(gen95_avc_scaling4x_curbe_data);
7435         }
7436     } else if (IS_KBL(i965->intel.device_info) ||
7437                IS_GEN10(i965->intel.device_info) ||
7438                IS_GLK(i965->intel.device_info)) {
7439         kernel_param.curbe_size = sizeof(gen95_avc_scaling4x_curbe_data);
7440         kernel_param.inline_data_size = sizeof(gen95_avc_scaling4x_curbe_data);
7441     } else if (IS_GEN8(i965->intel.device_info)) {
7442         kernel_param.curbe_size = sizeof(gen8_avc_scaling4x_curbe_data);
7443         kernel_param.inline_data_size = sizeof(gen8_avc_scaling4x_curbe_data);
7444     } else
7445         assert(0);
7446
7447     /* 4x scaling kernel*/
7448     kernel_param.sampler_size = 0;
7449
7450     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7451     scoreboard_param.mask = 0xFF;
7452     scoreboard_param.enable = generic_context->use_hw_scoreboard;
7453     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7454     scoreboard_param.walkpat_flag = 0;
7455
7456     gpe_context = &kernel_context->gpe_contexts[GEN9_AVC_KERNEL_SCALING_4X_IDX];
7457     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7458     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7459
7460     memset(&common_kernel, 0, sizeof(common_kernel));
7461
7462     generic_context->get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7463                                                 generic_context->enc_kernel_size,
7464                                                 INTEL_GENERIC_ENC_SCALING4X,
7465                                                 0,
7466                                                 &common_kernel);
7467
7468     gpe->load_kernels(ctx,
7469                       gpe_context,
7470                       &common_kernel,
7471                       1);
7472
7473     /* PreEnc using only the 4X scaling */
7474     if (preenc_enabled)
7475         return;
7476
7477     /*2x scaling kernel*/
7478     kernel_param.curbe_size = sizeof(gen9_avc_scaling2x_curbe_data);
7479     kernel_param.inline_data_size = 0;
7480     kernel_param.sampler_size = 0;
7481
7482     gpe_context = &kernel_context->gpe_contexts[GEN9_AVC_KERNEL_SCALING_2X_IDX];
7483     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7484     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7485
7486     memset(&common_kernel, 0, sizeof(common_kernel));
7487
7488     intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7489                                          generic_context->enc_kernel_size,
7490                                          INTEL_GENERIC_ENC_SCALING2X,
7491                                          0,
7492                                          &common_kernel);
7493
7494     gpe->load_kernels(ctx,
7495                       gpe_context,
7496                       &common_kernel,
7497                       1);
7498
7499 }
7500
7501 static void
7502 gen9_avc_kernel_init_me(VADriverContextP ctx,
7503                         struct generic_encoder_context *generic_context,
7504                         struct gen_avc_me_context *kernel_context,
7505                         int preenc_enabled)
7506 {
7507     struct i965_driver_data *i965 = i965_driver_data(ctx);
7508     struct i965_gpe_table *gpe = &i965->gpe_table;
7509     struct i965_gpe_context *gpe_context = NULL;
7510     struct encoder_kernel_parameter kernel_param ;
7511     struct encoder_scoreboard_parameter scoreboard_param;
7512     struct i965_kernel common_kernel;
7513     int i = 0;
7514     unsigned int curbe_size = 0;
7515
7516     if (IS_GEN8(i965->intel.device_info)) {
7517         curbe_size = sizeof(gen8_avc_me_curbe_data);
7518     } else {
7519         if (!preenc_enabled)
7520             curbe_size = sizeof(gen9_avc_me_curbe_data);
7521         else
7522             curbe_size = sizeof(gen9_avc_fei_me_curbe_data);
7523     }
7524
7525     kernel_param.curbe_size = curbe_size;
7526     kernel_param.inline_data_size = 0;
7527     kernel_param.sampler_size = 0;
7528
7529     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7530     scoreboard_param.mask = 0xFF;
7531     scoreboard_param.enable = generic_context->use_hw_scoreboard;
7532     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7533     scoreboard_param.walkpat_flag = 0;
7534
7535     /* There is two hme kernel, one for P and other for B frame */
7536     for (i = 0; i < 2; i++) {
7537         gpe_context = &kernel_context->gpe_contexts[i];
7538         gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7539         gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7540
7541         memset(&common_kernel, 0, sizeof(common_kernel));
7542
7543         generic_context->get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7544                                                     generic_context->enc_kernel_size,
7545                                                     INTEL_GENERIC_ENC_ME,
7546                                                     i,
7547                                                     &common_kernel);
7548
7549         gpe->load_kernels(ctx,
7550                           gpe_context,
7551                           &common_kernel,
7552                           1);
7553     }
7554
7555 }
7556
7557 static void
7558 gen9_avc_kernel_init_preproc(VADriverContextP ctx,
7559                              struct generic_encoder_context *generic_context,
7560                              struct gen_avc_preproc_context *kernel_context)
7561 {
7562     struct i965_driver_data *i965 = i965_driver_data(ctx);
7563     struct i965_gpe_table *gpe = &i965->gpe_table;
7564     struct i965_gpe_context *gpe_context = NULL;
7565     struct encoder_kernel_parameter kernel_param ;
7566     struct encoder_scoreboard_parameter scoreboard_param;
7567     struct i965_kernel common_kernel;
7568
7569     kernel_param.curbe_size = sizeof(gen9_avc_preproc_curbe_data);
7570     kernel_param.inline_data_size = 0;
7571     kernel_param.sampler_size = 0;
7572
7573     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7574     scoreboard_param.mask = 0xFF;
7575     scoreboard_param.enable = generic_context->use_hw_scoreboard;
7576     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7577     scoreboard_param.walkpat_flag = 0;
7578
7579     gpe_context = &kernel_context->gpe_contexts;
7580     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7581     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7582
7583     memset(&common_kernel, 0, sizeof(common_kernel));
7584
7585     intel_avc_fei_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7586                                              generic_context->enc_kernel_size,
7587                                              INTEL_GENERIC_ENC_PREPROC,
7588                                              0,
7589                                              &common_kernel);
7590
7591     gpe->load_kernels(ctx,
7592                       gpe_context,
7593                       &common_kernel,
7594                       1);
7595
7596 }
7597
7598 static void
7599 gen9_avc_kernel_init_mbenc(VADriverContextP ctx,
7600                            struct generic_encoder_context *generic_context,
7601                            struct gen_avc_mbenc_context *kernel_context,
7602                            int fei_enabled)
7603 {
7604     struct i965_driver_data *i965 = i965_driver_data(ctx);
7605     struct i965_gpe_table *gpe = &i965->gpe_table;
7606     struct i965_gpe_context *gpe_context = NULL;
7607     struct encoder_kernel_parameter kernel_param ;
7608     struct encoder_scoreboard_parameter scoreboard_param;
7609     struct i965_kernel common_kernel;
7610     int i = 0;
7611     unsigned int curbe_size = 0;
7612     unsigned int num_mbenc_kernels = 0;
7613
7614     if (IS_SKL(i965->intel.device_info) ||
7615         IS_BXT(i965->intel.device_info)) {
7616         if (!fei_enabled) {
7617             curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
7618             num_mbenc_kernels = NUM_GEN9_AVC_KERNEL_MBENC;
7619         } else {
7620             curbe_size = sizeof(gen9_avc_fei_mbenc_curbe_data);
7621             num_mbenc_kernels = NUM_GEN9_AVC_FEI_KERNEL_MBENC;
7622         }
7623     } else if (IS_KBL(i965->intel.device_info) ||
7624                IS_GEN10(i965->intel.device_info) ||
7625                IS_GLK(i965->intel.device_info)) {
7626         curbe_size = sizeof(gen95_avc_mbenc_curbe_data);
7627         num_mbenc_kernels = NUM_GEN9_AVC_KERNEL_MBENC;
7628     } else if (IS_GEN8(i965->intel.device_info)) {
7629         curbe_size = sizeof(gen8_avc_mbenc_curbe_data);
7630         num_mbenc_kernels = NUM_GEN9_AVC_KERNEL_MBENC;
7631     }
7632
7633     assert(curbe_size > 0);
7634     kernel_param.curbe_size = curbe_size;
7635     kernel_param.inline_data_size = 0;
7636     kernel_param.sampler_size = 0;
7637
7638     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7639     scoreboard_param.mask = 0xFF;
7640     scoreboard_param.enable = generic_context->use_hw_scoreboard;
7641     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7642     scoreboard_param.walkpat_flag = 0;
7643
7644     for (i = 0; i < num_mbenc_kernels ; i++) {
7645         gpe_context = &kernel_context->gpe_contexts[i];
7646         gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7647         gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7648
7649         memset(&common_kernel, 0, sizeof(common_kernel));
7650
7651         generic_context->get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7652                                                     generic_context->enc_kernel_size,
7653                                                     INTEL_GENERIC_ENC_MBENC,
7654                                                     i,
7655                                                     &common_kernel);
7656
7657         gpe->load_kernels(ctx,
7658                           gpe_context,
7659                           &common_kernel,
7660                           1);
7661     }
7662
7663 }
7664
7665 static void
7666 gen9_avc_kernel_init_brc(VADriverContextP ctx,
7667                          struct generic_encoder_context *generic_context,
7668                          struct gen_avc_brc_context *kernel_context)
7669 {
7670     struct i965_driver_data *i965 = i965_driver_data(ctx);
7671     struct i965_gpe_table *gpe = &i965->gpe_table;
7672     struct i965_gpe_context *gpe_context = NULL;
7673     struct encoder_kernel_parameter kernel_param ;
7674     struct encoder_scoreboard_parameter scoreboard_param;
7675     struct i965_kernel common_kernel;
7676     int num_brc_init_kernels = 0;
7677     int i = 0;
7678
7679     if (IS_GEN8(i965->intel.device_info)) {
7680         num_brc_init_kernels = NUM_GEN9_AVC_KERNEL_BRC - 1;
7681     } else {
7682         num_brc_init_kernels = NUM_GEN9_AVC_KERNEL_BRC;
7683     }
7684
7685     const int gen8_brc_curbe_size[NUM_GEN9_AVC_KERNEL_BRC - 1] = {
7686         (sizeof(gen9_avc_brc_init_reset_curbe_data)),
7687         (sizeof(gen8_avc_frame_brc_update_curbe_data)),
7688         (sizeof(gen9_avc_brc_init_reset_curbe_data)),
7689         (sizeof(gen8_avc_mbenc_curbe_data)),
7690         0,
7691     };
7692     const int gen9_brc_curbe_size[NUM_GEN9_AVC_KERNEL_BRC] = {
7693         (sizeof(gen9_avc_brc_init_reset_curbe_data)),
7694         (sizeof(gen9_avc_frame_brc_update_curbe_data)),
7695         (sizeof(gen9_avc_brc_init_reset_curbe_data)),
7696         ((IS_SKL(i965->intel.device_info) || IS_BXT(i965->intel.device_info)) ? sizeof(gen9_avc_mbenc_curbe_data) : sizeof(gen95_avc_mbenc_curbe_data)),
7697         0,
7698         (sizeof(gen9_avc_mb_brc_curbe_data))
7699     };
7700
7701     kernel_param.inline_data_size = 0;
7702     kernel_param.sampler_size = 0;
7703
7704     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7705     scoreboard_param.mask = 0xFF;
7706     scoreboard_param.enable = generic_context->use_hw_scoreboard;
7707     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7708     scoreboard_param.walkpat_flag = 0;
7709
7710     for (i = 0; i < num_brc_init_kernels; i++) {
7711         if (IS_GEN8(i965->intel.device_info)) {
7712             kernel_param.curbe_size = gen8_brc_curbe_size[i];
7713         } else {
7714             kernel_param.curbe_size = gen9_brc_curbe_size[i];
7715         }
7716         gpe_context = &kernel_context->gpe_contexts[i];
7717         gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7718         gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7719
7720         memset(&common_kernel, 0, sizeof(common_kernel));
7721
7722         intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7723                                              generic_context->enc_kernel_size,
7724                                              INTEL_GENERIC_ENC_BRC,
7725                                              i,
7726                                              &common_kernel);
7727
7728         gpe->load_kernels(ctx,
7729                           gpe_context,
7730                           &common_kernel,
7731                           1);
7732     }
7733
7734 }
7735
7736 static void
7737 gen9_avc_kernel_init_wp(VADriverContextP ctx,
7738                         struct generic_encoder_context *generic_context,
7739                         struct gen_avc_wp_context *kernel_context)
7740 {
7741     struct i965_driver_data *i965 = i965_driver_data(ctx);
7742     struct i965_gpe_table *gpe = &i965->gpe_table;
7743     struct i965_gpe_context *gpe_context = NULL;
7744     struct encoder_kernel_parameter kernel_param ;
7745     struct encoder_scoreboard_parameter scoreboard_param;
7746     struct i965_kernel common_kernel;
7747
7748     kernel_param.curbe_size = sizeof(gen9_avc_wp_curbe_data);
7749     kernel_param.inline_data_size = 0;
7750     kernel_param.sampler_size = 0;
7751
7752     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7753     scoreboard_param.mask = 0xFF;
7754     scoreboard_param.enable = generic_context->use_hw_scoreboard;
7755     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7756     scoreboard_param.walkpat_flag = 0;
7757
7758     gpe_context = &kernel_context->gpe_contexts;
7759     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7760     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7761
7762     memset(&common_kernel, 0, sizeof(common_kernel));
7763
7764     intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7765                                          generic_context->enc_kernel_size,
7766                                          INTEL_GENERIC_ENC_WP,
7767                                          0,
7768                                          &common_kernel);
7769
7770     gpe->load_kernels(ctx,
7771                       gpe_context,
7772                       &common_kernel,
7773                       1);
7774
7775 }
7776
7777 static void
7778 gen9_avc_kernel_init_sfd(VADriverContextP ctx,
7779                          struct generic_encoder_context *generic_context,
7780                          struct gen_avc_sfd_context *kernel_context)
7781 {
7782     struct i965_driver_data *i965 = i965_driver_data(ctx);
7783     struct i965_gpe_table *gpe = &i965->gpe_table;
7784     struct i965_gpe_context *gpe_context = NULL;
7785     struct encoder_kernel_parameter kernel_param ;
7786     struct encoder_scoreboard_parameter scoreboard_param;
7787     struct i965_kernel common_kernel;
7788
7789     kernel_param.curbe_size = sizeof(gen9_avc_sfd_curbe_data);
7790     kernel_param.inline_data_size = 0;
7791     kernel_param.sampler_size = 0;
7792
7793     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7794     scoreboard_param.mask = 0xFF;
7795     scoreboard_param.enable = generic_context->use_hw_scoreboard;
7796     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7797     scoreboard_param.walkpat_flag = 0;
7798
7799     gpe_context = &kernel_context->gpe_contexts;
7800     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7801     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7802
7803     memset(&common_kernel, 0, sizeof(common_kernel));
7804
7805     intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7806                                          generic_context->enc_kernel_size,
7807                                          INTEL_GENERIC_ENC_SFD,
7808                                          0,
7809                                          &common_kernel);
7810
7811     gpe->load_kernels(ctx,
7812                       gpe_context,
7813                       &common_kernel,
7814                       1);
7815
7816 }
7817
7818 static void
7819 gen9_avc_kernel_destroy(struct encoder_vme_mfc_context * vme_context)
7820 {
7821
7822     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
7823     struct i965_driver_data *i965 = i965_driver_data(avc_ctx->ctx);
7824     struct i965_gpe_table *gpe = &i965->gpe_table;
7825
7826     int i = 0;
7827
7828     gen9_avc_free_resources(vme_context);
7829
7830     for (i = 0; i < NUM_GEN9_AVC_KERNEL_SCALING; i++)
7831         gpe->context_destroy(&avc_ctx->context_scaling.gpe_contexts[i]);
7832
7833     for (i = 0; i < NUM_GEN9_AVC_KERNEL_BRC; i++)
7834         gpe->context_destroy(&avc_ctx->context_brc.gpe_contexts[i]);
7835
7836     for (i = 0; i < NUM_GEN9_AVC_KERNEL_ME; i++)
7837         gpe->context_destroy(&avc_ctx->context_me.gpe_contexts[i]);
7838
7839     for (i = 0; i < NUM_GEN9_AVC_KERNEL_MBENC; i++)
7840         gpe->context_destroy(&avc_ctx->context_mbenc.gpe_contexts[i]);
7841
7842     gpe->context_destroy(&avc_ctx->context_wp.gpe_contexts);
7843
7844     gpe->context_destroy(&avc_ctx->context_sfd.gpe_contexts);
7845
7846     gpe->context_destroy(&avc_ctx->context_preproc.gpe_contexts);
7847
7848 }
7849
7850 /*
7851 vme pipeline
7852 */
7853 static void
7854 gen9_avc_update_parameters(VADriverContextP ctx,
7855                            VAProfile profile,
7856                            struct encode_state *encode_state,
7857                            struct intel_encoder_context *encoder_context)
7858 {
7859     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7860     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
7861     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
7862     VAEncSequenceParameterBufferH264 *seq_param;
7863     VAEncSliceParameterBufferH264 *slice_param;
7864     VAEncMiscParameterBuffer *fei_misc_param;
7865     int i, j, slice_index;
7866     unsigned int preset = generic_state->preset;
7867     unsigned int fei_enabled = encoder_context->fei_enabled;
7868
7869     /* seq/pic/slice parameter setting */
7870     generic_state->b16xme_supported = gen9_avc_super_hme[preset];
7871     generic_state->b32xme_supported = gen9_avc_ultra_hme[preset];
7872
7873     avc_state->seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
7874     avc_state->pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
7875
7876     if (fei_enabled &&
7877         encode_state->misc_param[VAEncMiscParameterTypeFEIFrameControl]) {
7878         fei_misc_param = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeFEIFrameControl][0]->buffer;
7879         avc_state->fei_framectl_param =
7880             (VAEncMiscParameterFEIFrameControlH264 *)fei_misc_param->data;
7881     }
7882
7883     avc_state->slice_num = 0;
7884     slice_index = 0;
7885     for (j = 0; j < encode_state->num_slice_params_ext; j++) {
7886         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
7887         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
7888             avc_state->slice_param[slice_index] = slice_param;
7889             slice_param++;
7890             slice_index++;
7891             avc_state->slice_num++;
7892         }
7893     }
7894
7895     /* how many slices support by now? 1 slice or multi slices, but row slice.not slice group. */
7896     seq_param = avc_state->seq_param;
7897     slice_param = avc_state->slice_param[0];
7898
7899     generic_state->frame_type = avc_state->slice_param[0]->slice_type;
7900
7901     if (slice_param->slice_type == SLICE_TYPE_I ||
7902         slice_param->slice_type == SLICE_TYPE_SI)
7903         generic_state->frame_type = SLICE_TYPE_I;
7904     else if (slice_param->slice_type == SLICE_TYPE_P)
7905         generic_state->frame_type = SLICE_TYPE_P;
7906     else if (slice_param->slice_type == SLICE_TYPE_B)
7907         generic_state->frame_type = SLICE_TYPE_B;
7908     if (profile == VAProfileH264High)
7909         avc_state->transform_8x8_mode_enable = 0;//work around for high profile to disabel pic_param->pic_fields.bits.transform_8x8_mode_flag
7910     else
7911         avc_state->transform_8x8_mode_enable = 0;
7912
7913     /* rc init*/
7914     if (generic_state->brc_enabled && (!generic_state->brc_inited || generic_state->brc_need_reset)) {
7915         generic_state->target_bit_rate = ALIGN(seq_param->bits_per_second, 1000) / 1000;
7916         generic_state->init_vbv_buffer_fullness_in_bit = seq_param->bits_per_second;
7917         generic_state->vbv_buffer_size_in_bit = (uint64_t)seq_param->bits_per_second << 1;
7918         generic_state->frames_per_100s = 3000; /* 30fps */
7919     }
7920
7921     generic_state->gop_size = seq_param->intra_period;
7922     generic_state->gop_ref_distance = seq_param->ip_period;
7923
7924     if (generic_state->internal_rate_mode == VA_RC_CBR) {
7925         generic_state->max_bit_rate = generic_state->target_bit_rate;
7926         generic_state->min_bit_rate = generic_state->target_bit_rate;
7927     }
7928
7929     if (generic_state->frame_type == SLICE_TYPE_I || generic_state->first_frame) {
7930         gen9_avc_update_misc_parameters(ctx, encode_state, encoder_context);
7931     }
7932
7933     generic_state->preset = encoder_context->quality_level;
7934     if (encoder_context->quality_level == INTEL_PRESET_UNKNOWN) {
7935         generic_state->preset = INTEL_PRESET_RT_SPEED;
7936     }
7937     generic_state->kernel_mode = gen9_avc_kernel_mode[generic_state->preset];
7938
7939     if (!generic_state->brc_inited) {
7940         generic_state->brc_init_reset_input_bits_per_frame = ((double)(generic_state->max_bit_rate * 1000) * 100) / generic_state->frames_per_100s;;
7941         generic_state->brc_init_current_target_buf_full_in_bits = generic_state->init_vbv_buffer_fullness_in_bit;
7942         generic_state->brc_init_reset_buf_size_in_bits = generic_state->vbv_buffer_size_in_bit;
7943         generic_state->brc_target_size = generic_state->init_vbv_buffer_fullness_in_bit;
7944     }
7945
7946
7947     generic_state->curr_pak_pass = 0;
7948     generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
7949
7950     if (generic_state->internal_rate_mode == VA_RC_CBR ||
7951         generic_state->internal_rate_mode == VA_RC_VBR)
7952         generic_state->brc_enabled = 1;
7953     else
7954         generic_state->brc_enabled = 0;
7955
7956     if (generic_state->brc_enabled &&
7957         (!generic_state->init_vbv_buffer_fullness_in_bit ||
7958          !generic_state->vbv_buffer_size_in_bit ||
7959          !generic_state->max_bit_rate ||
7960          !generic_state->target_bit_rate ||
7961          !generic_state->frames_per_100s)) {
7962         WARN_ONCE("Rate control parameter is required for BRC\n");
7963         generic_state->brc_enabled = 0;
7964     }
7965
7966     if (!generic_state->brc_enabled) {
7967         generic_state->target_bit_rate = 0;
7968         generic_state->max_bit_rate = 0;
7969         generic_state->min_bit_rate = 0;
7970         generic_state->init_vbv_buffer_fullness_in_bit = 0;
7971         generic_state->vbv_buffer_size_in_bit = 0;
7972         generic_state->num_pak_passes = 1;
7973     } else {
7974         generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
7975     }
7976
7977
7978     generic_state->frame_width_in_mbs = seq_param->picture_width_in_mbs;
7979     generic_state->frame_height_in_mbs = seq_param->picture_height_in_mbs;
7980     generic_state->frame_width_in_pixel = generic_state->frame_width_in_mbs * 16;
7981     generic_state->frame_height_in_pixel = generic_state->frame_height_in_mbs * 16;
7982
7983     generic_state->frame_width_4x  = ALIGN(generic_state->frame_width_in_pixel / 4, 16);
7984     generic_state->frame_height_4x = ALIGN(generic_state->frame_height_in_pixel / 4, 16);
7985     generic_state->downscaled_width_4x_in_mb  = generic_state->frame_width_4x / 16 ;
7986     generic_state->downscaled_height_4x_in_mb = generic_state->frame_height_4x / 16;
7987
7988     generic_state->frame_width_16x  =  ALIGN(generic_state->frame_width_in_pixel / 16, 16);
7989     generic_state->frame_height_16x =  ALIGN(generic_state->frame_height_in_pixel / 16, 16);
7990     generic_state->downscaled_width_16x_in_mb  = generic_state->frame_width_16x / 16 ;
7991     generic_state->downscaled_height_16x_in_mb = generic_state->frame_height_16x / 16;
7992
7993     generic_state->frame_width_32x  = ALIGN(generic_state->frame_width_in_pixel / 32, 16);
7994     generic_state->frame_height_32x = ALIGN(generic_state->frame_height_in_pixel / 32, 16);
7995     generic_state->downscaled_width_32x_in_mb  = generic_state->frame_width_32x / 16 ;
7996     generic_state->downscaled_height_32x_in_mb = generic_state->frame_height_32x / 16;
7997
7998     if (generic_state->hme_supported) {
7999         generic_state->hme_enabled = 1;
8000     } else {
8001         generic_state->hme_enabled = 0;
8002     }
8003
8004     if (generic_state->b16xme_supported) {
8005         generic_state->b16xme_enabled = 1;
8006     } else {
8007         generic_state->b16xme_enabled = 0;
8008     }
8009
8010     if (generic_state->b32xme_supported) {
8011         generic_state->b32xme_enabled = 1;
8012     } else {
8013         generic_state->b32xme_enabled = 0;
8014     }
8015     /* disable HME/16xME if the size is too small */
8016     if (generic_state->frame_width_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8017         generic_state->b32xme_supported = 0;
8018         generic_state->b32xme_enabled = 0;
8019         generic_state->b16xme_supported = 0;
8020         generic_state->b16xme_enabled = 0;
8021         generic_state->frame_width_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8022         generic_state->downscaled_width_4x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8023     }
8024     if (generic_state->frame_height_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8025         generic_state->b32xme_supported = 0;
8026         generic_state->b32xme_enabled = 0;
8027         generic_state->b16xme_supported = 0;
8028         generic_state->b16xme_enabled = 0;
8029         generic_state->frame_height_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8030         generic_state->downscaled_height_4x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8031     }
8032
8033     if (generic_state->frame_width_16x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8034         generic_state->b32xme_supported = 0;
8035         generic_state->b32xme_enabled = 0;
8036         generic_state->frame_width_16x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8037         generic_state->downscaled_width_16x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8038     }
8039     if (generic_state->frame_height_16x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8040         generic_state->b32xme_supported = 0;
8041         generic_state->b32xme_enabled = 0;
8042         generic_state->frame_height_16x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8043         generic_state->downscaled_height_16x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8044     }
8045
8046     if (generic_state->frame_width_32x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8047         generic_state->frame_width_32x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8048         generic_state->downscaled_width_32x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8049     }
8050     if (generic_state->frame_height_32x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8051         generic_state->frame_height_32x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8052         generic_state->downscaled_height_32x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8053     }
8054
8055 }
8056
8057 static VAStatus
8058 gen9_avc_encode_check_parameter(VADriverContextP ctx,
8059                                 struct encode_state *encode_state,
8060                                 struct intel_encoder_context *encoder_context)
8061 {
8062     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8063     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8064     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8065     unsigned int rate_control_mode = encoder_context->rate_control_mode;
8066     unsigned int preset = generic_state->preset;
8067     VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param ;
8068     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
8069     int i = 0;
8070     int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
8071     /*avbr init*/
8072     generic_state->avbr_curracy = 30;
8073     generic_state->avbr_convergence = 150;
8074
8075     switch (rate_control_mode & 0x7f) {
8076     case VA_RC_CBR:
8077         generic_state->internal_rate_mode = VA_RC_CBR;
8078         break;
8079
8080     case VA_RC_VBR:
8081         generic_state->internal_rate_mode = VA_RC_VBR;
8082         break;
8083
8084     case VA_RC_CQP:
8085     default:
8086         generic_state->internal_rate_mode = VA_RC_CQP;
8087         break;
8088     }
8089
8090     if (rate_control_mode != VA_RC_NONE &&
8091         rate_control_mode != VA_RC_CQP) {
8092         generic_state->brc_enabled = 1;
8093         generic_state->brc_distortion_buffer_supported = 1;
8094         generic_state->brc_constant_buffer_supported = 1;
8095         generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
8096     }
8097
8098     /*check brc parameter*/
8099     if (generic_state->brc_enabled) {
8100         avc_state->mb_qp_data_enable = 0;
8101     }
8102
8103     /*set the brc init and reset accordingly*/
8104     if (generic_state->brc_need_reset &&
8105         (generic_state->brc_distortion_buffer_supported == 0 ||
8106          rate_control_mode == VA_RC_CQP)) {
8107         generic_state->brc_need_reset = 0;// not support by CQP
8108     }
8109     if (generic_state->internal_rate_mode == VA_RC_CBR || generic_state->internal_rate_mode == VA_RC_VBR || generic_state->frame_type == SLICE_TYPE_I) {
8110         avc_state->sfd_enable = 0;
8111     } else {
8112         avc_state->sfd_enable = 1;
8113     }
8114
8115     if (generic_state->frames_per_window_size == 0) {
8116         generic_state->frames_per_window_size = (generic_state->frames_per_100s / 100 < 60) ? (generic_state->frames_per_100s / 100) : 60;
8117     } else if (generic_state->frames_per_window_size > 2 * generic_state->frames_per_100s / 100) {
8118         generic_state->frames_per_window_size = (generic_state->frames_per_100s / 100 < 60) ? (generic_state->frames_per_100s / 100) : 60;
8119     }
8120
8121     if (generic_state->brc_enabled) {
8122         generic_state->hme_enabled = generic_state->frame_type != SLICE_TYPE_I;
8123         if (avc_state->min_max_qp_enable) {
8124             generic_state->num_pak_passes = 1;
8125         }
8126         generic_state->brc_roi_enable = (rate_control_mode != VA_RC_CQP) && (generic_state->num_roi > 0);// only !CQP
8127         generic_state->mb_brc_enabled = generic_state->mb_brc_enabled || generic_state->brc_roi_enable;
8128     } else {
8129         generic_state->num_pak_passes = 1;// CQP only one pass
8130     }
8131
8132     avc_state->mbenc_i_frame_dist_in_use = 0;
8133     avc_state->mbenc_i_frame_dist_in_use = (generic_state->brc_enabled) && (generic_state->brc_distortion_buffer_supported) && (generic_state->frame_type == SLICE_TYPE_I);
8134
8135     /*ROI must enable mbbrc.*/
8136
8137     /*CAD check*/
8138     if (avc_state->caf_supported) {
8139         switch (generic_state->frame_type) {
8140         case SLICE_TYPE_I:
8141             avc_state->caf_enable = 0;
8142             break;
8143         case SLICE_TYPE_P:
8144             avc_state->caf_enable = gen9_avc_all_fractional[preset] & 0x01;
8145             break;
8146         case SLICE_TYPE_B:
8147             avc_state->caf_enable = (gen9_avc_all_fractional[preset] >> 1) & 0x01;
8148             break;
8149         }
8150
8151         if (avc_state->caf_enable && avc_state->caf_disable_hd && gen9_avc_disable_all_fractional_check_for_high_res[preset]) {
8152             if (generic_state->frame_width_in_pixel >= 1280 && generic_state->frame_height_in_pixel >= 720)
8153                 avc_state->caf_enable = 0;
8154         }
8155     }
8156
8157     avc_state->adaptive_transform_decision_enable &= gen9_avc_enable_adaptive_tx_decision[preset & 0x7];
8158
8159     /* Flatness check is enabled only if scaling will be performed and CAF is enabled. here only frame */
8160     if (avc_state->flatness_check_supported) {
8161         avc_state->flatness_check_enable = ((avc_state->caf_enable) && (generic_state->brc_enabled || generic_state->hme_supported)) ;
8162     } else {
8163         avc_state->flatness_check_enable = 0;
8164     }
8165
8166     /* check mb_status_supported/enbale*/
8167     if (avc_state->adaptive_transform_decision_enable) {
8168         avc_state->mb_status_enable = 1;
8169     } else {
8170         avc_state->mb_status_enable = 0;
8171     }
8172     /*slice check,all the slices use the same slice height except the last slice*/
8173     avc_state->arbitrary_num_mbs_in_slice = 0;
8174     for (i = 0; i < avc_state->slice_num; i++) {
8175         if (avc_state->slice_param[i]->num_macroblocks % generic_state->frame_width_in_mbs > 0) {
8176             avc_state->arbitrary_num_mbs_in_slice = 1;
8177             avc_state->slice_height = 1; /* slice height will be ignored by kernel ans here set it as default value */
8178         } else {
8179             avc_state->slice_height = avc_state->slice_param[i]->num_macroblocks / generic_state->frame_width_in_mbs;
8180         }
8181     }
8182
8183     if (avc_state->slice_num > 1)
8184         avc_state->arbitrary_num_mbs_in_slice = 1;
8185
8186     if (generic_state->frame_type == SLICE_TYPE_I) {
8187         generic_state->hme_enabled = 0;
8188         generic_state->b16xme_enabled = 0;
8189         generic_state->b32xme_enabled = 0;
8190     }
8191
8192     if (generic_state->frame_type == SLICE_TYPE_B) {
8193         gen9_avc_get_dist_scale_factor(ctx, encode_state, encoder_context);
8194         avc_state->bi_weight = gen9_avc_get_biweight(avc_state->dist_scale_factor_list0[0], pic_param->pic_fields.bits.weighted_bipred_idc);
8195     }
8196
8197     /* Determine if SkipBiasAdjustment should be enabled for P picture 1. No B frame 2. Qp >= 22 3. CQP mode */
8198     avc_state->skip_bias_adjustment_enable = avc_state->skip_bias_adjustment_supported && (generic_state->frame_type == SLICE_TYPE_P)
8199                                              && (generic_state->gop_ref_distance == 1) && (avc_state->pic_param->pic_init_qp + avc_state->slice_param[0]->slice_qp_delta >= 22) && !generic_state->brc_enabled;
8200
8201     if (generic_state->kernel_mode == INTEL_ENC_KERNEL_QUALITY) {
8202         avc_state->tq_enable = 1;
8203         avc_state->tq_rounding = 6;
8204         if (generic_state->brc_enabled) {
8205             generic_state->mb_brc_enabled = 1;
8206         }
8207     }
8208
8209     //check the inter rounding
8210     avc_state->rounding_value = 0;
8211     avc_state->rounding_inter_p = 255;//default
8212     avc_state->rounding_inter_b = 255; //default
8213     avc_state->rounding_inter_b_ref = 255; //default
8214
8215     if (generic_state->frame_type == SLICE_TYPE_P) {
8216         if (avc_state->rounding_inter_p == AVC_INVALID_ROUNDING_VALUE) {
8217             if (avc_state->adaptive_rounding_inter_enable && !(generic_state->brc_enabled)) {
8218                 if (generic_state->gop_ref_distance == 1)
8219                     avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_p_without_b[slice_qp];
8220                 else
8221                     avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_p[slice_qp];
8222             } else {
8223                 avc_state->rounding_value = gen9_avc_inter_rounding_p[generic_state->preset];
8224             }
8225
8226         } else {
8227             avc_state->rounding_value = avc_state->rounding_inter_p;
8228         }
8229     } else if (generic_state->frame_type == SLICE_TYPE_B) {
8230         if (pic_param->pic_fields.bits.reference_pic_flag) {
8231             if (avc_state->rounding_inter_b_ref == AVC_INVALID_ROUNDING_VALUE)
8232                 avc_state->rounding_value = gen9_avc_inter_rounding_b_ref[generic_state->preset];
8233             else
8234                 avc_state->rounding_value = avc_state->rounding_inter_b_ref;
8235         } else {
8236             if (avc_state->rounding_inter_b == AVC_INVALID_ROUNDING_VALUE) {
8237                 if (avc_state->adaptive_rounding_inter_enable && !(generic_state->brc_enabled))
8238                     avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_b[slice_qp];
8239                 else
8240                     avc_state->rounding_value = gen9_avc_inter_rounding_b[generic_state->preset];
8241             } else {
8242                 avc_state->rounding_value = avc_state->rounding_inter_b;
8243             }
8244         }
8245     }
8246     return VA_STATUS_SUCCESS;
8247 }
8248
8249 static VAStatus
8250 gen9_avc_vme_gpe_kernel_prepare(VADriverContextP ctx,
8251                                 struct encode_state *encode_state,
8252                                 struct intel_encoder_context *encoder_context)
8253 {
8254     VAStatus va_status;
8255     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8256     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
8257     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
8258     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8259     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8260
8261     struct object_surface *obj_surface;
8262     struct object_buffer *obj_buffer;
8263     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
8264     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
8265     struct i965_coded_buffer_segment *coded_buffer_segment;
8266
8267     struct gen9_surface_avc *avc_priv_surface;
8268     dri_bo *bo;
8269     struct avc_surface_param surface_param;
8270     int i, j = 0;
8271     unsigned char * pdata;
8272
8273     /* Setup current reconstruct frame */
8274     obj_surface = encode_state->reconstructed_object;
8275     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
8276
8277     if (va_status != VA_STATUS_SUCCESS)
8278         return va_status;
8279
8280     memset(&surface_param, 0, sizeof(surface_param));
8281     surface_param.frame_width = generic_state->frame_width_in_pixel;
8282     surface_param.frame_height = generic_state->frame_height_in_pixel;
8283     va_status = gen9_avc_init_check_surfaces(ctx,
8284                                              obj_surface,
8285                                              encoder_context,
8286                                              &surface_param);
8287     if (va_status != VA_STATUS_SUCCESS)
8288         return va_status;
8289     {
8290         /* init the member of avc_priv_surface,frame_store_id,qp_value*/
8291         avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
8292         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = 0;
8293         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = 0;
8294         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2]);
8295         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1]);
8296         i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2], avc_priv_surface->dmv_top);
8297         i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1], avc_priv_surface->dmv_bottom);
8298         avc_priv_surface->qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
8299         avc_priv_surface->frame_store_id = 0;
8300         avc_priv_surface->frame_idx = pic_param->CurrPic.frame_idx;
8301         avc_priv_surface->top_field_order_cnt = pic_param->CurrPic.TopFieldOrderCnt;
8302         avc_priv_surface->is_as_ref = pic_param->pic_fields.bits.reference_pic_flag;
8303         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = avc_priv_surface->top_field_order_cnt;
8304         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = avc_priv_surface->top_field_order_cnt + 1;
8305     }
8306     i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
8307     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_reconstructed_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
8308
8309     /* input YUV surface*/
8310     obj_surface = encode_state->input_yuv_object;
8311     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
8312
8313     if (va_status != VA_STATUS_SUCCESS)
8314         return va_status;
8315     i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
8316     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_uncompressed_input_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
8317
8318     /* Reference surfaces */
8319     for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
8320         i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
8321         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2]);
8322         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1]);
8323         obj_surface = encode_state->reference_objects[i];
8324         avc_state->top_field_poc[2 * i] = 0;
8325         avc_state->top_field_poc[2 * i + 1] = 0;
8326
8327         if (obj_surface && obj_surface->bo) {
8328             i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->list_reference_res[i], obj_surface, GPE_RESOURCE_ALIGNMENT);
8329
8330             /* actually it should be handled when it is reconstructed surface*/
8331             va_status = gen9_avc_init_check_surfaces(ctx,
8332                                                      obj_surface, encoder_context,
8333                                                      &surface_param);
8334             if (va_status != VA_STATUS_SUCCESS)
8335                 return va_status;
8336             avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
8337             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2], avc_priv_surface->dmv_top);
8338             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1], avc_priv_surface->dmv_bottom);
8339             avc_state->top_field_poc[2 * i] = avc_priv_surface->top_field_order_cnt;
8340             avc_state->top_field_poc[2 * i + 1] = avc_priv_surface->top_field_order_cnt + 1;
8341             avc_priv_surface->frame_store_id = i;
8342         } else {
8343             break;
8344         }
8345     }
8346
8347     /* Encoded bitstream ?*/
8348     obj_buffer = encode_state->coded_buf_object;
8349     bo = obj_buffer->buffer_store->bo;
8350     i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
8351     i965_dri_object_to_buffer_gpe_resource(&generic_ctx->compressed_bitstream.res, bo);
8352     generic_ctx->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
8353     generic_ctx->compressed_bitstream.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
8354
8355     /*status buffer */
8356     avc_ctx->status_buffer.bo = bo;
8357
8358     /* set the internal flag to 0 to indicate the coded size is unknown */
8359     dri_bo_map(bo, 1);
8360     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
8361     coded_buffer_segment->mapped = 0;
8362     coded_buffer_segment->codec = encoder_context->codec;
8363     coded_buffer_segment->status_support = 1;
8364
8365     pdata = bo->virtual + avc_ctx->status_buffer.base_offset;
8366     memset(pdata, 0, avc_ctx->status_buffer.status_buffer_size);
8367     dri_bo_unmap(bo);
8368
8369     //frame id, it is the ref pic id in the reference_objects list.
8370     avc_state->num_refs[0] = 0;
8371     avc_state->num_refs[1] = 0;
8372     if (generic_state->frame_type == SLICE_TYPE_P) {
8373         avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
8374
8375         if (slice_param->num_ref_idx_active_override_flag)
8376             avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
8377     } else if (generic_state->frame_type == SLICE_TYPE_B) {
8378         avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
8379         avc_state->num_refs[1] = pic_param->num_ref_idx_l1_active_minus1 + 1;
8380
8381         if (slice_param->num_ref_idx_active_override_flag) {
8382             avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
8383             avc_state->num_refs[1] = slice_param->num_ref_idx_l1_active_minus1 + 1;
8384         }
8385     }
8386
8387     if (avc_state->num_refs[0] > ARRAY_ELEMS(avc_state->list_ref_idx[0]))
8388         return VA_STATUS_ERROR_INVALID_VALUE;
8389     if (avc_state->num_refs[1] > ARRAY_ELEMS(avc_state->list_ref_idx[1]))
8390         return VA_STATUS_ERROR_INVALID_VALUE;
8391
8392     for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[0]); i++) {
8393         VAPictureH264 *va_pic;
8394
8395         assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(avc_state->list_ref_idx[0]));
8396         avc_state->list_ref_idx[0][i] = 0;
8397
8398         if (i >= avc_state->num_refs[0])
8399             continue;
8400
8401         va_pic = &slice_param->RefPicList0[i];
8402
8403         for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
8404             obj_surface = encode_state->reference_objects[j];
8405
8406             if (obj_surface &&
8407                 obj_surface->bo &&
8408                 obj_surface->base.id == va_pic->picture_id) {
8409
8410                 assert(obj_surface->base.id != VA_INVALID_SURFACE);
8411                 avc_state->list_ref_idx[0][i] = j;
8412
8413                 break;
8414             }
8415         }
8416     }
8417     for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[1]); i++) {
8418         VAPictureH264 *va_pic;
8419
8420         assert(ARRAY_ELEMS(slice_param->RefPicList1) == ARRAY_ELEMS(avc_state->list_ref_idx[1]));
8421         avc_state->list_ref_idx[1][i] = 0;
8422
8423         if (i >= avc_state->num_refs[1])
8424             continue;
8425
8426         va_pic = &slice_param->RefPicList1[i];
8427
8428         for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
8429             obj_surface = encode_state->reference_objects[j];
8430
8431             if (obj_surface &&
8432                 obj_surface->bo &&
8433                 obj_surface->base.id == va_pic->picture_id) {
8434
8435                 assert(obj_surface->base.id != VA_INVALID_SURFACE);
8436                 avc_state->list_ref_idx[1][i] = j;
8437
8438                 break;
8439             }
8440         }
8441     }
8442
8443     return VA_STATUS_SUCCESS;
8444 }
8445
8446 static VAStatus
8447 gen9_avc_vme_gpe_kernel_init(VADriverContextP ctx,
8448                              struct encode_state *encode_state,
8449                              struct intel_encoder_context *encoder_context)
8450 {
8451     return VA_STATUS_SUCCESS;
8452 }
8453
8454 static VAStatus
8455 gen9_avc_vme_gpe_kernel_final(VADriverContextP ctx,
8456                               struct encode_state *encode_state,
8457                               struct intel_encoder_context *encoder_context)
8458 {
8459
8460     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8461     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8462     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8463
8464     /*set this flag when all kernel is finished*/
8465     if (generic_state->brc_enabled) {
8466         generic_state->brc_inited = 1;
8467         generic_state->brc_need_reset = 0;
8468         avc_state->mbenc_curbe_set_in_brc_update = 0;
8469     }
8470     return VA_STATUS_SUCCESS;
8471 }
8472
8473 static VAStatus
8474 gen9_avc_vme_gpe_kernel_run(VADriverContextP ctx,
8475                             struct encode_state *encode_state,
8476                             struct intel_encoder_context *encoder_context)
8477 {
8478     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8479     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8480     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8481     int fei_enabled = encoder_context->fei_enabled;
8482
8483     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
8484     VAEncSliceParameterBufferH264 *slice_param = avc_state->slice_param[0];
8485     int sfd_in_use = 0;
8486
8487     /* BRC init/reset needs to be called before HME since it will reset the Brc Distortion surface*/
8488     if (!fei_enabled && generic_state->brc_enabled && (!generic_state->brc_inited || generic_state->brc_need_reset)) {
8489         gen9_avc_kernel_brc_init_reset(ctx, encode_state, encoder_context);
8490     }
8491
8492     /*down scaling*/
8493     if (generic_state->hme_supported) {
8494         gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
8495         if (generic_state->b16xme_supported) {
8496             gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_16x);
8497             if (generic_state->b32xme_supported) {
8498                 gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_32x);
8499             }
8500         }
8501     }
8502
8503     /*me kernel*/
8504     if (generic_state->hme_enabled) {
8505         if (generic_state->b16xme_enabled) {
8506             if (generic_state->b32xme_enabled) {
8507                 gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_32x);
8508             }
8509             gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_16x);
8510         }
8511         gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
8512     }
8513
8514     /*call SFD kernel after HME in same command buffer*/
8515     sfd_in_use = avc_state->sfd_enable && generic_state->hme_enabled;
8516     sfd_in_use = sfd_in_use && !avc_state->sfd_mb_enable;
8517     if (sfd_in_use) {
8518         gen9_avc_kernel_sfd(ctx, encode_state, encoder_context);
8519     }
8520
8521     /* BRC and MbEnc are included in the same task phase*/
8522     if (generic_state->brc_enabled) {
8523         if (avc_state->mbenc_i_frame_dist_in_use) {
8524             gen9_avc_kernel_mbenc(ctx, encode_state, encoder_context, true);
8525         }
8526         gen9_avc_kernel_brc_frame_update(ctx, encode_state, encoder_context);
8527
8528         if (avc_state->brc_split_enable && generic_state->mb_brc_enabled) {
8529             gen9_avc_kernel_brc_mb_update(ctx, encode_state, encoder_context);
8530         }
8531     }
8532
8533     /*weight prediction,disable by now */
8534     avc_state->weighted_ref_l0_enable = 0;
8535     avc_state->weighted_ref_l1_enable = 0;
8536     if (avc_state->weighted_prediction_supported &&
8537         ((generic_state->frame_type == SLICE_TYPE_P && pic_param->pic_fields.bits.weighted_pred_flag) ||
8538          (generic_state->frame_type == SLICE_TYPE_B && pic_param->pic_fields.bits.weighted_bipred_idc == INTEL_AVC_WP_MODE_EXPLICIT))) {
8539         if (slice_param->luma_weight_l0_flag & 1) {
8540             gen9_avc_kernel_wp(ctx, encode_state, encoder_context, 0);
8541
8542         } else if (!(slice_param->chroma_weight_l0_flag & 1)) {
8543             pic_param->pic_fields.bits.weighted_pred_flag = 0;// it should be handled in app
8544         }
8545
8546         if (generic_state->frame_type == SLICE_TYPE_B && pic_param->pic_fields.bits.weighted_bipred_idc == INTEL_AVC_WP_MODE_EXPLICIT) {
8547             if (slice_param->luma_weight_l1_flag & 1) {
8548                 gen9_avc_kernel_wp(ctx, encode_state, encoder_context, 1);
8549             } else if (!((slice_param->luma_weight_l0_flag & 1) ||
8550                          (slice_param->chroma_weight_l0_flag & 1) ||
8551                          (slice_param->chroma_weight_l1_flag & 1))) {
8552                 pic_param->pic_fields.bits.weighted_bipred_idc = INTEL_AVC_WP_MODE_DEFAULT;// it should be handled in app
8553             }
8554         }
8555     }
8556
8557     /*mbenc kernel*/
8558     gen9_avc_kernel_mbenc(ctx, encode_state, encoder_context, false);
8559
8560     /*ignore the reset vertical line kernel*/
8561
8562     return VA_STATUS_SUCCESS;
8563 }
8564
8565 static VAStatus
8566 gen9_avc_vme_pipeline(VADriverContextP ctx,
8567                       VAProfile profile,
8568                       struct encode_state *encode_state,
8569                       struct intel_encoder_context *encoder_context)
8570 {
8571     VAStatus va_status;
8572
8573     gen9_avc_update_parameters(ctx, profile, encode_state, encoder_context);
8574
8575     va_status = gen9_avc_encode_check_parameter(ctx, encode_state, encoder_context);
8576     if (va_status != VA_STATUS_SUCCESS)
8577         return va_status;
8578
8579     va_status = gen9_avc_allocate_resources(ctx, encode_state, encoder_context);
8580     if (va_status != VA_STATUS_SUCCESS)
8581         return va_status;
8582
8583     va_status = gen9_avc_vme_gpe_kernel_prepare(ctx, encode_state, encoder_context);
8584     if (va_status != VA_STATUS_SUCCESS)
8585         return va_status;
8586
8587     va_status = gen9_avc_vme_gpe_kernel_init(ctx, encode_state, encoder_context);
8588     if (va_status != VA_STATUS_SUCCESS)
8589         return va_status;
8590
8591     va_status = gen9_avc_vme_gpe_kernel_run(ctx, encode_state, encoder_context);
8592     if (va_status != VA_STATUS_SUCCESS)
8593         return va_status;
8594
8595     gen9_avc_vme_gpe_kernel_final(ctx, encode_state, encoder_context);
8596
8597     return VA_STATUS_SUCCESS;
8598 }
8599
8600 /* Update PreEnc specific parameters */
8601 static VAStatus
8602 gen9_avc_preenc_update_parameters(VADriverContextP ctx,
8603                                   VAProfile profile,
8604                                   struct encode_state *encode_state,
8605                                   struct intel_encoder_context *encoder_context)
8606 {
8607     struct i965_driver_data *i965 = i965_driver_data(ctx);
8608     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8609     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8610     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
8611     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8612     VAStatsStatisticsParameterH264 *stat_param_h264 = NULL;
8613     VAStatsStatisticsParameter *stat_param = NULL;
8614     struct object_buffer *obj_buffer = NULL;
8615     struct object_buffer *obj_buffer_mv = NULL, *obj_buffer_stat = NULL;
8616     struct buffer_store *buffer_store = NULL;
8617     unsigned int size = 0, i = 0;
8618     unsigned int frame_mb_nums = 0;
8619
8620     if (!encoder_context->preenc_enabled ||
8621         !encode_state->stat_param_ext ||
8622         !encode_state->stat_param_ext->buffer)
8623         return VA_STATUS_ERROR_OPERATION_FAILED;
8624
8625     stat_param_h264 = avc_state->stat_param =
8626                           (VAStatsStatisticsParameterH264 *)encode_state->stat_param_ext->buffer;
8627     stat_param = &stat_param_h264->stats_params;
8628
8629     /* Assume the frame type based on number of past/future ref frames */
8630     if (!stat_param->num_past_references && !stat_param->num_future_references)
8631         generic_state->frame_type = SLICE_TYPE_I;
8632     else if (stat_param->num_future_references > 0)
8633         generic_state->frame_type = SLICE_TYPE_B;
8634     else
8635         generic_state->frame_type = SLICE_TYPE_P;
8636
8637     generic_state->preset = INTEL_PRESET_RT_SPEED;
8638     generic_state->kernel_mode = gen9_avc_kernel_mode[generic_state->preset];
8639
8640     /* frame width and height */
8641     generic_state->frame_width_in_pixel = encoder_context->frame_width_in_pixel;
8642     generic_state->frame_height_in_pixel = encoder_context->frame_height_in_pixel;
8643     generic_state->frame_width_in_mbs = ALIGN(generic_state->frame_width_in_pixel, 16) / 16;
8644     generic_state->frame_height_in_mbs = ALIGN(generic_state->frame_height_in_pixel, 16) / 16;
8645
8646     /* 4x downscaled width and height */
8647     generic_state->frame_width_4x = ALIGN(generic_state->frame_width_in_pixel / 4, 16);
8648     generic_state->frame_height_4x = ALIGN(generic_state->frame_height_in_pixel / 4, 16);
8649     generic_state->downscaled_width_4x_in_mb  = generic_state->frame_width_4x / 16 ;
8650     generic_state->downscaled_height_4x_in_mb = generic_state->frame_height_4x / 16;
8651
8652     /* reset hme types for preenc */
8653     if (generic_state->frame_type != SLICE_TYPE_I)
8654         generic_state->hme_enabled = 1;
8655
8656     /* ensure frame width is not too small */
8657     if (generic_state->frame_width_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8658         generic_state->frame_width_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8659         generic_state->downscaled_width_4x_in_mb =
8660             WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8661     }
8662
8663     /* ensure frame height is not too small*/
8664     if (generic_state->frame_height_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8665         generic_state->frame_height_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8666         generic_state->downscaled_height_4x_in_mb =
8667             WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8668     }
8669
8670     /********** Ensure buffer object parameters ********/
8671     frame_mb_nums = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs;
8672
8673     /* mv predictor buffer */
8674     if (stat_param_h264->mv_predictor_ctrl) {
8675         if (stat_param->mv_predictor == VA_INVALID_ID)
8676             goto error;
8677         size = frame_mb_nums * FEI_AVC_MV_PREDICTOR_BUFFER_SIZE;
8678         obj_buffer = BUFFER(stat_param->mv_predictor);
8679         buffer_store = obj_buffer->buffer_store;
8680         if (buffer_store->bo->size < size)
8681             goto error;
8682         if (avc_ctx->preproc_mv_predictor_buffer.bo != NULL)
8683             i965_free_gpe_resource(&avc_ctx->preproc_mv_predictor_buffer);
8684         i965_dri_object_to_buffer_gpe_resource(
8685             &avc_ctx->preproc_mv_predictor_buffer,
8686             buffer_store->bo);
8687     }
8688
8689     /* MB qp buffer */
8690     if (stat_param_h264->mb_qp) {
8691         if (stat_param->qp == VA_INVALID_ID)
8692             goto error;
8693         size = frame_mb_nums * FEI_AVC_QP_BUFFER_SIZE;
8694         obj_buffer = BUFFER(stat_param->qp);
8695         buffer_store = obj_buffer->buffer_store;
8696         if (buffer_store->bo->size < size)
8697             goto error;
8698         if (avc_ctx->preproc_mb_qp_buffer.bo != NULL)
8699             i965_free_gpe_resource(&avc_ctx->preproc_mb_qp_buffer);
8700         i965_dri_object_to_buffer_gpe_resource(
8701             &avc_ctx->preproc_mb_qp_buffer,
8702             buffer_store->bo);
8703     }
8704
8705     /* locate mv and stat buffer */
8706     if (!stat_param_h264->disable_mv_output ||
8707         !stat_param_h264->disable_statistics_output) {
8708
8709         if (!stat_param->outputs)
8710             goto error;
8711
8712         for (i = 0; i < 2 ; i++) {
8713             if (stat_param->outputs[i] != VA_INVALID_ID) {
8714                 obj_buffer = BUFFER(stat_param->outputs[i]);
8715                 switch (obj_buffer->type) {
8716                 case VAStatsMVBufferType:
8717                     obj_buffer_mv = obj_buffer;
8718                     break;
8719                 case VAStatsStatisticsBufferType:
8720                     obj_buffer_stat = obj_buffer;
8721                     break;
8722                 default:
8723                     assert(0);
8724                 }
8725             }
8726             if (!(!stat_param_h264->disable_mv_output &&
8727                   !stat_param_h264->disable_statistics_output))
8728                 break;
8729         }
8730     }
8731     /* mv data output buffer */
8732     if (!stat_param_h264->disable_mv_output) {
8733         size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
8734         buffer_store = obj_buffer_mv->buffer_store;
8735         if (buffer_store->bo->size < size)
8736             goto error;
8737         if (avc_ctx->preproc_mv_data_out_buffer.bo != NULL)
8738             i965_free_gpe_resource(&avc_ctx->preproc_mv_data_out_buffer);
8739         i965_dri_object_to_buffer_gpe_resource(
8740             &avc_ctx->preproc_mv_data_out_buffer,
8741             buffer_store->bo);
8742     }
8743     /* statistics output buffer */
8744     if (!stat_param_h264->disable_statistics_output) {
8745         size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
8746         buffer_store = obj_buffer_stat->buffer_store;
8747         if (buffer_store->bo->size < size)
8748             goto error;
8749         if (avc_ctx->preproc_stat_data_out_buffer.bo != NULL)
8750             i965_free_gpe_resource(&avc_ctx->preproc_stat_data_out_buffer);
8751         i965_dri_object_to_buffer_gpe_resource(
8752             &avc_ctx->preproc_stat_data_out_buffer,
8753             buffer_store->bo);
8754     }
8755
8756     /* past ref stat out buffer */
8757     if (stat_param->num_past_references && stat_param->past_ref_stat_buf &&
8758         stat_param->past_ref_stat_buf[0] != VA_INVALID_ID) {
8759         size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
8760         obj_buffer = BUFFER(stat_param->past_ref_stat_buf[0]);
8761         buffer_store = obj_buffer->buffer_store;
8762         if (buffer_store->bo->size < size)
8763             goto error;
8764         if (avc_ctx->preenc_past_ref_stat_data_out_buffer.bo != NULL)
8765             i965_free_gpe_resource(&avc_ctx->preenc_past_ref_stat_data_out_buffer);
8766         i965_dri_object_to_buffer_gpe_resource(
8767             &avc_ctx->preenc_past_ref_stat_data_out_buffer,
8768             buffer_store->bo);
8769     }
8770     /* future ref stat out buffer */
8771     if (stat_param->num_past_references && stat_param->future_ref_stat_buf &&
8772         stat_param->future_ref_stat_buf[0] != VA_INVALID_ID) {
8773         size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
8774         obj_buffer = BUFFER(stat_param->future_ref_stat_buf[0]);
8775         buffer_store = obj_buffer->buffer_store;
8776         if (buffer_store->bo->size < size)
8777             goto error;
8778         if (avc_ctx->preenc_future_ref_stat_data_out_buffer.bo != NULL)
8779             i965_free_gpe_resource(&avc_ctx->preenc_future_ref_stat_data_out_buffer);
8780         i965_dri_object_to_buffer_gpe_resource(
8781             &avc_ctx->preenc_future_ref_stat_data_out_buffer,
8782             buffer_store->bo);
8783     }
8784     return VA_STATUS_SUCCESS;
8785
8786 error:
8787     return VA_STATUS_ERROR_INVALID_BUFFER;
8788 }
8789
8790 /* allocate internal resouces required for PreEenc */
8791 static VAStatus
8792 gen9_avc_preenc_allocate_internal_resources(VADriverContextP ctx,
8793                                             struct encode_state *encode_state,
8794                                             struct intel_encoder_context *encoder_context)
8795 {
8796     struct i965_driver_data *i965 = i965_driver_data(ctx);
8797     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8798     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
8799     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8800     unsigned int width  = 0;
8801     unsigned int height  = 0;
8802     unsigned int size  = 0;
8803     int allocate_flag = 1;
8804
8805     /* 4x MEMV data buffer */
8806     width = ALIGN(generic_state->downscaled_width_4x_in_mb * 32, 64);
8807     height = generic_state->downscaled_height_4x_in_mb * 4 * 2 * 10;
8808     i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
8809     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
8810                                                   &avc_ctx->s4x_memv_data_buffer,
8811                                                   width, height,
8812                                                   width,
8813                                                   "4x MEMV data buffer");
8814     if (!allocate_flag)
8815         goto failed_allocation;
8816     i965_zero_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
8817
8818     /*  Output DISTORTION surface from 4x ME */
8819     width = generic_state->downscaled_width_4x_in_mb * 8;
8820     height = generic_state->downscaled_height_4x_in_mb * 4 * 10;
8821     i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
8822     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
8823                                                   &avc_ctx->s4x_memv_distortion_buffer,
8824                                                   width, height,
8825                                                   ALIGN(width, 64),
8826                                                   "4x MEMV distortion buffer");
8827     if (!allocate_flag)
8828         goto failed_allocation;
8829     i965_zero_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
8830
8831     /* output BRC DISTORTION surface from 4x ME  */
8832     width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
8833     height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
8834     i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
8835     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
8836                                                   &avc_ctx->res_brc_dist_data_surface,
8837                                                   width, height,
8838                                                   width,
8839                                                   "brc dist data buffer");
8840     if (!allocate_flag)
8841         goto failed_allocation;
8842     i965_zero_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
8843
8844
8845     /* FTQ Lut buffer,whichs is the mbbrc_const_data_buffer */
8846     i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
8847     size = 16 * AVC_QP_MAX * 4;
8848     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
8849                                                &avc_ctx->res_mbbrc_const_data_buffer,
8850                                                ALIGN(size, 0x1000),
8851                                                "mbbrc const data buffer");
8852     if (!allocate_flag)
8853         goto failed_allocation;
8854     i965_zero_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
8855
8856     /* 4x downscaled surface  */
8857     if (!avc_ctx->preenc_scaled_4x_surface_obj) {
8858         i965_CreateSurfaces(ctx,
8859                             generic_state->frame_width_4x,
8860                             generic_state->frame_height_4x,
8861                             VA_RT_FORMAT_YUV420,
8862                             1,
8863                             &avc_ctx->preenc_scaled_4x_surface_id);
8864         avc_ctx->preenc_scaled_4x_surface_obj = SURFACE(avc_ctx->preenc_scaled_4x_surface_id);
8865         if (!avc_ctx->preenc_scaled_4x_surface_obj)
8866             goto failed_allocation;
8867         i965_check_alloc_surface_bo(ctx, avc_ctx->preenc_scaled_4x_surface_obj, 1,
8868                                     VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
8869     }
8870
8871     /* 4x downscaled past ref surface  */
8872     if (!avc_ctx->preenc_past_ref_scaled_4x_surface_obj) {
8873         i965_CreateSurfaces(ctx,
8874                             generic_state->frame_width_4x,
8875                             generic_state->frame_height_4x,
8876                             VA_RT_FORMAT_YUV420,
8877                             1,
8878                             &avc_ctx->preenc_past_ref_scaled_4x_surface_id);
8879         avc_ctx->preenc_past_ref_scaled_4x_surface_obj =
8880             SURFACE(avc_ctx->preenc_past_ref_scaled_4x_surface_id);
8881         if (!avc_ctx->preenc_past_ref_scaled_4x_surface_obj)
8882             goto failed_allocation;
8883         i965_check_alloc_surface_bo(ctx, avc_ctx->preenc_past_ref_scaled_4x_surface_obj, 1,
8884                                     VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
8885     }
8886
8887     /* 4x downscaled future ref surface  */
8888     if (!avc_ctx->preenc_future_ref_scaled_4x_surface_obj) {
8889         i965_CreateSurfaces(ctx,
8890                             generic_state->frame_width_4x,
8891                             generic_state->frame_height_4x,
8892                             VA_RT_FORMAT_YUV420,
8893                             1,
8894                             &avc_ctx->preenc_future_ref_scaled_4x_surface_id);
8895         avc_ctx->preenc_future_ref_scaled_4x_surface_obj =
8896             SURFACE(avc_ctx->preenc_future_ref_scaled_4x_surface_id);
8897         if (!avc_ctx->preenc_future_ref_scaled_4x_surface_obj)
8898             goto failed_allocation;
8899         i965_check_alloc_surface_bo(ctx, avc_ctx->preenc_future_ref_scaled_4x_surface_obj, 1,
8900                                     VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
8901     }
8902
8903     /* FeiPreEncFixme: dummy coded buffer. This is a tweak which helps to use
8904      * the generic AVC Encdoe codepath which allocate status buffer as extension
8905      * to CodedBuffer */
8906     if (!avc_ctx->status_buffer.bo) {
8907         size =
8908             generic_state->frame_width_in_pixel * generic_state->frame_height_in_pixel * 12;
8909         size += I965_CODEDBUFFER_HEADER_SIZE;
8910         size += 0x1000;
8911         avc_ctx->status_buffer.bo = dri_bo_alloc(i965->intel.bufmgr,
8912                                                  "Dummy Coded Buffer",
8913                                                  size, 64);
8914     }
8915
8916     return VA_STATUS_SUCCESS;
8917
8918 failed_allocation:
8919     return VA_STATUS_ERROR_ALLOCATION_FAILED;
8920 }
8921
8922
8923 static VAStatus
8924 gen9_avc_preenc_gpe_kernel_run(VADriverContextP ctx,
8925                                struct encode_state *encode_state,
8926                                struct intel_encoder_context *encoder_context)
8927 {
8928     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8929     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8930     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8931     VAStatsStatisticsParameterH264 *stat_param_h264 = avc_state->stat_param;;
8932     VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
8933
8934     /* FeiPreEncFixme: Optimize the scaling. Keep a cache of already scaled surfaces
8935      * to avoid repeated scaling of same surfaces */
8936
8937     /* down scaling */
8938     gen9_avc_preenc_kernel_scaling(ctx, encode_state, encoder_context,
8939                                    INTEL_ENC_HME_4x, SCALE_CUR_PIC);
8940     if (stat_param->num_past_references > 0) {
8941         gen9_avc_preenc_kernel_scaling(ctx, encode_state, encoder_context,
8942                                        INTEL_ENC_HME_4x, SCALE_PAST_REF_PIC);
8943     }
8944     if (stat_param->num_future_references > 0) {
8945         gen9_avc_preenc_kernel_scaling(ctx, encode_state, encoder_context,
8946                                        INTEL_ENC_HME_4x, SCALE_FUTURE_REF_PIC);
8947     }
8948
8949     /* me kernel */
8950     if (generic_state->hme_enabled) {
8951         gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
8952     }
8953
8954     /* preproc kernel */
8955     if (!stat_param_h264->disable_mv_output || !stat_param_h264->disable_statistics_output) {
8956         gen9_avc_preenc_kernel_preproc(ctx, encode_state, encoder_context);
8957     }
8958
8959     return VA_STATUS_SUCCESS;
8960 }
8961
8962 static VAStatus
8963 gen9_avc_preenc_pipeline(VADriverContextP ctx,
8964                          VAProfile profile,
8965                          struct encode_state *encode_state,
8966                          struct intel_encoder_context *encoder_context)
8967 {
8968     VAStatus va_status;
8969
8970     va_status = gen9_avc_preenc_update_parameters(ctx, profile, encode_state, encoder_context);
8971     if (va_status != VA_STATUS_SUCCESS)
8972         return va_status;
8973
8974     va_status = gen9_avc_preenc_allocate_internal_resources(ctx, encode_state, encoder_context);
8975     if (va_status != VA_STATUS_SUCCESS)
8976         return va_status;
8977
8978     va_status = gen9_avc_preenc_gpe_kernel_run(ctx, encode_state, encoder_context);
8979     if (va_status != VA_STATUS_SUCCESS)
8980         return va_status;
8981
8982     return VA_STATUS_SUCCESS;
8983 }
8984
8985 static void
8986 gen9_avc_vme_context_destroy(void * context)
8987 {
8988     struct encoder_vme_mfc_context *vme_context = (struct encoder_vme_mfc_context *)context;
8989     struct generic_encoder_context *generic_ctx;
8990     struct i965_avc_encoder_context *avc_ctx;
8991     struct generic_enc_codec_state *generic_state;
8992     struct avc_enc_state *avc_state;
8993
8994     if (!vme_context)
8995         return;
8996
8997     generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
8998     avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
8999     generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
9000     avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
9001
9002     gen9_avc_kernel_destroy(vme_context);
9003
9004     free(generic_ctx);
9005     free(avc_ctx);
9006     free(generic_state);
9007     free(avc_state);
9008     free(vme_context);
9009     return;
9010
9011 }
9012
9013 static void
9014 gen8_avc_kernel_init(VADriverContextP ctx,
9015                      struct intel_encoder_context *encoder_context)
9016 {
9017     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9018     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
9019     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
9020     int fei_enabled = encoder_context->fei_enabled;
9021
9022     generic_ctx->get_kernel_header_and_size = fei_enabled ?
9023                                               intel_avc_fei_get_kernel_header_and_size :
9024                                               intel_avc_get_kernel_header_and_size ;
9025     gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling, false);
9026     gen9_avc_kernel_init_brc(ctx, generic_ctx, &avc_ctx->context_brc);
9027     gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me, false);
9028     gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc, fei_enabled);
9029     gen9_avc_kernel_init_sfd(ctx, generic_ctx, &avc_ctx->context_sfd);
9030
9031     //function pointer
9032     generic_ctx->pfn_set_curbe_scaling2x = gen9_avc_set_curbe_scaling2x;
9033     generic_ctx->pfn_set_curbe_scaling4x = gen8_avc_set_curbe_scaling4x;
9034     generic_ctx->pfn_set_curbe_me = gen8_avc_set_curbe_me;
9035     generic_ctx->pfn_set_curbe_mbenc = gen8_avc_set_curbe_mbenc;
9036     generic_ctx->pfn_set_curbe_brc_init_reset = gen9_avc_set_curbe_brc_init_reset;
9037     generic_ctx->pfn_set_curbe_brc_frame_update = gen8_avc_set_curbe_brc_frame_update;
9038     generic_ctx->pfn_set_curbe_sfd = gen9_avc_set_curbe_sfd;
9039
9040     generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
9041     generic_ctx->pfn_send_me_surface = gen9_avc_send_surface_me;
9042     generic_ctx->pfn_send_mbenc_surface = gen9_avc_send_surface_mbenc;
9043     generic_ctx->pfn_send_brc_init_reset_surface = gen9_avc_send_surface_brc_init_reset;
9044     generic_ctx->pfn_send_brc_frame_update_surface = gen9_avc_send_surface_brc_frame_update;
9045     generic_ctx->pfn_send_sfd_surface = gen9_avc_send_surface_sfd;
9046 }
9047 static void
9048 gen9_avc_kernel_init(VADriverContextP ctx,
9049                      struct intel_encoder_context *encoder_context)
9050 {
9051     struct i965_driver_data *i965 = i965_driver_data(ctx);
9052     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9053     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
9054     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
9055     int fei_enabled = encoder_context->fei_enabled;
9056     int preenc_enabled = encoder_context->preenc_enabled;
9057
9058     generic_ctx->get_kernel_header_and_size = (fei_enabled || preenc_enabled) ?
9059                                               intel_avc_fei_get_kernel_header_and_size :
9060                                               intel_avc_get_kernel_header_and_size ;
9061
9062     if (!fei_enabled && !preenc_enabled) {
9063         /* generic AVC Encoder */
9064         gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling, false);
9065         gen9_avc_kernel_init_brc(ctx, generic_ctx, &avc_ctx->context_brc);
9066         gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me, false);
9067         gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc,
9068                                    encoder_context->fei_enabled);
9069         gen9_avc_kernel_init_wp(ctx, generic_ctx, &avc_ctx->context_wp);
9070         gen9_avc_kernel_init_sfd(ctx, generic_ctx, &avc_ctx->context_sfd);
9071
9072         //function pointer
9073         generic_ctx->pfn_set_curbe_scaling2x = gen9_avc_set_curbe_scaling2x;
9074         generic_ctx->pfn_set_curbe_scaling4x = gen9_avc_set_curbe_scaling4x;
9075         generic_ctx->pfn_set_curbe_me = gen9_avc_set_curbe_me;
9076         generic_ctx->pfn_set_curbe_mbenc = gen9_avc_set_curbe_mbenc;
9077         generic_ctx->pfn_set_curbe_brc_init_reset = gen9_avc_set_curbe_brc_init_reset;
9078         generic_ctx->pfn_set_curbe_brc_frame_update = gen9_avc_set_curbe_brc_frame_update;
9079         generic_ctx->pfn_set_curbe_brc_mb_update = gen9_avc_set_curbe_brc_mb_update;
9080         generic_ctx->pfn_set_curbe_sfd = gen9_avc_set_curbe_sfd;
9081         generic_ctx->pfn_set_curbe_wp = gen9_avc_set_curbe_wp;
9082
9083         generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
9084         generic_ctx->pfn_send_me_surface = gen9_avc_send_surface_me;
9085         generic_ctx->pfn_send_mbenc_surface = gen9_avc_send_surface_mbenc;
9086         generic_ctx->pfn_send_brc_init_reset_surface = gen9_avc_send_surface_brc_init_reset;
9087         generic_ctx->pfn_send_brc_frame_update_surface = gen9_avc_send_surface_brc_frame_update;
9088         generic_ctx->pfn_send_brc_mb_update_surface = gen9_avc_send_surface_brc_mb_update;
9089         generic_ctx->pfn_send_sfd_surface = gen9_avc_send_surface_sfd;
9090         generic_ctx->pfn_send_wp_surface = gen9_avc_send_surface_wp;
9091
9092         if (IS_SKL(i965->intel.device_info) ||
9093             IS_BXT(i965->intel.device_info))
9094             generic_ctx->pfn_set_curbe_scaling4x = gen9_avc_set_curbe_scaling4x;
9095         else if (IS_KBL(i965->intel.device_info) ||
9096                  IS_GEN10(i965->intel.device_info) ||
9097                  IS_GLK(i965->intel.device_info))
9098             generic_ctx->pfn_set_curbe_scaling4x = gen95_avc_set_curbe_scaling4x;
9099
9100     } else if (fei_enabled) {
9101         /* FEI AVC Encoding */
9102         gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc,
9103                                    encoder_context->fei_enabled);
9104         generic_ctx->pfn_set_curbe_mbenc = gen9_avc_fei_set_curbe_mbenc;
9105         generic_ctx->pfn_send_mbenc_surface = gen9_avc_fei_send_surface_mbenc;
9106
9107     } else {
9108         /* PreEnc for AVC */
9109         gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling,
9110                                      encoder_context->preenc_enabled);
9111         gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me,
9112                                 encoder_context->preenc_enabled);
9113         gen9_avc_kernel_init_preproc(ctx, generic_ctx, &avc_ctx->context_preproc);
9114
9115         /* preenc 4x scaling uses the gen95 kernel */
9116         generic_ctx->pfn_set_curbe_scaling4x = gen95_avc_set_curbe_scaling4x;
9117         generic_ctx->pfn_set_curbe_me = gen9_avc_preenc_set_curbe_me;
9118         generic_ctx->pfn_set_curbe_preproc = gen9_avc_preenc_set_curbe_preproc;
9119
9120         generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
9121         generic_ctx->pfn_send_me_surface = gen9_avc_preenc_send_surface_me;
9122         generic_ctx->pfn_send_preproc_surface = gen9_avc_preenc_send_surface_preproc;
9123     }
9124 }
9125
9126 /*
9127 PAK pipeline related function
9128 */
9129 extern int
9130 intel_avc_enc_slice_type_fixup(int slice_type);
9131
9132 /* Allocate resources needed for PAK only mode (get invoked only in FEI encode) */
9133 static VAStatus
9134 gen9_avc_allocate_pak_resources(VADriverContextP ctx,
9135                                 struct encode_state *encode_state,
9136                                 struct intel_encoder_context *encoder_context)
9137 {
9138     struct i965_driver_data *i965 = i965_driver_data(ctx);
9139     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9140     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
9141     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
9142     unsigned int size  = 0;
9143     int allocate_flag = 1;
9144
9145     /*second level batch buffer for image state write when cqp etc*/
9146     i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
9147     size = INTEL_AVC_IMAGE_STATE_CMD_SIZE ;
9148     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
9149                                                &avc_ctx->res_image_state_batch_buffer_2nd_level,
9150                                                ALIGN(size, 0x1000),
9151                                                "second levle batch (image state write) buffer");
9152     if (!allocate_flag)
9153         goto failed_allocation;
9154
9155     if (!generic_state->brc_allocated) {
9156         i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
9157         size = 64;//44
9158         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
9159                                                    &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
9160                                                    ALIGN(size, 0x1000),
9161                                                    "brc pak statistic buffer");
9162         if (!allocate_flag)
9163             goto failed_allocation;
9164     }
9165
9166     return VA_STATUS_SUCCESS;
9167
9168 failed_allocation:
9169     return VA_STATUS_ERROR_ALLOCATION_FAILED;
9170 }
9171
9172 static void
9173 gen9_mfc_avc_pipe_mode_select(VADriverContextP ctx,
9174                               struct encode_state *encode_state,
9175                               struct intel_encoder_context *encoder_context)
9176 {
9177     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9178     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9179     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9180     struct intel_batchbuffer *batch = encoder_context->base.batch;
9181
9182     BEGIN_BCS_BATCH(batch, 5);
9183
9184     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
9185     OUT_BCS_BATCH(batch,
9186                   (0 << 29) |
9187                   (MFX_LONG_MODE << 17) |       /* Must be long format for encoder */
9188                   (MFD_MODE_VLD << 15) |
9189                   (0 << 13) |                   /* Non-VDEnc mode  is 0*/
9190                   ((generic_state->curr_pak_pass != (generic_state->num_pak_passes - 1)) << 10) |                  /* Stream-Out Enable */
9191                   ((!!avc_ctx->res_post_deblocking_output.bo) << 9)  |    /* Post Deblocking Output */
9192                   ((!!avc_ctx->res_pre_deblocking_output.bo) << 8)  |     /* Pre Deblocking Output */
9193                   (0 << 7)  |                   /* Scaled surface enable */
9194                   (0 << 6)  |                   /* Frame statistics stream out enable */
9195                   (0 << 5)  |                   /* not in stitch mode */
9196                   (1 << 4)  |                   /* encoding mode */
9197                   (MFX_FORMAT_AVC << 0));
9198     OUT_BCS_BATCH(batch,
9199                   (0 << 7)  | /* expand NOA bus flag */
9200                   (0 << 6)  | /* disable slice-level clock gating */
9201                   (0 << 5)  | /* disable clock gating for NOA */
9202                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
9203                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
9204                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
9205                   (0 << 1)  |
9206                   (0 << 0));
9207     OUT_BCS_BATCH(batch, 0);
9208     OUT_BCS_BATCH(batch, 0);
9209
9210     ADVANCE_BCS_BATCH(batch);
9211 }
9212
9213 static void
9214 gen9_mfc_avc_surface_state(VADriverContextP ctx,
9215                            struct intel_encoder_context *encoder_context,
9216                            struct i965_gpe_resource *gpe_resource,
9217                            int id)
9218 {
9219     struct intel_batchbuffer *batch = encoder_context->base.batch;
9220
9221     BEGIN_BCS_BATCH(batch, 6);
9222
9223     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
9224     OUT_BCS_BATCH(batch, id);
9225     OUT_BCS_BATCH(batch,
9226                   ((gpe_resource->height - 1) << 18) |
9227                   ((gpe_resource->width - 1) << 4));
9228     OUT_BCS_BATCH(batch,
9229                   (MFX_SURFACE_PLANAR_420_8 << 28) |    /* 420 planar YUV surface */
9230                   (1 << 27) |                           /* must be 1 for interleave U/V, hardware requirement */
9231                   ((gpe_resource->pitch - 1) << 3) |    /* pitch */
9232                   (0 << 2)  |                           /* must be 0 for interleave U/V */
9233                   (1 << 1)  |                           /* must be tiled */
9234                   (I965_TILEWALK_YMAJOR << 0));         /* tile walk, TILEWALK_YMAJOR */
9235     OUT_BCS_BATCH(batch,
9236                   (0 << 16) |                   /* must be 0 for interleave U/V */
9237                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
9238     OUT_BCS_BATCH(batch,
9239                   (0 << 16) |                   /* must be 0 for interleave U/V */
9240                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
9241
9242     ADVANCE_BCS_BATCH(batch);
9243 }
9244
9245 static void
9246 gen9_mfc_avc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
9247 {
9248     struct i965_driver_data *i965 = i965_driver_data(ctx);
9249     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9250     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
9251     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9252     struct intel_batchbuffer *batch = encoder_context->base.batch;
9253     int i;
9254     unsigned int cmd_len = 65;
9255
9256     if (IS_GEN10(i965->intel.device_info))
9257         cmd_len = 68;
9258
9259     BEGIN_BCS_BATCH(batch, cmd_len);
9260
9261     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (cmd_len - 2));
9262
9263     /* the DW1-3 is for pre_deblocking */
9264     OUT_BUFFER_3DW(batch, avc_ctx->res_pre_deblocking_output.bo, 1, 0, i965->intel.mocs_state);
9265
9266     /* the DW4-6 is for the post_deblocking */
9267     OUT_BUFFER_3DW(batch, avc_ctx->res_post_deblocking_output.bo, 1, 0, i965->intel.mocs_state);
9268
9269     /* the DW7-9 is for the uncompressed_picture */
9270     OUT_BUFFER_3DW(batch, generic_ctx->res_uncompressed_input_surface.bo, 0, 0, i965->intel.mocs_state);
9271
9272     /* the DW10-12 is for PAK information (write) */
9273     OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, i965->intel.mocs_state);//?
9274
9275     /* the DW13-15 is for the intra_row_store_scratch */
9276     OUT_BUFFER_3DW(batch, avc_ctx->res_intra_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
9277
9278     /* the DW16-18 is for the deblocking filter */
9279     OUT_BUFFER_3DW(batch, avc_ctx->res_deblocking_filter_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
9280
9281     /* the DW 19-50 is for Reference pictures*/
9282     for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
9283         OUT_BUFFER_2DW(batch, avc_ctx->list_reference_res[i].bo, 0, 0);
9284     }
9285
9286     /* DW 51, reference picture attributes */
9287     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
9288
9289     /* The DW 52-54 is for PAK information (read) */
9290     OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, i965->intel.mocs_state);
9291
9292     /* the DW 55-57 is the ILDB buffer */
9293     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9294
9295     /* the DW 58-60 is the second ILDB buffer */
9296     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9297
9298     /* DW 61, memory compress enable & mode */
9299     OUT_BCS_BATCH(batch, 0);
9300
9301     /* the DW 62-64 is the buffer */
9302     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9303
9304     /*65-67 for CNL */
9305     if (IS_GEN10(i965->intel.device_info)) {
9306         OUT_BCS_BATCH(batch, 0);
9307         OUT_BCS_BATCH(batch, 0);
9308         OUT_BCS_BATCH(batch, 0);
9309     }
9310
9311     ADVANCE_BCS_BATCH(batch);
9312 }
9313
9314 static void
9315 gen9_mfc_avc_ind_obj_base_addr_state(VADriverContextP ctx,
9316                                      struct encode_state *encode_state,
9317                                      struct intel_encoder_context *encoder_context)
9318 {
9319     struct i965_driver_data *i965 = i965_driver_data(ctx);
9320     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9321     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
9322     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9323     struct intel_batchbuffer *batch = encoder_context->base.batch;
9324     struct object_surface *obj_surface;
9325     struct gen9_surface_avc *avc_priv_surface;
9326     unsigned int size = 0;
9327     unsigned int w_mb = generic_state->frame_width_in_mbs;
9328     unsigned int h_mb = generic_state->frame_height_in_mbs;
9329
9330     obj_surface = encode_state->reconstructed_object;
9331
9332     if (!obj_surface || !obj_surface->private_data)
9333         return;
9334     avc_priv_surface = obj_surface->private_data;
9335
9336     BEGIN_BCS_BATCH(batch, 26);
9337
9338     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
9339     /* The DW1-5 is for the MFX indirect bistream offset */
9340     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9341     OUT_BUFFER_2DW(batch, NULL, 0, 0);
9342
9343     /* the DW6-10 is for MFX Indirect MV Object Base Address */
9344     size = w_mb * h_mb * 32 * 4;
9345     OUT_BUFFER_3DW(batch,
9346                    avc_priv_surface->res_mv_data_surface.bo,
9347                    1,
9348                    0,
9349                    i965->intel.mocs_state);
9350     OUT_BUFFER_2DW(batch,
9351                    avc_priv_surface->res_mv_data_surface.bo,
9352                    1,
9353                    ALIGN(size, 0x1000));
9354
9355     /* The DW11-15 is for MFX IT-COFF. Not used on encoder */
9356     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9357     OUT_BUFFER_2DW(batch, NULL, 0, 0);
9358
9359     /* The DW16-20 is for MFX indirect DBLK. Not used on encoder */
9360     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9361     OUT_BUFFER_2DW(batch, NULL, 0, 0);
9362
9363     /* The DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder
9364      * Note: an offset is specified in MFX_AVC_SLICE_STATE
9365      */
9366     OUT_BUFFER_3DW(batch,
9367                    generic_ctx->compressed_bitstream.res.bo,
9368                    1,
9369                    0,
9370                    i965->intel.mocs_state);
9371     OUT_BUFFER_2DW(batch,
9372                    generic_ctx->compressed_bitstream.res.bo,
9373                    1,
9374                    generic_ctx->compressed_bitstream.end_offset);
9375
9376     ADVANCE_BCS_BATCH(batch);
9377 }
9378
9379 static void
9380 gen9_mfc_avc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
9381 {
9382     struct i965_driver_data *i965 = i965_driver_data(ctx);
9383     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9384     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9385     struct intel_batchbuffer *batch = encoder_context->base.batch;
9386
9387     BEGIN_BCS_BATCH(batch, 10);
9388
9389     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
9390
9391     /* The DW1-3 is for bsd/mpc row store scratch buffer */
9392     OUT_BUFFER_3DW(batch, avc_ctx->res_bsd_mpc_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
9393
9394     /* The DW4-6 is for MPR Row Store Scratch Buffer Base Address, ignore for encoder */
9395     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9396
9397     /* The DW7-9 is for Bitplane Read Buffer Base Address, ignore for encoder */
9398     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9399
9400     ADVANCE_BCS_BATCH(batch);
9401 }
9402
9403 static void
9404 gen9_mfc_avc_directmode_state(VADriverContextP ctx,
9405                               struct intel_encoder_context *encoder_context)
9406 {
9407     struct i965_driver_data *i965 = i965_driver_data(ctx);
9408     struct intel_batchbuffer *batch = encoder_context->base.batch;
9409     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9410     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9411     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
9412
9413     int i;
9414
9415     BEGIN_BCS_BATCH(batch, 71);
9416
9417     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
9418
9419     /* Reference frames and Current frames */
9420     /* the DW1-32 is for the direct MV for reference */
9421     for (i = 0; i < NUM_MFC_AVC_DMV_BUFFERS - 2; i += 2) {
9422         if (avc_ctx->res_direct_mv_buffersr[i].bo != NULL) {
9423             OUT_BCS_RELOC64(batch, avc_ctx->res_direct_mv_buffersr[i].bo,
9424                             I915_GEM_DOMAIN_INSTRUCTION, 0,
9425                             0);
9426         } else {
9427             OUT_BCS_BATCH(batch, 0);
9428             OUT_BCS_BATCH(batch, 0);
9429         }
9430     }
9431
9432     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
9433
9434     /* the DW34-36 is the MV for the current frame */
9435     OUT_BCS_RELOC64(batch, avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2].bo,
9436                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
9437                     0);
9438
9439     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
9440
9441     /* POL list */
9442     for (i = 0; i < 32; i++) {
9443         OUT_BCS_BATCH(batch, avc_state->top_field_poc[i]);
9444     }
9445     OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2]);
9446     OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1]);
9447
9448     ADVANCE_BCS_BATCH(batch);
9449 }
9450
9451 static void
9452 gen9_mfc_qm_state(VADriverContextP ctx,
9453                   int qm_type,
9454                   const unsigned int *qm,
9455                   int qm_length,
9456                   struct intel_encoder_context *encoder_context)
9457 {
9458     struct intel_batchbuffer *batch = encoder_context->base.batch;
9459     unsigned int qm_buffer[16];
9460
9461     assert(qm_length <= 16);
9462     assert(sizeof(*qm) == 4);
9463     memset(qm_buffer, 0, 16 * 4);
9464     memcpy(qm_buffer, qm, qm_length * 4);
9465
9466     BEGIN_BCS_BATCH(batch, 18);
9467     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
9468     OUT_BCS_BATCH(batch, qm_type << 0);
9469     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
9470     ADVANCE_BCS_BATCH(batch);
9471 }
9472
9473 static void
9474 gen9_mfc_avc_qm_state(VADriverContextP ctx,
9475                       struct encode_state *encode_state,
9476                       struct intel_encoder_context *encoder_context)
9477 {
9478     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9479     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
9480     VAEncSequenceParameterBufferH264  *seq_param = avc_state->seq_param;
9481     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
9482
9483
9484     const unsigned int *qm_4x4_intra;
9485     const unsigned int *qm_4x4_inter;
9486     const unsigned int *qm_8x8_intra;
9487     const unsigned int *qm_8x8_inter;
9488
9489     if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
9490         && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
9491         qm_4x4_intra = qm_4x4_inter = qm_8x8_intra = qm_8x8_inter = qm_flat;
9492     } else {
9493         VAIQMatrixBufferH264 *qm;
9494         assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
9495         qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
9496         qm_4x4_intra = (unsigned int *)qm->ScalingList4x4[0];
9497         qm_4x4_inter = (unsigned int *)qm->ScalingList4x4[3];
9498         qm_8x8_intra = (unsigned int *)qm->ScalingList8x8[0];
9499         qm_8x8_inter = (unsigned int *)qm->ScalingList8x8[1];
9500     }
9501
9502     gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm_4x4_intra, 12, encoder_context);
9503     gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm_4x4_inter, 12, encoder_context);
9504     gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm_8x8_intra, 16, encoder_context);
9505     gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm_8x8_inter, 16, encoder_context);
9506 }
9507
9508 static void
9509 gen9_mfc_fqm_state(VADriverContextP ctx,
9510                    int fqm_type,
9511                    const unsigned int *fqm,
9512                    int fqm_length,
9513                    struct intel_encoder_context *encoder_context)
9514 {
9515     struct intel_batchbuffer *batch = encoder_context->base.batch;
9516     unsigned int fqm_buffer[32];
9517
9518     assert(fqm_length <= 32);
9519     assert(sizeof(*fqm) == 4);
9520     memset(fqm_buffer, 0, 32 * 4);
9521     memcpy(fqm_buffer, fqm, fqm_length * 4);
9522
9523     BEGIN_BCS_BATCH(batch, 34);
9524     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
9525     OUT_BCS_BATCH(batch, fqm_type << 0);
9526     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
9527     ADVANCE_BCS_BATCH(batch);
9528 }
9529
9530 static void
9531 gen9_mfc_fill_fqm(uint8_t *qm, uint16_t *fqm, int len)
9532 {
9533     int i, j;
9534     for (i = 0; i < len; i++)
9535         for (j = 0; j < len; j++) {
9536             assert(qm[j * len + i]);
9537             fqm[i * len + j] = (1 << 16) / qm[j * len + i];
9538         }
9539 }
9540
9541 static void
9542 gen9_mfc_avc_fqm_state(VADriverContextP ctx,
9543                        struct encode_state *encode_state,
9544                        struct intel_encoder_context *encoder_context)
9545 {
9546     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9547     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
9548     VAEncSequenceParameterBufferH264  *seq_param = avc_state->seq_param;
9549     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
9550
9551     if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
9552         && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
9553         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm_flat, 24, encoder_context);
9554         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm_flat, 24, encoder_context);
9555         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm_flat, 32, encoder_context);
9556         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm_flat, 32, encoder_context);
9557     } else {
9558         int i;
9559         uint32_t fqm[32];
9560         VAIQMatrixBufferH264 *qm;
9561         assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
9562         qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
9563
9564         for (i = 0; i < 3; i++)
9565             gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * i, 4);
9566         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm, 24, encoder_context);
9567
9568         for (i = 3; i < 6; i++)
9569             gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * (i - 3), 4);
9570         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm, 24, encoder_context);
9571
9572         gen9_mfc_fill_fqm(qm->ScalingList8x8[0], (uint16_t *)fqm, 8);
9573         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm, 32, encoder_context);
9574
9575         gen9_mfc_fill_fqm(qm->ScalingList8x8[1], (uint16_t *)fqm, 8);
9576         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm, 32, encoder_context);
9577     }
9578 }
9579
9580 static void
9581 gen9_mfc_avc_insert_object(VADriverContextP ctx,
9582                            struct intel_encoder_context *encoder_context,
9583                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
9584                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
9585                            int slice_header_indicator,
9586                            struct intel_batchbuffer *batch)
9587 {
9588     if (data_bits_in_last_dw == 0)
9589         data_bits_in_last_dw = 32;
9590
9591     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
9592
9593     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws));
9594     OUT_BCS_BATCH(batch,
9595                   (0 << 16) |   /* always start at offset 0 */
9596                   (slice_header_indicator << 14) |
9597                   (data_bits_in_last_dw << 8) |
9598                   (skip_emul_byte_count << 4) |
9599                   (!!emulation_flag << 3) |
9600                   ((!!is_last_header) << 2) |
9601                   ((!!is_end_of_slice) << 1) |
9602                   (0 << 0));    /* check this flag */
9603     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
9604
9605     ADVANCE_BCS_BATCH(batch);
9606 }
9607
9608 static void
9609 gen9_mfc_avc_insert_aud_packed_data(VADriverContextP ctx,
9610                                     struct encode_state *encode_state,
9611                                     struct intel_encoder_context *encoder_context,
9612                                     struct intel_batchbuffer *batch)
9613 {
9614     VAEncPackedHeaderParameterBuffer *param = NULL;
9615     unsigned int length_in_bits;
9616     unsigned int *header_data = NULL;
9617     unsigned char *nal_type = NULL;
9618     int count, i, start_index;
9619
9620     count = encode_state->slice_rawdata_count[0];
9621     start_index = (encode_state->slice_rawdata_index[0] & SLICE_PACKED_DATA_INDEX_MASK);
9622
9623     for (i = 0; i < count; i++) {
9624         unsigned int skip_emul_byte_cnt;
9625
9626         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
9627         nal_type = (unsigned char *)header_data;
9628
9629         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
9630         if (param->type != VAEncPackedHeaderRawData)
9631             continue;
9632
9633         length_in_bits = param->bit_length;
9634
9635         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9636
9637         if ((*(nal_type + skip_emul_byte_cnt - 1) & 0x1f) == AVC_NAL_DELIMITER) {
9638             gen9_mfc_avc_insert_object(ctx,
9639                                        encoder_context,
9640                                        header_data,
9641                                        ALIGN(length_in_bits, 32) >> 5,
9642                                        length_in_bits & 0x1f,
9643                                        skip_emul_byte_cnt,
9644                                        0,
9645                                        0,
9646                                        !param->has_emulation_bytes,
9647                                        0,
9648                                        batch);
9649             break;
9650         }
9651     }
9652 }
9653
9654 static void
9655 gen9_mfc_avc_insert_slice_packed_data(VADriverContextP ctx,
9656                                       struct encode_state *encode_state,
9657                                       struct intel_encoder_context *encoder_context,
9658                                       int slice_index,
9659                                       struct intel_batchbuffer *batch)
9660 {
9661     VAEncPackedHeaderParameterBuffer *param = NULL;
9662     unsigned int length_in_bits;
9663     unsigned int *header_data = NULL;
9664     int count, i, start_index;
9665     int slice_header_index;
9666     unsigned char *nal_type = NULL;
9667
9668     if (encode_state->slice_header_index[slice_index] == 0)
9669         slice_header_index = -1;
9670     else
9671         slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
9672
9673     count = encode_state->slice_rawdata_count[slice_index];
9674     start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
9675
9676     for (i = 0; i < count; i++) {
9677         unsigned int skip_emul_byte_cnt;
9678
9679         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
9680         nal_type = (unsigned char *)header_data;
9681
9682         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
9683
9684         length_in_bits = param->bit_length;
9685
9686         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9687
9688         /* skip the slice header packed data type as it is lastly inserted */
9689         if (param->type == VAEncPackedHeaderSlice || (*(nal_type + skip_emul_byte_cnt - 1) & 0x1f) == AVC_NAL_DELIMITER)
9690             continue;
9691
9692         /* as the slice header is still required, the last header flag is set to
9693          * zero.
9694          */
9695         gen9_mfc_avc_insert_object(ctx,
9696                                    encoder_context,
9697                                    header_data,
9698                                    ALIGN(length_in_bits, 32) >> 5,
9699                                    length_in_bits & 0x1f,
9700                                    skip_emul_byte_cnt,
9701                                    0,
9702                                    0,
9703                                    !param->has_emulation_bytes,
9704                                    0,
9705                                    batch);
9706     }
9707
9708     if (slice_header_index == -1) {
9709         VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
9710         VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
9711         VAEncSliceParameterBufferH264 *slice_params = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
9712         unsigned char *slice_header = NULL;
9713         int slice_header_length_in_bits = 0;
9714
9715         /* No slice header data is passed. And the driver needs to generate it */
9716         /* For the Normal H264 */
9717         slice_header_length_in_bits = build_avc_slice_header(seq_param,
9718                                                              pic_param,
9719                                                              slice_params,
9720                                                              &slice_header);
9721         gen9_mfc_avc_insert_object(ctx,
9722                                    encoder_context,
9723                                    (unsigned int *)slice_header,
9724                                    ALIGN(slice_header_length_in_bits, 32) >> 5,
9725                                    slice_header_length_in_bits & 0x1f,
9726                                    5,  /* first 5 bytes are start code + nal unit type */
9727                                    1, 0, 1,
9728                                    1,
9729                                    batch);
9730
9731         free(slice_header);
9732     } else {
9733         unsigned int skip_emul_byte_cnt;
9734
9735         header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
9736
9737         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[slice_header_index]->buffer);
9738         length_in_bits = param->bit_length;
9739
9740         /* as the slice header is the last header data for one slice,
9741          * the last header flag is set to one.
9742          */
9743         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9744
9745         gen9_mfc_avc_insert_object(ctx,
9746                                    encoder_context,
9747                                    header_data,
9748                                    ALIGN(length_in_bits, 32) >> 5,
9749                                    length_in_bits & 0x1f,
9750                                    skip_emul_byte_cnt,
9751                                    1,
9752                                    0,
9753                                    !param->has_emulation_bytes,
9754                                    1,
9755                                    batch);
9756     }
9757
9758     return;
9759 }
9760
9761 static void
9762 gen9_mfc_avc_inset_headers(VADriverContextP ctx,
9763                            struct encode_state *encode_state,
9764                            struct intel_encoder_context *encoder_context,
9765                            VAEncSliceParameterBufferH264 *slice_param,
9766                            int slice_index,
9767                            struct intel_batchbuffer *batch)
9768 {
9769     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9770     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9771     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
9772     unsigned int internal_rate_mode = generic_state->internal_rate_mode;
9773     unsigned int skip_emul_byte_cnt;
9774
9775     if (slice_index == 0) {
9776
9777         /* if AUD exist and insert it firstly */
9778         gen9_mfc_avc_insert_aud_packed_data(ctx, encode_state, encoder_context, batch);
9779
9780         if (encode_state->packed_header_data[idx]) {
9781             VAEncPackedHeaderParameterBuffer *param = NULL;
9782             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
9783             unsigned int length_in_bits;
9784
9785             assert(encode_state->packed_header_param[idx]);
9786             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
9787             length_in_bits = param->bit_length;
9788
9789             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9790             gen9_mfc_avc_insert_object(ctx,
9791                                        encoder_context,
9792                                        header_data,
9793                                        ALIGN(length_in_bits, 32) >> 5,
9794                                        length_in_bits & 0x1f,
9795                                        skip_emul_byte_cnt,
9796                                        0,
9797                                        0,
9798                                        !param->has_emulation_bytes,
9799                                        0,
9800                                        batch);
9801         }
9802
9803         idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
9804
9805         if (encode_state->packed_header_data[idx]) {
9806             VAEncPackedHeaderParameterBuffer *param = NULL;
9807             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
9808             unsigned int length_in_bits;
9809
9810             assert(encode_state->packed_header_param[idx]);
9811             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
9812             length_in_bits = param->bit_length;
9813
9814             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9815
9816             gen9_mfc_avc_insert_object(ctx,
9817                                        encoder_context,
9818                                        header_data,
9819                                        ALIGN(length_in_bits, 32) >> 5,
9820                                        length_in_bits & 0x1f,
9821                                        skip_emul_byte_cnt,
9822                                        0,
9823                                        0,
9824                                        !param->has_emulation_bytes,
9825                                        0,
9826                                        batch);
9827         }
9828
9829         idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
9830
9831         if (encode_state->packed_header_data[idx]) {
9832             VAEncPackedHeaderParameterBuffer *param = NULL;
9833             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
9834             unsigned int length_in_bits;
9835
9836             assert(encode_state->packed_header_param[idx]);
9837             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
9838             length_in_bits = param->bit_length;
9839
9840             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9841             gen9_mfc_avc_insert_object(ctx,
9842                                        encoder_context,
9843                                        header_data,
9844                                        ALIGN(length_in_bits, 32) >> 5,
9845                                        length_in_bits & 0x1f,
9846                                        skip_emul_byte_cnt,
9847                                        0,
9848                                        0,
9849                                        !param->has_emulation_bytes,
9850                                        0,
9851                                        batch);
9852         } else if (internal_rate_mode == VA_RC_CBR) {
9853             /* insert others */
9854         }
9855     }
9856
9857     gen9_mfc_avc_insert_slice_packed_data(ctx,
9858                                           encode_state,
9859                                           encoder_context,
9860                                           slice_index,
9861                                           batch);
9862 }
9863
9864 static void
9865 gen9_mfc_avc_slice_state(VADriverContextP ctx,
9866                          struct encode_state *encode_state,
9867                          struct intel_encoder_context *encoder_context,
9868                          VAEncPictureParameterBufferH264 *pic_param,
9869                          VAEncSliceParameterBufferH264 *slice_param,
9870                          VAEncSliceParameterBufferH264 *next_slice_param,
9871                          struct intel_batchbuffer *batch)
9872 {
9873     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9874     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
9875     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9876     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
9877     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
9878     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
9879     unsigned char correct[6], grow, shrink;
9880     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
9881     int max_qp_n, max_qp_p;
9882     int i;
9883     int weighted_pred_idc = 0;
9884     int num_ref_l0 = 0, num_ref_l1 = 0;
9885     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
9886     int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
9887     unsigned int rc_panic_enable = 0;
9888     unsigned int rate_control_counter_enable = 0;
9889     unsigned int rounding_value = 0;
9890     unsigned int rounding_inter_enable = 0;
9891
9892     slice_hor_pos = slice_param->macroblock_address % generic_state->frame_width_in_mbs;
9893     slice_ver_pos = slice_param->macroblock_address / generic_state->frame_width_in_mbs;
9894
9895     if (next_slice_param) {
9896         next_slice_hor_pos = next_slice_param->macroblock_address % generic_state->frame_width_in_mbs;
9897         next_slice_ver_pos = next_slice_param->macroblock_address / generic_state->frame_width_in_mbs;
9898     } else {
9899         next_slice_hor_pos = 0;
9900         next_slice_ver_pos = generic_state->frame_height_in_mbs;
9901     }
9902
9903     if (slice_type == SLICE_TYPE_I) {
9904         luma_log2_weight_denom = 0;
9905         chroma_log2_weight_denom = 0;
9906     } else if (slice_type == SLICE_TYPE_P) {
9907         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
9908         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
9909         rounding_inter_enable = avc_state->rounding_inter_enable;
9910         rounding_value = avc_state->rounding_value;
9911
9912         if (slice_param->num_ref_idx_active_override_flag)
9913             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
9914     } else if (slice_type == SLICE_TYPE_B) {
9915         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
9916         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
9917         num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
9918         rounding_inter_enable = avc_state->rounding_inter_enable;
9919         rounding_value = avc_state->rounding_value;
9920
9921         if (slice_param->num_ref_idx_active_override_flag) {
9922             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
9923             num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
9924         }
9925
9926         if (weighted_pred_idc == 2) {
9927             /* 8.4.3 - Derivation process for prediction weights (8-279) */
9928             luma_log2_weight_denom = 5;
9929             chroma_log2_weight_denom = 5;
9930         }
9931     }
9932
9933     max_qp_n = 0;
9934     max_qp_p = 0;
9935     grow = 0;
9936     shrink = 0;
9937
9938     rate_control_counter_enable = (generic_state->brc_enabled && (generic_state->curr_pak_pass != 0));
9939     rc_panic_enable = (avc_state->rc_panic_enable &&
9940                        (!avc_state->min_max_qp_enable) &&
9941                        (encoder_context->rate_control_mode != VA_RC_CQP) &&
9942                        (generic_state->curr_pak_pass == (generic_state->num_pak_passes - 1)));
9943
9944     for (i = 0; i < 6; i++)
9945         correct[i] = 0;
9946
9947     BEGIN_BCS_BATCH(batch, 11);
9948
9949     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
9950     OUT_BCS_BATCH(batch, slice_type);
9951     OUT_BCS_BATCH(batch,
9952                   (num_ref_l1 << 24) |
9953                   (num_ref_l0 << 16) |
9954                   (chroma_log2_weight_denom << 8) |
9955                   (luma_log2_weight_denom << 0));
9956     OUT_BCS_BATCH(batch,
9957                   (weighted_pred_idc << 30) |
9958                   (((slice_type == SLICE_TYPE_B) ? slice_param->direct_spatial_mv_pred_flag : 0) << 29) |
9959                   (slice_param->disable_deblocking_filter_idc << 27) |
9960                   (slice_param->cabac_init_idc << 24) |
9961                   (slice_qp << 16) |
9962                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
9963                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
9964
9965     OUT_BCS_BATCH(batch,
9966                   slice_ver_pos << 24 |
9967                   slice_hor_pos << 16 |
9968                   slice_param->macroblock_address);
9969     OUT_BCS_BATCH(batch,
9970                   next_slice_ver_pos << 16 |
9971                   next_slice_hor_pos);
9972
9973     OUT_BCS_BATCH(batch,
9974                   (rate_control_counter_enable << 31) |
9975                   (1 << 30) |           /* ResetRateControlCounter */
9976                   (2 << 28) |           /* Loose Rate Control */
9977                   (0 << 24) |           /* RC Stable Tolerance */
9978                   (rc_panic_enable << 23) |           /* RC Panic Enable */
9979                   (1 << 22) |           /* CBP mode */
9980                   (0 << 21) |           /* MB Type Direct Conversion, 0: Enable, 1: Disable */
9981                   (0 << 20) |           /* MB Type Skip Conversion, 0: Enable, 1: Disable */
9982                   (!next_slice_param << 19) |                   /* Is Last Slice */
9983                   (0 << 18) |           /* BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable */
9984                   (1 << 17) |           /* HeaderPresentFlag */
9985                   (1 << 16) |           /* SliceData PresentFlag */
9986                   (0 << 15) |           /* TailPresentFlag  */
9987                   (1 << 13) |           /* RBSP NAL TYPE */
9988                   (1 << 12));           /* CabacZeroWordInsertionEnable */
9989
9990     OUT_BCS_BATCH(batch, generic_ctx->compressed_bitstream.start_offset);
9991
9992     OUT_BCS_BATCH(batch,
9993                   (max_qp_n << 24) |     /*Target QP - 24 is lowest QP*/
9994                   (max_qp_p << 16) |     /*Target QP + 20 is highest QP*/
9995                   (shrink << 8) |
9996                   (grow << 0));
9997     OUT_BCS_BATCH(batch,
9998                   (rounding_inter_enable << 31) |
9999                   (rounding_value << 28) |
10000                   (1 << 27) |
10001                   (5 << 24) |
10002                   (correct[5] << 20) |
10003                   (correct[4] << 16) |
10004                   (correct[3] << 12) |
10005                   (correct[2] << 8) |
10006                   (correct[1] << 4) |
10007                   (correct[0] << 0));
10008     OUT_BCS_BATCH(batch, 0);
10009
10010     ADVANCE_BCS_BATCH(batch);
10011 }
10012
10013 static uint8_t
10014 gen9_mfc_avc_get_ref_idx_state(VAPictureH264 *va_pic, unsigned int frame_store_id)
10015 {
10016     unsigned int is_long_term =
10017         !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
10018     unsigned int is_top_field =
10019         !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
10020     unsigned int is_bottom_field =
10021         !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
10022
10023     return ((is_long_term                         << 6) |
10024             (0 << 5) |
10025             (frame_store_id                       << 1) |
10026             ((is_top_field ^ 1) & is_bottom_field));
10027 }
10028
10029 static void
10030 gen9_mfc_avc_ref_idx_state(VADriverContextP ctx,
10031                            struct encode_state *encode_state,
10032                            struct intel_encoder_context *encoder_context,
10033                            VAEncSliceParameterBufferH264 *slice_param,
10034                            struct intel_batchbuffer *batch)
10035 {
10036     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10037     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10038     VAPictureH264 *ref_pic;
10039     int i, slice_type, ref_idx_shift;
10040     unsigned int fwd_ref_entry;
10041     unsigned int bwd_ref_entry;
10042
10043     /* max 4 ref frames are allowed for l0 and l1 */
10044     fwd_ref_entry = 0x80808080;
10045     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
10046
10047     if ((slice_type == SLICE_TYPE_P) ||
10048         (slice_type == SLICE_TYPE_B)) {
10049         for (i = 0; i < MIN(avc_state->num_refs[0], 4); i++) {
10050             ref_pic = &slice_param->RefPicList0[i];
10051             ref_idx_shift = i * 8;
10052
10053             fwd_ref_entry &= ~(0xFF << ref_idx_shift);
10054             fwd_ref_entry += (gen9_mfc_avc_get_ref_idx_state(ref_pic, avc_state->list_ref_idx[0][i]) << ref_idx_shift);
10055         }
10056     }
10057
10058     bwd_ref_entry = 0x80808080;
10059     if (slice_type == SLICE_TYPE_B) {
10060         for (i = 0; i < MIN(avc_state->num_refs[1], 4); i++) {
10061             ref_pic = &slice_param->RefPicList1[i];
10062             ref_idx_shift = i * 8;
10063
10064             bwd_ref_entry &= ~(0xFF << ref_idx_shift);
10065             bwd_ref_entry += (gen9_mfc_avc_get_ref_idx_state(ref_pic, avc_state->list_ref_idx[1][i]) << ref_idx_shift);
10066         }
10067     }
10068
10069     if ((slice_type == SLICE_TYPE_P) ||
10070         (slice_type == SLICE_TYPE_B)) {
10071         BEGIN_BCS_BATCH(batch, 10);
10072         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
10073         OUT_BCS_BATCH(batch, 0);                        // L0
10074         OUT_BCS_BATCH(batch, fwd_ref_entry);
10075
10076         for (i = 0; i < 7; i++) {
10077             OUT_BCS_BATCH(batch, 0x80808080);
10078         }
10079
10080         ADVANCE_BCS_BATCH(batch);
10081     }
10082
10083     if (slice_type == SLICE_TYPE_B) {
10084         BEGIN_BCS_BATCH(batch, 10);
10085         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
10086         OUT_BCS_BATCH(batch, 1);                  //Select L1
10087         OUT_BCS_BATCH(batch, bwd_ref_entry);      //max 4 reference allowed
10088         for (i = 0; i < 7; i++) {
10089             OUT_BCS_BATCH(batch, 0x80808080);
10090         }
10091         ADVANCE_BCS_BATCH(batch);
10092     }
10093 }
10094
10095 static void
10096 gen9_mfc_avc_weightoffset_state(VADriverContextP ctx,
10097                                 struct encode_state *encode_state,
10098                                 struct intel_encoder_context *encoder_context,
10099                                 VAEncPictureParameterBufferH264 *pic_param,
10100                                 VAEncSliceParameterBufferH264 *slice_param,
10101                                 struct intel_batchbuffer *batch)
10102 {
10103     int i, slice_type;
10104     short weightoffsets[32 * 6];
10105
10106     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
10107
10108     if (slice_type == SLICE_TYPE_P &&
10109         pic_param->pic_fields.bits.weighted_pred_flag == 1) {
10110         memset(weightoffsets, 0, 32 * 6 * sizeof(short));
10111         for (i = 0; i < 32; i++) {
10112             weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
10113             weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
10114             weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
10115             weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
10116             weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
10117             weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
10118         }
10119
10120         BEGIN_BCS_BATCH(batch, 98);
10121         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
10122         OUT_BCS_BATCH(batch, 0);
10123         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
10124
10125         ADVANCE_BCS_BATCH(batch);
10126     }
10127
10128     if (slice_type == SLICE_TYPE_B &&
10129         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
10130         memset(weightoffsets, 0, 32 * 6 * sizeof(short));
10131         for (i = 0; i < 32; i++) {
10132             weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
10133             weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
10134             weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
10135             weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
10136             weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
10137             weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
10138         }
10139
10140         BEGIN_BCS_BATCH(batch, 98);
10141         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
10142         OUT_BCS_BATCH(batch, 0);
10143         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
10144         ADVANCE_BCS_BATCH(batch);
10145
10146         memset(weightoffsets, 0, 32 * 6 * sizeof(short));
10147         for (i = 0; i < 32; i++) {
10148             weightoffsets[i * 6 + 0] = slice_param->luma_weight_l1[i];
10149             weightoffsets[i * 6 + 1] = slice_param->luma_offset_l1[i];
10150             weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l1[i][0];
10151             weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l1[i][0];
10152             weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l1[i][1];
10153             weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l1[i][1];
10154         }
10155
10156         BEGIN_BCS_BATCH(batch, 98);
10157         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
10158         OUT_BCS_BATCH(batch, 1);
10159         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
10160         ADVANCE_BCS_BATCH(batch);
10161     }
10162 }
10163
10164 static void
10165 gen9_mfc_avc_single_slice(VADriverContextP ctx,
10166                           struct encode_state *encode_state,
10167                           struct intel_encoder_context *encoder_context,
10168                           VAEncSliceParameterBufferH264 *slice_param,
10169                           VAEncSliceParameterBufferH264 *next_slice_param,
10170                           int slice_index)
10171 {
10172     struct i965_driver_data *i965 = i965_driver_data(ctx);
10173     struct i965_gpe_table *gpe = &i965->gpe_table;
10174     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10175     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10176     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10177     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10178     struct intel_batchbuffer *batch = encoder_context->base.batch;
10179     struct intel_batchbuffer *slice_batch = avc_ctx->pres_slice_batch_buffer_2nd_level;
10180     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
10181     struct gpe_mi_batch_buffer_start_parameter second_level_batch;
10182     struct object_surface *obj_surface;
10183     struct gen9_surface_avc *avc_priv_surface;
10184
10185     unsigned int slice_offset = 0;
10186
10187     if (generic_state->curr_pak_pass == 0) {
10188         slice_offset = intel_batchbuffer_used_size(slice_batch);
10189         avc_state->slice_batch_offset[slice_index] = slice_offset;
10190         gen9_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context, slice_param, slice_batch);
10191         gen9_mfc_avc_weightoffset_state(ctx,
10192                                         encode_state,
10193                                         encoder_context,
10194                                         pic_param,
10195                                         slice_param,
10196                                         slice_batch);
10197         gen9_mfc_avc_slice_state(ctx,
10198                                  encode_state,
10199                                  encoder_context,
10200                                  pic_param,
10201                                  slice_param,
10202                                  next_slice_param,
10203                                  slice_batch);
10204         gen9_mfc_avc_inset_headers(ctx,
10205                                    encode_state,
10206                                    encoder_context,
10207                                    slice_param,
10208                                    slice_index,
10209                                    slice_batch);
10210
10211         BEGIN_BCS_BATCH(slice_batch, 2);
10212         OUT_BCS_BATCH(slice_batch, 0);
10213         OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END);
10214         ADVANCE_BCS_BATCH(slice_batch);
10215
10216     } else {
10217         slice_offset = avc_state->slice_batch_offset[slice_index];
10218     }
10219     /* insert slice as second level.*/
10220     memset(&second_level_batch, 0, sizeof(second_level_batch));
10221     second_level_batch.is_second_level = 1; /* Must be the second level batch buffer */
10222     second_level_batch.offset = slice_offset;
10223     second_level_batch.bo = slice_batch->buffer;
10224     gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
10225
10226     /* insert mb code as second level.*/
10227     obj_surface = encode_state->reconstructed_object;
10228     assert(obj_surface->private_data);
10229     avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
10230
10231     memset(&second_level_batch, 0, sizeof(second_level_batch));
10232     second_level_batch.is_second_level = 1; /* Must be the second level batch buffer */
10233     second_level_batch.offset = slice_param->macroblock_address * 16 * 4;
10234     second_level_batch.bo = avc_priv_surface->res_mb_code_surface.bo;
10235     gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
10236
10237 }
10238
10239 static void
10240 gen9_avc_pak_slice_level(VADriverContextP ctx,
10241                          struct encode_state *encode_state,
10242                          struct intel_encoder_context *encoder_context)
10243 {
10244     struct i965_driver_data *i965 = i965_driver_data(ctx);
10245     struct i965_gpe_table *gpe = &i965->gpe_table;
10246     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10247     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10248     struct intel_batchbuffer *batch = encoder_context->base.batch;
10249     struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
10250     VAEncSliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
10251     int i, j;
10252     int slice_index = 0;
10253     int is_frame_level = (avc_state->slice_num > 1) ? 0 : 1;   /* check it for SKL,now single slice per frame */
10254     int has_tail = 0;             /* check it later */
10255
10256     for (j = 0; j < encode_state->num_slice_params_ext; j++) {
10257         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
10258
10259         if (j == encode_state->num_slice_params_ext - 1)
10260             next_slice_group_param = NULL;
10261         else
10262             next_slice_group_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j + 1]->buffer;
10263
10264         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
10265             if (i < encode_state->slice_params_ext[j]->num_elements - 1)
10266                 next_slice_param = slice_param + 1;
10267             else
10268                 next_slice_param = next_slice_group_param;
10269
10270             gen9_mfc_avc_single_slice(ctx,
10271                                       encode_state,
10272                                       encoder_context,
10273                                       slice_param,
10274                                       next_slice_param,
10275                                       slice_index);
10276             slice_param++;
10277             slice_index++;
10278
10279             if (is_frame_level)
10280                 break;
10281         }
10282
10283         if (is_frame_level)
10284             break;
10285     }
10286
10287     if (has_tail) {
10288         /* insert a tail if required */
10289     }
10290
10291     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
10292     mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
10293     gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_params);
10294 }
10295 static void
10296 gen9_avc_pak_picture_level(VADriverContextP ctx,
10297                            struct encode_state *encode_state,
10298                            struct intel_encoder_context *encoder_context)
10299 {
10300     struct i965_driver_data *i965 = i965_driver_data(ctx);
10301     struct i965_gpe_table *gpe = &i965->gpe_table;
10302     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10303     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
10304     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10305     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10306     struct gpe_mi_batch_buffer_start_parameter second_level_batch;
10307     struct intel_batchbuffer *batch = encoder_context->base.batch;
10308
10309     if (generic_state->brc_enabled &&
10310         generic_state->curr_pak_pass) {
10311         struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
10312         struct encoder_status_buffer_internal *status_buffer;
10313         status_buffer = &(avc_ctx->status_buffer);
10314
10315         memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
10316         mi_conditional_batch_buffer_end_params.offset = status_buffer->image_status_mask_offset;
10317         mi_conditional_batch_buffer_end_params.bo = status_buffer->bo;
10318         mi_conditional_batch_buffer_end_params.compare_data = 0;
10319         mi_conditional_batch_buffer_end_params.compare_mask_mode_disabled = 0;
10320         gpe->mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
10321     }
10322
10323     gen9_mfc_avc_pipe_mode_select(ctx, encode_state, encoder_context);
10324     gen9_mfc_avc_surface_state(ctx, encoder_context, &(generic_ctx->res_reconstructed_surface), 0);
10325     gen9_mfc_avc_surface_state(ctx, encoder_context, &(generic_ctx->res_uncompressed_input_surface), 4);
10326     gen9_mfc_avc_pipe_buf_addr_state(ctx, encoder_context);
10327     gen9_mfc_avc_ind_obj_base_addr_state(ctx, encode_state, encoder_context);
10328     gen9_mfc_avc_bsp_buf_base_addr_state(ctx, encoder_context);
10329
10330     if (generic_state->brc_enabled) {
10331         memset(&second_level_batch, 0, sizeof(second_level_batch));
10332         if (generic_state->curr_pak_pass == 0) {
10333             second_level_batch.offset = 0;
10334         } else {
10335             second_level_batch.offset = generic_state->curr_pak_pass * INTEL_AVC_IMAGE_STATE_CMD_SIZE;
10336         }
10337         second_level_batch.is_second_level = 1;
10338         second_level_batch.bo = avc_ctx->res_brc_image_state_read_buffer.bo;
10339         gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
10340     } else {
10341         /*generate a new image state */
10342         gen9_avc_set_image_state_non_brc(ctx, encode_state, encoder_context, &(avc_ctx->res_image_state_batch_buffer_2nd_level));
10343         memset(&second_level_batch, 0, sizeof(second_level_batch));
10344         second_level_batch.offset = 0;
10345         second_level_batch.is_second_level = 1;
10346         second_level_batch.bo = avc_ctx->res_image_state_batch_buffer_2nd_level.bo;
10347         gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
10348     }
10349
10350     gen9_mfc_avc_qm_state(ctx, encode_state, encoder_context);
10351     gen9_mfc_avc_fqm_state(ctx, encode_state, encoder_context);
10352     gen9_mfc_avc_directmode_state(ctx, encoder_context);
10353
10354 }
10355
10356 static void
10357 gen9_avc_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
10358 {
10359     struct i965_driver_data *i965 = i965_driver_data(ctx);
10360     struct i965_gpe_table *gpe = &i965->gpe_table;
10361     struct intel_batchbuffer *batch = encoder_context->base.batch;
10362     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10363     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10364     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10365
10366     struct gpe_mi_store_register_mem_parameter mi_store_reg_mem_param;
10367     struct gpe_mi_store_data_imm_parameter mi_store_data_imm_param;
10368     struct gpe_mi_flush_dw_parameter mi_flush_dw_param;
10369     struct encoder_status_buffer_internal *status_buffer;
10370
10371     status_buffer = &(avc_ctx->status_buffer);
10372
10373     memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
10374     gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
10375
10376     /* read register and store into status_buffer and pak_statitistic info */
10377     memset(&mi_store_reg_mem_param, 0, sizeof(mi_store_reg_mem_param));
10378     mi_store_reg_mem_param.bo = status_buffer->bo;
10379     mi_store_reg_mem_param.offset = status_buffer->bs_byte_count_frame_offset;
10380     mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_reg_offset;
10381     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10382
10383     mi_store_reg_mem_param.bo = status_buffer->bo;
10384     mi_store_reg_mem_param.offset = status_buffer->image_status_mask_offset;
10385     mi_store_reg_mem_param.mmio_offset = status_buffer->image_status_mask_reg_offset;
10386     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10387
10388     /*update the status in the pak_statistic_surface */
10389     mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
10390     mi_store_reg_mem_param.offset = 0;
10391     mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_reg_offset;
10392     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10393
10394     mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
10395     mi_store_reg_mem_param.offset = 4;
10396     mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_nh_reg_offset;
10397     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10398
10399     memset(&mi_store_data_imm_param, 0, sizeof(mi_store_data_imm_param));
10400     mi_store_data_imm_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
10401     mi_store_data_imm_param.offset = sizeof(unsigned int) * 2;
10402     mi_store_data_imm_param.dw0 = (generic_state->curr_pak_pass + 1);
10403     gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm_param);
10404
10405     mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
10406     mi_store_reg_mem_param.offset = sizeof(unsigned int) * (4 + generic_state->curr_pak_pass) ;
10407     mi_store_reg_mem_param.mmio_offset = status_buffer->image_status_ctrl_reg_offset;
10408     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10409
10410     memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
10411     gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
10412
10413     return;
10414 }
10415
10416 static void
10417 gen9_avc_pak_brc_prepare(struct encode_state *encode_state,
10418                          struct intel_encoder_context *encoder_context)
10419 {
10420     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10421     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10422     unsigned int rate_control_mode = encoder_context->rate_control_mode;
10423
10424     switch (rate_control_mode & 0x7f) {
10425     case VA_RC_CBR:
10426         generic_state->internal_rate_mode = VA_RC_CBR;
10427         break;
10428
10429     case VA_RC_VBR:
10430         generic_state->internal_rate_mode = VA_RC_VBR;//AVBR
10431         break;
10432
10433     case VA_RC_CQP:
10434     default:
10435         generic_state->internal_rate_mode = VA_RC_CQP;
10436         break;
10437     }
10438
10439     if (encoder_context->quality_level == 0)
10440         encoder_context->quality_level = ENCODER_DEFAULT_QUALITY_AVC;
10441 }
10442
10443 /* allcate resources for pak only (fei mode) */
10444 static VAStatus
10445 gen9_avc_fei_pak_pipeline_prepare(VADriverContextP ctx,
10446                                   VAProfile profile,
10447                                   struct encode_state *encode_state,
10448                                   struct intel_encoder_context *encoder_context)
10449 {
10450     VAStatus va_status;
10451     struct i965_driver_data *i965 = i965_driver_data(ctx);
10452     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10453     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
10454     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10455     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10456     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10457     struct gen9_surface_avc *avc_priv_surface;
10458     VAEncPictureParameterBufferH264  *pic_param;
10459     VAEncSliceParameterBufferH264 *slice_param;
10460     VAEncMiscParameterFEIFrameControlH264 *fei_param = NULL;
10461     unsigned int size = 0, i, j;
10462     unsigned int frame_mb_nums;
10463     struct object_buffer *obj_buffer = NULL;
10464     struct buffer_store *buffer_store = NULL;
10465     struct object_surface *obj_surface = NULL;
10466     struct avc_surface_param surface_param;
10467     struct i965_coded_buffer_segment *coded_buffer_segment;
10468     dri_bo *bo;
10469     unsigned char * pdata;
10470
10471     gen9_avc_update_parameters(ctx, profile, encode_state, encoder_context);
10472
10473     pic_param = avc_state->pic_param;
10474     slice_param = avc_state->slice_param[0];
10475
10476     va_status = gen9_avc_encode_check_parameter(ctx, encode_state, encoder_context);
10477     if (va_status != VA_STATUS_SUCCESS)
10478         return va_status;
10479
10480     va_status = gen9_avc_allocate_pak_resources(ctx, encode_state, encoder_context);
10481     if (va_status != VA_STATUS_SUCCESS)
10482         return va_status;
10483
10484     /* Encoded bitstream ?*/
10485     obj_buffer = encode_state->coded_buf_object;
10486     bo = obj_buffer->buffer_store->bo;
10487     i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
10488     i965_dri_object_to_buffer_gpe_resource(&generic_ctx->compressed_bitstream.res, bo);
10489     generic_ctx->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
10490     generic_ctx->compressed_bitstream.end_offset =
10491         ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
10492
10493     /*status buffer */
10494     dri_bo_unreference(avc_ctx->status_buffer.bo);
10495     avc_ctx->status_buffer.bo = bo;
10496     dri_bo_reference(bo);
10497
10498     /* set the internal flag to 0 to indicate the coded size is unknown */
10499     dri_bo_map(bo, 1);
10500     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
10501     coded_buffer_segment->mapped = 0;
10502     coded_buffer_segment->codec = encoder_context->codec;
10503     coded_buffer_segment->status_support = 1;
10504
10505     pdata = bo->virtual + avc_ctx->status_buffer.base_offset;
10506     memset(pdata, 0, avc_ctx->status_buffer.status_buffer_size);
10507     dri_bo_unmap(bo);
10508     //frame id, it is the ref pic id in the reference_objects list.
10509     avc_state->num_refs[0] = 0;
10510     avc_state->num_refs[1] = 0;
10511     if (generic_state->frame_type == SLICE_TYPE_P) {
10512         avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
10513
10514         if (slice_param->num_ref_idx_active_override_flag)
10515             avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
10516     } else if (generic_state->frame_type == SLICE_TYPE_B) {
10517         avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
10518         avc_state->num_refs[1] = pic_param->num_ref_idx_l1_active_minus1 + 1;
10519
10520         if (slice_param->num_ref_idx_active_override_flag) {
10521             avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
10522             avc_state->num_refs[1] = slice_param->num_ref_idx_l1_active_minus1 + 1;
10523         }
10524     }
10525     for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[0]); i++) {
10526         VAPictureH264 *va_pic;
10527
10528         assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(avc_state->list_ref_idx[0]));
10529         avc_state->list_ref_idx[0][i] = 0;
10530
10531         if (i >= avc_state->num_refs[0])
10532             continue;
10533
10534         va_pic = &slice_param->RefPicList0[i];
10535
10536         for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
10537             obj_surface = encode_state->reference_objects[j];
10538
10539             if (obj_surface &&
10540                 obj_surface->bo &&
10541                 obj_surface->base.id == va_pic->picture_id) {
10542
10543                 assert(obj_surface->base.id != VA_INVALID_SURFACE);
10544                 avc_state->list_ref_idx[0][i] = j;
10545
10546                 break;
10547             }
10548         }
10549     }
10550     for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[1]); i++) {
10551         VAPictureH264 *va_pic;
10552
10553         assert(ARRAY_ELEMS(slice_param->RefPicList1) == ARRAY_ELEMS(avc_state->list_ref_idx[1]));
10554         avc_state->list_ref_idx[1][i] = 0;
10555
10556         if (i >= avc_state->num_refs[1])
10557             continue;
10558
10559         va_pic = &slice_param->RefPicList1[i];
10560
10561         for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
10562             obj_surface = encode_state->reference_objects[j];
10563
10564
10565             if (obj_surface &&
10566                 obj_surface->bo &&
10567                 obj_surface->base.id == va_pic->picture_id) {
10568
10569                 assert(obj_surface->base.id != VA_INVALID_SURFACE);
10570                 avc_state->list_ref_idx[1][i] = j;
10571
10572                 break;
10573                 break;
10574             }
10575         }
10576     }
10577
10578     obj_surface = encode_state->reconstructed_object;
10579     fei_param = avc_state->fei_framectl_param;
10580     frame_mb_nums = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs;
10581
10582     /* Setup current reconstruct frame */
10583     obj_surface = encode_state->reconstructed_object;
10584     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
10585
10586     if (va_status != VA_STATUS_SUCCESS)
10587         return va_status;
10588
10589     memset(&surface_param, 0, sizeof(surface_param));
10590     surface_param.frame_width = generic_state->frame_width_in_pixel;
10591     surface_param.frame_height = generic_state->frame_height_in_pixel;
10592     va_status = gen9_avc_init_check_surfaces(ctx,
10593                                              obj_surface, encoder_context,
10594                                              &surface_param);
10595     avc_priv_surface = obj_surface->private_data;
10596
10597     /* res_mb_code_surface for MB code */
10598     /* PAK only mode must have the mb_code_surface from middleware,
10599      * so the code shouldn't reach here without an externally provided
10600      * MB Code buffer */
10601     assert(fei_param->mb_code_data != VA_INVALID_ID);
10602     size = frame_mb_nums * FEI_AVC_MB_CODE_BUFFER_SIZE;
10603     obj_buffer = BUFFER(fei_param->mb_code_data);
10604     assert(obj_buffer != NULL);
10605     buffer_store = obj_buffer->buffer_store;
10606     assert(size <= buffer_store->bo->size);
10607     if (avc_priv_surface->res_mb_code_surface.bo != NULL)
10608         i965_free_gpe_resource(&avc_priv_surface->res_mb_code_surface);
10609     i965_dri_object_to_buffer_gpe_resource(&avc_priv_surface->res_mb_code_surface,
10610                                            buffer_store->bo);
10611     /* res_mv_data_surface for MV data */
10612     size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
10613     if (fei_param->mv_data != VA_INVALID_ID) {
10614         obj_buffer = BUFFER(fei_param->mv_data);
10615         assert(obj_buffer != NULL);
10616         buffer_store = obj_buffer->buffer_store;
10617         assert(size <= buffer_store->bo->size);
10618         if (avc_priv_surface->res_mv_data_surface.bo != NULL)
10619             i965_free_gpe_resource(&avc_priv_surface->res_mv_data_surface);
10620         i965_dri_object_to_buffer_gpe_resource(&avc_priv_surface->res_mv_data_surface,
10621                                                buffer_store->bo);
10622     }
10623
10624     return VA_STATUS_SUCCESS;
10625
10626 }
10627
10628 static VAStatus
10629 gen9_avc_pak_pipeline_prepare(VADriverContextP ctx,
10630                               VAProfile profile,
10631                               struct encode_state *encode_state,
10632                               struct intel_encoder_context *encoder_context)
10633 {
10634     VAStatus va_status;
10635     struct i965_driver_data *i965 = i965_driver_data(ctx);
10636     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10637     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
10638     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10639     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10640     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10641
10642     struct object_surface *obj_surface;
10643     VAEncPictureParameterBufferH264  *pic_param;
10644     VAEncSliceParameterBufferH264 *slice_param;
10645
10646     struct gen9_surface_avc *avc_priv_surface;
10647     struct avc_surface_param surface_param;
10648     int i, j, enable_avc_ildb = 0;
10649     unsigned int allocate_flag = 1;
10650     unsigned int size, w_mb, h_mb;
10651
10652     if (encoder_context->fei_function_mode == VA_FEI_FUNCTION_PAK) {
10653         va_status = gen9_avc_fei_pak_pipeline_prepare(ctx, profile, encode_state, encoder_context);
10654         if (va_status != VA_STATUS_SUCCESS)
10655             return va_status;
10656     }
10657
10658     pic_param = avc_state->pic_param;
10659     slice_param = avc_state->slice_param[0];
10660     w_mb = generic_state->frame_width_in_mbs;
10661     h_mb = generic_state->frame_height_in_mbs;
10662
10663     /* update the parameter and check slice parameter */
10664     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
10665         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
10666         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
10667
10668         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
10669             assert((slice_param->slice_type == SLICE_TYPE_I) ||
10670                    (slice_param->slice_type == SLICE_TYPE_SI) ||
10671                    (slice_param->slice_type == SLICE_TYPE_P) ||
10672                    (slice_param->slice_type == SLICE_TYPE_SP) ||
10673                    (slice_param->slice_type == SLICE_TYPE_B));
10674
10675             if (slice_param->disable_deblocking_filter_idc != 1) {
10676                 enable_avc_ildb = 1;
10677                 break;
10678             }
10679
10680             slice_param++;
10681         }
10682     }
10683     avc_state->enable_avc_ildb = enable_avc_ildb;
10684
10685     /* setup the all surface and buffer for PAK */
10686     /* Setup current reconstruct frame */
10687     obj_surface = encode_state->reconstructed_object;
10688     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
10689
10690     if (va_status != VA_STATUS_SUCCESS)
10691         return va_status;
10692
10693     memset(&surface_param, 0, sizeof(surface_param));
10694     surface_param.frame_width = generic_state->frame_width_in_pixel;
10695     surface_param.frame_height = generic_state->frame_height_in_pixel;
10696     va_status = gen9_avc_init_check_surfaces(ctx,
10697                                              obj_surface, encoder_context,
10698                                              &surface_param);
10699     if (va_status != VA_STATUS_SUCCESS)
10700         return va_status;
10701     /* init the member of avc_priv_surface,frame_store_id,qp_value */
10702     {
10703         avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
10704         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = 0;
10705         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = 0;
10706         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2]);
10707         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1]);
10708         i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2], avc_priv_surface->dmv_top);
10709         i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1], avc_priv_surface->dmv_bottom);
10710         avc_priv_surface->qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
10711         avc_priv_surface->frame_store_id = 0;
10712         avc_priv_surface->frame_idx = pic_param->CurrPic.frame_idx;
10713         avc_priv_surface->top_field_order_cnt = pic_param->CurrPic.TopFieldOrderCnt;
10714         avc_priv_surface->is_as_ref = pic_param->pic_fields.bits.reference_pic_flag;
10715         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = avc_priv_surface->top_field_order_cnt;
10716         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = avc_priv_surface->top_field_order_cnt + 1;
10717     }
10718     i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
10719     i965_free_gpe_resource(&avc_ctx->res_post_deblocking_output);
10720     i965_free_gpe_resource(&avc_ctx->res_pre_deblocking_output);
10721     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_reconstructed_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
10722
10723
10724     if (avc_state->enable_avc_ildb) {
10725         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_post_deblocking_output, obj_surface, GPE_RESOURCE_ALIGNMENT);
10726     } else {
10727         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_pre_deblocking_output, obj_surface, GPE_RESOURCE_ALIGNMENT);
10728     }
10729     /* input YUV surface */
10730     obj_surface = encode_state->input_yuv_object;
10731     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
10732
10733     if (va_status != VA_STATUS_SUCCESS)
10734         return va_status;
10735     i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
10736     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_uncompressed_input_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
10737
10738     /* Reference surfaces */
10739     for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
10740         i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
10741         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2]);
10742         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1]);
10743         obj_surface = encode_state->reference_objects[i];
10744         avc_state->top_field_poc[2 * i] = 0;
10745         avc_state->top_field_poc[2 * i + 1] = 0;
10746
10747         if (obj_surface && obj_surface->bo) {
10748             i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->list_reference_res[i], obj_surface, GPE_RESOURCE_ALIGNMENT);
10749
10750             /* actually it should be handled when it is reconstructed surface */
10751             va_status = gen9_avc_init_check_surfaces(ctx,
10752                                                      obj_surface, encoder_context,
10753                                                      &surface_param);
10754             if (va_status != VA_STATUS_SUCCESS)
10755                 return va_status;
10756             avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
10757             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2], avc_priv_surface->dmv_top);
10758             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1], avc_priv_surface->dmv_bottom);
10759             avc_priv_surface->frame_store_id = i;
10760             avc_state->top_field_poc[2 * i] = avc_priv_surface->top_field_order_cnt;
10761             avc_state->top_field_poc[2 * i + 1] = avc_priv_surface->top_field_order_cnt + 1;
10762         } else {
10763             break;
10764         }
10765     }
10766
10767     if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
10768         intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
10769         avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
10770     }
10771
10772     avc_ctx->pres_slice_batch_buffer_2nd_level =
10773         intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD,
10774                               4096 *
10775                               encode_state->num_slice_params_ext);
10776     if (!avc_ctx->pres_slice_batch_buffer_2nd_level)
10777         return VA_STATUS_ERROR_ALLOCATION_FAILED;
10778
10779     for (i = 0; i < MAX_AVC_SLICE_NUM; i++) {
10780         avc_state->slice_batch_offset[i] = 0;
10781     }
10782
10783
10784     size = w_mb * 64;
10785     i965_free_gpe_resource(&avc_ctx->res_intra_row_store_scratch_buffer);
10786     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
10787                                                &avc_ctx->res_intra_row_store_scratch_buffer,
10788                                                size,
10789                                                "PAK Intra row store scratch buffer");
10790     if (!allocate_flag)
10791         goto failed_allocation;
10792
10793     size = w_mb * 4 * 64;
10794     i965_free_gpe_resource(&avc_ctx->res_deblocking_filter_row_store_scratch_buffer);
10795     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
10796                                                &avc_ctx->res_deblocking_filter_row_store_scratch_buffer,
10797                                                size,
10798                                                "PAK Deblocking filter row store scratch buffer");
10799     if (!allocate_flag)
10800         goto failed_allocation;
10801
10802     size = w_mb * 2 * 64;
10803     i965_free_gpe_resource(&avc_ctx->res_bsd_mpc_row_store_scratch_buffer);
10804     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
10805                                                &avc_ctx->res_bsd_mpc_row_store_scratch_buffer,
10806                                                size,
10807                                                "PAK BSD/MPC row store scratch buffer");
10808     if (!allocate_flag)
10809         goto failed_allocation;
10810
10811     size = w_mb * h_mb * 16;
10812     i965_free_gpe_resource(&avc_ctx->res_pak_mb_status_buffer);
10813     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
10814                                                &avc_ctx->res_pak_mb_status_buffer,
10815                                                size,
10816                                                "PAK MB status buffer");
10817     if (!allocate_flag)
10818         goto failed_allocation;
10819
10820     return VA_STATUS_SUCCESS;
10821
10822 failed_allocation:
10823     return VA_STATUS_ERROR_ALLOCATION_FAILED;
10824 }
10825
10826 static VAStatus
10827 gen9_avc_encode_picture(VADriverContextP ctx,
10828                         VAProfile profile,
10829                         struct encode_state *encode_state,
10830                         struct intel_encoder_context *encoder_context)
10831 {
10832     VAStatus va_status;
10833     struct i965_driver_data *i965 = i965_driver_data(ctx);
10834     struct i965_gpe_table *gpe = &i965->gpe_table;
10835     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10836     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
10837     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
10838     struct intel_batchbuffer *batch = encoder_context->base.batch;
10839
10840     va_status = gen9_avc_pak_pipeline_prepare(ctx, profile, encode_state, encoder_context);
10841
10842     if (va_status != VA_STATUS_SUCCESS)
10843         return va_status;
10844
10845     if (i965->intel.has_bsd2)
10846         intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
10847     else
10848         intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
10849     intel_batchbuffer_emit_mi_flush(batch);
10850     for (generic_state->curr_pak_pass = 0;
10851          generic_state->curr_pak_pass < generic_state->num_pak_passes;
10852          generic_state->curr_pak_pass++) {
10853
10854         if (generic_state->curr_pak_pass == 0) {
10855             /* Initialize the avc Image Ctrl reg for the first pass,write 0 to staturs/control register, is it needed in AVC? */
10856             struct gpe_mi_load_register_imm_parameter mi_load_reg_imm;
10857             struct encoder_status_buffer_internal *status_buffer;
10858
10859             status_buffer = &(avc_ctx->status_buffer);
10860             memset(&mi_load_reg_imm, 0, sizeof(mi_load_reg_imm));
10861             mi_load_reg_imm.mmio_offset = status_buffer->image_status_ctrl_reg_offset;
10862             mi_load_reg_imm.data = 0;
10863             gpe->mi_load_register_imm(ctx, batch, &mi_load_reg_imm);
10864         }
10865         gen9_avc_pak_picture_level(ctx, encode_state, encoder_context);
10866         gen9_avc_pak_slice_level(ctx, encode_state, encoder_context);
10867         gen9_avc_read_mfc_status(ctx, encoder_context);
10868     }
10869
10870     if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
10871         intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
10872         avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
10873     }
10874
10875     intel_batchbuffer_end_atomic(batch);
10876     intel_batchbuffer_flush(batch);
10877
10878     generic_state->seq_frame_number++;
10879     generic_state->total_frame_number++;
10880     generic_state->first_frame = 0;
10881     return VA_STATUS_SUCCESS;
10882 }
10883
10884 static VAStatus
10885 gen9_avc_pak_pipeline(VADriverContextP ctx,
10886                       VAProfile profile,
10887                       struct encode_state *encode_state,
10888                       struct intel_encoder_context *encoder_context)
10889 {
10890     VAStatus vaStatus;
10891
10892     switch (profile) {
10893     case VAProfileH264ConstrainedBaseline:
10894     case VAProfileH264Main:
10895     case VAProfileH264High:
10896     case VAProfileH264MultiviewHigh:
10897     case VAProfileH264StereoHigh:
10898         vaStatus = gen9_avc_encode_picture(ctx, profile, encode_state, encoder_context);
10899         break;
10900
10901     default:
10902         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
10903         break;
10904     }
10905
10906     return vaStatus;
10907 }
10908
10909 static void
10910 gen9_avc_pak_context_destroy(void * context)
10911 {
10912     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)context;
10913     struct generic_encoder_context * generic_ctx;
10914     struct i965_avc_encoder_context * avc_ctx;
10915     int i = 0;
10916
10917     if (!pak_context)
10918         return;
10919
10920     generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
10921     avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10922
10923     // other things
10924     i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
10925     i965_free_gpe_resource(&avc_ctx->res_post_deblocking_output);
10926     i965_free_gpe_resource(&avc_ctx->res_pre_deblocking_output);
10927     i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
10928
10929     i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
10930     i965_free_gpe_resource(&avc_ctx->res_intra_row_store_scratch_buffer);
10931     i965_free_gpe_resource(&avc_ctx->res_deblocking_filter_row_store_scratch_buffer);
10932     i965_free_gpe_resource(&avc_ctx->res_bsd_mpc_row_store_scratch_buffer);
10933     i965_free_gpe_resource(&avc_ctx->res_pak_mb_status_buffer);
10934
10935     for (i = 0 ; i < MAX_MFC_AVC_REFERENCE_SURFACES; i++) {
10936         i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
10937     }
10938
10939     for (i = 0 ; i < NUM_MFC_AVC_DMV_BUFFERS; i++) {
10940         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i]);
10941     }
10942
10943     if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
10944         intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
10945         avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
10946     }
10947
10948 }
10949
10950 static VAStatus
10951 gen9_avc_get_coded_status(VADriverContextP ctx,
10952                           struct intel_encoder_context *encoder_context,
10953                           struct i965_coded_buffer_segment *coded_buf_seg)
10954 {
10955     struct encoder_status *avc_encode_status;
10956
10957     if (!encoder_context || !coded_buf_seg)
10958         return VA_STATUS_ERROR_INVALID_BUFFER;
10959
10960     avc_encode_status = (struct encoder_status *)coded_buf_seg->codec_private_data;
10961     coded_buf_seg->base.size = avc_encode_status->bs_byte_count_frame;
10962
10963     return VA_STATUS_SUCCESS;
10964 }
10965
10966 Bool
10967 gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
10968 {
10969     /* VME & PAK share the same context */
10970     struct i965_driver_data *i965 = i965_driver_data(ctx);
10971     struct encoder_vme_mfc_context * vme_context = NULL;
10972     struct generic_encoder_context * generic_ctx = NULL;
10973     struct i965_avc_encoder_context * avc_ctx = NULL;
10974     struct generic_enc_codec_state * generic_state = NULL;
10975     struct avc_enc_state * avc_state = NULL;
10976     struct encoder_status_buffer_internal *status_buffer;
10977     uint32_t base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
10978
10979     vme_context = calloc(1, sizeof(struct encoder_vme_mfc_context));
10980     generic_ctx = calloc(1, sizeof(struct generic_encoder_context));
10981     avc_ctx = calloc(1, sizeof(struct i965_avc_encoder_context));
10982     generic_state = calloc(1, sizeof(struct generic_enc_codec_state));
10983     avc_state = calloc(1, sizeof(struct avc_enc_state));
10984
10985     if (!vme_context || !generic_ctx || !avc_ctx || !generic_state || !avc_state)
10986         goto allocate_structure_failed;
10987
10988     memset(vme_context, 0, sizeof(struct encoder_vme_mfc_context));
10989     memset(generic_ctx, 0, sizeof(struct generic_encoder_context));
10990     memset(avc_ctx, 0, sizeof(struct i965_avc_encoder_context));
10991     memset(generic_state, 0, sizeof(struct generic_enc_codec_state));
10992     memset(avc_state, 0, sizeof(struct avc_enc_state));
10993
10994     encoder_context->vme_context = vme_context;
10995     vme_context->generic_enc_ctx = generic_ctx;
10996     vme_context->private_enc_ctx = avc_ctx;
10997     vme_context->generic_enc_state = generic_state;
10998     vme_context->private_enc_state = avc_state;
10999
11000     if (IS_SKL(i965->intel.device_info) ||
11001         IS_BXT(i965->intel.device_info)) {
11002         if (!encoder_context->fei_enabled && !encoder_context->preenc_enabled) {
11003             generic_ctx->enc_kernel_ptr = (void *)skl_avc_encoder_kernels;
11004             generic_ctx->enc_kernel_size = sizeof(skl_avc_encoder_kernels);
11005         } else {
11006             /* FEI and PreEnc operation kernels are included in
11007             * the monolithic kernel binary */
11008             generic_ctx->enc_kernel_ptr = (void *)skl_avc_fei_encoder_kernels;
11009             generic_ctx->enc_kernel_size = sizeof(skl_avc_fei_encoder_kernels);
11010         }
11011     } else if (IS_GEN8(i965->intel.device_info)) {
11012         generic_ctx->enc_kernel_ptr = (void *)bdw_avc_encoder_kernels;
11013         generic_ctx->enc_kernel_size = sizeof(bdw_avc_encoder_kernels);
11014     } else if (IS_KBL(i965->intel.device_info) ||
11015                IS_GLK(i965->intel.device_info)) {
11016         generic_ctx->enc_kernel_ptr = (void *)kbl_avc_encoder_kernels;
11017         generic_ctx->enc_kernel_size = sizeof(kbl_avc_encoder_kernels);
11018     } else if (IS_GEN10(i965->intel.device_info)) {
11019         generic_ctx->enc_kernel_ptr = (void *)cnl_avc_encoder_kernels;
11020         generic_ctx->enc_kernel_size = sizeof(cnl_avc_encoder_kernels);
11021     } else
11022         goto allocate_structure_failed;
11023
11024     /* initialize misc ? */
11025     avc_ctx->ctx = ctx;
11026     generic_ctx->use_hw_scoreboard = 1;
11027     generic_ctx->use_hw_non_stalling_scoreboard = 1;
11028
11029     /* initialize generic state */
11030
11031     generic_state->kernel_mode = INTEL_ENC_KERNEL_NORMAL;
11032     generic_state->preset = INTEL_PRESET_RT_SPEED;
11033     generic_state->seq_frame_number = 0;
11034     generic_state->total_frame_number = 0;
11035     generic_state->frame_type = 0;
11036     generic_state->first_frame = 1;
11037
11038     generic_state->frame_width_in_pixel = 0;
11039     generic_state->frame_height_in_pixel = 0;
11040     generic_state->frame_width_in_mbs = 0;
11041     generic_state->frame_height_in_mbs = 0;
11042     generic_state->frame_width_4x = 0;
11043     generic_state->frame_height_4x = 0;
11044     generic_state->frame_width_16x = 0;
11045     generic_state->frame_height_16x = 0;
11046     generic_state->frame_width_32x = 0;
11047     generic_state->downscaled_width_4x_in_mb = 0;
11048     generic_state->downscaled_height_4x_in_mb = 0;
11049     generic_state->downscaled_width_16x_in_mb = 0;
11050     generic_state->downscaled_height_16x_in_mb = 0;
11051     generic_state->downscaled_width_32x_in_mb = 0;
11052     generic_state->downscaled_height_32x_in_mb = 0;
11053
11054     generic_state->hme_supported = 1;
11055     generic_state->b16xme_supported = 1;
11056     generic_state->b32xme_supported = 0;
11057     generic_state->hme_enabled = 0;
11058     generic_state->b16xme_enabled = 0;
11059     generic_state->b32xme_enabled = 0;
11060
11061     if (encoder_context->fei_enabled) {
11062         /* Disabling HME in FEI encode */
11063         generic_state->hme_supported = 0;
11064         generic_state->b16xme_supported = 0;
11065     } else if (encoder_context->preenc_enabled) {
11066         /* Disabling 16x16ME in PreEnc */
11067         generic_state->b16xme_supported = 0;
11068     }
11069
11070     generic_state->brc_distortion_buffer_supported = 1;
11071     generic_state->brc_constant_buffer_supported = 0;
11072
11073     generic_state->frame_rate = 30;
11074     generic_state->brc_allocated = 0;
11075     generic_state->brc_inited = 0;
11076     generic_state->brc_need_reset = 0;
11077     generic_state->is_low_delay = 0;
11078     generic_state->brc_enabled = 0;//default
11079     generic_state->internal_rate_mode = 0;
11080     generic_state->curr_pak_pass = 0;
11081     generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
11082     generic_state->is_first_pass = 1;
11083     generic_state->is_last_pass = 0;
11084     generic_state->mb_brc_enabled = 0; // enable mb brc
11085     generic_state->brc_roi_enable = 0;
11086     generic_state->brc_dirty_roi_enable = 0;
11087     generic_state->skip_frame_enbale = 0;
11088
11089     generic_state->target_bit_rate = 0;
11090     generic_state->max_bit_rate = 0;
11091     generic_state->min_bit_rate = 0;
11092     generic_state->init_vbv_buffer_fullness_in_bit = 0;
11093     generic_state->vbv_buffer_size_in_bit = 0;
11094     generic_state->frames_per_100s = 0;
11095     generic_state->gop_size = 0;
11096     generic_state->gop_ref_distance = 0;
11097     generic_state->brc_target_size = 0;
11098     generic_state->brc_mode = 0;
11099     generic_state->brc_init_current_target_buf_full_in_bits = 0.0;
11100     generic_state->brc_init_reset_input_bits_per_frame = 0.0;
11101     generic_state->brc_init_reset_buf_size_in_bits = 0;
11102     generic_state->brc_init_previous_target_buf_full_in_bits = 0;
11103     generic_state->frames_per_window_size = 0;//default
11104     generic_state->target_percentage = 0;
11105
11106     generic_state->avbr_curracy = 0;
11107     generic_state->avbr_convergence = 0;
11108
11109     generic_state->num_skip_frames = 0;
11110     generic_state->size_skip_frames = 0;
11111
11112     generic_state->num_roi = 0;
11113     generic_state->max_delta_qp = 0;
11114     generic_state->min_delta_qp = 0;
11115
11116     if (encoder_context->rate_control_mode != VA_RC_NONE &&
11117         encoder_context->rate_control_mode != VA_RC_CQP) {
11118         generic_state->brc_enabled = 1;
11119         generic_state->brc_distortion_buffer_supported = 1;
11120         generic_state->brc_constant_buffer_supported = 1;
11121         generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
11122     }
11123     /*avc state initialization */
11124     avc_state->mad_enable = 0;
11125     avc_state->mb_disable_skip_map_enable = 0;
11126     avc_state->sfd_enable = 1;//default
11127     avc_state->sfd_mb_enable = 1;//set it true
11128     avc_state->adaptive_search_window_enable = 1;//default
11129     avc_state->mb_qp_data_enable = 0;
11130     avc_state->intra_refresh_i_enable = 0;
11131     avc_state->min_max_qp_enable = 0;
11132     avc_state->skip_bias_adjustment_enable = 0;//default,same as   skip_bias_adjustment_supporte? no
11133
11134     //external input
11135     avc_state->non_ftq_skip_threshold_lut_input_enable = 0;
11136     avc_state->ftq_skip_threshold_lut_input_enable = 0;
11137     avc_state->ftq_override = 0;
11138
11139     avc_state->direct_bias_adjustment_enable = 0;
11140     avc_state->global_motion_bias_adjustment_enable = 0;
11141     avc_state->disable_sub_mb_partion = 0;
11142     avc_state->arbitrary_num_mbs_in_slice = 0;
11143     avc_state->adaptive_transform_decision_enable = 0;//default
11144     avc_state->skip_check_disable = 0;
11145     avc_state->tq_enable = 0;
11146     avc_state->enable_avc_ildb = 0;
11147     avc_state->mbaff_flag = 0;
11148     avc_state->enable_force_skip = 1;//default
11149     avc_state->rc_panic_enable = 1;//default
11150     avc_state->suppress_recon_enable = 1;//default
11151
11152     avc_state->ref_pic_select_list_supported = 1;
11153     avc_state->mb_brc_supported = 1;//?,default
11154     avc_state->multi_pre_enable = 1;//default
11155     avc_state->ftq_enable = 1;//default
11156     avc_state->caf_supported = 1; //default
11157     avc_state->caf_enable = 0;
11158     avc_state->caf_disable_hd = 1;//default
11159     avc_state->skip_bias_adjustment_supported = 1;//default
11160
11161     avc_state->adaptive_intra_scaling_enable = 1;//default
11162     avc_state->old_mode_cost_enable = 0;//default
11163     avc_state->multi_ref_qp_enable = 1;//default
11164     avc_state->weighted_ref_l0_enable = 1;//default
11165     avc_state->weighted_ref_l1_enable = 1;//default
11166     avc_state->weighted_prediction_supported = 0;
11167     avc_state->brc_split_enable = 0;
11168     avc_state->slice_level_report_supported = 0;
11169
11170     avc_state->fbr_bypass_enable = 1;//default
11171     avc_state->field_scaling_output_interleaved = 0;
11172     avc_state->mb_variance_output_enable = 0;
11173     avc_state->mb_pixel_average_output_enable = 0;
11174     avc_state->rolling_intra_refresh_enable = 0;// same as intra_refresh_i_enable?
11175     avc_state->mbenc_curbe_set_in_brc_update = 0;
11176     avc_state->rounding_inter_enable = 1; //default
11177     avc_state->adaptive_rounding_inter_enable = 1;//default
11178
11179     avc_state->mbenc_i_frame_dist_in_use = 0;
11180     avc_state->mb_status_supported = 1; //set in intialization for gen9
11181     avc_state->mb_status_enable = 0;
11182     avc_state->mb_vproc_stats_enable = 0;
11183     avc_state->flatness_check_enable = 0;
11184     avc_state->flatness_check_supported = 1;//default
11185     avc_state->block_based_skip_enable = 0;
11186     avc_state->use_widi_mbenc_kernel = 0;
11187     avc_state->kernel_trellis_enable = 0;
11188     avc_state->generic_reserved = 0;
11189
11190     avc_state->rounding_value = 0;
11191     avc_state->rounding_inter_p = AVC_INVALID_ROUNDING_VALUE;//default
11192     avc_state->rounding_inter_b = AVC_INVALID_ROUNDING_VALUE; //default
11193     avc_state->rounding_inter_b_ref = AVC_INVALID_ROUNDING_VALUE; //default
11194     avc_state->min_qp_i = INTEL_AVC_MIN_QP;
11195     avc_state->min_qp_p = INTEL_AVC_MIN_QP;
11196     avc_state->min_qp_b = INTEL_AVC_MIN_QP;
11197     avc_state->max_qp_i = INTEL_AVC_MAX_QP;
11198     avc_state->max_qp_p = INTEL_AVC_MAX_QP;
11199     avc_state->max_qp_b = INTEL_AVC_MAX_QP;
11200
11201     memset(avc_state->non_ftq_skip_threshold_lut, 0, AVC_QP_MAX * sizeof(uint8_t));
11202     memset(avc_state->ftq_skip_threshold_lut, 0, AVC_QP_MAX * sizeof(uint8_t));
11203     memset(avc_state->lamda_value_lut, 0, AVC_QP_MAX * 2 * sizeof(uint32_t));
11204
11205     avc_state->intra_refresh_qp_threshold = 0;
11206     avc_state->trellis_flag = 0;
11207     avc_state->hme_mv_cost_scaling_factor = 0;
11208     avc_state->slice_height = 1;
11209     avc_state->slice_num = 1;
11210     memset(avc_state->dist_scale_factor_list0, 0, 32 * sizeof(uint32_t));
11211     avc_state->bi_weight = 0;
11212
11213     avc_state->lambda_table_enable = 0;
11214
11215     if (IS_GEN8(i965->intel.device_info)) {
11216         avc_state->brc_const_data_surface_width = 64;
11217         avc_state->brc_const_data_surface_height = 44;
11218         avc_state->mb_status_supported = 0;
11219     } else if (IS_SKL(i965->intel.device_info) ||
11220                IS_BXT(i965->intel.device_info)) {
11221         avc_state->brc_const_data_surface_width = 64;
11222         avc_state->brc_const_data_surface_height = 44;
11223         avc_state->brc_split_enable = 1;
11224     } else if (IS_KBL(i965->intel.device_info) ||
11225                IS_GEN10(i965->intel.device_info) ||
11226                IS_GLK(i965->intel.device_info)) {
11227         avc_state->brc_const_data_surface_width = 64;
11228         avc_state->brc_const_data_surface_height = 53;
11229         //gen95
11230         avc_state->decouple_mbenc_curbe_from_brc_enable = 1;
11231         avc_state->extended_mv_cost_range_enable = 0;
11232         avc_state->reserved_g95 = 0;
11233         avc_state->mbenc_brc_buffer_size = 128;
11234         avc_state->kernel_trellis_enable = 1;
11235         avc_state->lambda_table_enable = 1;
11236         avc_state->brc_split_enable = 1;
11237
11238         if (IS_GEN10(i965->intel.device_info))
11239             avc_state->adaptive_transform_decision_enable = 1;// CNL
11240     }
11241
11242     avc_state->num_refs[0] = 0;
11243     avc_state->num_refs[1] = 0;
11244     memset(avc_state->list_ref_idx, 0, 32 * 2 * sizeof(uint32_t));
11245     memset(avc_state->top_field_poc, 0, NUM_MFC_AVC_DMV_BUFFERS * sizeof(int32_t));
11246     avc_state->tq_rounding = 0;
11247     avc_state->zero_mv_threshold = 0;
11248     avc_state->slice_second_levle_batch_buffer_in_use = 0;
11249
11250     //1. seq/pic/slice
11251
11252     /* the definition of status buffer offset for Encoder */
11253
11254     status_buffer = &avc_ctx->status_buffer;
11255     memset(status_buffer, 0, sizeof(struct encoder_status_buffer_internal));
11256
11257     status_buffer->base_offset = base_offset;
11258     status_buffer->bs_byte_count_frame_offset = base_offset + offsetof(struct encoder_status, bs_byte_count_frame);
11259     status_buffer->bs_byte_count_frame_nh_offset = base_offset + offsetof(struct encoder_status, bs_byte_count_frame_nh);
11260     status_buffer->image_status_mask_offset = base_offset + offsetof(struct encoder_status, image_status_mask);
11261     status_buffer->image_status_ctrl_offset = base_offset + offsetof(struct encoder_status, image_status_ctrl);
11262     status_buffer->mfc_qp_status_count_offset = base_offset + offsetof(struct encoder_status, mfc_qp_status_count);
11263     status_buffer->media_index_offset       = base_offset + offsetof(struct encoder_status, media_index);
11264
11265     status_buffer->status_buffer_size = sizeof(struct encoder_status);
11266     status_buffer->bs_byte_count_frame_reg_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG;
11267     status_buffer->bs_byte_count_frame_nh_reg_offset = MFC_BITSTREAM_BYTECOUNT_SLICE_REG;
11268     status_buffer->image_status_mask_reg_offset = MFC_IMAGE_STATUS_MASK_REG;
11269     status_buffer->image_status_ctrl_reg_offset = MFC_IMAGE_STATUS_CTRL_REG;
11270     status_buffer->mfc_qp_status_count_reg_offset = MFC_QP_STATUS_COUNT_REG;
11271
11272     if (IS_GEN8(i965->intel.device_info)) {
11273         gen8_avc_kernel_init(ctx, encoder_context);
11274     } else {
11275         gen9_avc_kernel_init(ctx, encoder_context);
11276     }
11277     encoder_context->vme_context = vme_context;
11278     /* Handling PreEnc operations separately since it gives better
11279      * code readability, avoid possible vme operations mess-up */
11280     encoder_context->vme_pipeline =
11281         !encoder_context->preenc_enabled ? gen9_avc_vme_pipeline : gen9_avc_preenc_pipeline;
11282     encoder_context->vme_context_destroy = gen9_avc_vme_context_destroy;
11283
11284     return true;
11285
11286 allocate_structure_failed:
11287
11288     free(vme_context);
11289     free(generic_ctx);
11290     free(avc_ctx);
11291     free(generic_state);
11292     free(avc_state);
11293     return false;
11294 }
11295
11296 Bool
11297 gen9_avc_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
11298 {
11299     /* VME & PAK share the same context */
11300     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
11301
11302     if (!pak_context)
11303         return false;
11304
11305     encoder_context->mfc_context = pak_context;
11306     encoder_context->mfc_context_destroy = gen9_avc_pak_context_destroy;
11307     encoder_context->mfc_pipeline = gen9_avc_pak_pipeline;
11308     encoder_context->mfc_brc_prepare = gen9_avc_pak_brc_prepare;
11309     encoder_context->get_status = gen9_avc_get_coded_status;
11310     return true;
11311 }