OSDN Git Service

Remove implicit truncations from int to bit-field
[android-x86/hardware-intel-common-vaapi.git] / src / i965_avc_encoder.c
1 /*
2  * Copyright @ 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWAR OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Pengfei Qu <Pengfei.qu@intel.com>
26  *    Sreerenj Balachandran <sreerenj.balachandran@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <stdbool.h>
33 #include <string.h>
34 #include <math.h>
35 #include <assert.h>
36 #include <va/va.h>
37
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
40
41 #include "i965_defines.h"
42 #include "i965_structs.h"
43 #include "i965_drv_video.h"
44 #include "i965_encoder.h"
45 #include "i965_encoder_utils.h"
46 #include "intel_media.h"
47
48 #include "i965_gpe_utils.h"
49 #include "i965_encoder_common.h"
50 #include "i965_avc_encoder_common.h"
51 #include "i965_avc_encoder_kernels.h"
52 #include "i965_avc_encoder.h"
53 #include "i965_avc_const_def.h"
54
55 #define MAX_URB_SIZE                    4096 /* In register */
56 #define NUM_KERNELS_PER_GPE_CONTEXT     1
57 #define MBENC_KERNEL_BASE GEN9_AVC_KERNEL_MBENC_QUALITY_I
58 #define GPE_RESOURCE_ALIGNMENT 4  /* 4 means 16 = 1 << 4) */
59
60 #define OUT_BUFFER_2DW(batch, bo, is_target, delta)  do {               \
61         if (bo) {                                                       \
62             OUT_BCS_RELOC64(batch,                                        \
63                             bo,                                         \
64                             I915_GEM_DOMAIN_INSTRUCTION,                \
65                             is_target ? I915_GEM_DOMAIN_RENDER : 0,     \
66                             delta);                                     \
67         } else {                                                        \
68             OUT_BCS_BATCH(batch, 0);                                    \
69             OUT_BCS_BATCH(batch, 0);                                    \
70         }                                                               \
71     } while (0)
72
73 #define OUT_BUFFER_3DW(batch, bo, is_target, delta, attr)  do { \
74         OUT_BUFFER_2DW(batch, bo, is_target, delta);            \
75         OUT_BCS_BATCH(batch, attr);                             \
76     } while (0)
77
78 /* FEI specific buffer sizes per MB in bytes for gen9 */
79 #define FEI_AVC_MB_CODE_BUFFER_SIZE      64
80 #define FEI_AVC_MV_DATA_BUFFER_SIZE      128
81 #define FEI_AVC_MB_CONTROL_BUFFER_SIZE   16
82 #define FEI_AVC_MV_PREDICTOR_BUFFER_SIZE 40
83 #define FEI_AVC_DISTORTION_BUFFER_SIZE   48
84 #define FEI_AVC_QP_BUFFER_SIZE           1
85 #define PREENC_AVC_STATISTICS_BUFFER_SIZE 64
86
87 #define SCALE_CUR_PIC        1
88 #define SCALE_PAST_REF_PIC   2
89 #define SCALE_FUTURE_REF_PIC 3
90
91 static const uint32_t qm_flat[16] = {
92     0x10101010, 0x10101010, 0x10101010, 0x10101010,
93     0x10101010, 0x10101010, 0x10101010, 0x10101010,
94     0x10101010, 0x10101010, 0x10101010, 0x10101010,
95     0x10101010, 0x10101010, 0x10101010, 0x10101010
96 };
97
98 static const uint32_t fqm_flat[32] = {
99     0x10001000, 0x10001000, 0x10001000, 0x10001000,
100     0x10001000, 0x10001000, 0x10001000, 0x10001000,
101     0x10001000, 0x10001000, 0x10001000, 0x10001000,
102     0x10001000, 0x10001000, 0x10001000, 0x10001000,
103     0x10001000, 0x10001000, 0x10001000, 0x10001000,
104     0x10001000, 0x10001000, 0x10001000, 0x10001000,
105     0x10001000, 0x10001000, 0x10001000, 0x10001000,
106     0x10001000, 0x10001000, 0x10001000, 0x10001000
107 };
108
109 static const unsigned int slice_type_kernel[3] = {1, 2, 0};
110
111 static const gen9_avc_brc_init_reset_curbe_data gen9_avc_brc_init_reset_curbe_init_data = {
112     // unsigned int 0
113     {
114         0
115     },
116
117     // unsigned int 1
118     {
119         0
120     },
121
122     // unsigned int 2
123     {
124         0
125     },
126
127     // unsigned int 3
128     {
129         0
130     },
131
132     // unsigned int 4
133     {
134         0
135     },
136
137     // unsigned int 5
138     {
139         0
140     },
141
142     // unsigned int 6
143     {
144         0
145     },
146
147     // unsigned int 7
148     {
149         0
150     },
151
152     // unsigned int 8
153     {
154         0,
155         0
156     },
157
158     // unsigned int 9
159     {
160         0,
161         0
162     },
163
164     // unsigned int 10
165     {
166         0,
167         0
168     },
169
170     // unsigned int 11
171     {
172         0,
173         1
174     },
175
176     // unsigned int 12
177     {
178         51,
179         0
180     },
181
182     // unsigned int 13
183     {
184         40,
185         60,
186         80,
187         120
188     },
189
190     // unsigned int 14
191     {
192         35,
193         60,
194         80,
195         120
196     },
197
198     // unsigned int 15
199     {
200         40,
201         60,
202         90,
203         115
204     },
205
206     // unsigned int 16
207     {
208         0,
209         0,
210         0,
211         0
212     },
213
214     // unsigned int 17
215     {
216         0,
217         0,
218         0,
219         0
220     },
221
222     // unsigned int 18
223     {
224         0,
225         0,
226         0,
227         0
228     },
229
230     // unsigned int 19
231     {
232         0,
233         0,
234         0,
235         0
236     },
237
238     // unsigned int 20
239     {
240         0,
241         0,
242         0,
243         0
244     },
245
246     // unsigned int 21
247     {
248         0,
249         0,
250         0,
251         0
252     },
253
254     // unsigned int 22
255     {
256         0,
257         0,
258         0,
259         0
260     },
261
262     // unsigned int 23
263     {
264         0
265     }
266 };
267
268 static const gen8_avc_frame_brc_update_curbe_data gen8_avc_frame_brc_update_curbe_init_data = {
269     //unsigned int 0
270     {
271         0
272     },
273
274     //unsigned int 1
275     {
276         0
277     },
278
279     //unsigned int 2
280     {
281         0
282     },
283
284     //unsigned int 3
285     {
286
287         10,
288         50
289
290     },
291
292     //unsigned int 4
293     {
294
295         100,
296         150
297
298     },
299
300     //unsigned int 5
301     {
302         0, 0, 0, 0
303     },
304
305     //unsigned int 6
306     {
307         0, 0, 0, 0
308     },
309
310     //unsigned int 7
311     {
312         0
313     },
314
315     //unsigned int 8
316     {
317
318         1,
319         1,
320         3,
321         2
322
323     },
324
325     //unsigned int 9
326     {
327
328         1,
329         40,
330         5,
331         5
332
333     },
334
335     //unsigned int 10
336     {
337
338         3,
339         1,
340         7,
341         18
342
343     },
344
345     //unsigned int 11
346     {
347
348         25,
349         37,
350         40,
351         75
352
353     },
354
355     //unsigned int 12
356     {
357
358         97,
359         103,
360         125,
361         160
362
363     },
364
365     //unsigned int 13
366     {
367
368         -3,
369         -2,
370         -1,
371         0
372
373     },
374
375     //unsigned int 14
376     {
377
378         1,
379         2,
380         3,
381         0xff
382
383     },
384
385     //unsigned int 15
386     {
387         0, 0
388     },
389
390     //unsigned int 16
391     {
392         0, 0
393     },
394
395     //unsigned int 17
396     {
397         0, 0
398     },
399 };
400 static const gen9_avc_frame_brc_update_curbe_data gen9_avc_frame_brc_update_curbe_init_data = {
401     // unsigned int 0
402     {
403         0
404     },
405
406     // unsigned int 1
407     {
408         0
409     },
410
411     // unsigned int 2
412     {
413         0
414     },
415
416     // unsigned int 3
417     {
418         10,
419         50
420     },
421
422     // unsigned int 4
423     {
424         100,
425         150
426     },
427
428     // unsigned int 5
429     {
430         0,
431         0,
432         0,
433         0
434     },
435
436     // unsigned int 6
437     {
438         0,
439         0,
440         0,
441         0,
442         0,
443         0
444     },
445
446     // unsigned int 7
447     {
448         0
449     },
450
451     // unsigned int 8
452     {
453         1,
454         1,
455         3,
456         2
457     },
458
459     // unsigned int 9
460     {
461         1,
462         40,
463         5,
464         5
465     },
466
467     // unsigned int 10
468     {
469         3,
470         1,
471         7,
472         18
473     },
474
475     // unsigned int 11
476     {
477         25,
478         37,
479         40,
480         75
481     },
482
483     // unsigned int 12
484     {
485         97,
486         103,
487         125,
488         160
489     },
490
491     // unsigned int 13
492     {
493         -3,
494         -2,
495         -1,
496         0
497     },
498
499     // unsigned int 14
500     {
501         1,
502         2,
503         3,
504         0xff
505     },
506
507     // unsigned int 15
508     {
509         0,
510         0,
511         0,
512         0
513     },
514
515     // unsigned int 16
516     {
517         0
518     },
519
520     // unsigned int 17
521     {
522         0
523     },
524
525     // unsigned int 18
526     {
527         0
528     },
529
530     // unsigned int 19
531     {
532         0
533     },
534
535     // unsigned int 20
536     {
537         0
538     },
539
540     // unsigned int 21
541     {
542         0
543     },
544
545     // unsigned int 22
546     {
547         0
548     },
549
550     // unsigned int 23
551     {
552         0
553     },
554
555 };
556
557 static void
558 gen9_avc_update_misc_parameters(VADriverContextP ctx,
559                                 struct encode_state *encode_state,
560                                 struct intel_encoder_context *encoder_context)
561 {
562     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
563     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
564     int i;
565
566     /* brc */
567     generic_state->max_bit_rate = (encoder_context->brc.bits_per_second[0] + 1000 - 1) / 1000;
568
569     generic_state->brc_need_reset = encoder_context->brc.need_reset;
570
571     if (generic_state->internal_rate_mode == VA_RC_CBR) {
572         generic_state->min_bit_rate = generic_state->max_bit_rate;
573         generic_state->mb_brc_enabled = encoder_context->brc.mb_rate_control[0] == 1;
574
575         if (generic_state->target_bit_rate != generic_state->max_bit_rate) {
576             generic_state->target_bit_rate = generic_state->max_bit_rate;
577             generic_state->brc_need_reset = 1;
578         }
579     } else if (generic_state->internal_rate_mode == VA_RC_VBR) {
580         generic_state->min_bit_rate = generic_state->max_bit_rate * (2 * encoder_context->brc.target_percentage[0] - 100) / 100;
581         generic_state->mb_brc_enabled = encoder_context->brc.mb_rate_control[0] == 1;
582
583         if (generic_state->target_bit_rate != generic_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100) {
584             generic_state->target_bit_rate = generic_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100;
585             generic_state->brc_need_reset = 1;
586         }
587     }
588
589     /*  frame rate */
590     if (generic_state->internal_rate_mode != VA_RC_CQP) {
591         generic_state->frames_per_100s = encoder_context->brc.framerate[0].num * 100 / encoder_context->brc.framerate[0].den ;
592         generic_state->frame_rate = encoder_context->brc.framerate[0].num / encoder_context->brc.framerate[0].den ;
593         generic_state->frames_per_window_size = (int)(encoder_context->brc.window_size * generic_state->frame_rate / 1000); // brc.windows size in ms as the unit
594     } else {
595         generic_state->frames_per_100s = 30 * 100;
596         generic_state->frame_rate = 30 ;
597         generic_state->frames_per_window_size = 30;
598     }
599
600     /*  HRD */
601     if (generic_state->internal_rate_mode != VA_RC_CQP) {
602         generic_state->vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;//misc->buffer_size;
603         generic_state->init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;//misc->initial_buffer_fullness;
604     }
605
606     /* ROI */
607     generic_state->num_roi = MIN(encoder_context->brc.num_roi, 3);
608     if (generic_state->num_roi > 0) {
609         generic_state->max_delta_qp = encoder_context->brc.roi_max_delta_qp;
610         generic_state->min_delta_qp = encoder_context->brc.roi_min_delta_qp;
611
612         for (i = 0; i < generic_state->num_roi; i++) {
613             generic_state->roi[i].left   = encoder_context->brc.roi[i].left;
614             generic_state->roi[i].right  = encoder_context->brc.roi[i].right;
615             generic_state->roi[i].top    = encoder_context->brc.roi[i].top;
616             generic_state->roi[i].bottom = encoder_context->brc.roi[i].bottom;
617             generic_state->roi[i].value  = encoder_context->brc.roi[i].value;
618
619             generic_state->roi[i].left /= 16;
620             generic_state->roi[i].right /= 16;
621             generic_state->roi[i].top /= 16;
622             generic_state->roi[i].bottom /= 16;
623         }
624     }
625
626 }
627
628 static bool
629 intel_avc_get_kernel_header_and_size(void *pvbinary,
630                                      int binary_size,
631                                      INTEL_GENERIC_ENC_OPERATION operation,
632                                      int krnstate_idx,
633                                      struct i965_kernel *ret_kernel)
634 {
635     typedef uint32_t BIN_PTR[4];
636
637     char *bin_start;
638     gen9_avc_encoder_kernel_header      *pkh_table;
639     kernel_header          *pcurr_header, *pinvalid_entry, *pnext_header;
640     int next_krnoffset;
641
642     if (!pvbinary || !ret_kernel)
643         return false;
644
645     bin_start = (char *)pvbinary;
646     pkh_table = (gen9_avc_encoder_kernel_header *)pvbinary;
647     pinvalid_entry = &(pkh_table->static_detection) + 1;
648     next_krnoffset = binary_size;
649
650     if (operation == INTEL_GENERIC_ENC_SCALING4X) {
651         pcurr_header = &pkh_table->ply_dscale_ply;
652     } else if (operation == INTEL_GENERIC_ENC_SCALING2X) {
653         pcurr_header = &pkh_table->ply_2xdscale_ply;
654     } else if (operation == INTEL_GENERIC_ENC_ME) {
655         pcurr_header = &pkh_table->me_p;
656     } else if (operation == INTEL_GENERIC_ENC_BRC) {
657         pcurr_header = &pkh_table->frame_brc_init;
658     } else if (operation == INTEL_GENERIC_ENC_MBENC) {
659         pcurr_header = &pkh_table->mbenc_quality_I;
660     } else if (operation == INTEL_GENERIC_ENC_WP) {
661         pcurr_header = &pkh_table->wp;
662     } else if (operation == INTEL_GENERIC_ENC_SFD) {
663         pcurr_header = &pkh_table->static_detection;
664     } else {
665         return false;
666     }
667
668     pcurr_header += krnstate_idx;
669     ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
670
671     pnext_header = (pcurr_header + 1);
672     if (pnext_header < pinvalid_entry) {
673         next_krnoffset = pnext_header->kernel_start_pointer << 6;
674     }
675     ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
676
677     return true;
678 }
679
680 static bool
681 intel_avc_fei_get_kernel_header_and_size(
682     void                             *pvbinary,
683     int                              binary_size,
684     INTEL_GENERIC_ENC_OPERATION      operation,
685     int                              krnstate_idx,
686     struct i965_kernel               *ret_kernel)
687 {
688     typedef uint32_t BIN_PTR[4];
689
690     char *bin_start;
691     gen9_avc_fei_encoder_kernel_header      *pkh_table;
692     kernel_header          *pcurr_header, *pinvalid_entry, *pnext_header;
693     int next_krnoffset;
694
695     if (!pvbinary || !ret_kernel)
696         return false;
697
698     bin_start = (char *)pvbinary;
699     pkh_table = (gen9_avc_fei_encoder_kernel_header *)pvbinary;
700     pinvalid_entry = &(pkh_table->wp) + 1;
701     next_krnoffset = binary_size;
702
703     if (operation == INTEL_GENERIC_ENC_SCALING4X) {
704         pcurr_header = &pkh_table->ply_dscale_ply;
705     } else if (operation == INTEL_GENERIC_ENC_ME) {
706         pcurr_header = &pkh_table->me_p;
707     } else if (operation == INTEL_GENERIC_ENC_MBENC) {
708         pcurr_header = &pkh_table->mbenc_i;
709     } else if (operation == INTEL_GENERIC_ENC_PREPROC) {
710         pcurr_header =  &pkh_table->preproc;
711     } else {
712         return false;
713     }
714
715     pcurr_header += krnstate_idx;
716     ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
717
718     pnext_header = (pcurr_header + 1);
719     if (pnext_header < pinvalid_entry) {
720         next_krnoffset = pnext_header->kernel_start_pointer << 6;
721     }
722     ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
723
724     return true;
725 }
726
727 static void
728 gen9_free_surfaces_avc(void **data)
729 {
730     struct gen9_surface_avc *avc_surface;
731
732     if (!data || !*data)
733         return;
734
735     avc_surface = *data;
736
737     if (avc_surface->scaled_4x_surface_obj) {
738         i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_4x_surface_id, 1);
739         avc_surface->scaled_4x_surface_id = VA_INVALID_SURFACE;
740         avc_surface->scaled_4x_surface_obj = NULL;
741     }
742
743     if (avc_surface->scaled_16x_surface_obj) {
744         i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_16x_surface_id, 1);
745         avc_surface->scaled_16x_surface_id = VA_INVALID_SURFACE;
746         avc_surface->scaled_16x_surface_obj = NULL;
747     }
748
749     if (avc_surface->scaled_32x_surface_obj) {
750         i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_32x_surface_id, 1);
751         avc_surface->scaled_32x_surface_id = VA_INVALID_SURFACE;
752         avc_surface->scaled_32x_surface_obj = NULL;
753     }
754
755     i965_free_gpe_resource(&avc_surface->res_mb_code_surface);
756     i965_free_gpe_resource(&avc_surface->res_mv_data_surface);
757     i965_free_gpe_resource(&avc_surface->res_ref_pic_select_surface);
758
759     /* FEI specific resources */
760     /* since the driver previously taken an extra reference to the drm_bo
761      * in case the buffers were supplied by middleware, there shouldn't
762      * be any memory handling issue */
763     i965_free_gpe_resource(&avc_surface->res_fei_mb_cntrl_surface);
764     i965_free_gpe_resource(&avc_surface->res_fei_mv_predictor_surface);
765     i965_free_gpe_resource(&avc_surface->res_fei_vme_distortion_surface);
766     i965_free_gpe_resource(&avc_surface->res_fei_mb_qp_surface);
767
768     dri_bo_unreference(avc_surface->dmv_top);
769     avc_surface->dmv_top = NULL;
770     dri_bo_unreference(avc_surface->dmv_bottom);
771     avc_surface->dmv_bottom = NULL;
772
773     free(avc_surface);
774
775     *data = NULL;
776
777     return;
778 }
779
780 static VAStatus
781 gen9_avc_init_check_surfaces(VADriverContextP ctx,
782                              struct object_surface *obj_surface,
783                              struct intel_encoder_context *encoder_context,
784                              struct avc_surface_param *surface_param)
785 {
786     struct i965_driver_data *i965 = i965_driver_data(ctx);
787     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
788     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
789     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
790
791     struct gen9_surface_avc *avc_surface;
792     int downscaled_width_4x, downscaled_height_4x;
793     int downscaled_width_16x, downscaled_height_16x;
794     int downscaled_width_32x, downscaled_height_32x;
795     int size = 0;
796     unsigned int frame_width_in_mbs = ALIGN(surface_param->frame_width, 16) / 16;
797     unsigned int frame_height_in_mbs = ALIGN(surface_param->frame_height, 16) / 16;
798     unsigned int frame_mb_nums = frame_width_in_mbs * frame_height_in_mbs;
799     int allocate_flag = 1;
800     int width, height;
801
802     if (!obj_surface || !obj_surface->bo)
803         return VA_STATUS_ERROR_INVALID_SURFACE;
804
805     if (obj_surface->private_data) {
806         return VA_STATUS_SUCCESS;
807     }
808
809     avc_surface = calloc(1, sizeof(struct gen9_surface_avc));
810
811     if (!avc_surface)
812         return VA_STATUS_ERROR_ALLOCATION_FAILED;
813
814     avc_surface->ctx = ctx;
815     obj_surface->private_data = avc_surface;
816     obj_surface->free_private_data = gen9_free_surfaces_avc;
817
818     downscaled_width_4x = generic_state->frame_width_4x;
819     downscaled_height_4x = generic_state->frame_height_4x;
820
821     i965_CreateSurfaces(ctx,
822                         downscaled_width_4x,
823                         downscaled_height_4x,
824                         VA_RT_FORMAT_YUV420,
825                         1,
826                         &avc_surface->scaled_4x_surface_id);
827
828     avc_surface->scaled_4x_surface_obj = SURFACE(avc_surface->scaled_4x_surface_id);
829
830     if (!avc_surface->scaled_4x_surface_obj) {
831         return VA_STATUS_ERROR_ALLOCATION_FAILED;
832     }
833
834     i965_check_alloc_surface_bo(ctx, avc_surface->scaled_4x_surface_obj, 1,
835                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
836
837     downscaled_width_16x = generic_state->frame_width_16x;
838     downscaled_height_16x = generic_state->frame_height_16x;
839     i965_CreateSurfaces(ctx,
840                         downscaled_width_16x,
841                         downscaled_height_16x,
842                         VA_RT_FORMAT_YUV420,
843                         1,
844                         &avc_surface->scaled_16x_surface_id);
845     avc_surface->scaled_16x_surface_obj = SURFACE(avc_surface->scaled_16x_surface_id);
846
847     if (!avc_surface->scaled_16x_surface_obj) {
848         return VA_STATUS_ERROR_ALLOCATION_FAILED;
849     }
850
851     i965_check_alloc_surface_bo(ctx, avc_surface->scaled_16x_surface_obj, 1,
852                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
853
854     if (generic_state->b32xme_supported ||
855         generic_state->b32xme_enabled) {
856         downscaled_width_32x = generic_state->frame_width_32x;
857         downscaled_height_32x = generic_state->frame_height_32x;
858         i965_CreateSurfaces(ctx,
859                             downscaled_width_32x,
860                             downscaled_height_32x,
861                             VA_RT_FORMAT_YUV420,
862                             1,
863                             &avc_surface->scaled_32x_surface_id);
864         avc_surface->scaled_32x_surface_obj = SURFACE(avc_surface->scaled_32x_surface_id);
865
866         if (!avc_surface->scaled_32x_surface_obj) {
867             return VA_STATUS_ERROR_ALLOCATION_FAILED;
868         }
869
870         i965_check_alloc_surface_bo(ctx, avc_surface->scaled_32x_surface_obj, 1,
871                                     VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
872     }
873
874     /*mb code and mv data for each frame*/
875     if (!encoder_context->fei_enabled) {
876         size = frame_mb_nums * 16 * 4;
877         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
878                                                    &avc_surface->res_mb_code_surface,
879                                                    ALIGN(size, 0x1000),
880                                                    "mb code buffer");
881         if (!allocate_flag)
882             goto failed_allocation;
883
884         size = frame_mb_nums * 32 * 4;
885         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
886                                                    &avc_surface->res_mv_data_surface,
887                                                    ALIGN(size, 0x1000),
888                                                    "mv data buffer");
889         if (!allocate_flag)
890             goto failed_allocation;
891     }
892
893     /* ref pic list*/
894     if (avc_state->ref_pic_select_list_supported) {
895         width = ALIGN(frame_width_in_mbs * 8, 64);
896         height = frame_height_in_mbs ;
897         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
898                                                       &avc_surface->res_ref_pic_select_surface,
899                                                       width, height,
900                                                       width,
901                                                       "Ref pic select list buffer");
902         if (!allocate_flag)
903             goto failed_allocation;
904     }
905
906     /*direct mv*/
907     avc_surface->dmv_top =
908         dri_bo_alloc(i965->intel.bufmgr,
909                      "direct mv top Buffer",
910                      68 * frame_mb_nums,
911                      64);
912     avc_surface->dmv_bottom =
913         dri_bo_alloc(i965->intel.bufmgr,
914                      "direct mv bottom Buffer",
915                      68 * frame_mb_nums,
916                      64);
917     assert(avc_surface->dmv_top);
918     assert(avc_surface->dmv_bottom);
919
920     return VA_STATUS_SUCCESS;
921
922 failed_allocation:
923     return VA_STATUS_ERROR_ALLOCATION_FAILED;
924 }
925
926 static void
927 gen9_avc_generate_slice_map(VADriverContextP ctx,
928                             struct encode_state *encode_state,
929                             struct intel_encoder_context *encoder_context)
930 {
931     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
932     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
933     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
934     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
935
936     struct i965_gpe_resource *gpe_resource = NULL;
937     VAEncSliceParameterBufferH264 * slice_param = NULL;
938     unsigned int * data = NULL;
939     unsigned int * data_row = NULL;
940     int i, j, count = 0;
941     unsigned int pitch = ALIGN((generic_state->frame_width_in_mbs + 1) * 4, 64) / 4;
942
943     if (!avc_state->arbitrary_num_mbs_in_slice)
944         return;
945
946     gpe_resource = &(avc_ctx->res_mbenc_slice_map_surface);
947     assert(gpe_resource);
948
949     i965_zero_gpe_resource(gpe_resource);
950
951     data_row = (unsigned int *)i965_map_gpe_resource(gpe_resource);
952     assert(data_row);
953
954     data = data_row;
955     for (i = 0; i < avc_state->slice_num; i++) {
956         slice_param = avc_state->slice_param[i];
957         for (j = 0; j < slice_param->num_macroblocks; j++) {
958             *data++ = i;
959             if ((count > 0) && (count % generic_state->frame_width_in_mbs == 0)) {
960                 data_row += pitch;
961                 data = data_row;
962                 *data++ = i;
963             }
964             count++;
965         }
966     }
967     *data++ = 0xFFFFFFFF;
968
969     i965_unmap_gpe_resource(gpe_resource);
970 }
971
972 static VAStatus
973 gen9_avc_allocate_resources(VADriverContextP ctx,
974                             struct encode_state *encode_state,
975                             struct intel_encoder_context *encoder_context)
976 {
977     struct i965_driver_data *i965 = i965_driver_data(ctx);
978     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
979     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
980     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
981     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
982     unsigned int size  = 0;
983     unsigned int width  = 0;
984     unsigned int height  = 0;
985     unsigned char * data  = NULL;
986     int allocate_flag = 1;
987     int i = 0;
988
989     /*all the surface/buffer are allocated here*/
990
991     /*second level batch buffer for image state write when cqp etc*/
992     i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
993     size = INTEL_AVC_IMAGE_STATE_CMD_SIZE ;
994     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
995                                                &avc_ctx->res_image_state_batch_buffer_2nd_level,
996                                                ALIGN(size, 0x1000),
997                                                "second levle batch (image state write) buffer");
998     if (!allocate_flag)
999         goto failed_allocation;
1000
1001     /* scaling related surface   */
1002     if (avc_state->mb_status_supported) {
1003         i965_free_gpe_resource(&avc_ctx->res_mb_status_buffer);
1004         size = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * 16 * 4 + 1023) & ~0x3ff;
1005         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1006                                                    &avc_ctx->res_mb_status_buffer,
1007                                                    ALIGN(size, 0x1000),
1008                                                    "MB statistics output buffer");
1009         if (!allocate_flag)
1010             goto failed_allocation;
1011         i965_zero_gpe_resource(&avc_ctx->res_mb_status_buffer);
1012     }
1013
1014     if (avc_state->flatness_check_supported) {
1015         width = generic_state->frame_width_in_mbs * 4;
1016         height = generic_state->frame_height_in_mbs * 4;
1017         i965_free_gpe_resource(&avc_ctx->res_flatness_check_surface);
1018         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1019                                                       &avc_ctx->res_flatness_check_surface,
1020                                                       width, height,
1021                                                       ALIGN(width, 64),
1022                                                       "Flatness check buffer");
1023         if (!allocate_flag)
1024             goto failed_allocation;
1025     }
1026     /* me related surface */
1027     width = generic_state->downscaled_width_4x_in_mb * 8;
1028     height = generic_state->downscaled_height_4x_in_mb * 4 * 10;
1029     i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
1030     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1031                                                   &avc_ctx->s4x_memv_distortion_buffer,
1032                                                   width, height,
1033                                                   ALIGN(width, 64),
1034                                                   "4x MEMV distortion buffer");
1035     if (!allocate_flag)
1036         goto failed_allocation;
1037     i965_zero_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
1038
1039     width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
1040     height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
1041     i965_free_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
1042     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1043                                                   &avc_ctx->s4x_memv_min_distortion_brc_buffer,
1044                                                   width, height,
1045                                                   width,
1046                                                   "4x MEMV min distortion brc buffer");
1047     if (!allocate_flag)
1048         goto failed_allocation;
1049     i965_zero_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
1050
1051
1052     width = ALIGN(generic_state->downscaled_width_4x_in_mb * 32, 64);
1053     height = generic_state->downscaled_height_4x_in_mb * 4 * 2 * 10;
1054     i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
1055     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1056                                                   &avc_ctx->s4x_memv_data_buffer,
1057                                                   width, height,
1058                                                   width,
1059                                                   "4x MEMV data buffer");
1060     if (!allocate_flag)
1061         goto failed_allocation;
1062     i965_zero_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
1063
1064
1065     width = ALIGN(generic_state->downscaled_width_16x_in_mb * 32, 64);
1066     height = generic_state->downscaled_height_16x_in_mb * 4 * 2 * 10 ;
1067     i965_free_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
1068     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1069                                                   &avc_ctx->s16x_memv_data_buffer,
1070                                                   width, height,
1071                                                   width,
1072                                                   "16x MEMV data buffer");
1073     if (!allocate_flag)
1074         goto failed_allocation;
1075     i965_zero_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
1076
1077
1078     width = ALIGN(generic_state->downscaled_width_32x_in_mb * 32, 64);
1079     height = generic_state->downscaled_height_32x_in_mb * 4 * 2 * 10 ;
1080     i965_free_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
1081     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1082                                                   &avc_ctx->s32x_memv_data_buffer,
1083                                                   width, height,
1084                                                   width,
1085                                                   "32x MEMV data buffer");
1086     if (!allocate_flag)
1087         goto failed_allocation;
1088     i965_zero_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
1089
1090
1091     if (!generic_state->brc_allocated) {
1092         /*brc related surface */
1093         i965_free_gpe_resource(&avc_ctx->res_brc_history_buffer);
1094         size = 864;
1095         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1096                                                    &avc_ctx->res_brc_history_buffer,
1097                                                    ALIGN(size, 0x1000),
1098                                                    "brc history buffer");
1099         if (!allocate_flag)
1100             goto failed_allocation;
1101
1102         i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
1103         size = 64;//44
1104         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1105                                                    &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
1106                                                    ALIGN(size, 0x1000),
1107                                                    "brc pak statistic buffer");
1108         if (!allocate_flag)
1109             goto failed_allocation;
1110
1111         i965_free_gpe_resource(&avc_ctx->res_brc_image_state_read_buffer);
1112         size = INTEL_AVC_IMAGE_STATE_CMD_SIZE * 7;
1113         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1114                                                    &avc_ctx->res_brc_image_state_read_buffer,
1115                                                    ALIGN(size, 0x1000),
1116                                                    "brc image state read buffer");
1117         if (!allocate_flag)
1118             goto failed_allocation;
1119
1120         i965_free_gpe_resource(&avc_ctx->res_brc_image_state_write_buffer);
1121         size = INTEL_AVC_IMAGE_STATE_CMD_SIZE * 7;
1122         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1123                                                    &avc_ctx->res_brc_image_state_write_buffer,
1124                                                    ALIGN(size, 0x1000),
1125                                                    "brc image state write buffer");
1126         if (!allocate_flag)
1127             goto failed_allocation;
1128
1129         width = ALIGN(avc_state->brc_const_data_surface_width, 64);
1130         height = avc_state->brc_const_data_surface_height;
1131         i965_free_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
1132         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1133                                                       &avc_ctx->res_brc_const_data_buffer,
1134                                                       width, height,
1135                                                       width,
1136                                                       "brc const data buffer");
1137         if (!allocate_flag)
1138             goto failed_allocation;
1139         i965_zero_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
1140
1141         if (generic_state->brc_distortion_buffer_supported) {
1142             width = ALIGN(generic_state->downscaled_width_4x_in_mb * 8, 64);
1143             height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1144             width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
1145             height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
1146             i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
1147             allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1148                                                           &avc_ctx->res_brc_dist_data_surface,
1149                                                           width, height,
1150                                                           width,
1151                                                           "brc dist data buffer");
1152             if (!allocate_flag)
1153                 goto failed_allocation;
1154             i965_zero_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
1155         }
1156
1157         if (generic_state->brc_roi_enable) {
1158             width = ALIGN(generic_state->downscaled_width_4x_in_mb * 16, 64);
1159             height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1160             i965_free_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
1161             allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1162                                                           &avc_ctx->res_mbbrc_roi_surface,
1163                                                           width, height,
1164                                                           width,
1165                                                           "mbbrc roi buffer");
1166             if (!allocate_flag)
1167                 goto failed_allocation;
1168             i965_zero_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
1169         }
1170
1171         /*mb qp in mb brc*/
1172         width = ALIGN(generic_state->downscaled_width_4x_in_mb * 4, 64);
1173         height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1174         i965_free_gpe_resource(&avc_ctx->res_mbbrc_mb_qp_data_surface);
1175         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1176                                                       &avc_ctx->res_mbbrc_mb_qp_data_surface,
1177                                                       width, height,
1178                                                       width,
1179                                                       "mbbrc mb qp buffer");
1180         if (!allocate_flag)
1181             goto failed_allocation;
1182
1183         i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
1184         size = 16 * AVC_QP_MAX * 4;
1185         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1186                                                    &avc_ctx->res_mbbrc_const_data_buffer,
1187                                                    ALIGN(size, 0x1000),
1188                                                    "mbbrc const data buffer");
1189         if (!allocate_flag)
1190             goto failed_allocation;
1191
1192         if (avc_state->decouple_mbenc_curbe_from_brc_enable) {
1193             i965_free_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
1194             size = avc_state->mbenc_brc_buffer_size;
1195             allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1196                                                        &avc_ctx->res_mbenc_brc_buffer,
1197                                                        ALIGN(size, 0x1000),
1198                                                        "mbenc brc buffer");
1199             if (!allocate_flag)
1200                 goto failed_allocation;
1201             i965_zero_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
1202         }
1203         generic_state->brc_allocated = 1;
1204     }
1205
1206     /*mb qp external*/
1207     if (avc_state->mb_qp_data_enable) {
1208         width = ALIGN(generic_state->downscaled_width_4x_in_mb * 4, 64);
1209         height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1210         i965_free_gpe_resource(&avc_ctx->res_mb_qp_data_surface);
1211         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1212                                                       &avc_ctx->res_mb_qp_data_surface,
1213                                                       width, height,
1214                                                       width,
1215                                                       "external mb qp buffer");
1216         if (!allocate_flag)
1217             goto failed_allocation;
1218     }
1219
1220     /*     mbenc related surface. it share most of surface with other kernels     */
1221     if (avc_state->arbitrary_num_mbs_in_slice) {
1222         width = ALIGN((generic_state->frame_width_in_mbs + 1) * 4, 64);
1223         height = generic_state->frame_height_in_mbs ;
1224         i965_free_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1225         allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1226                                                       &avc_ctx->res_mbenc_slice_map_surface,
1227                                                       width, height,
1228                                                       width,
1229                                                       "slice map buffer");
1230         if (!allocate_flag)
1231             goto failed_allocation;
1232         i965_zero_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1233
1234         /*generate slice map,default one slice per frame.*/
1235     }
1236
1237     /* sfd related surface  */
1238     if (avc_state->sfd_enable) {
1239         i965_free_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1240         size = 128;
1241         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1242                                                    &avc_ctx->res_sfd_output_buffer,
1243                                                    size,
1244                                                    "sfd output buffer");
1245         if (!allocate_flag)
1246             goto failed_allocation;
1247         i965_zero_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1248
1249         i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer);
1250         size = ALIGN(52, 64);
1251         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1252                                                    &avc_ctx->res_sfd_cost_table_p_frame_buffer,
1253                                                    size,
1254                                                    "sfd P frame cost table buffer");
1255         if (!allocate_flag)
1256             goto failed_allocation;
1257         data = i965_map_gpe_resource(&(avc_ctx->res_sfd_cost_table_p_frame_buffer));
1258         assert(data);
1259         memcpy(data, gen9_avc_sfd_cost_table_p_frame, sizeof(unsigned char) * 52);
1260         i965_unmap_gpe_resource(&(avc_ctx->res_sfd_cost_table_p_frame_buffer));
1261
1262         i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_b_frame_buffer);
1263         size = ALIGN(52, 64);
1264         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1265                                                    &avc_ctx->res_sfd_cost_table_b_frame_buffer,
1266                                                    size,
1267                                                    "sfd B frame cost table buffer");
1268         if (!allocate_flag)
1269             goto failed_allocation;
1270         data = i965_map_gpe_resource(&(avc_ctx->res_sfd_cost_table_b_frame_buffer));
1271         assert(data);
1272         memcpy(data, gen9_avc_sfd_cost_table_b_frame, sizeof(unsigned char) * 52);
1273         i965_unmap_gpe_resource(&(avc_ctx->res_sfd_cost_table_b_frame_buffer));
1274     }
1275
1276     /* wp related surfaces */
1277     if (avc_state->weighted_prediction_supported) {
1278         for (i = 0; i < 2 ; i++) {
1279             if (avc_ctx->wp_output_pic_select_surface_obj[i]) {
1280                 continue;
1281             }
1282
1283             width = generic_state->frame_width_in_pixel;
1284             height = generic_state->frame_height_in_pixel ;
1285             i965_CreateSurfaces(ctx,
1286                                 width,
1287                                 height,
1288                                 VA_RT_FORMAT_YUV420,
1289                                 1,
1290                                 &avc_ctx->wp_output_pic_select_surface_id[i]);
1291             avc_ctx->wp_output_pic_select_surface_obj[i] = SURFACE(avc_ctx->wp_output_pic_select_surface_id[i]);
1292
1293             if (!avc_ctx->wp_output_pic_select_surface_obj[i]) {
1294                 goto failed_allocation;
1295             }
1296
1297             i965_check_alloc_surface_bo(ctx, avc_ctx->wp_output_pic_select_surface_obj[i], 1,
1298                                         VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
1299         }
1300         i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[0]);
1301         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_wp_output_pic_select_surface_list[0], avc_ctx->wp_output_pic_select_surface_obj[0], GPE_RESOURCE_ALIGNMENT);
1302         i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[1]);
1303         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_wp_output_pic_select_surface_list[1], avc_ctx->wp_output_pic_select_surface_obj[1], GPE_RESOURCE_ALIGNMENT);
1304     }
1305
1306     /* other   */
1307
1308     i965_free_gpe_resource(&avc_ctx->res_mad_data_buffer);
1309     size = 4 * 1;
1310     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1311                                                &avc_ctx->res_mad_data_buffer,
1312                                                ALIGN(size, 0x1000),
1313                                                "MAD data buffer");
1314     if (!allocate_flag)
1315         goto failed_allocation;
1316
1317     return VA_STATUS_SUCCESS;
1318
1319 failed_allocation:
1320     return VA_STATUS_ERROR_ALLOCATION_FAILED;
1321 }
1322
1323 static void
1324 gen9_avc_free_resources(struct encoder_vme_mfc_context * vme_context)
1325 {
1326     if (!vme_context)
1327         return;
1328
1329     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1330     VADriverContextP ctx = avc_ctx->ctx;
1331     int i = 0;
1332
1333     /* free all the surface/buffer here*/
1334     i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
1335     i965_free_gpe_resource(&avc_ctx->res_mb_status_buffer);
1336     i965_free_gpe_resource(&avc_ctx->res_flatness_check_surface);
1337     i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
1338     i965_free_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
1339     i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
1340     i965_free_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
1341     i965_free_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
1342     i965_free_gpe_resource(&avc_ctx->res_brc_history_buffer);
1343     i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
1344     i965_free_gpe_resource(&avc_ctx->res_brc_image_state_read_buffer);
1345     i965_free_gpe_resource(&avc_ctx->res_brc_image_state_write_buffer);
1346     i965_free_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
1347     i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
1348     i965_free_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
1349     i965_free_gpe_resource(&avc_ctx->res_mbbrc_mb_qp_data_surface);
1350     i965_free_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
1351     i965_free_gpe_resource(&avc_ctx->res_mb_qp_data_surface);
1352     i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
1353     i965_free_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1354     i965_free_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1355     i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer);
1356     i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_b_frame_buffer);
1357     i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[0]);
1358     i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[1]);
1359     i965_free_gpe_resource(&avc_ctx->res_mad_data_buffer);
1360
1361     for (i = 0; i < 2 ; i++) {
1362         if (avc_ctx->wp_output_pic_select_surface_obj[i]) {
1363             i965_DestroySurfaces(ctx, &avc_ctx->wp_output_pic_select_surface_id[i], 1);
1364             avc_ctx->wp_output_pic_select_surface_id[i] = VA_INVALID_SURFACE;
1365             avc_ctx->wp_output_pic_select_surface_obj[i] = NULL;
1366         }
1367     }
1368
1369     /* free preenc resources */
1370     i965_free_gpe_resource(&avc_ctx->preproc_mv_predictor_buffer);
1371     i965_free_gpe_resource(&avc_ctx->preproc_mb_qp_buffer);
1372     i965_free_gpe_resource(&avc_ctx->preproc_mv_data_out_buffer);
1373     i965_free_gpe_resource(&avc_ctx->preproc_stat_data_out_buffer);
1374
1375     i965_free_gpe_resource(&avc_ctx->preenc_past_ref_stat_data_out_buffer);
1376     i965_free_gpe_resource(&avc_ctx->preenc_future_ref_stat_data_out_buffer);
1377
1378     i965_DestroySurfaces(ctx, &avc_ctx->preenc_scaled_4x_surface_id, 1);
1379     avc_ctx->preenc_scaled_4x_surface_id = VA_INVALID_SURFACE;
1380     avc_ctx->preenc_scaled_4x_surface_obj = NULL;
1381
1382     i965_DestroySurfaces(ctx, &avc_ctx->preenc_past_ref_scaled_4x_surface_id, 1);
1383     avc_ctx->preenc_past_ref_scaled_4x_surface_id = VA_INVALID_SURFACE;
1384     avc_ctx->preenc_past_ref_scaled_4x_surface_obj = NULL;
1385
1386     i965_DestroySurfaces(ctx, &avc_ctx->preenc_future_ref_scaled_4x_surface_id, 1);
1387     avc_ctx->preenc_future_ref_scaled_4x_surface_id = VA_INVALID_SURFACE;
1388     avc_ctx->preenc_future_ref_scaled_4x_surface_obj = NULL;
1389 }
1390
1391 static void
1392 gen9_avc_run_kernel_media_object(VADriverContextP ctx,
1393                                  struct intel_encoder_context *encoder_context,
1394                                  struct i965_gpe_context *gpe_context,
1395                                  int media_function,
1396                                  struct gpe_media_object_parameter *param)
1397 {
1398     struct i965_driver_data *i965 = i965_driver_data(ctx);
1399     struct i965_gpe_table *gpe = &i965->gpe_table;
1400     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1401     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1402
1403     struct intel_batchbuffer *batch = encoder_context->base.batch;
1404     struct encoder_status_buffer_internal *status_buffer;
1405     struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1406
1407     if (!batch)
1408         return;
1409
1410     intel_batchbuffer_start_atomic(batch, 0x1000);
1411     intel_batchbuffer_emit_mi_flush(batch);
1412
1413     status_buffer = &(avc_ctx->status_buffer);
1414     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1415     mi_store_data_imm.bo = status_buffer->bo;
1416     mi_store_data_imm.offset = status_buffer->media_index_offset;
1417     mi_store_data_imm.dw0 = media_function;
1418     gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1419
1420     gpe->pipeline_setup(ctx, gpe_context, batch);
1421     gpe->media_object(ctx, gpe_context, batch, param);
1422     gpe->media_state_flush(ctx, gpe_context, batch);
1423
1424     gpe->pipeline_end(ctx, gpe_context, batch);
1425
1426     intel_batchbuffer_end_atomic(batch);
1427
1428     intel_batchbuffer_flush(batch);
1429 }
1430
1431 static void
1432 gen9_avc_run_kernel_media_object_walker(VADriverContextP ctx,
1433                                         struct intel_encoder_context *encoder_context,
1434                                         struct i965_gpe_context *gpe_context,
1435                                         int media_function,
1436                                         struct gpe_media_object_walker_parameter *param)
1437 {
1438     struct i965_driver_data *i965 = i965_driver_data(ctx);
1439     struct i965_gpe_table *gpe = &i965->gpe_table;
1440     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1441     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1442
1443     struct intel_batchbuffer *batch = encoder_context->base.batch;
1444     struct encoder_status_buffer_internal *status_buffer;
1445     struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1446
1447     if (!batch)
1448         return;
1449
1450     intel_batchbuffer_start_atomic(batch, 0x1000);
1451
1452     intel_batchbuffer_emit_mi_flush(batch);
1453
1454     status_buffer = &(avc_ctx->status_buffer);
1455     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1456     mi_store_data_imm.bo = status_buffer->bo;
1457     mi_store_data_imm.offset = status_buffer->media_index_offset;
1458     mi_store_data_imm.dw0 = media_function;
1459     gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1460
1461     gpe->pipeline_setup(ctx, gpe_context, batch);
1462     gpe->media_object_walker(ctx, gpe_context, batch, param);
1463     gpe->media_state_flush(ctx, gpe_context, batch);
1464
1465     gpe->pipeline_end(ctx, gpe_context, batch);
1466
1467     intel_batchbuffer_end_atomic(batch);
1468
1469     intel_batchbuffer_flush(batch);
1470 }
1471
1472 static void
1473 gen9_init_gpe_context_avc(VADriverContextP ctx,
1474                           struct i965_gpe_context *gpe_context,
1475                           struct encoder_kernel_parameter *kernel_param)
1476 {
1477     struct i965_driver_data *i965 = i965_driver_data(ctx);
1478
1479     gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
1480
1481     gpe_context->sampler.entry_size = 0;
1482     gpe_context->sampler.max_entries = 0;
1483
1484     if (kernel_param->sampler_size) {
1485         gpe_context->sampler.entry_size = ALIGN(kernel_param->sampler_size, 64);
1486         gpe_context->sampler.max_entries = 1;
1487     }
1488
1489     gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
1490     gpe_context->idrt.max_entries = NUM_KERNELS_PER_GPE_CONTEXT;
1491
1492     gpe_context->surface_state_binding_table.max_entries = MAX_AVC_ENCODER_SURFACES;
1493     gpe_context->surface_state_binding_table.binding_table_offset = 0;
1494     gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_AVC_ENCODER_SURFACES * 4, 64);
1495     gpe_context->surface_state_binding_table.length = ALIGN(MAX_AVC_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_AVC_ENCODER_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
1496
1497     if (i965->intel.eu_total > 0)
1498         gpe_context->vfe_state.max_num_threads = 6 * i965->intel.eu_total;
1499     else
1500         gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads
1501
1502     gpe_context->vfe_state.curbe_allocation_size = MAX(1, ALIGN(gpe_context->curbe.length, 32) >> 5); // in registers
1503     gpe_context->vfe_state.urb_entry_size = MAX(1, ALIGN(kernel_param->inline_data_size, 32) >> 5); // in registers
1504     gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE -
1505                                               gpe_context->vfe_state.curbe_allocation_size -
1506                                               ((gpe_context->idrt.entry_size >> 5) *
1507                                                gpe_context->idrt.max_entries)) / gpe_context->vfe_state.urb_entry_size;
1508     gpe_context->vfe_state.num_urb_entries = CLAMP(1, 127, gpe_context->vfe_state.num_urb_entries);
1509     gpe_context->vfe_state.gpgpu_mode = 0;
1510 }
1511
1512 static void
1513 gen9_init_vfe_scoreboard_avc(struct i965_gpe_context *gpe_context,
1514                              struct encoder_scoreboard_parameter *scoreboard_param)
1515 {
1516     gpe_context->vfe_desc5.scoreboard0.mask = scoreboard_param->mask;
1517     gpe_context->vfe_desc5.scoreboard0.type = scoreboard_param->type;
1518     gpe_context->vfe_desc5.scoreboard0.enable = scoreboard_param->enable;
1519
1520     if (scoreboard_param->walkpat_flag) {
1521         gpe_context->vfe_desc5.scoreboard0.mask = 0x0F;
1522         gpe_context->vfe_desc5.scoreboard0.type = 1;
1523
1524         gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0;
1525         gpe_context->vfe_desc6.scoreboard1.delta_y0 = -1;
1526
1527         gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0;
1528         gpe_context->vfe_desc6.scoreboard1.delta_y1 = -2;
1529
1530         gpe_context->vfe_desc6.scoreboard1.delta_x2 = -1;
1531         gpe_context->vfe_desc6.scoreboard1.delta_y2 = 3;
1532
1533         gpe_context->vfe_desc6.scoreboard1.delta_x3 = -1;
1534         gpe_context->vfe_desc6.scoreboard1.delta_y3 = 1;
1535     } else {
1536         // Scoreboard 0
1537         gpe_context->vfe_desc6.scoreboard1.delta_x0 = -1;
1538         gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0;
1539
1540         // Scoreboard 1
1541         gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0;
1542         gpe_context->vfe_desc6.scoreboard1.delta_y1 = -1;
1543
1544         // Scoreboard 2
1545         gpe_context->vfe_desc6.scoreboard1.delta_x2 = 1;
1546         gpe_context->vfe_desc6.scoreboard1.delta_y2 = -1;
1547
1548         // Scoreboard 3
1549         gpe_context->vfe_desc6.scoreboard1.delta_x3 = -1;
1550         gpe_context->vfe_desc6.scoreboard1.delta_y3 = -1;
1551
1552         // Scoreboard 4
1553         gpe_context->vfe_desc7.scoreboard2.delta_x4 = -1;
1554         gpe_context->vfe_desc7.scoreboard2.delta_y4 = 1;
1555
1556         // Scoreboard 5
1557         gpe_context->vfe_desc7.scoreboard2.delta_x5 = 0;
1558         gpe_context->vfe_desc7.scoreboard2.delta_y5 = -2;
1559
1560         // Scoreboard 6
1561         gpe_context->vfe_desc7.scoreboard2.delta_x6 = 1;
1562         gpe_context->vfe_desc7.scoreboard2.delta_y6 = -2;
1563
1564         // Scoreboard 7
1565         gpe_context->vfe_desc7.scoreboard2.delta_x6 = -1;
1566         gpe_context->vfe_desc7.scoreboard2.delta_y6 = -2;
1567     }
1568 }
1569 /*
1570 VME pipeline related function
1571 */
1572
1573 /*
1574 scaling kernel related function
1575 */
1576 static void
1577 gen9_avc_set_curbe_scaling4x(VADriverContextP ctx,
1578                              struct encode_state *encode_state,
1579                              struct i965_gpe_context *gpe_context,
1580                              struct intel_encoder_context *encoder_context,
1581                              void *param)
1582 {
1583     gen9_avc_scaling4x_curbe_data *curbe_cmd;
1584     struct scaling_param *surface_param = (struct scaling_param *)param;
1585
1586     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1587
1588     if (!curbe_cmd)
1589         return;
1590
1591     memset(curbe_cmd, 0, sizeof(gen9_avc_scaling4x_curbe_data));
1592
1593     curbe_cmd->dw0.input_picture_width  = surface_param->input_frame_width;
1594     curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1595
1596     curbe_cmd->dw1.input_y_bti = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1597     curbe_cmd->dw2.output_y_bti = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1598
1599
1600     curbe_cmd->dw5.flatness_threshold = 128;
1601     curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
1602     curbe_cmd->dw7.enable_mb_variance_output = surface_param->enable_mb_variance_output;
1603     curbe_cmd->dw8.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
1604
1605     if (curbe_cmd->dw6.enable_mb_flatness_check ||
1606         curbe_cmd->dw7.enable_mb_variance_output ||
1607         curbe_cmd->dw8.enable_mb_pixel_average_output) {
1608         curbe_cmd->dw10.mbv_proc_stat_bti = GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX;
1609     }
1610
1611     i965_gpe_context_unmap_curbe(gpe_context);
1612     return;
1613 }
1614
1615 static void
1616 gen95_avc_set_curbe_scaling4x(VADriverContextP ctx,
1617                               struct encode_state *encode_state,
1618                               struct i965_gpe_context *gpe_context,
1619                               struct intel_encoder_context *encoder_context,
1620                               void *param)
1621 {
1622     gen95_avc_scaling4x_curbe_data *curbe_cmd;
1623     struct scaling_param *surface_param = (struct scaling_param *)param;
1624
1625     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1626
1627     if (!curbe_cmd)
1628         return;
1629
1630     memset(curbe_cmd, 0, sizeof(gen95_avc_scaling4x_curbe_data));
1631
1632     curbe_cmd->dw0.input_picture_width  = surface_param->input_frame_width;
1633     curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1634
1635     curbe_cmd->dw1.input_y_bti_frame = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1636     curbe_cmd->dw2.output_y_bti_frame = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1637
1638     if (surface_param->enable_mb_flatness_check)
1639         curbe_cmd->dw5.flatness_threshold = 128;
1640     curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
1641     curbe_cmd->dw6.enable_mb_variance_output = surface_param->enable_mb_variance_output;
1642     curbe_cmd->dw6.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
1643     curbe_cmd->dw6.enable_block8x8_statistics_output = surface_param->blk8x8_stat_enabled;
1644
1645     if (curbe_cmd->dw6.enable_mb_flatness_check ||
1646         curbe_cmd->dw6.enable_mb_variance_output ||
1647         curbe_cmd->dw6.enable_mb_pixel_average_output) {
1648         curbe_cmd->dw8.mbv_proc_stat_bti_frame = GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX;
1649     }
1650
1651     i965_gpe_context_unmap_curbe(gpe_context);
1652     return;
1653 }
1654
1655 static void
1656 gen9_avc_set_curbe_scaling2x(VADriverContextP ctx,
1657                              struct encode_state *encode_state,
1658                              struct i965_gpe_context *gpe_context,
1659                              struct intel_encoder_context *encoder_context,
1660                              void *param)
1661 {
1662     gen9_avc_scaling2x_curbe_data *curbe_cmd;
1663     struct scaling_param *surface_param = (struct scaling_param *)param;
1664
1665     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1666
1667     if (!curbe_cmd)
1668         return;
1669
1670     memset(curbe_cmd, 0, sizeof(gen9_avc_scaling2x_curbe_data));
1671
1672     curbe_cmd->dw0.input_picture_width  = surface_param->input_frame_width;
1673     curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1674
1675     curbe_cmd->dw8.input_y_bti = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1676     curbe_cmd->dw9.output_y_bti = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1677
1678     i965_gpe_context_unmap_curbe(gpe_context);
1679     return;
1680 }
1681
1682 static void
1683 gen9_avc_send_surface_scaling(VADriverContextP ctx,
1684                               struct encode_state *encode_state,
1685                               struct i965_gpe_context *gpe_context,
1686                               struct intel_encoder_context *encoder_context,
1687                               void *param)
1688 {
1689     struct scaling_param *surface_param = (struct scaling_param *)param;
1690     struct i965_driver_data *i965 = i965_driver_data(ctx);
1691     unsigned int surface_format;
1692     unsigned int res_size;
1693
1694     if (surface_param->scaling_out_use_32unorm_surf_fmt)
1695         surface_format = I965_SURFACEFORMAT_R32_UNORM;
1696     else if (surface_param->scaling_out_use_16unorm_surf_fmt)
1697         surface_format = I965_SURFACEFORMAT_R16_UNORM;
1698     else
1699         surface_format = I965_SURFACEFORMAT_R8_UNORM;
1700
1701     i965_add_2d_gpe_surface(ctx, gpe_context,
1702                             surface_param->input_surface,
1703                             0, 1, surface_format,
1704                             GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX);
1705
1706     i965_add_2d_gpe_surface(ctx, gpe_context,
1707                             surface_param->output_surface,
1708                             0, 1, surface_format,
1709                             GEN9_AVC_SCALING_FRAME_DST_Y_INDEX);
1710
1711     /*add buffer mv_proc_stat, here need change*/
1712     if (IS_GEN8(i965->intel.device_info)) {
1713         if (surface_param->mbv_proc_stat_enabled) {
1714             res_size = 16 * (surface_param->input_frame_width / 16) * (surface_param->input_frame_height / 16) * sizeof(unsigned int);
1715
1716             i965_add_buffer_gpe_surface(ctx,
1717                                         gpe_context,
1718                                         surface_param->pres_mbv_proc_stat_buffer,
1719                                         0,
1720                                         res_size / 4,
1721                                         0,
1722                                         GEN8_SCALING_FRAME_MBVPROCSTATS_DST_CM);
1723         }
1724         if (surface_param->enable_mb_flatness_check) {
1725             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
1726                                            surface_param->pres_flatness_check_surface,
1727                                            1,
1728                                            I965_SURFACEFORMAT_R8_UNORM,
1729                                            GEN8_SCALING_FRAME_FLATNESS_DST_CM);
1730         }
1731     } else {
1732         if (surface_param->mbv_proc_stat_enabled) {
1733             res_size = 16 * (surface_param->input_frame_width / 16) * (surface_param->input_frame_height / 16) * sizeof(unsigned int);
1734
1735             i965_add_buffer_gpe_surface(ctx,
1736                                         gpe_context,
1737                                         surface_param->pres_mbv_proc_stat_buffer,
1738                                         0,
1739                                         res_size / 4,
1740                                         0,
1741                                         GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
1742         } else if (surface_param->enable_mb_flatness_check) {
1743             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
1744                                            surface_param->pres_flatness_check_surface,
1745                                            1,
1746                                            I965_SURFACEFORMAT_R8_UNORM,
1747                                            GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
1748         }
1749     }
1750     return;
1751 }
1752
1753 static VAStatus
1754 gen9_avc_kernel_scaling(VADriverContextP ctx,
1755                         struct encode_state *encode_state,
1756                         struct intel_encoder_context *encoder_context,
1757                         int hme_type)
1758 {
1759     struct i965_driver_data *i965 = i965_driver_data(ctx);
1760     struct i965_gpe_table *gpe = &i965->gpe_table;
1761     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1762     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1763     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1764     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1765     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
1766
1767     struct i965_gpe_context *gpe_context;
1768     struct scaling_param surface_param;
1769     struct object_surface *obj_surface;
1770     struct gen9_surface_avc *avc_priv_surface;
1771     struct gpe_media_object_walker_parameter media_object_walker_param;
1772     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
1773     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
1774     int media_function = 0;
1775     int kernel_idx = 0;
1776
1777     obj_surface = encode_state->reconstructed_object;
1778     avc_priv_surface = obj_surface->private_data;
1779
1780     memset(&surface_param, 0, sizeof(struct scaling_param));
1781     switch (hme_type) {
1782     case INTEL_ENC_HME_4x : {
1783         media_function = INTEL_MEDIA_STATE_4X_SCALING;
1784         kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
1785         downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
1786         downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
1787
1788         surface_param.input_surface = encode_state->input_yuv_object ;
1789         surface_param.input_frame_width = generic_state->frame_width_in_pixel ;
1790         surface_param.input_frame_height = generic_state->frame_height_in_pixel ;
1791
1792         surface_param.output_surface = avc_priv_surface->scaled_4x_surface_obj ;
1793         surface_param.output_frame_width = generic_state->frame_width_4x ;
1794         surface_param.output_frame_height = generic_state->frame_height_4x ;
1795
1796         surface_param.enable_mb_flatness_check = avc_state->flatness_check_enable;
1797         surface_param.enable_mb_variance_output = avc_state->mb_status_enable;
1798         surface_param.enable_mb_pixel_average_output = avc_state->mb_status_enable;
1799
1800         surface_param.blk8x8_stat_enabled = 0 ;
1801         surface_param.use_4x_scaling  = 1 ;
1802         surface_param.use_16x_scaling = 0 ;
1803         surface_param.use_32x_scaling = 0 ;
1804         break;
1805     }
1806     case INTEL_ENC_HME_16x : {
1807         media_function = INTEL_MEDIA_STATE_16X_SCALING;
1808         kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
1809         downscaled_width_in_mb = generic_state->downscaled_width_16x_in_mb;
1810         downscaled_height_in_mb = generic_state->downscaled_height_16x_in_mb;
1811
1812         surface_param.input_surface = avc_priv_surface->scaled_4x_surface_obj ;
1813         surface_param.input_frame_width = generic_state->frame_width_4x ;
1814         surface_param.input_frame_height = generic_state->frame_height_4x ;
1815
1816         surface_param.output_surface = avc_priv_surface->scaled_16x_surface_obj ;
1817         surface_param.output_frame_width = generic_state->frame_width_16x ;
1818         surface_param.output_frame_height = generic_state->frame_height_16x ;
1819
1820         surface_param.enable_mb_flatness_check = 0 ;
1821         surface_param.enable_mb_variance_output = 0 ;
1822         surface_param.enable_mb_pixel_average_output = 0 ;
1823
1824         surface_param.blk8x8_stat_enabled = 0 ;
1825         surface_param.use_4x_scaling  = 0 ;
1826         surface_param.use_16x_scaling = 1 ;
1827         surface_param.use_32x_scaling = 0 ;
1828
1829         break;
1830     }
1831     case INTEL_ENC_HME_32x : {
1832         media_function = INTEL_MEDIA_STATE_32X_SCALING;
1833         kernel_idx = GEN9_AVC_KERNEL_SCALING_2X_IDX;
1834         downscaled_width_in_mb = generic_state->downscaled_width_32x_in_mb;
1835         downscaled_height_in_mb = generic_state->downscaled_height_32x_in_mb;
1836
1837         surface_param.input_surface = avc_priv_surface->scaled_16x_surface_obj ;
1838         surface_param.input_frame_width = generic_state->frame_width_16x ;
1839         surface_param.input_frame_height = generic_state->frame_height_16x ;
1840
1841         surface_param.output_surface = avc_priv_surface->scaled_32x_surface_obj ;
1842         surface_param.output_frame_width = generic_state->frame_width_32x ;
1843         surface_param.output_frame_height = generic_state->frame_height_32x ;
1844
1845         surface_param.enable_mb_flatness_check = 0 ;
1846         surface_param.enable_mb_variance_output = 0 ;
1847         surface_param.enable_mb_pixel_average_output = 0 ;
1848
1849         surface_param.blk8x8_stat_enabled = 0 ;
1850         surface_param.use_4x_scaling  = 0 ;
1851         surface_param.use_16x_scaling = 0 ;
1852         surface_param.use_32x_scaling = 1 ;
1853         break;
1854     }
1855     default :
1856         assert(0);
1857
1858     }
1859
1860     gpe_context = &(avc_ctx->context_scaling.gpe_contexts[kernel_idx]);
1861
1862     gpe->context_init(ctx, gpe_context);
1863     gpe->reset_binding_table(ctx, gpe_context);
1864
1865     if (surface_param.use_32x_scaling) {
1866         generic_ctx->pfn_set_curbe_scaling2x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1867     } else {
1868         generic_ctx->pfn_set_curbe_scaling4x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1869     }
1870
1871     if (surface_param.use_32x_scaling) {
1872         surface_param.scaling_out_use_16unorm_surf_fmt = 1 ;
1873         surface_param.scaling_out_use_32unorm_surf_fmt = 0 ;
1874     } else {
1875         surface_param.scaling_out_use_16unorm_surf_fmt = 0 ;
1876         surface_param.scaling_out_use_32unorm_surf_fmt = 1 ;
1877     }
1878
1879     if (surface_param.use_4x_scaling) {
1880         if (avc_state->mb_status_supported) {
1881             surface_param.enable_mb_flatness_check = 0;
1882             surface_param.mbv_proc_stat_enabled = (surface_param.use_4x_scaling) ? (avc_state->mb_status_enable || avc_state->flatness_check_enable) : 0 ;
1883             surface_param.pres_mbv_proc_stat_buffer = &(avc_ctx->res_mb_status_buffer);
1884
1885         } else {
1886             surface_param.enable_mb_flatness_check = (surface_param.use_4x_scaling) ? avc_state->flatness_check_enable : 0;
1887             surface_param.mbv_proc_stat_enabled = 0 ;
1888             surface_param.pres_flatness_check_surface = &(avc_ctx->res_flatness_check_surface);
1889         }
1890     }
1891
1892     generic_ctx->pfn_send_scaling_surface(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1893
1894     /* setup the interface data */
1895     gpe->setup_interface_data(ctx, gpe_context);
1896
1897     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
1898     if (surface_param.use_32x_scaling) {
1899         kernel_walker_param.resolution_x = downscaled_width_in_mb ;
1900         kernel_walker_param.resolution_y = downscaled_height_in_mb ;
1901     } else {
1902         /* the scaling is based on 8x8 blk level */
1903         kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
1904         kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
1905     }
1906     kernel_walker_param.no_dependency = 1;
1907
1908     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
1909
1910     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
1911                                             gpe_context,
1912                                             media_function,
1913                                             &media_object_walker_param);
1914
1915     return VA_STATUS_SUCCESS;
1916 }
1917
1918 /*
1919 frame/mb brc related function
1920 */
1921 static void
1922 gen9_avc_init_mfx_avc_img_state(VADriverContextP ctx,
1923                                 struct encode_state *encode_state,
1924                                 struct intel_encoder_context *encoder_context,
1925                                 struct gen9_mfx_avc_img_state *pstate)
1926 {
1927     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1928     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1929     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1930
1931     VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
1932     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
1933
1934     memset(pstate, 0, sizeof(*pstate));
1935
1936     pstate->dw0.dword_length = (sizeof(struct gen9_mfx_avc_img_state)) / 4 - 2;
1937     pstate->dw0.sub_opcode_b = 0;
1938     pstate->dw0.sub_opcode_a = 0;
1939     pstate->dw0.command_opcode = 1;
1940     pstate->dw0.pipeline = 2;
1941     pstate->dw0.command_type = 3;
1942
1943     pstate->dw1.frame_size_in_mbs = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs ;
1944
1945     pstate->dw2.frame_width_in_mbs_minus1 = generic_state->frame_width_in_mbs - 1;
1946     pstate->dw2.frame_height_in_mbs_minus1 = generic_state->frame_height_in_mbs - 1;
1947
1948     pstate->dw3.image_structure = 0;//frame is zero
1949     pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
1950     pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
1951     pstate->dw3.brc_domain_rate_control_enable = 0;//0,set for non-vdenc mode;
1952     pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
1953     pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
1954
1955     pstate->dw4.field_picture_flag = 0;
1956     pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
1957     pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
1958     pstate->dw4.transform_8x8_idct_mode_flag = pic_param->pic_fields.bits.transform_8x8_mode_flag;
1959     pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
1960     pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
1961     pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
1962     pstate->dw4.mb_mv_format_flag = 1;
1963     pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
1964     pstate->dw4.mv_unpacked_flag = 1;
1965     pstate->dw4.insert_test_flag = 0;
1966     pstate->dw4.load_slice_pointer_flag = 0;
1967     pstate->dw4.macroblock_stat_enable = 0;        /* disable in the first pass */
1968     pstate->dw4.minimum_frame_size = 0;
1969     pstate->dw5.intra_mb_max_bit_flag = 1;
1970     pstate->dw5.inter_mb_max_bit_flag = 1;
1971     pstate->dw5.frame_size_over_flag = 1;
1972     pstate->dw5.frame_size_under_flag = 1;
1973     pstate->dw5.intra_mb_ipcm_flag = 1;
1974     pstate->dw5.mb_rate_ctrl_flag = 0;
1975     pstate->dw5.non_first_pass_flag = 0;
1976     pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
1977     pstate->dw5.aq_chroma_disable = 1;
1978     if (pstate->dw4.entropy_coding_flag && (avc_state->tq_enable)) {
1979         pstate->dw5.aq_enable = avc_state->tq_enable;
1980         pstate->dw5.aq_rounding = avc_state->tq_rounding;
1981     } else {
1982         pstate->dw5.aq_rounding = 0;
1983     }
1984
1985     pstate->dw6.intra_mb_max_size = 2700;
1986     pstate->dw6.inter_mb_max_size = 4095;
1987
1988     pstate->dw8.slice_delta_qp_max0 = 0;
1989     pstate->dw8.slice_delta_qp_max1 = 0;
1990     pstate->dw8.slice_delta_qp_max2 = 0;
1991     pstate->dw8.slice_delta_qp_max3 = 0;
1992
1993     pstate->dw9.slice_delta_qp_min0 = 0;
1994     pstate->dw9.slice_delta_qp_min1 = 0;
1995     pstate->dw9.slice_delta_qp_min2 = 0;
1996     pstate->dw9.slice_delta_qp_min3 = 0;
1997
1998     pstate->dw10.frame_bitrate_min = 0;
1999     pstate->dw10.frame_bitrate_min_unit = 1;
2000     pstate->dw10.frame_bitrate_min_unit_mode = 1;
2001     pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
2002     pstate->dw10.frame_bitrate_max_unit = 1;
2003     pstate->dw10.frame_bitrate_max_unit_mode = 1;
2004
2005     pstate->dw11.frame_bitrate_min_delta = 0;
2006     pstate->dw11.frame_bitrate_max_delta = 0;
2007
2008     pstate->dw12.vad_error_logic = 1;
2009     /* set paramters DW19/DW20 for slices */
2010 }
2011
2012 static void
2013 gen8_avc_init_mfx_avc_img_state(VADriverContextP ctx,
2014                                 struct encode_state *encode_state,
2015                                 struct intel_encoder_context *encoder_context,
2016                                 struct gen8_mfx_avc_img_state *pstate)
2017 {
2018     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2019     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2020     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2021
2022     VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
2023     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
2024
2025     memset(pstate, 0, sizeof(*pstate));
2026
2027     pstate->dw0.dword_length = (sizeof(struct gen8_mfx_avc_img_state)) / 4 - 2;
2028     pstate->dw0.command_sub_opcode_b = 0;
2029     pstate->dw0.command_sub_opcode_a = 0;
2030     pstate->dw0.command_opcode = 1;
2031     pstate->dw0.command_pipeline = 2;
2032     pstate->dw0.command_type = 3;
2033
2034     pstate->dw1.frame_size_in_mbs = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs ;
2035
2036     pstate->dw2.frame_width_in_mbs_minus1 = generic_state->frame_width_in_mbs - 1;
2037     pstate->dw2.frame_height_in_mbs_minus1 = generic_state->frame_height_in_mbs - 1;
2038
2039     pstate->dw3.image_structure = 0;//frame is zero
2040     pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
2041     pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
2042     pstate->dw3.inter_mb_conf_flag = 0;
2043     pstate->dw3.intra_mb_conf_flag = 0;
2044     pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
2045     pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
2046
2047     pstate->dw4.field_picture_flag = 0;
2048     pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
2049     pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
2050     pstate->dw4.transform_8x8_idct_mode_flag = pic_param->pic_fields.bits.transform_8x8_mode_flag;
2051     pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
2052     pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
2053     pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
2054     pstate->dw4.mb_mv_format_flag = 1;
2055     pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
2056     pstate->dw4.mv_unpacked_flag = 1;
2057     pstate->dw4.insert_test_flag = 0;
2058     pstate->dw4.load_slice_pointer_flag = 0;
2059     pstate->dw4.macroblock_stat_enable = 0;        /* disable in the first pass */
2060     pstate->dw4.minimum_frame_size = 0;
2061     pstate->dw5.intra_mb_max_bit_flag = 1;
2062     pstate->dw5.inter_mb_max_bit_flag = 1;
2063     pstate->dw5.frame_size_over_flag = 1;
2064     pstate->dw5.frame_size_under_flag = 1;
2065     pstate->dw5.intra_mb_ipcm_flag = 1;
2066     pstate->dw5.mb_rate_ctrl_flag = 0;
2067     pstate->dw5.non_first_pass_flag = 0;
2068     pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
2069     pstate->dw5.aq_chroma_disable = 1;
2070     if (pstate->dw4.entropy_coding_flag && (avc_state->tq_enable)) {
2071         pstate->dw5.aq_enable = avc_state->tq_enable;
2072         pstate->dw5.aq_rounding = avc_state->tq_rounding;
2073     } else {
2074         pstate->dw5.aq_rounding = 0;
2075     }
2076
2077     pstate->dw6.intra_mb_max_size = 2700;
2078     pstate->dw6.inter_mb_max_size = 4095;
2079
2080     pstate->dw8.slice_delta_qp_max0 = 0;
2081     pstate->dw8.slice_delta_qp_max1 = 0;
2082     pstate->dw8.slice_delta_qp_max2 = 0;
2083     pstate->dw8.slice_delta_qp_max3 = 0;
2084
2085     pstate->dw9.slice_delta_qp_min0 = 0;
2086     pstate->dw9.slice_delta_qp_min1 = 0;
2087     pstate->dw9.slice_delta_qp_min2 = 0;
2088     pstate->dw9.slice_delta_qp_min3 = 0;
2089
2090     pstate->dw10.frame_bitrate_min = 0;
2091     pstate->dw10.frame_bitrate_min_unit = 1;
2092     pstate->dw10.frame_bitrate_min_unit_mode = 1;
2093     pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
2094     pstate->dw10.frame_bitrate_max_unit = 1;
2095     pstate->dw10.frame_bitrate_max_unit_mode = 1;
2096
2097     pstate->dw11.frame_bitrate_min_delta = 0;
2098     pstate->dw11.frame_bitrate_max_delta = 0;
2099     /* set paramters DW19/DW20 for slices */
2100 }
2101 void gen9_avc_set_image_state(VADriverContextP ctx,
2102                               struct encode_state *encode_state,
2103                               struct intel_encoder_context *encoder_context,
2104                               struct i965_gpe_resource *gpe_resource)
2105 {
2106     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2107     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
2108     char *pdata;
2109     int i;
2110     unsigned int * data;
2111     struct gen9_mfx_avc_img_state cmd;
2112
2113     pdata = i965_map_gpe_resource(gpe_resource);
2114
2115     if (!pdata)
2116         return;
2117
2118     gen9_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
2119     for (i = 0; i < generic_state->num_pak_passes; i++) {
2120
2121         if (i == 0) {
2122             cmd.dw4.macroblock_stat_enable = 0;
2123             cmd.dw5.non_first_pass_flag = 0;
2124         } else {
2125             cmd.dw4.macroblock_stat_enable = 1;
2126             cmd.dw5.non_first_pass_flag = 1;
2127             cmd.dw5.intra_mb_ipcm_flag = 1;
2128
2129         }
2130         cmd.dw5.mb_rate_ctrl_flag = 0;
2131         memcpy(pdata, &cmd, sizeof(struct gen9_mfx_avc_img_state));
2132         data = (unsigned int *)(pdata + sizeof(struct gen9_mfx_avc_img_state));
2133         *data = MI_BATCH_BUFFER_END;
2134
2135         pdata += INTEL_AVC_IMAGE_STATE_CMD_SIZE;
2136     }
2137     i965_unmap_gpe_resource(gpe_resource);
2138     return;
2139 }
2140
2141 void gen8_avc_set_image_state(VADriverContextP ctx,
2142                               struct encode_state *encode_state,
2143                               struct intel_encoder_context *encoder_context,
2144                               struct i965_gpe_resource *gpe_resource)
2145 {
2146     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2147     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
2148     char *pdata;
2149     int i;
2150     unsigned int * data;
2151     struct gen8_mfx_avc_img_state cmd;
2152
2153     pdata = i965_map_gpe_resource(gpe_resource);
2154
2155     if (!pdata)
2156         return;
2157
2158     gen8_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
2159     for (i = 0; i < generic_state->num_pak_passes; i++) {
2160
2161         if (i == 0) {
2162             cmd.dw4.macroblock_stat_enable = 0;
2163             cmd.dw5.non_first_pass_flag = 0;
2164         } else {
2165             cmd.dw4.macroblock_stat_enable = 1;
2166             cmd.dw5.non_first_pass_flag = 1;
2167             cmd.dw5.intra_mb_ipcm_flag = 1;
2168             cmd.dw3.inter_mb_conf_flag = 1;
2169             cmd.dw3.intra_mb_conf_flag = 1;
2170         }
2171         cmd.dw5.mb_rate_ctrl_flag = 0;
2172         memcpy(pdata, &cmd, sizeof(struct gen8_mfx_avc_img_state));
2173         data = (unsigned int *)(pdata + sizeof(struct gen8_mfx_avc_img_state));
2174         *data = MI_BATCH_BUFFER_END;
2175
2176         pdata += INTEL_AVC_IMAGE_STATE_CMD_SIZE;
2177     }
2178     i965_unmap_gpe_resource(gpe_resource);
2179     return;
2180 }
2181
2182 void gen9_avc_set_image_state_non_brc(VADriverContextP ctx,
2183                                       struct encode_state *encode_state,
2184                                       struct intel_encoder_context *encoder_context,
2185                                       struct i965_gpe_resource *gpe_resource)
2186 {
2187     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2188     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
2189     char *pdata;
2190
2191     unsigned int * data;
2192     struct gen9_mfx_avc_img_state cmd;
2193
2194     pdata = i965_map_gpe_resource(gpe_resource);
2195
2196     if (!pdata)
2197         return;
2198
2199     gen9_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
2200
2201     if (generic_state->curr_pak_pass == 0) {
2202         cmd.dw4.macroblock_stat_enable = 0;
2203         cmd.dw5.non_first_pass_flag = 0;
2204
2205     } else {
2206         cmd.dw4.macroblock_stat_enable = 1;
2207         cmd.dw5.non_first_pass_flag = 0;
2208         cmd.dw5.intra_mb_ipcm_flag = 1;
2209     }
2210
2211     cmd.dw5.mb_rate_ctrl_flag = 0;
2212     memcpy(pdata, &cmd, sizeof(struct gen9_mfx_avc_img_state));
2213     data = (unsigned int *)(pdata + sizeof(struct gen9_mfx_avc_img_state));
2214     *data = MI_BATCH_BUFFER_END;
2215
2216     i965_unmap_gpe_resource(gpe_resource);
2217     return;
2218 }
2219
2220 static void
2221 gen95_avc_calc_lambda_table(VADriverContextP ctx,
2222                             struct encode_state *encode_state,
2223                             struct intel_encoder_context *encoder_context)
2224 {
2225     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2226     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2227     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2228     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
2229     unsigned int value, inter, intra;
2230     unsigned int rounding_value = 0;
2231     unsigned int size = 0;
2232     int i = 0;
2233     int col = 0;
2234     unsigned int * lambda_table = (unsigned int *)(avc_state->lamda_value_lut);
2235
2236     value = 0;
2237     inter = 0;
2238     intra = 0;
2239
2240     size = AVC_QP_MAX * 2 * sizeof(unsigned int);
2241     switch (generic_state->frame_type) {
2242     case SLICE_TYPE_I:
2243         memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_i_frame[0][0], size * sizeof(unsigned char));
2244         break;
2245     case SLICE_TYPE_P:
2246         memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_p_frame[0][0], size * sizeof(unsigned char));
2247         break;
2248     case SLICE_TYPE_B:
2249         memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_b_frame[0][0], size * sizeof(unsigned char));
2250         break;
2251     default:
2252         assert(0);
2253         break;
2254     }
2255
2256     for (i = 0; i < AVC_QP_MAX ; i++) {
2257         for (col = 0; col < 2; col++) {
2258             value = *(lambda_table + i * 2 + col);
2259             intra = value >> 16;
2260
2261             if (intra < GEN95_AVC_MAX_LAMBDA) {
2262                 if (intra == 0xfffa) {
2263                     intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
2264                 }
2265             }
2266
2267             intra = intra << 16;
2268             inter = value & 0xffff;
2269
2270             if (inter < GEN95_AVC_MAX_LAMBDA) {
2271                 if (inter == 0xffef) {
2272                     if (generic_state->frame_type == SLICE_TYPE_P) {
2273                         if (avc_state->rounding_inter_p == AVC_INVALID_ROUNDING_VALUE)
2274                             rounding_value = gen9_avc_inter_rounding_p[generic_state->preset];
2275                         else
2276                             rounding_value = avc_state->rounding_inter_p;
2277                     } else if (generic_state->frame_type == SLICE_TYPE_B) {
2278                         if (pic_param->pic_fields.bits.reference_pic_flag) {
2279                             if (avc_state->rounding_inter_b_ref == AVC_INVALID_ROUNDING_VALUE)
2280                                 rounding_value = gen9_avc_inter_rounding_b_ref[generic_state->preset];
2281                             else
2282                                 rounding_value = avc_state->rounding_inter_b_ref;
2283                         } else {
2284                             if (avc_state->rounding_inter_b == AVC_INVALID_ROUNDING_VALUE)
2285                                 rounding_value = gen9_avc_inter_rounding_b[generic_state->preset];
2286                             else
2287                                 rounding_value = avc_state->rounding_inter_b;
2288                         }
2289                     }
2290                 }
2291                 inter = 0xf000 + rounding_value;
2292             }
2293             *(lambda_table + i * 2 + col) = intra + inter;
2294         }
2295     }
2296 }
2297
2298 static void
2299 gen9_avc_init_brc_const_data(VADriverContextP ctx,
2300                              struct encode_state *encode_state,
2301                              struct intel_encoder_context *encoder_context)
2302 {
2303     struct i965_driver_data *i965 = i965_driver_data(ctx);
2304     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2305     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2306     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2307     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2308
2309     struct i965_gpe_resource *gpe_resource = NULL;
2310     unsigned char * data = NULL;
2311     unsigned char * data_tmp = NULL;
2312     unsigned int size = 0;
2313     unsigned int table_idx = 0;
2314     unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
2315     int i = 0;
2316
2317     struct object_surface *obj_surface;
2318     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
2319     VASurfaceID surface_id;
2320     unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
2321
2322     gpe_resource = &(avc_ctx->res_brc_const_data_buffer);
2323     assert(gpe_resource);
2324
2325     i965_zero_gpe_resource(gpe_resource);
2326
2327     data = i965_map_gpe_resource(gpe_resource);
2328     assert(data);
2329
2330     table_idx = slice_type_kernel[generic_state->frame_type];
2331
2332     /* Fill surface with QP Adjustment table, Distortion threshold table, MaxFrame threshold table, Distortion QP Adjustment Table*/
2333     size = sizeof(gen9_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb);
2334     memcpy(data, gen9_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb, size * sizeof(unsigned char));
2335
2336     data += size;
2337
2338     /* skip threshold table*/
2339     size = 128;
2340     switch (generic_state->frame_type) {
2341     case SLICE_TYPE_P:
2342         memcpy(data, gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2343         break;
2344     case SLICE_TYPE_B:
2345         memcpy(data, gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2346         break;
2347     default:
2348         /*SLICE_TYPE_I,no change */
2349         break;
2350     }
2351
2352     if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->non_ftq_skip_threshold_lut_input_enable) {
2353         for (i = 0; i < AVC_QP_MAX ; i++) {
2354             *(data + 1 + (i * 2)) = (unsigned char)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
2355         }
2356     }
2357     data += size;
2358
2359     /*fill the qp for ref list*/
2360     size = 32 + 32 + 32 + 160;
2361     memset(data, 0xff, 32);
2362     memset(data + 32 + 32, 0xff, 32);
2363     switch (generic_state->frame_type) {
2364     case SLICE_TYPE_P: {
2365         for (i = 0 ; i <  slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
2366             surface_id = slice_param->RefPicList0[i].picture_id;
2367             obj_surface = SURFACE(surface_id);
2368             if (!obj_surface)
2369                 break;
2370             *(data + i) = avc_state->list_ref_idx[0][i];//?
2371         }
2372     }
2373     break;
2374     case SLICE_TYPE_B: {
2375         data = data + 32 + 32;
2376         for (i = 0 ; i <  slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
2377             surface_id = slice_param->RefPicList1[i].picture_id;
2378             obj_surface = SURFACE(surface_id);
2379             if (!obj_surface)
2380                 break;
2381             *(data + i) = avc_state->list_ref_idx[1][i];//?
2382         }
2383
2384         data = data - 32 - 32;
2385
2386         for (i = 0 ; i <  slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
2387             surface_id = slice_param->RefPicList0[i].picture_id;
2388             obj_surface = SURFACE(surface_id);
2389             if (!obj_surface)
2390                 break;
2391             *(data + i) = avc_state->list_ref_idx[0][i];//?
2392         }
2393     }
2394     break;
2395     default:
2396         /*SLICE_TYPE_I,no change */
2397         break;
2398     }
2399     data += size;
2400
2401     /*mv cost and mode cost*/
2402     size = 1664;
2403     memcpy(data, (unsigned char *)&gen9_avc_mode_mv_cost_table[table_idx][0][0], size * sizeof(unsigned char));
2404
2405     if (avc_state->old_mode_cost_enable) {
2406         data_tmp = data;
2407         for (i = 0; i < AVC_QP_MAX ; i++) {
2408             *(data_tmp + 3) = (unsigned int)gen9_avc_old_intra_mode_cost[i];
2409             data_tmp += 16;
2410         }
2411     }
2412
2413     if (avc_state->ftq_skip_threshold_lut_input_enable) {
2414         for (i = 0; i < AVC_QP_MAX ; i++) {
2415             *(data + (i * 32) + 24) =
2416                 *(data + (i * 32) + 25) =
2417                     *(data + (i * 32) + 27) =
2418                         *(data + (i * 32) + 28) =
2419                             *(data + (i * 32) + 29) =
2420                                 *(data + (i * 32) + 30) =
2421                                     *(data + (i * 32) + 31) = avc_state->ftq_skip_threshold_lut[i];
2422         }
2423
2424     }
2425     data += size;
2426
2427     /*ref cost*/
2428     size = 128;
2429     memcpy(data, (unsigned char *)&gen9_avc_ref_cost[table_idx][0], size * sizeof(unsigned char));
2430     data += size;
2431
2432     /*scaling factor*/
2433     size = 64;
2434     if (avc_state->adaptive_intra_scaling_enable) {
2435         memcpy(data, (unsigned char *)gen9_avc_adaptive_intra_scaling_factor, size * sizeof(unsigned char));
2436     } else {
2437         memcpy(data, (unsigned char *)gen9_avc_intra_scaling_factor, size * sizeof(unsigned char));
2438     }
2439
2440     if (IS_KBL(i965->intel.device_info) ||
2441         IS_GEN10(i965->intel.device_info) ||
2442         IS_GLK(i965->intel.device_info)) {
2443         data += size;
2444
2445         size = 512;
2446         memcpy(data, (unsigned char *)gen95_avc_lambda_data, size * sizeof(unsigned char));
2447         data += size;
2448
2449         size = 64;
2450         memcpy(data, (unsigned char *)gen95_avc_ftq25, size * sizeof(unsigned char));
2451     }
2452
2453     i965_unmap_gpe_resource(gpe_resource);
2454 }
2455
2456 static void
2457 gen9_avc_init_brc_const_data_old(VADriverContextP ctx,
2458                                  struct encode_state *encode_state,
2459                                  struct intel_encoder_context *encoder_context)
2460 {
2461     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2462     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2463     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2464     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2465
2466     struct i965_gpe_resource *gpe_resource = NULL;
2467     unsigned int * data = NULL;
2468     unsigned int * data_tmp = NULL;
2469     unsigned int size = 0;
2470     unsigned int table_idx = 0;
2471     unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
2472     unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
2473     int i = 0;
2474
2475     gpe_resource = &(avc_ctx->res_brc_const_data_buffer);
2476     assert(gpe_resource);
2477
2478     i965_zero_gpe_resource(gpe_resource);
2479
2480     data = i965_map_gpe_resource(gpe_resource);
2481     assert(data);
2482
2483     table_idx = slice_type_kernel[generic_state->frame_type];
2484
2485     /* Fill surface with QP Adjustment table, Distortion threshold table, MaxFrame threshold table, Distortion QP Adjustment Table*/
2486     size = sizeof(gen75_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb);
2487     memcpy(data, gen75_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb, size * sizeof(unsigned char));
2488
2489     data += size;
2490
2491     /* skip threshold table*/
2492     size = 128;
2493     switch (generic_state->frame_type) {
2494     case SLICE_TYPE_P:
2495         memcpy(data, gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2496         break;
2497     case SLICE_TYPE_B:
2498         memcpy(data, gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2499         break;
2500     default:
2501         /*SLICE_TYPE_I,no change */
2502         break;
2503     }
2504
2505     if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->non_ftq_skip_threshold_lut_input_enable) {
2506         for (i = 0; i < AVC_QP_MAX ; i++) {
2507             *(data + 1 + (i * 2)) = (unsigned char)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
2508         }
2509     }
2510     data += size;
2511
2512     /*fill the qp for ref list*/
2513     size = 128;
2514     data += size;
2515     size = 128;
2516     data += size;
2517
2518     /*mv cost and mode cost*/
2519     size = 1664;
2520     memcpy(data, (unsigned char *)&gen75_avc_mode_mv_cost_table[table_idx][0][0], size * sizeof(unsigned char));
2521
2522     if (avc_state->old_mode_cost_enable) {
2523         data_tmp = data;
2524         for (i = 0; i < AVC_QP_MAX ; i++) {
2525             *(data_tmp + 3) = (unsigned int)gen9_avc_old_intra_mode_cost[i];
2526             data_tmp += 16;
2527         }
2528     }
2529
2530     if (avc_state->ftq_skip_threshold_lut_input_enable) {
2531         for (i = 0; i < AVC_QP_MAX ; i++) {
2532             *(data + (i * 32) + 24) =
2533                 *(data + (i * 32) + 25) =
2534                     *(data + (i * 32) + 27) =
2535                         *(data + (i * 32) + 28) =
2536                             *(data + (i * 32) + 29) =
2537                                 *(data + (i * 32) + 30) =
2538                                     *(data + (i * 32) + 31) = avc_state->ftq_skip_threshold_lut[i];
2539         }
2540
2541     }
2542     data += size;
2543
2544     /*ref cost*/
2545     size = 128;
2546     memcpy(data, (unsigned char *)&gen9_avc_ref_cost[table_idx][0], size * sizeof(unsigned char));
2547
2548     i965_unmap_gpe_resource(gpe_resource);
2549 }
2550 static void
2551 gen9_avc_set_curbe_brc_init_reset(VADriverContextP ctx,
2552                                   struct encode_state *encode_state,
2553                                   struct i965_gpe_context *gpe_context,
2554                                   struct intel_encoder_context *encoder_context,
2555                                   void * param)
2556 {
2557     gen9_avc_brc_init_reset_curbe_data *cmd;
2558     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2559     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2560     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2561     double input_bits_per_frame = 0;
2562     double bps_ratio = 0;
2563     VAEncSequenceParameterBufferH264 * seq_param = avc_state->seq_param;
2564     struct avc_param common_param;
2565
2566     cmd = i965_gpe_context_map_curbe(gpe_context);
2567
2568     if (!cmd)
2569         return;
2570
2571     memcpy(cmd, &gen9_avc_brc_init_reset_curbe_init_data, sizeof(gen9_avc_brc_init_reset_curbe_data));
2572
2573     memset(&common_param, 0, sizeof(common_param));
2574     common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
2575     common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
2576     common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
2577     common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
2578     common_param.frames_per_100s = generic_state->frames_per_100s;
2579     common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
2580     common_param.target_bit_rate = generic_state->target_bit_rate;
2581
2582     cmd->dw0.profile_level_max_frame = i965_avc_get_profile_level_max_frame(&common_param, seq_param->level_idc);
2583     cmd->dw1.init_buf_full_in_bits = generic_state->init_vbv_buffer_fullness_in_bit;
2584     cmd->dw2.buf_size_in_bits = generic_state->vbv_buffer_size_in_bit;
2585     cmd->dw3.average_bit_rate = generic_state->target_bit_rate * 1000;
2586     cmd->dw4.max_bit_rate = generic_state->max_bit_rate * 1000;
2587     cmd->dw8.gop_p = (generic_state->gop_ref_distance) ? ((generic_state->gop_size - 1) / generic_state->gop_ref_distance) : 0;
2588     cmd->dw9.gop_b = (generic_state->gop_size - 1 - cmd->dw8.gop_p);
2589     cmd->dw9.frame_width_in_bytes = generic_state->frame_width_in_pixel;
2590     cmd->dw10.frame_height_in_bytes = generic_state->frame_height_in_pixel;
2591     cmd->dw12.no_slices = avc_state->slice_num;
2592
2593     //VUI
2594     if (seq_param->vui_parameters_present_flag && generic_state->internal_rate_mode != INTEL_BRC_AVBR) {
2595         cmd->dw4.max_bit_rate = cmd->dw4.max_bit_rate;
2596         if (generic_state->internal_rate_mode == VA_RC_CBR) {
2597             cmd->dw3.average_bit_rate = cmd->dw4.max_bit_rate;
2598
2599         }
2600
2601     }
2602     cmd->dw6.frame_rate_m = generic_state->frames_per_100s;
2603     cmd->dw7.frame_rate_d = 100;
2604     cmd->dw8.brc_flag = 0;
2605     cmd->dw8.brc_flag |= (generic_state->mb_brc_enabled) ? 0 : 0x8000;
2606
2607
2608     if (generic_state->internal_rate_mode == VA_RC_CBR) {
2609         //CBR
2610         cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate;
2611         cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISCBR;
2612
2613     } else if (generic_state->internal_rate_mode == VA_RC_VBR) {
2614         //VBR
2615         if (cmd->dw4.max_bit_rate < cmd->dw3.average_bit_rate) {
2616             cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate << 1;
2617         }
2618         cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISVBR;
2619
2620     } else if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2621         //AVBR
2622         cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate;
2623         cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISAVBR;
2624
2625     }
2626     //igonre icq/vcm/qvbr
2627
2628     cmd->dw10.avbr_accuracy = generic_state->avbr_curracy;
2629     cmd->dw11.avbr_convergence = generic_state->avbr_convergence;
2630
2631     //frame bits
2632     input_bits_per_frame = (double)(cmd->dw4.max_bit_rate) * (double)(cmd->dw7.frame_rate_d) / (double)(cmd->dw6.frame_rate_m);;
2633
2634     if (cmd->dw2.buf_size_in_bits == 0) {
2635         cmd->dw2.buf_size_in_bits = (unsigned int)(input_bits_per_frame * 4);
2636     }
2637
2638     if (cmd->dw1.init_buf_full_in_bits == 0) {
2639         cmd->dw1.init_buf_full_in_bits = cmd->dw2.buf_size_in_bits * 7 / 8;
2640     }
2641     if (cmd->dw1.init_buf_full_in_bits < (unsigned int)(input_bits_per_frame * 2)) {
2642         cmd->dw1.init_buf_full_in_bits = (unsigned int)(input_bits_per_frame * 2);
2643     }
2644     if (cmd->dw1.init_buf_full_in_bits > cmd->dw2.buf_size_in_bits) {
2645         cmd->dw1.init_buf_full_in_bits = cmd->dw2.buf_size_in_bits;
2646     }
2647
2648     //AVBR
2649     if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2650         cmd->dw2.buf_size_in_bits = 2 * generic_state->target_bit_rate * 1000;
2651         cmd->dw1.init_buf_full_in_bits = (unsigned int)(3 * cmd->dw2.buf_size_in_bits / 4);
2652
2653     }
2654
2655     bps_ratio = input_bits_per_frame / (cmd->dw2.buf_size_in_bits / 30.0);
2656     bps_ratio = (bps_ratio < 0.1) ? 0.1 : (bps_ratio > 3.5) ? 3.5 : bps_ratio;
2657
2658
2659     cmd->dw16.deviation_threshold_0_pand_b = (unsigned int)(-50 * pow(0.90, bps_ratio));
2660     cmd->dw16.deviation_threshold_1_pand_b = (unsigned int)(-50 * pow(0.66, bps_ratio));
2661     cmd->dw16.deviation_threshold_2_pand_b = (unsigned int)(-50 * pow(0.46, bps_ratio));
2662     cmd->dw16.deviation_threshold_3_pand_b = (unsigned int)(-50 * pow(0.3, bps_ratio));
2663     cmd->dw17.deviation_threshold_4_pand_b = (unsigned int)(50 *  pow(0.3, bps_ratio));
2664     cmd->dw17.deviation_threshold_5_pand_b = (unsigned int)(50 * pow(0.46, bps_ratio));
2665     cmd->dw17.deviation_threshold_6_pand_b = (unsigned int)(50 * pow(0.7,  bps_ratio));
2666     cmd->dw17.deviation_threshold_7_pand_b = (unsigned int)(50 * pow(0.9,  bps_ratio));
2667     cmd->dw18.deviation_threshold_0_vbr = (unsigned int)(-50 * pow(0.9, bps_ratio));
2668     cmd->dw18.deviation_threshold_1_vbr = (unsigned int)(-50 * pow(0.7, bps_ratio));
2669     cmd->dw18.deviation_threshold_2_vbr = (unsigned int)(-50 * pow(0.5, bps_ratio));
2670     cmd->dw18.deviation_threshold_3_vbr = (unsigned int)(-50 * pow(0.3, bps_ratio));
2671     cmd->dw19.deviation_threshold_4_vbr = (unsigned int)(100 * pow(0.4, bps_ratio));
2672     cmd->dw19.deviation_threshold_5_vbr = (unsigned int)(100 * pow(0.5, bps_ratio));
2673     cmd->dw19.deviation_threshold_6_vbr = (unsigned int)(100 * pow(0.75, bps_ratio));
2674     cmd->dw19.deviation_threshold_7_vbr = (unsigned int)(100 * pow(0.9, bps_ratio));
2675     cmd->dw20.deviation_threshold_0_i = (unsigned int)(-50 * pow(0.8, bps_ratio));
2676     cmd->dw20.deviation_threshold_1_i = (unsigned int)(-50 * pow(0.6, bps_ratio));
2677     cmd->dw20.deviation_threshold_2_i = (unsigned int)(-50 * pow(0.34, bps_ratio));
2678     cmd->dw20.deviation_threshold_3_i = (unsigned int)(-50 * pow(0.2, bps_ratio));
2679     cmd->dw21.deviation_threshold_4_i = (unsigned int)(50 * pow(0.2,  bps_ratio));
2680     cmd->dw21.deviation_threshold_5_i = (unsigned int)(50 * pow(0.4,  bps_ratio));
2681     cmd->dw21.deviation_threshold_6_i = (unsigned int)(50 * pow(0.66, bps_ratio));
2682     cmd->dw21.deviation_threshold_7_i = (unsigned int)(50 * pow(0.9,  bps_ratio));
2683
2684     cmd->dw22.sliding_window_size = generic_state->frames_per_window_size;
2685
2686     i965_gpe_context_unmap_curbe(gpe_context);
2687
2688     return;
2689 }
2690
2691 static void
2692 gen9_avc_send_surface_brc_init_reset(VADriverContextP ctx,
2693                                      struct encode_state *encode_state,
2694                                      struct i965_gpe_context *gpe_context,
2695                                      struct intel_encoder_context *encoder_context,
2696                                      void * param_mbenc)
2697 {
2698     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2699     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2700
2701     i965_add_buffer_gpe_surface(ctx,
2702                                 gpe_context,
2703                                 &avc_ctx->res_brc_history_buffer,
2704                                 0,
2705                                 avc_ctx->res_brc_history_buffer.size,
2706                                 0,
2707                                 GEN9_AVC_BRC_INIT_RESET_HISTORY_INDEX);
2708
2709     i965_add_buffer_2d_gpe_surface(ctx,
2710                                    gpe_context,
2711                                    &avc_ctx->res_brc_dist_data_surface,
2712                                    1,
2713                                    I965_SURFACEFORMAT_R8_UNORM,
2714                                    GEN9_AVC_BRC_INIT_RESET_DISTORTION_INDEX);
2715
2716     return;
2717 }
2718
2719 static VAStatus
2720 gen9_avc_kernel_brc_init_reset(VADriverContextP ctx,
2721                                struct encode_state *encode_state,
2722                                struct intel_encoder_context *encoder_context)
2723 {
2724     struct i965_driver_data *i965 = i965_driver_data(ctx);
2725     struct i965_gpe_table *gpe = &i965->gpe_table;
2726     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2727     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2728     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2729     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
2730
2731     struct i965_gpe_context *gpe_context;
2732     struct gpe_media_object_parameter media_object_param;
2733     struct gpe_media_object_inline_data media_object_inline_data;
2734     int media_function = 0;
2735     int kernel_idx = GEN9_AVC_KERNEL_BRC_INIT;
2736
2737     media_function = INTEL_MEDIA_STATE_BRC_INIT_RESET;
2738
2739     if (generic_state->brc_inited)
2740         kernel_idx = GEN9_AVC_KERNEL_BRC_RESET;
2741
2742     gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
2743
2744     gpe->context_init(ctx, gpe_context);
2745     gpe->reset_binding_table(ctx, gpe_context);
2746
2747     generic_ctx->pfn_set_curbe_brc_init_reset(ctx, encode_state, gpe_context, encoder_context, NULL);
2748
2749     generic_ctx->pfn_send_brc_init_reset_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
2750
2751     gpe->setup_interface_data(ctx, gpe_context);
2752
2753     memset(&media_object_param, 0, sizeof(media_object_param));
2754     memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
2755     media_object_param.pinline_data = &media_object_inline_data;
2756     media_object_param.inline_size = sizeof(media_object_inline_data);
2757
2758     gen9_avc_run_kernel_media_object(ctx, encoder_context,
2759                                      gpe_context,
2760                                      media_function,
2761                                      &media_object_param);
2762
2763     return VA_STATUS_SUCCESS;
2764 }
2765
2766 static void
2767 gen9_avc_set_curbe_brc_frame_update(VADriverContextP ctx,
2768                                     struct encode_state *encode_state,
2769                                     struct i965_gpe_context *gpe_context,
2770                                     struct intel_encoder_context *encoder_context,
2771                                     void * param)
2772 {
2773     gen9_avc_frame_brc_update_curbe_data *cmd;
2774     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2775     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2776     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2777     struct object_surface *obj_surface;
2778     struct gen9_surface_avc *avc_priv_surface;
2779     struct avc_param common_param;
2780     VAEncSequenceParameterBufferH264 * seq_param = avc_state->seq_param;
2781
2782     obj_surface = encode_state->reconstructed_object;
2783
2784     if (!obj_surface || !obj_surface->private_data)
2785         return;
2786     avc_priv_surface = obj_surface->private_data;
2787
2788     cmd = i965_gpe_context_map_curbe(gpe_context);
2789
2790     if (!cmd)
2791         return;
2792
2793     memcpy(cmd, &gen9_avc_frame_brc_update_curbe_init_data, sizeof(gen9_avc_frame_brc_update_curbe_data));
2794
2795     cmd->dw5.target_size_flag = 0 ;
2796     if (generic_state->brc_init_current_target_buf_full_in_bits > (double)generic_state->brc_init_reset_buf_size_in_bits) {
2797         /*overflow*/
2798         generic_state->brc_init_current_target_buf_full_in_bits -= (double)generic_state->brc_init_reset_buf_size_in_bits;
2799         cmd->dw5.target_size_flag = 1 ;
2800     }
2801
2802     if (generic_state->skip_frame_enbale) {
2803         cmd->dw6.num_skip_frames = generic_state->num_skip_frames ;
2804         cmd->dw7.size_skip_frames = generic_state->size_skip_frames;
2805
2806         generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame * generic_state->num_skip_frames;
2807
2808     }
2809     cmd->dw0.target_size = (unsigned int)generic_state->brc_init_current_target_buf_full_in_bits ;
2810     cmd->dw1.frame_number = generic_state->seq_frame_number ;
2811     cmd->dw2.size_of_pic_headers = generic_state->herder_bytes_inserted << 3 ;
2812     cmd->dw5.cur_frame_type = generic_state->frame_type ;
2813     cmd->dw5.brc_flag = 0 ;
2814     cmd->dw5.brc_flag |= (avc_priv_surface->is_as_ref) ? INTEL_ENCODE_BRCUPDATE_IS_REFERENCE : 0 ;
2815
2816     if (avc_state->multi_pre_enable) {
2817         cmd->dw5.brc_flag  |= INTEL_ENCODE_BRCUPDATE_IS_ACTUALQP ;
2818         cmd->dw14.qp_index_of_cur_pic = avc_priv_surface->frame_idx ; //do not know this. use -1
2819     }
2820
2821     cmd->dw5.max_num_paks = generic_state->num_pak_passes ;
2822     if (avc_state->min_max_qp_enable) {
2823         switch (generic_state->frame_type) {
2824         case SLICE_TYPE_I:
2825             cmd->dw6.minimum_qp = avc_state->min_qp_i ;
2826             cmd->dw6.maximum_qp = avc_state->max_qp_i ;
2827             break;
2828         case SLICE_TYPE_P:
2829             cmd->dw6.minimum_qp = avc_state->min_qp_p ;
2830             cmd->dw6.maximum_qp = avc_state->max_qp_p ;
2831             break;
2832         case SLICE_TYPE_B:
2833             cmd->dw6.minimum_qp = avc_state->min_qp_b ;
2834             cmd->dw6.maximum_qp = avc_state->max_qp_b ;
2835             break;
2836         }
2837     } else {
2838         cmd->dw6.minimum_qp = 0 ;
2839         cmd->dw6.maximum_qp = 0 ;
2840     }
2841     cmd->dw6.enable_force_skip = avc_state->enable_force_skip ;
2842     cmd->dw6.enable_sliding_window = 0 ;
2843
2844     generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame;
2845
2846     if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2847         cmd->dw3.start_gadj_frame0 = (unsigned int)((10 *   generic_state->avbr_convergence) / (double)150);
2848         cmd->dw3.start_gadj_frame1 = (unsigned int)((50 *   generic_state->avbr_convergence) / (double)150);
2849         cmd->dw4.start_gadj_frame2 = (unsigned int)((100 *  generic_state->avbr_convergence) / (double)150);
2850         cmd->dw4.start_gadj_frame3 = (unsigned int)((150 *  generic_state->avbr_convergence) / (double)150);
2851         cmd->dw11.g_rate_ratio_threshold_0 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 40)));
2852         cmd->dw11.g_rate_ratio_threshold_1 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 75)));
2853         cmd->dw12.g_rate_ratio_threshold_2 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 97)));
2854         cmd->dw12.g_rate_ratio_threshold_3 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (103 - 100)));
2855         cmd->dw12.g_rate_ratio_threshold_4 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (125 - 100)));
2856         cmd->dw12.g_rate_ratio_threshold_5 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (160 - 100)));
2857
2858     }
2859     cmd->dw15.enable_roi = generic_state->brc_roi_enable ;
2860
2861     memset(&common_param, 0, sizeof(common_param));
2862     common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
2863     common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
2864     common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
2865     common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
2866     common_param.frames_per_100s = generic_state->frames_per_100s;
2867     common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
2868     common_param.target_bit_rate = generic_state->target_bit_rate;
2869
2870     cmd->dw19.user_max_frame = i965_avc_get_profile_level_max_frame(&common_param, seq_param->level_idc);
2871     i965_gpe_context_unmap_curbe(gpe_context);
2872
2873     return;
2874 }
2875
2876 static void
2877 gen9_avc_send_surface_brc_frame_update(VADriverContextP ctx,
2878                                        struct encode_state *encode_state,
2879                                        struct i965_gpe_context *gpe_context,
2880                                        struct intel_encoder_context *encoder_context,
2881                                        void * param_brc)
2882 {
2883     struct i965_driver_data *i965 = i965_driver_data(ctx);
2884     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2885     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2886     struct brc_param * param = (struct brc_param *)param_brc ;
2887     struct i965_gpe_context * gpe_context_mbenc = param->gpe_context_mbenc;
2888     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2889     unsigned char is_g95 = 0;
2890
2891     if (IS_SKL(i965->intel.device_info) ||
2892         IS_BXT(i965->intel.device_info) ||
2893         IS_GEN8(i965->intel.device_info))
2894         is_g95 = 0;
2895     else if (IS_KBL(i965->intel.device_info) ||
2896              IS_GEN10(i965->intel.device_info) ||
2897              IS_GLK(i965->intel.device_info))
2898         is_g95 = 1;
2899
2900     /* brc history buffer*/
2901     i965_add_buffer_gpe_surface(ctx,
2902                                 gpe_context,
2903                                 &avc_ctx->res_brc_history_buffer,
2904                                 0,
2905                                 avc_ctx->res_brc_history_buffer.size,
2906                                 0,
2907                                 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_HISTORY_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_HISTORY_INDEX));
2908
2909     /* previous pak buffer*/
2910     i965_add_buffer_gpe_surface(ctx,
2911                                 gpe_context,
2912                                 &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
2913                                 0,
2914                                 avc_ctx->res_brc_pre_pak_statistics_output_buffer.size,
2915                                 0,
2916                                 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_PAK_STATISTICS_OUTPUT_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_PAK_STATISTICS_OUTPUT_INDEX));
2917
2918     /* image state command buffer read only*/
2919     i965_add_buffer_gpe_surface(ctx,
2920                                 gpe_context,
2921                                 &avc_ctx->res_brc_image_state_read_buffer,
2922                                 0,
2923                                 avc_ctx->res_brc_image_state_read_buffer.size,
2924                                 0,
2925                                 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_READ_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_READ_INDEX));
2926
2927     /* image state command buffer write only*/
2928     i965_add_buffer_gpe_surface(ctx,
2929                                 gpe_context,
2930                                 &avc_ctx->res_brc_image_state_write_buffer,
2931                                 0,
2932                                 avc_ctx->res_brc_image_state_write_buffer.size,
2933                                 0,
2934                                 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_WRITE_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_WRITE_INDEX));
2935
2936     if (avc_state->mbenc_brc_buffer_size > 0) {
2937         i965_add_buffer_gpe_surface(ctx,
2938                                     gpe_context,
2939                                     &(avc_ctx->res_mbenc_brc_buffer),
2940                                     0,
2941                                     avc_ctx->res_mbenc_brc_buffer.size,
2942                                     0,
2943                                     GEN95_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_WRITE_INDEX);
2944     } else {
2945         /*  Mbenc curbe input buffer */
2946         gen9_add_dri_buffer_gpe_surface(ctx,
2947                                         gpe_context,
2948                                         gpe_context_mbenc->dynamic_state.bo,
2949                                         0,
2950                                         ALIGN(gpe_context_mbenc->curbe.length, 64),
2951                                         gpe_context_mbenc->curbe.offset,
2952                                         GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_READ_INDEX);
2953         /* Mbenc curbe output buffer */
2954         gen9_add_dri_buffer_gpe_surface(ctx,
2955                                         gpe_context,
2956                                         gpe_context_mbenc->dynamic_state.bo,
2957                                         0,
2958                                         ALIGN(gpe_context_mbenc->curbe.length, 64),
2959                                         gpe_context_mbenc->curbe.offset,
2960                                         GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_WRITE_INDEX);
2961     }
2962
2963     /* AVC_ME Distortion 2D surface buffer,input/output. is it res_brc_dist_data_surface*/
2964     i965_add_buffer_2d_gpe_surface(ctx,
2965                                    gpe_context,
2966                                    &avc_ctx->res_brc_dist_data_surface,
2967                                    1,
2968                                    I965_SURFACEFORMAT_R8_UNORM,
2969                                    (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_DISTORTION_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_DISTORTION_INDEX));
2970
2971     /* BRC const data 2D surface buffer */
2972     i965_add_buffer_2d_gpe_surface(ctx,
2973                                    gpe_context,
2974                                    &avc_ctx->res_brc_const_data_buffer,
2975                                    1,
2976                                    I965_SURFACEFORMAT_R8_UNORM,
2977                                    (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX));
2978
2979     /* MB statistical data surface*/
2980     if (!IS_GEN8(i965->intel.device_info)) {
2981         i965_add_buffer_gpe_surface(ctx,
2982                                     gpe_context,
2983                                     &avc_ctx->res_mb_status_buffer,
2984                                     0,
2985                                     avc_ctx->res_mb_status_buffer.size,
2986                                     0,
2987                                     (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX));
2988     } else {
2989         i965_add_buffer_2d_gpe_surface(ctx,
2990                                        gpe_context,
2991                                        &avc_ctx->res_mbbrc_mb_qp_data_surface,
2992                                        1,
2993                                        I965_SURFACEFORMAT_R8_UNORM,
2994                                        GEN9_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX);
2995     }
2996     return;
2997 }
2998
2999 static VAStatus
3000 gen9_avc_kernel_brc_frame_update(VADriverContextP ctx,
3001                                  struct encode_state *encode_state,
3002                                  struct intel_encoder_context *encoder_context)
3003
3004 {
3005     struct i965_driver_data *i965 = i965_driver_data(ctx);
3006     struct i965_gpe_table *gpe = &i965->gpe_table;
3007     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3008     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
3009     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3010     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3011     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3012
3013     struct i965_gpe_context *gpe_context = NULL;
3014     struct gpe_media_object_parameter media_object_param;
3015     struct gpe_media_object_inline_data media_object_inline_data;
3016     int media_function = 0;
3017     int kernel_idx = 0;
3018     unsigned int mb_const_data_buffer_in_use, mb_qp_buffer_in_use;
3019     unsigned int brc_enabled = 0;
3020     unsigned int roi_enable = (generic_state->num_roi > 0) ? 1 : 0;
3021     unsigned int dirty_roi_enable = ((generic_state->dirty_num_roi > 0) && (generic_state->frame_type == SLICE_TYPE_P) && (0));
3022
3023     /* the following set the mbenc curbe*/
3024     struct mbenc_param curbe_mbenc_param ;
3025     struct brc_param curbe_brc_param ;
3026
3027     mb_const_data_buffer_in_use =
3028         generic_state->mb_brc_enabled ||
3029         roi_enable ||
3030         dirty_roi_enable ||
3031         avc_state->mb_qp_data_enable ||
3032         avc_state->rolling_intra_refresh_enable;
3033     mb_qp_buffer_in_use =
3034         generic_state->mb_brc_enabled ||
3035         generic_state->brc_roi_enable ||
3036         avc_state->mb_qp_data_enable;
3037
3038     switch (generic_state->kernel_mode) {
3039     case INTEL_ENC_KERNEL_NORMAL : {
3040         kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_NORMAL_I;
3041         break;
3042     }
3043     case INTEL_ENC_KERNEL_PERFORMANCE : {
3044         kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_PERFORMANCE_I;
3045         break;
3046     }
3047     case INTEL_ENC_KERNEL_QUALITY : {
3048         kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_QUALITY_I;
3049         break;
3050     }
3051     default:
3052         assert(0);
3053
3054     }
3055
3056     if (generic_state->frame_type == SLICE_TYPE_P) {
3057         kernel_idx += 1;
3058     } else if (generic_state->frame_type == SLICE_TYPE_B) {
3059         kernel_idx += 2;
3060     }
3061
3062     gpe_context = &(avc_ctx->context_mbenc.gpe_contexts[kernel_idx]);
3063     gpe->context_init(ctx, gpe_context);
3064
3065     memset(&curbe_mbenc_param, 0, sizeof(struct mbenc_param));
3066
3067     curbe_mbenc_param.mb_const_data_buffer_in_use = mb_const_data_buffer_in_use;
3068     curbe_mbenc_param.mb_qp_buffer_in_use = mb_qp_buffer_in_use;
3069     curbe_mbenc_param.mbenc_i_frame_dist_in_use = 0;
3070     curbe_mbenc_param.brc_enabled = brc_enabled;
3071     curbe_mbenc_param.roi_enabled = roi_enable;
3072
3073     /* set curbe mbenc*/
3074     generic_ctx->pfn_set_curbe_mbenc(ctx, encode_state, gpe_context, encoder_context, &curbe_mbenc_param);
3075
3076     // gen95 set curbe out of the brc. gen9 do it here
3077     avc_state->mbenc_curbe_set_in_brc_update = !avc_state->decouple_mbenc_curbe_from_brc_enable;
3078     /*begin brc frame update*/
3079     memset(&curbe_brc_param, 0, sizeof(struct brc_param));
3080     curbe_brc_param.gpe_context_mbenc = gpe_context;
3081     media_function = INTEL_MEDIA_STATE_BRC_UPDATE;
3082     kernel_idx = GEN9_AVC_KERNEL_BRC_FRAME_UPDATE;
3083     gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
3084     curbe_brc_param.gpe_context_brc_frame_update = gpe_context;
3085
3086     gpe->context_init(ctx, gpe_context);
3087     gpe->reset_binding_table(ctx, gpe_context);
3088     /*brc copy ignored*/
3089
3090     /* set curbe frame update*/
3091     generic_ctx->pfn_set_curbe_brc_frame_update(ctx, encode_state, gpe_context, encoder_context, &curbe_brc_param);
3092
3093     /* load brc constant data, is it same as mbenc mb brc constant data? no.*/
3094     if (avc_state->multi_pre_enable) {
3095         gen9_avc_init_brc_const_data(ctx, encode_state, encoder_context);
3096     } else {
3097         gen9_avc_init_brc_const_data_old(ctx, encode_state, encoder_context);
3098     }
3099     /* image state construct*/
3100     if (IS_GEN8(i965->intel.device_info)) {
3101         gen8_avc_set_image_state(ctx, encode_state, encoder_context, &(avc_ctx->res_brc_image_state_read_buffer));
3102     } else {
3103         gen9_avc_set_image_state(ctx, encode_state, encoder_context, &(avc_ctx->res_brc_image_state_read_buffer));
3104     }
3105     /* set surface frame mbenc*/
3106     generic_ctx->pfn_send_brc_frame_update_surface(ctx, encode_state, gpe_context, encoder_context, &curbe_brc_param);
3107
3108
3109     gpe->setup_interface_data(ctx, gpe_context);
3110
3111     memset(&media_object_param, 0, sizeof(media_object_param));
3112     memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
3113     media_object_param.pinline_data = &media_object_inline_data;
3114     media_object_param.inline_size = sizeof(media_object_inline_data);
3115
3116     gen9_avc_run_kernel_media_object(ctx, encoder_context,
3117                                      gpe_context,
3118                                      media_function,
3119                                      &media_object_param);
3120
3121     return VA_STATUS_SUCCESS;
3122 }
3123
3124 static void
3125 gen9_avc_set_curbe_brc_mb_update(VADriverContextP ctx,
3126                                  struct encode_state *encode_state,
3127                                  struct i965_gpe_context *gpe_context,
3128                                  struct intel_encoder_context *encoder_context,
3129                                  void * param)
3130 {
3131     gen9_avc_mb_brc_curbe_data *cmd;
3132     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3133     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3134
3135     cmd = i965_gpe_context_map_curbe(gpe_context);
3136
3137     if (!cmd)
3138         return;
3139
3140     memset(cmd, 0, sizeof(gen9_avc_mb_brc_curbe_data));
3141
3142     cmd->dw0.cur_frame_type = generic_state->frame_type;
3143     if (generic_state->brc_roi_enable) {
3144         cmd->dw0.enable_roi = 1;
3145     } else {
3146         cmd->dw0.enable_roi = 0;
3147     }
3148
3149     i965_gpe_context_unmap_curbe(gpe_context);
3150
3151     return;
3152 }
3153
3154 static void
3155 gen9_avc_send_surface_brc_mb_update(VADriverContextP ctx,
3156                                     struct encode_state *encode_state,
3157                                     struct i965_gpe_context *gpe_context,
3158                                     struct intel_encoder_context *encoder_context,
3159                                     void * param_mbenc)
3160 {
3161     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3162     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3163     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3164
3165     /* brc history buffer*/
3166     i965_add_buffer_gpe_surface(ctx,
3167                                 gpe_context,
3168                                 &avc_ctx->res_brc_history_buffer,
3169                                 0,
3170                                 avc_ctx->res_brc_history_buffer.size,
3171                                 0,
3172                                 GEN9_AVC_MB_BRC_UPDATE_HISTORY_INDEX);
3173
3174     /* MB qp data buffer is it same as res_mbbrc_mb_qp_data_surface*/
3175     if (generic_state->mb_brc_enabled) {
3176         i965_add_buffer_2d_gpe_surface(ctx,
3177                                        gpe_context,
3178                                        &avc_ctx->res_mbbrc_mb_qp_data_surface,
3179                                        1,
3180                                        I965_SURFACEFORMAT_R8_UNORM,
3181                                        GEN9_AVC_MB_BRC_UPDATE_MB_QP_INDEX);
3182
3183     }
3184
3185     /* BRC roi feature*/
3186     if (generic_state->brc_roi_enable) {
3187         i965_add_buffer_gpe_surface(ctx,
3188                                     gpe_context,
3189                                     &avc_ctx->res_mbbrc_roi_surface,
3190                                     0,
3191                                     avc_ctx->res_mbbrc_roi_surface.size,
3192                                     0,
3193                                     GEN9_AVC_MB_BRC_UPDATE_ROI_INDEX);
3194
3195     }
3196
3197     /* MB statistical data surface*/
3198     i965_add_buffer_gpe_surface(ctx,
3199                                 gpe_context,
3200                                 &avc_ctx->res_mb_status_buffer,
3201                                 0,
3202                                 avc_ctx->res_mb_status_buffer.size,
3203                                 0,
3204                                 GEN9_AVC_MB_BRC_UPDATE_MB_STATUS_INDEX);
3205
3206     return;
3207 }
3208
3209 static VAStatus
3210 gen9_avc_kernel_brc_mb_update(VADriverContextP ctx,
3211                               struct encode_state *encode_state,
3212                               struct intel_encoder_context *encoder_context)
3213
3214 {
3215     struct i965_driver_data *i965 = i965_driver_data(ctx);
3216     struct i965_gpe_table *gpe = &i965->gpe_table;
3217     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3218     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3219     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3220     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
3221
3222     struct i965_gpe_context *gpe_context;
3223     struct gpe_media_object_walker_parameter media_object_walker_param;
3224     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
3225     int media_function = 0;
3226     int kernel_idx = 0;
3227
3228     media_function = INTEL_MEDIA_STATE_MB_BRC_UPDATE;
3229     kernel_idx = GEN9_AVC_KERNEL_BRC_MB_UPDATE;
3230     gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
3231
3232     gpe->context_init(ctx, gpe_context);
3233     gpe->reset_binding_table(ctx, gpe_context);
3234
3235     /* set curbe brc mb update*/
3236     generic_ctx->pfn_set_curbe_brc_mb_update(ctx, encode_state, gpe_context, encoder_context, NULL);
3237
3238
3239     /* set surface brc mb update*/
3240     generic_ctx->pfn_send_brc_mb_update_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
3241
3242
3243     gpe->setup_interface_data(ctx, gpe_context);
3244
3245     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
3246     /* the scaling is based on 8x8 blk level */
3247     kernel_walker_param.resolution_x = (generic_state->frame_width_in_mbs + 1) / 2;
3248     kernel_walker_param.resolution_y = (generic_state->frame_height_in_mbs + 1) / 2 ;
3249     kernel_walker_param.no_dependency = 1;
3250
3251     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
3252
3253     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
3254                                             gpe_context,
3255                                             media_function,
3256                                             &media_object_walker_param);
3257
3258     return VA_STATUS_SUCCESS;
3259 }
3260
3261 /*
3262 mbenc kernel related function,it include intra dist kernel
3263 */
3264 static int
3265 gen9_avc_get_biweight(int dist_scale_factor_ref_id0_list0, unsigned short weighted_bipredidc)
3266 {
3267     int biweight = 32;      // default value
3268
3269     /* based on kernel HLD*/
3270     if (weighted_bipredidc != INTEL_AVC_WP_MODE_IMPLICIT) {
3271         biweight = 32;
3272     } else {
3273         biweight = (dist_scale_factor_ref_id0_list0 + 2) >> 2;
3274
3275         if (biweight != 16 && biweight != 21 &&
3276             biweight != 32 && biweight != 43 && biweight != 48) {
3277             biweight = 32;        // If # of B-pics between two refs is more than 3. VME does not support it.
3278         }
3279     }
3280
3281     return biweight;
3282 }
3283
3284 static void
3285 gen9_avc_get_dist_scale_factor(VADriverContextP ctx,
3286                                struct encode_state *encode_state,
3287                                struct intel_encoder_context *encoder_context)
3288 {
3289     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3290     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3291     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3292     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
3293
3294     int max_num_references;
3295     VAPictureH264 *curr_pic;
3296     VAPictureH264 *ref_pic_l0;
3297     VAPictureH264 *ref_pic_l1;
3298     int i = 0;
3299     int tb = 0;
3300     int td = 0;
3301     int tx = 0;
3302     int tmp = 0;
3303     int poc0 = 0;
3304     int poc1 = 0;
3305
3306     max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
3307
3308     memset(avc_state->dist_scale_factor_list0, 0, 32 * sizeof(unsigned int));
3309     curr_pic = &pic_param->CurrPic;
3310     for (i = 0; i < max_num_references; i++) {
3311         ref_pic_l0 = &(slice_param->RefPicList0[i]);
3312
3313         if ((ref_pic_l0->flags & VA_PICTURE_H264_INVALID) ||
3314             (ref_pic_l0->picture_id == VA_INVALID_SURFACE))
3315             break;
3316         ref_pic_l1 = &(slice_param->RefPicList1[0]);
3317         if ((ref_pic_l0->flags & VA_PICTURE_H264_INVALID) ||
3318             (ref_pic_l0->picture_id == VA_INVALID_SURFACE))
3319             break;
3320
3321         poc0 = (curr_pic->TopFieldOrderCnt - ref_pic_l0->TopFieldOrderCnt);
3322         poc1 = (ref_pic_l1->TopFieldOrderCnt - ref_pic_l0->TopFieldOrderCnt);
3323         CLIP(poc0, -128, 127);
3324         CLIP(poc1, -128, 127);
3325         tb = poc0;
3326         td = poc1;
3327
3328         if (td == 0) {
3329             td = 1;
3330         }
3331         tmp = (td / 2 > 0) ? (td / 2) : (-(td / 2));
3332         tx = (16384 + tmp) / td ;
3333         tmp = (tb * tx + 32) >> 6;
3334         CLIP(tmp, -1024, 1023);
3335         avc_state->dist_scale_factor_list0[i] = tmp;
3336     }
3337     return;
3338 }
3339
3340 static unsigned int
3341 gen9_avc_get_qp_from_ref_list(VADriverContextP ctx,
3342                               VAEncSliceParameterBufferH264 *slice_param,
3343                               int list,
3344                               int ref_frame_idx)
3345 {
3346     struct i965_driver_data *i965 = i965_driver_data(ctx);
3347     struct object_surface *obj_surface;
3348     struct gen9_surface_avc *avc_priv_surface;
3349     VASurfaceID surface_id;
3350
3351     assert(slice_param);
3352     assert(list < 2);
3353
3354     if (list == 0) {
3355         if (ref_frame_idx < slice_param->num_ref_idx_l0_active_minus1 + 1)
3356             surface_id = slice_param->RefPicList0[ref_frame_idx].picture_id;
3357         else
3358             return 0;
3359     } else {
3360         if (ref_frame_idx < slice_param->num_ref_idx_l1_active_minus1 + 1)
3361             surface_id = slice_param->RefPicList1[ref_frame_idx].picture_id;
3362         else
3363             return 0;
3364     }
3365     obj_surface = SURFACE(surface_id);
3366     if (obj_surface && obj_surface->private_data) {
3367         avc_priv_surface = obj_surface->private_data;
3368         return avc_priv_surface->qp_value;
3369     } else {
3370         return 0;
3371     }
3372 }
3373
3374 static void
3375 gen9_avc_load_mb_brc_const_data(VADriverContextP ctx,
3376                                 struct encode_state *encode_state,
3377                                 struct intel_encoder_context *encoder_context)
3378 {
3379     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3380     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3381     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3382     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3383
3384     struct i965_gpe_resource *gpe_resource = NULL;
3385     unsigned int * data = NULL;
3386     unsigned int * data_tmp = NULL;
3387     unsigned int size = 16 * 52;
3388     unsigned int table_idx = 0;
3389     unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
3390     unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
3391     int i = 0;
3392
3393     gpe_resource = &(avc_ctx->res_mbbrc_const_data_buffer);
3394     assert(gpe_resource);
3395     data = i965_map_gpe_resource(gpe_resource);
3396     assert(data);
3397
3398     table_idx = slice_type_kernel[generic_state->frame_type];
3399
3400     memcpy(data, gen9_avc_mb_brc_const_data[table_idx][0], size * sizeof(unsigned int));
3401
3402     data_tmp = data;
3403
3404     switch (generic_state->frame_type) {
3405     case SLICE_TYPE_I:
3406         for (i = 0; i < AVC_QP_MAX ; i++) {
3407             if (avc_state->old_mode_cost_enable)
3408                 *data = (unsigned int)gen9_avc_old_intra_mode_cost[i];
3409             data += 16;
3410         }
3411         break;
3412     case SLICE_TYPE_P:
3413     case SLICE_TYPE_B:
3414         for (i = 0; i < AVC_QP_MAX ; i++) {
3415             if (generic_state->frame_type == SLICE_TYPE_P) {
3416                 if (avc_state->skip_bias_adjustment_enable)
3417                     *(data + 3) = (unsigned int)gen9_avc_mv_cost_p_skip_adjustment[i];
3418             }
3419             if (avc_state->non_ftq_skip_threshold_lut_input_enable) {
3420                 *(data + 9) = (unsigned int)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
3421             } else if (generic_state->frame_type == SLICE_TYPE_P) {
3422                 *(data + 9) = (unsigned int)gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag][i];
3423             } else {
3424                 *(data + 9) = (unsigned int)gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag][i];
3425             }
3426
3427             if (avc_state->adaptive_intra_scaling_enable) {
3428                 *(data + 10) = (unsigned int)gen9_avc_adaptive_intra_scaling_factor[i];
3429             } else {
3430                 *(data + 10) = (unsigned int)gen9_avc_intra_scaling_factor[i];
3431
3432             }
3433             data += 16;
3434
3435         }
3436         break;
3437     default:
3438         assert(0);
3439     }
3440
3441     data = data_tmp;
3442     for (i = 0; i < AVC_QP_MAX ; i++) {
3443         if (avc_state->ftq_skip_threshold_lut_input_enable) {
3444             *(data + 6) = (avc_state->ftq_skip_threshold_lut[i] |
3445                            (avc_state->ftq_skip_threshold_lut[i] << 16) |
3446                            (avc_state->ftq_skip_threshold_lut[i] << 24));
3447             *(data + 7) = (avc_state->ftq_skip_threshold_lut[i] |
3448                            (avc_state->ftq_skip_threshold_lut[i] << 8) |
3449                            (avc_state->ftq_skip_threshold_lut[i] << 16) |
3450                            (avc_state->ftq_skip_threshold_lut[i] << 24));
3451         }
3452
3453         if (avc_state->kernel_trellis_enable) {
3454             *(data + 11) = (unsigned int)avc_state->lamda_value_lut[i][0];
3455             *(data + 12) = (unsigned int)avc_state->lamda_value_lut[i][1];
3456
3457         }
3458         data += 16;
3459
3460     }
3461     i965_unmap_gpe_resource(gpe_resource);
3462 }
3463
3464 static void
3465 gen9_avc_set_curbe_mbenc(VADriverContextP ctx,
3466                          struct encode_state *encode_state,
3467                          struct i965_gpe_context *gpe_context,
3468                          struct intel_encoder_context *encoder_context,
3469                          void * param)
3470 {
3471     struct i965_driver_data *i965 = i965_driver_data(ctx);
3472     union {
3473         gen9_avc_mbenc_curbe_data *g9;
3474         gen95_avc_mbenc_curbe_data *g95;
3475     } cmd;
3476     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3477     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3478     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3479
3480     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3481     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
3482     VASurfaceID surface_id;
3483     struct object_surface *obj_surface;
3484
3485     struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
3486     unsigned char qp = 0;
3487     unsigned char me_method = 0;
3488     unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
3489     unsigned int table_idx = 0;
3490     unsigned char is_g9 = 0;
3491     unsigned char is_g95 = 0;
3492     unsigned int curbe_size = 0;
3493
3494     unsigned int preset = generic_state->preset;
3495     if (IS_SKL(i965->intel.device_info) ||
3496         IS_BXT(i965->intel.device_info)) {
3497         cmd.g9 = (gen9_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
3498         if (!cmd.g9)
3499             return;
3500         is_g9 = 1;
3501         curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
3502         memset(cmd.g9, 0, curbe_size);
3503
3504         if (mbenc_i_frame_dist_in_use) {
3505             memcpy(cmd.g9, gen9_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
3506
3507         } else {
3508             switch (generic_state->frame_type) {
3509             case SLICE_TYPE_I:
3510                 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
3511                 break;
3512             case SLICE_TYPE_P:
3513                 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
3514                 break;
3515             case SLICE_TYPE_B:
3516                 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
3517                 break;
3518             default:
3519                 assert(0);
3520             }
3521
3522         }
3523     } else if (IS_KBL(i965->intel.device_info) ||
3524                IS_GEN10(i965->intel.device_info) ||
3525                IS_GLK(i965->intel.device_info)) {
3526         cmd.g95 = (gen95_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
3527         if (!cmd.g95)
3528             return;
3529         is_g95 = 1;
3530         curbe_size = sizeof(gen95_avc_mbenc_curbe_data);
3531         memset(cmd.g9, 0, curbe_size);
3532
3533         if (mbenc_i_frame_dist_in_use) {
3534             memcpy(cmd.g95, gen95_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
3535
3536         } else {
3537             switch (generic_state->frame_type) {
3538             case SLICE_TYPE_I:
3539                 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
3540                 break;
3541             case SLICE_TYPE_P:
3542                 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
3543                 break;
3544             case SLICE_TYPE_B:
3545                 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
3546                 break;
3547             default:
3548                 assert(0);
3549             }
3550
3551         }
3552     } else {
3553         /* Never get here, just silence a gcc warning */
3554         assert(0);
3555
3556         return;
3557     }
3558
3559     me_method = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_b_me_method[preset] : gen9_avc_p_me_method[preset];
3560     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
3561
3562     cmd.g9->dw0.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
3563     cmd.g9->dw37.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
3564     cmd.g9->dw0.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
3565     cmd.g9->dw37.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
3566
3567     cmd.g9->dw2.max_len_sp = gen9_avc_max_len_sp[preset];
3568     cmd.g9->dw38.max_len_sp = 0;
3569
3570     if (is_g95)
3571         cmd.g95->dw1.extended_mv_cost_range = avc_state->extended_mv_cost_range_enable;
3572
3573     cmd.g9->dw3.src_access = 0;
3574     cmd.g9->dw3.ref_access = 0;
3575
3576     if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) {
3577         //disable ftq_override by now.
3578         if (avc_state->ftq_override) {
3579             cmd.g9->dw3.ftq_enable = avc_state->ftq_enable;
3580
3581         } else {
3582             // both gen9 and gen95 come here by now
3583             if (generic_state->frame_type == SLICE_TYPE_P) {
3584                 cmd.g9->dw3.ftq_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
3585
3586             } else {
3587                 cmd.g9->dw3.ftq_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
3588             }
3589         }
3590     } else {
3591         cmd.g9->dw3.ftq_enable = 0;
3592     }
3593
3594     if (avc_state->disable_sub_mb_partion)
3595         cmd.g9->dw3.sub_mb_part_mask = 0x7;
3596
3597     if (mbenc_i_frame_dist_in_use) {
3598         cmd.g9->dw2.pitch_width = generic_state->downscaled_width_4x_in_mb;
3599         cmd.g9->dw4.picture_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
3600         cmd.g9->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4;
3601         cmd.g9->dw6.batch_buffer_end = 0;
3602         cmd.g9->dw31.intra_compute_type = 1;
3603
3604     } else {
3605         cmd.g9->dw2.pitch_width = generic_state->frame_width_in_mbs;
3606         cmd.g9->dw4.picture_height_minus1 = generic_state->frame_height_in_mbs - 1;
3607         cmd.g9->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ? generic_state->frame_height_in_mbs : avc_state->slice_height;
3608
3609         {
3610             memcpy(&(cmd.g9->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
3611             if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
3612             } else if (avc_state->skip_bias_adjustment_enable) {
3613                 /* Load different MvCost for P picture when SkipBiasAdjustment is enabled
3614                 // No need to check for P picture as the flag is only enabled for P picture */
3615                 cmd.g9->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
3616
3617             }
3618         }
3619
3620         table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
3621         memcpy(&(cmd.g9->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
3622     }
3623     cmd.g9->dw4.enable_fbr_bypass = avc_state->fbr_bypass_enable;
3624     cmd.g9->dw4.enable_intra_cost_scaling_for_static_frame = avc_state->sfd_enable && generic_state->hme_enabled;
3625     cmd.g9->dw4.field_parity_flag = 0;//bottom field
3626     cmd.g9->dw4.enable_cur_fld_idr = 0;//field realted
3627     cmd.g9->dw4.contrained_intra_pred_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
3628     cmd.g9->dw4.hme_enable = generic_state->hme_enabled;
3629     cmd.g9->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
3630     cmd.g9->dw4.use_actual_ref_qp_value = generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
3631
3632
3633     cmd.g9->dw7.intra_part_mask = avc_state->transform_8x8_mode_enable ? 0 : 0x02;
3634     cmd.g9->dw7.src_field_polarity = 0;//field related
3635
3636     /*ftq_skip_threshold_lut set,dw14 /15*/
3637
3638     /*r5 disable NonFTQSkipThresholdLUT*/
3639     if (generic_state->frame_type == SLICE_TYPE_P) {
3640         cmd.g9->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
3641
3642     } else if (generic_state->frame_type == SLICE_TYPE_B) {
3643         cmd.g9->dw32.skip_val = gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
3644
3645     }
3646
3647     cmd.g9->dw13.qp_prime_y = qp;
3648     cmd.g9->dw13.qp_prime_cb = qp;
3649     cmd.g9->dw13.qp_prime_cr = qp;
3650     cmd.g9->dw13.target_size_in_word = 0xff;//hardcode for brc disable
3651
3652     if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) {
3653         switch (gen9_avc_multi_pred[preset]) {
3654         case 0:
3655             cmd.g9->dw32.mult_pred_l0_disable = 128;
3656             cmd.g9->dw32.mult_pred_l1_disable = 128;
3657             break;
3658         case 1:
3659             cmd.g9->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_P) ? 1 : 128;
3660             cmd.g9->dw32.mult_pred_l1_disable = 128;
3661             break;
3662         case 2:
3663             cmd.g9->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3664             cmd.g9->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3665             break;
3666         case 3:
3667             cmd.g9->dw32.mult_pred_l0_disable = 1;
3668             cmd.g9->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3669             break;
3670
3671         }
3672
3673     } else {
3674         cmd.g9->dw32.mult_pred_l0_disable = 128;
3675         cmd.g9->dw32.mult_pred_l1_disable = 128;
3676     }
3677
3678     /*field setting for dw33 34, ignored*/
3679
3680     if (avc_state->adaptive_transform_decision_enable) {
3681         if (generic_state->frame_type != SLICE_TYPE_I) {
3682             if (is_g9) {
3683                 cmd.g9->dw34.enable_adaptive_tx_decision = 1;
3684                 cmd.g9->dw58.mb_texture_threshold = 1024;
3685                 cmd.g9->dw58.tx_decision_threshold = 128;
3686             } else if (is_g95) {
3687                 cmd.g95->dw34.enable_adaptive_tx_decision = 1;
3688                 cmd.g95->dw60.mb_texture_threshold = 1024;
3689                 cmd.g95->dw60.tx_decision_threshold = 128;
3690             }
3691         }
3692     }
3693
3694
3695     if (generic_state->frame_type == SLICE_TYPE_B) {
3696         cmd.g9->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
3697         cmd.g9->dw34.list1_ref_id0_frm_field_parity = 0;
3698         cmd.g9->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
3699     }
3700
3701     cmd.g9->dw34.b_original_bff = 0; //frame only
3702     cmd.g9->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
3703     cmd.g9->dw34.roi_enable_flag = curbe_param->roi_enabled;
3704     cmd.g9->dw34.mad_enable_falg = avc_state->mad_enable;
3705     cmd.g9->dw34.mb_brc_enable = avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled;
3706     cmd.g9->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
3707     if (is_g95) {
3708         cmd.g95->dw34.tq_enable = avc_state->tq_enable;
3709         cmd.g95->dw34.cqp_flag = !generic_state->brc_enabled;
3710     }
3711
3712     if (is_g9) {
3713         cmd.g9->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
3714
3715         if (cmd.g9->dw34.force_non_skip_check) {
3716             cmd.g9->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
3717         }
3718     }
3719
3720
3721     cmd.g9->dw36.check_all_fractional_enable = avc_state->caf_enable;
3722     cmd.g9->dw38.ref_threshold = 400;
3723     cmd.g9->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
3724
3725     /* Default:2 used for MBBRC (MB QP Surface width and height are 4x downscaled picture in MB unit * 4  bytes)
3726        0 used for MBQP data surface (MB QP Surface width and height are same as the input picture size in MB unit * 1bytes)
3727        starting GEN9, BRC use split kernel, MB QP surface is same size as input picture */
3728     cmd.g9->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled) ? 0 : 2;
3729
3730     if (mbenc_i_frame_dist_in_use) {
3731         cmd.g9->dw13.qp_prime_y = 0;
3732         cmd.g9->dw13.qp_prime_cb = 0;
3733         cmd.g9->dw13.qp_prime_cr = 0;
3734         cmd.g9->dw33.intra_16x16_nondc_penalty = 0;
3735         cmd.g9->dw33.intra_8x8_nondc_penalty = 0;
3736         cmd.g9->dw33.intra_4x4_nondc_penalty = 0;
3737
3738     }
3739     if (cmd.g9->dw4.use_actual_ref_qp_value) {
3740         cmd.g9->dw44.actual_qp_value_for_ref_id0_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 0);
3741         cmd.g9->dw44.actual_qp_value_for_ref_id1_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 1);
3742         cmd.g9->dw44.actual_qp_value_for_ref_id2_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 2);
3743         cmd.g9->dw44.actual_qp_value_for_ref_id3_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 3);
3744         cmd.g9->dw45.actual_qp_value_for_ref_id4_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 4);
3745         cmd.g9->dw45.actual_qp_value_for_ref_id5_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 5);
3746         cmd.g9->dw45.actual_qp_value_for_ref_id6_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 6);
3747         cmd.g9->dw45.actual_qp_value_for_ref_id7_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 7);
3748         cmd.g9->dw46.actual_qp_value_for_ref_id0_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 0);
3749         cmd.g9->dw46.actual_qp_value_for_ref_id1_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 1);
3750     }
3751
3752     table_idx = slice_type_kernel[generic_state->frame_type];
3753     cmd.g9->dw46.ref_cost = gen9_avc_ref_cost[table_idx][qp];
3754
3755     if (generic_state->frame_type == SLICE_TYPE_I) {
3756         cmd.g9->dw0.skip_mode_enable = 0;
3757         cmd.g9->dw37.skip_mode_enable = 0;
3758         cmd.g9->dw36.hme_combine_overlap = 0;
3759         cmd.g9->dw47.intra_cost_sf = 16;
3760         cmd.g9->dw34.enable_direct_bias_adjustment = 0;
3761         if (is_g9)
3762             cmd.g9->dw34.enable_global_motion_bias_adjustment = 0;
3763
3764     } else if (generic_state->frame_type == SLICE_TYPE_P) {
3765         cmd.g9->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
3766         cmd.g9->dw3.bme_disable_fbr = 1;
3767         cmd.g9->dw5.ref_width = gen9_avc_search_x[preset];
3768         cmd.g9->dw5.ref_height = gen9_avc_search_y[preset];
3769         cmd.g9->dw7.non_skip_zmv_added = 1;
3770         cmd.g9->dw7.non_skip_mode_added = 1;
3771         cmd.g9->dw7.skip_center_mask = 1;
3772         cmd.g9->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
3773         cmd.g9->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame onlys
3774         cmd.g9->dw36.hme_combine_overlap = 1;
3775         cmd.g9->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
3776         cmd.g9->dw39.ref_width = gen9_avc_search_x[preset];
3777         cmd.g9->dw39.ref_height = gen9_avc_search_y[preset];
3778         cmd.g9->dw34.enable_direct_bias_adjustment = 0;
3779         cmd.g9->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
3780         if (is_g9 && avc_state->global_motion_bias_adjustment_enable)
3781             cmd.g9->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
3782
3783     } else {
3784         cmd.g9->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
3785         cmd.g9->dw1.bi_weight = avc_state->bi_weight;
3786         cmd.g9->dw3.search_ctrl = 7;
3787         cmd.g9->dw3.skip_type = 1;
3788         cmd.g9->dw5.ref_width = gen9_avc_b_search_x[preset];
3789         cmd.g9->dw5.ref_height = gen9_avc_b_search_y[preset];
3790         cmd.g9->dw7.skip_center_mask = 0xff;
3791         cmd.g9->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
3792         cmd.g9->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
3793         cmd.g9->dw36.hme_combine_overlap = 1;
3794         surface_id = slice_param->RefPicList1[0].picture_id;
3795         obj_surface = SURFACE(surface_id);
3796         if (!obj_surface) {
3797             WARN_ONCE("Invalid backward reference frame\n");
3798             return;
3799         }
3800         cmd.g9->dw36.is_fwd_frame_short_term_ref = !!(slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
3801
3802         cmd.g9->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
3803         cmd.g9->dw36.num_ref_idx_l1_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l1_active_minus1 : 0;
3804         cmd.g9->dw39.ref_width = gen9_avc_b_search_x[preset];
3805         cmd.g9->dw39.ref_height = gen9_avc_b_search_y[preset];
3806         cmd.g9->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
3807         cmd.g9->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
3808         cmd.g9->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
3809         cmd.g9->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
3810         cmd.g9->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
3811         cmd.g9->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
3812         cmd.g9->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
3813         cmd.g9->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
3814
3815         cmd.g9->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
3816         if (cmd.g9->dw34.enable_direct_bias_adjustment) {
3817             cmd.g9->dw7.non_skip_zmv_added = 1;
3818             cmd.g9->dw7.non_skip_mode_added = 1;
3819         }
3820
3821         cmd.g9->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
3822         if (is_g9 && avc_state->global_motion_bias_adjustment_enable)
3823             cmd.g9->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
3824
3825     }
3826
3827     avc_state->block_based_skip_enable = cmd.g9->dw3.block_based_skip_enable;
3828
3829     if (avc_state->rolling_intra_refresh_enable) {
3830         /*by now disable it*/
3831         cmd.g9->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
3832         cmd.g9->dw32.mult_pred_l0_disable = 128;
3833         /* Pass the same IntraRefreshUnit to the kernel w/o the adjustment by -1, so as to have an overlap of one MB row or column of Intra macroblocks
3834          across one P frame to another P frame, as needed by the RollingI algo */
3835         if (is_g9) {
3836             cmd.g9->dw48.widi_intra_refresh_mb_num = 0;
3837             cmd.g9->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
3838             cmd.g9->dw48.widi_intra_refresh_qp_delta = 0;
3839         }
3840
3841         if (is_g95) {
3842             if (avc_state->rolling_intra_refresh_enable == INTEL_ROLLING_I_SQUARE && generic_state->brc_enabled) {
3843                 cmd.g95->dw4.enable_intra_refresh = 0;
3844                 cmd.g95->dw34.widi_intra_refresh_en = INTEL_ROLLING_I_DISABLED;
3845                 cmd.g95->dw48.widi_intra_refresh_mb_x = 0;
3846                 cmd.g95->dw61.widi_intra_refresh_mb_y = 0;
3847             } else {
3848                 cmd.g95->dw4.enable_intra_refresh = 1;
3849                 cmd.g95->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
3850                 cmd.g95->dw48.widi_intra_refresh_mb_x = 0;
3851                 cmd.g95->dw61.widi_intra_refresh_mb_y = 0;
3852                 cmd.g9->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
3853                 cmd.g9->dw48.widi_intra_refresh_qp_delta = 0;
3854             }
3855         }
3856
3857     } else {
3858         cmd.g9->dw34.widi_intra_refresh_en = 0;
3859     }
3860
3861     cmd.g9->dw34.enable_per_mb_static_check = avc_state->sfd_enable && generic_state->hme_enabled;
3862     if (is_g9)
3863         cmd.g9->dw34.enable_adaptive_search_window_size = avc_state->adaptive_search_window_enable;
3864     else if (is_g95)
3865         cmd.g95->dw34.enable_adaptive_search_window_size = avc_state->adaptive_search_window_enable;
3866
3867     /*roi set disable by now. 49-56*/
3868     if (curbe_param->roi_enabled) {
3869         cmd.g9->dw49.roi_1_x_left   = generic_state->roi[0].left;
3870         cmd.g9->dw49.roi_1_y_top    = generic_state->roi[0].top;
3871         cmd.g9->dw50.roi_1_x_right  = generic_state->roi[0].right;
3872         cmd.g9->dw50.roi_1_y_bottom = generic_state->roi[0].bottom;
3873
3874         cmd.g9->dw51.roi_2_x_left   = generic_state->roi[1].left;
3875         cmd.g9->dw51.roi_2_y_top    = generic_state->roi[1].top;
3876         cmd.g9->dw52.roi_2_x_right  = generic_state->roi[1].right;
3877         cmd.g9->dw52.roi_2_y_bottom = generic_state->roi[1].bottom;
3878
3879         cmd.g9->dw53.roi_3_x_left   = generic_state->roi[2].left;
3880         cmd.g9->dw53.roi_3_y_top    = generic_state->roi[2].top;
3881         cmd.g9->dw54.roi_3_x_right  = generic_state->roi[2].right;
3882         cmd.g9->dw54.roi_3_y_bottom = generic_state->roi[2].bottom;
3883
3884         cmd.g9->dw55.roi_4_x_left   = generic_state->roi[3].left;
3885         cmd.g9->dw55.roi_4_y_top    = generic_state->roi[3].top;
3886         cmd.g9->dw56.roi_4_x_right  = generic_state->roi[3].right;
3887         cmd.g9->dw56.roi_4_y_bottom = generic_state->roi[3].bottom;
3888
3889         if (!generic_state->brc_enabled) {
3890             char tmp = 0;
3891             tmp = generic_state->roi[0].value;
3892             CLIP(tmp, -qp, AVC_QP_MAX - qp);
3893             cmd.g9->dw57.roi_1_dqp_prime_y = tmp;
3894             tmp = generic_state->roi[1].value;
3895             CLIP(tmp, -qp, AVC_QP_MAX - qp);
3896             cmd.g9->dw57.roi_2_dqp_prime_y = tmp;
3897             tmp = generic_state->roi[2].value;
3898             CLIP(tmp, -qp, AVC_QP_MAX - qp);
3899             cmd.g9->dw57.roi_3_dqp_prime_y = tmp;
3900             tmp = generic_state->roi[3].value;
3901             CLIP(tmp, -qp, AVC_QP_MAX - qp);
3902             cmd.g9->dw57.roi_4_dqp_prime_y = tmp;
3903         } else {
3904             cmd.g9->dw34.roi_enable_flag = 0;
3905         }
3906     }
3907
3908     if (is_g95) {
3909         if (avc_state->tq_enable) {
3910             if (generic_state->frame_type == SLICE_TYPE_I) {
3911                 cmd.g95->dw58.value = gen95_avc_tq_lambda_i_frame[qp][0];
3912                 cmd.g95->dw59.value = gen95_avc_tq_lambda_i_frame[qp][1];
3913
3914             } else if (generic_state->frame_type == SLICE_TYPE_P) {
3915                 cmd.g95->dw58.value = gen95_avc_tq_lambda_p_frame[qp][0];
3916                 cmd.g95->dw59.value = gen95_avc_tq_lambda_p_frame[qp][1];
3917
3918             } else {
3919                 cmd.g95->dw58.value = gen95_avc_tq_lambda_b_frame[qp][0];
3920                 cmd.g95->dw59.value = gen95_avc_tq_lambda_b_frame[qp][1];
3921             }
3922
3923             if (cmd.g95->dw58.lambda_8x8_inter > GEN95_AVC_MAX_LAMBDA)
3924                 cmd.g95->dw58.lambda_8x8_inter = 0xf000 + avc_state->rounding_value;
3925
3926             if (cmd.g95->dw58.lambda_8x8_intra > GEN95_AVC_MAX_LAMBDA)
3927                 cmd.g95->dw58.lambda_8x8_intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
3928
3929             if (cmd.g95->dw59.lambda_inter > GEN95_AVC_MAX_LAMBDA)
3930                 cmd.g95->dw59.lambda_inter = 0xf000 + avc_state->rounding_value;
3931
3932             if (cmd.g95->dw59.lambda_intra > GEN95_AVC_MAX_LAMBDA)
3933                 cmd.g95->dw59.lambda_intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
3934         }
3935     }
3936
3937     if (is_g95) {
3938         cmd.g95->dw66.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
3939         cmd.g95->dw67.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
3940         cmd.g95->dw68.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
3941         cmd.g95->dw69.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
3942         cmd.g95->dw70.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
3943         cmd.g95->dw71.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
3944         cmd.g95->dw72.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
3945         cmd.g95->dw73.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
3946         cmd.g95->dw74.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
3947         cmd.g95->dw75.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
3948         cmd.g95->dw76.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
3949         cmd.g95->dw77.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
3950         cmd.g95->dw78.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
3951         cmd.g95->dw79.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
3952         cmd.g95->dw80.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
3953         cmd.g95->dw81.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
3954         cmd.g95->dw82.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
3955         cmd.g95->dw83.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
3956         cmd.g95->dw84.brc_curbe_surf_index = GEN95_AVC_MBENC_BRC_CURBE_DATA_INDEX;
3957         cmd.g95->dw85.force_non_skip_mb_map_surface = GEN95_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
3958         cmd.g95->dw86.widi_wa_surf_index = GEN95_AVC_MBENC_WIDI_WA_INDEX;
3959         cmd.g95->dw87.static_detection_cost_table_index = GEN95_AVC_MBENC_SFD_COST_TABLE_INDEX;
3960     }
3961
3962     if (is_g9) {
3963         cmd.g9->dw64.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
3964         cmd.g9->dw65.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
3965         cmd.g9->dw66.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
3966         cmd.g9->dw67.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
3967         cmd.g9->dw68.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
3968         cmd.g9->dw69.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
3969         cmd.g9->dw70.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
3970         cmd.g9->dw71.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
3971         cmd.g9->dw72.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
3972         cmd.g9->dw73.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
3973         cmd.g9->dw74.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
3974         cmd.g9->dw75.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
3975         cmd.g9->dw76.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
3976         cmd.g9->dw77.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
3977         cmd.g9->dw78.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
3978         cmd.g9->dw79.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
3979         cmd.g9->dw80.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
3980         cmd.g9->dw81.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
3981         cmd.g9->dw82.force_non_skip_mb_map_surface = GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
3982         cmd.g9->dw83.widi_wa_surf_index = GEN9_AVC_MBENC_WIDI_WA_INDEX;
3983         cmd.g9->dw84.brc_curbe_surf_index = GEN9_AVC_MBENC_BRC_CURBE_DATA_INDEX;
3984         cmd.g9->dw85.static_detection_cost_table_index = GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX;
3985     }
3986
3987     i965_gpe_context_unmap_curbe(gpe_context);
3988
3989     return;
3990 }
3991
3992 static void
3993 gen9_avc_fei_set_curbe_mbenc(VADriverContextP ctx,
3994                              struct encode_state *encode_state,
3995                              struct i965_gpe_context *gpe_context,
3996                              struct intel_encoder_context *encoder_context,
3997                              void * param)
3998 {
3999     struct i965_driver_data *i965 = i965_driver_data(ctx);
4000     gen9_avc_fei_mbenc_curbe_data *cmd;
4001     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4002     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4003     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4004     VASurfaceID surface_id;
4005     struct object_surface *obj_surface;
4006     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4007     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
4008     VAEncMiscParameterFEIFrameControlH264 *fei_param = avc_state->fei_framectl_param;
4009
4010     struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
4011     unsigned char qp = 0;
4012     unsigned char me_method = 0;
4013     unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
4014     unsigned int table_idx = 0;
4015     int ref_width, ref_height, len_sp;
4016     int is_bframe = (generic_state->frame_type == SLICE_TYPE_B);
4017     int is_pframe = (generic_state->frame_type == SLICE_TYPE_P);
4018     unsigned int preset = generic_state->preset;
4019
4020     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
4021
4022     assert(gpe_context != NULL);
4023     cmd = (gen9_avc_fei_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
4024     memset(cmd, 0, sizeof(gen9_avc_fei_mbenc_curbe_data));
4025
4026     if (mbenc_i_frame_dist_in_use) {
4027         memcpy(cmd, gen9_avc_fei_mbenc_curbe_i_frame_dist_init_data, sizeof(gen9_avc_fei_mbenc_curbe_data));
4028
4029     } else {
4030         switch (generic_state->frame_type) {
4031         case SLICE_TYPE_I:
4032             memcpy(cmd, gen9_avc_fei_mbenc_curbe_i_frame_init_data, sizeof(gen9_avc_fei_mbenc_curbe_data));
4033             break;
4034         case SLICE_TYPE_P:
4035             memcpy(cmd, gen9_avc_fei_mbenc_curbe_p_frame_init_data, sizeof(gen9_avc_fei_mbenc_curbe_data));
4036             break;
4037         case SLICE_TYPE_B:
4038             memcpy(cmd, gen9_avc_fei_mbenc_curbe_b_frame_init_data, sizeof(gen9_avc_fei_mbenc_curbe_data));
4039             break;
4040         default:
4041             assert(0);
4042         }
4043
4044     }
4045     /* 4 means full search, 6 means diamand search */
4046     me_method  = (fei_param->search_window == 5) ||
4047                  (fei_param->search_window == 8) ? 4 : 6;
4048
4049     ref_width    = fei_param->ref_width;
4050     ref_height   = fei_param->ref_height;
4051     len_sp       = fei_param->len_sp;
4052     /* If there is a serch_window, discard user provided ref_width, ref_height
4053      * and search_path length */
4054     switch (fei_param->search_window) {
4055     case 0:
4056         /*  not use predefined search window, there should be a search_path input */
4057         if ((fei_param->search_path != 0) &&
4058             (fei_param->search_path != 1) &&
4059             (fei_param->search_path != 2)) {
4060             WARN_ONCE("Invalid input search_path for SearchWindow=0  \n");
4061             assert(0);
4062         }
4063         /* 4 means full search, 6 means diamand search */
4064         me_method = (fei_param->search_path == 1) ? 6 : 4;
4065         if (((ref_width * ref_height) > 2048) || (ref_width > 64) || (ref_height > 64)) {
4066             WARN_ONCE("Invalid input ref_width/ref_height in"
4067                       "SearchWindow=0 case! \n");
4068             assert(0);
4069         }
4070         break;
4071
4072     case 1:
4073         /* Tiny - 4 SUs 24x24 window */
4074         ref_width  = 24;
4075         ref_height = 24;
4076         len_sp     = 4;
4077         break;
4078
4079     case 2:
4080         /* Small - 9 SUs 28x28 window */
4081         ref_width  = 28;
4082         ref_height = 28;
4083         len_sp     = 9;
4084         break;
4085     case 3:
4086         /* Diamond - 16 SUs 48x40 window */
4087         ref_width  = 48;
4088         ref_height = 40;
4089         len_sp     = 16;
4090         break;
4091     case 4:
4092         /* Large Diamond - 32 SUs 48x40 window */
4093         ref_width  = 48;
4094         ref_height = 40;
4095         len_sp     = 32;
4096         break;
4097     case 5:
4098         /* Exhaustive - 48 SUs 48x40 window */
4099         ref_width  = 48;
4100         ref_height = 40;
4101         len_sp     = 48;
4102         break;
4103     case 6:
4104         /* Diamond - 16 SUs 64x32 window */
4105         ref_width  = 64;
4106         ref_height = 32;
4107         len_sp     = 16;
4108         break;
4109     case 7:
4110         /* Large Diamond - 32 SUs 64x32 window */
4111         ref_width  = 64;
4112         ref_height = 32;
4113         len_sp     = 32;
4114         break;
4115     case 8:
4116         /* Exhaustive - 48 SUs 64x32 window */
4117         ref_width  = 64;
4118         ref_height = 32;
4119         len_sp     = 48;
4120         break;
4121
4122     default:
4123         assert(0);
4124     }
4125
4126     /* ref_width*ref_height = Max 64x32 one direction, Max 32x32 two directions */
4127     if (is_bframe) {
4128         CLIP(ref_width, 4, 32);
4129         CLIP(ref_height, 4, 32);
4130     } else if (is_pframe) {
4131         CLIP(ref_width, 4, 64);
4132         CLIP(ref_height, 4, 32);
4133     }
4134
4135     cmd->dw0.adaptive_enable =
4136         cmd->dw37.adaptive_enable = fei_param->adaptive_search;
4137     cmd->dw0.t8x8_flag_for_inter_enable = cmd->dw37.t8x8_flag_for_inter_enable
4138                                           = avc_state->transform_8x8_mode_enable;
4139     cmd->dw2.max_len_sp = len_sp;
4140     cmd->dw38.max_len_sp = 0; // HLD mandates this field to be Zero
4141     cmd->dw2.max_num_su = cmd->dw38.max_num_su = 57;
4142     cmd->dw3.src_access =
4143         cmd->dw3.ref_access = 0; // change it to (is_frame ? 0: 1) when interlace is suppoted
4144
4145     if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) {
4146         if (avc_state->ftq_override) {
4147             cmd->dw3.ft_enable = avc_state->ftq_enable;
4148         } else {
4149             if (generic_state->frame_type == SLICE_TYPE_P) {
4150                 cmd->dw3.ft_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
4151             } else {
4152                 cmd->dw3.ft_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
4153             }
4154         }
4155     } else {
4156         cmd->dw3.ft_enable = 0;
4157     }
4158
4159     if (avc_state->disable_sub_mb_partion)
4160         cmd->dw3.sub_mb_part_mask = 0x7;
4161
4162     if (mbenc_i_frame_dist_in_use) {
4163         /* Fixme: Not supported, no brc in fei */
4164         assert(0);
4165         cmd->dw2.pic_width = generic_state->downscaled_width_4x_in_mb;
4166         cmd->dw4.pic_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
4167         cmd->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4;
4168         cmd->dw6.batch_buffer_end = 0;
4169         cmd->dw31.intra_compute_type = 1;
4170     }
4171
4172     cmd->dw2.pic_width = generic_state->frame_width_in_mbs;
4173     cmd->dw4.pic_height_minus1 = generic_state->frame_height_in_mbs - 1;
4174     cmd->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ?
4175                                generic_state->frame_height_in_mbs : avc_state->slice_height;
4176     cmd->dw3.sub_mb_part_mask = fei_param->sub_mb_part_mask;
4177     cmd->dw3.sub_pel_mode = fei_param->sub_pel_mode;
4178     cmd->dw3.inter_sad = fei_param->inter_sad;
4179     cmd->dw3.Intra_sad = fei_param->intra_sad;
4180     cmd->dw3.search_ctrl = (is_bframe) ? 7 : 0;
4181     cmd->dw4.enable_fbr_bypass = avc_state->fbr_bypass_enable;
4182     cmd->dw4.enable_intra_cost_scaling_for_static_frame =
4183         avc_state->sfd_enable && generic_state->hme_enabled;
4184     cmd->dw4.true_distortion_enable = fei_param->distortion_type == 0 ? 1 : 0;
4185     cmd->dw4.constrained_intra_pred_flag =
4186         pic_param->pic_fields.bits.constrained_intra_pred_flag;
4187     cmd->dw4.hme_enable = 0;
4188     cmd->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
4189     cmd->dw4.use_actual_ref_qp_value =
4190         generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
4191     cmd->dw7.intra_part_mask = fei_param->intra_part_mask;
4192     cmd->dw7.src_field_polarity = 0;
4193
4194     /* mv mode cost */
4195     memcpy(&(cmd->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
4196     if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
4197         // cmd->dw8 = gen9_avc_old_intra_mode_cost[qp];
4198     } else if (avc_state->skip_bias_adjustment_enable) {
4199         // Load different MvCost for P picture when SkipBiasAdjustment is enabled
4200         // No need to check for P picture as the flag is only enabled for P picture
4201         cmd->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
4202     }
4203
4204     //dw16
4205     /* search path tables */
4206     table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
4207     memcpy(&(cmd->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
4208
4209     //ftq_skip_threshold_lut set,dw14 /15
4210
4211     //r5 disable NonFTQSkipThresholdLUT
4212     if (generic_state->frame_type == SLICE_TYPE_P) {
4213         cmd->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
4214     } else if (generic_state->frame_type == SLICE_TYPE_B) {
4215         cmd->dw32.skip_val =
4216             gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
4217     }
4218     cmd->dw13.qp_prime_y = qp;
4219     cmd->dw13.qp_prime_cb = qp;
4220     cmd->dw13.qp_prime_cr = qp;
4221     cmd->dw13.target_size_in_word = 0xff; /* hardcoded for brc disable */
4222
4223     if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) {
4224         cmd->dw32.mult_pred_l0_disable = fei_param->multi_pred_l0 ? 0x01 : 0x80;
4225         cmd->dw32.mult_pred_l1_disable = ((generic_state->frame_type == SLICE_TYPE_B) && fei_param->multi_pred_l1) ? 0x01 : 0x80;
4226     } else {
4227         /* disable */
4228         cmd->dw32.mult_pred_l0_disable = 0x80;
4229         cmd->dw32.mult_pred_l1_disable = 0x80;
4230     }
4231     /* no field pic setting, not supported */
4232
4233     //dw34 58
4234     if (avc_state->adaptive_transform_decision_enable) {
4235         if (generic_state->frame_type != SLICE_TYPE_I) {
4236             cmd->dw34.enable_adaptive_tx_decision = 1;
4237         }
4238
4239         cmd->dw58.mb_texture_threshold = 1024;
4240         cmd->dw58.tx_decision_threshold = 128;
4241     }
4242     if (generic_state->frame_type == SLICE_TYPE_B) {
4243         cmd->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
4244         cmd->dw34.list1_ref_id1_frm_field_parity = 0;
4245         cmd->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
4246     }
4247     cmd->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
4248     cmd->dw34.roi_enable_flag = generic_state->brc_roi_enable;
4249     cmd->dw34.mad_enable_falg = avc_state->mad_enable;
4250     cmd->dw34.mb_brc_enable = avc_state->mb_qp_data_enable && generic_state->mb_brc_enabled;
4251     cmd->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
4252     cmd->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
4253
4254     if (cmd->dw34.force_non_skip_check) {
4255         cmd->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
4256     }
4257     cmd->dw36.check_all_fractional_enable = avc_state->caf_enable;
4258     cmd->dw38.ref_threshold = 400;
4259     cmd->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
4260     // Default:2 used for MBBRC (MB QP Surface width and height are 4x downscaled picture in MB unit * 4  bytes)
4261     // 0 used for MBQP data surface (MB QP Surface width and height are same as the input picture size in MB unit * 1bytes)
4262     // starting GEN9, BRC use split kernel, MB QP surface is same size as input picture
4263     cmd->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled) ? 0 : 2;
4264     if (mbenc_i_frame_dist_in_use) {
4265         cmd->dw13.qp_prime_y = 0;
4266         cmd->dw13.qp_prime_cb = 0;
4267         cmd->dw13.qp_prime_cr = 0;
4268         cmd->dw33.intra_16x16_nondc_penalty = 0;
4269         cmd->dw33.intra_8x8_nondc_penalty = 0;
4270         cmd->dw33.intra_4x4_nondc_penalty = 0;
4271     }
4272     if (cmd->dw4.use_actual_ref_qp_value) {
4273         cmd->dw44.actual_qp_value_for_ref_id0_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 0);
4274         cmd->dw44.actual_qp_value_for_ref_id1_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 1);
4275         cmd->dw44.actual_qp_value_for_ref_id2_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 2);
4276         cmd->dw44.actual_qp_value_for_ref_id3_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 3);
4277         cmd->dw45.actual_qp_value_for_ref_id4_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 4);
4278         cmd->dw45.actual_qp_value_for_ref_id5_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 5);
4279         cmd->dw45.actual_qp_value_for_ref_id6_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 6);
4280         cmd->dw45.actual_qp_value_for_ref_id7_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 7);
4281         cmd->dw46.actual_qp_value_for_ref_id0_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 0);
4282         cmd->dw46.actual_qp_value_for_ref_id1_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 1);
4283     }
4284
4285     table_idx = slice_type_kernel[generic_state->frame_type];
4286     cmd->dw46.ref_cost = gen9_avc_ref_cost[table_idx][qp];
4287
4288     if (generic_state->frame_type == SLICE_TYPE_I) {
4289         cmd->dw0.skip_mode_enable = 0;
4290         cmd->dw37.skip_mode_enable = 0;
4291         cmd->dw36.hme_combine_overlap = 0;
4292         cmd->dw36.check_all_fractional_enable = 0;
4293         cmd->dw47.intra_cost_sf = 16;/* not used, but recommended to set 16 by kernel team */
4294         cmd->dw34.enable_direct_bias_adjustment = 0;
4295         cmd->dw34.enable_global_motion_bias_adjustment = 0;
4296
4297     } else if (generic_state->frame_type == SLICE_TYPE_P) {
4298         cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
4299         cmd->dw3.bme_disable_fbr = 1;
4300         cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
4301         cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
4302         cmd->dw7.non_skip_zmv_added = 1;
4303         cmd->dw7.non_skip_mode_added = 1;
4304         cmd->dw7.skip_center_mask = 1;
4305
4306         cmd->dw47.intra_cost_sf =
4307             (avc_state->adaptive_intra_scaling_enable) ?
4308             gen9_avc_adaptive_intra_scaling_factor[preset] :
4309             gen9_avc_intra_scaling_factor[preset];
4310
4311         cmd->dw47.max_vmv_r =
4312             i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
4313
4314         cmd->dw36.hme_combine_overlap = 1;
4315         cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
4316         cmd->dw36.check_all_fractional_enable = fei_param->repartition_check_enable;
4317         cmd->dw34.enable_direct_bias_adjustment = 0;
4318         cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
4319         if (avc_state->global_motion_bias_adjustment_enable)
4320             cmd->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
4321
4322         cmd->dw64.num_mv_predictors_l0 = fei_param->num_mv_predictors_l0;
4323
4324     } else { /* B slice */
4325
4326         cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
4327         cmd->dw1.bi_Weight = avc_state->bi_weight;
4328         cmd->dw3.search_ctrl = 7;
4329         cmd->dw3.skip_type = 1;
4330         cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
4331         cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
4332         cmd->dw7.skip_center_mask = 0xff;
4333
4334         cmd->dw47.intra_cost_sf = avc_state->adaptive_intra_scaling_enable ?
4335                                   gen9_avc_adaptive_intra_scaling_factor[qp] :
4336                                   gen9_avc_intra_scaling_factor[qp];
4337
4338         cmd->dw47.max_vmv_r =
4339             i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
4340
4341         cmd->dw36.hme_combine_overlap = 1;
4342
4343         //check is_fwd_frame_short_term_ref
4344         surface_id = slice_param->RefPicList1[0].picture_id;
4345         obj_surface = SURFACE(surface_id);
4346         if (!obj_surface) {
4347             WARN_ONCE("Invalid backward reference frame\n");
4348             if (gpe_context)
4349                 i965_gpe_context_unmap_curbe(gpe_context);
4350             return;
4351         }
4352         cmd->dw36.is_fwd_frame_short_term_ref = !!(slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
4353
4354         cmd->dw36.num_ref_idx_l0_minus_one =
4355             (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1
4356             : 0;
4357         cmd->dw36.num_ref_idx_l1_minus_one =
4358             (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l1_active_minus1
4359             : 0;
4360         cmd->dw36.check_all_fractional_enable = fei_param->repartition_check_enable;
4361
4362         cmd->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
4363         cmd->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
4364         cmd->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
4365         cmd->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
4366         cmd->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
4367         cmd->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
4368         cmd->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
4369         cmd->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
4370
4371         cmd->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
4372         if (cmd->dw34.enable_direct_bias_adjustment) {
4373             cmd->dw7.non_skip_mode_added = 1;
4374             cmd->dw7.non_skip_zmv_added = 1;
4375         }
4376
4377         cmd->dw34.enable_global_motion_bias_adjustment =
4378             avc_state->global_motion_bias_adjustment_enable;
4379         if (avc_state->global_motion_bias_adjustment_enable)
4380             cmd->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
4381
4382         cmd->dw64.num_mv_predictors_l0 = fei_param->num_mv_predictors_l0;
4383         cmd->dw64.num_mv_predictors_l1 = fei_param->num_mv_predictors_l1;
4384     }
4385
4386     avc_state->block_based_skip_enable = cmd->dw3.block_based_skip_enable;
4387
4388     if (avc_state->rolling_intra_refresh_enable) {
4389         //Not supported
4390         cmd->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
4391
4392     } else {
4393         cmd->dw34.widi_intra_refresh_en = 0;
4394     }
4395     cmd->dw34.enable_per_mb_static_check = avc_state->sfd_enable && generic_state->hme_enabled;
4396     cmd->dw34.enable_adaptive_search_window_size = avc_state->adaptive_search_window_enable;
4397
4398     /* Fixme: Skipped ROI stuffs for now */
4399
4400     /* r64: FEI specific fields */
4401     cmd->dw64.fei_enable = 1;
4402     cmd->dw64.multiple_mv_predictor_per_mb_enable = fei_param->mv_predictor_enable;
4403     if (fei_param->distortion != VA_INVALID_ID)
4404         cmd->dw64.vme_distortion_output_enable = 1;
4405     cmd->dw64.per_mb_qp_enable = fei_param->mb_qp;
4406     cmd->dw64.mb_input_enable = fei_param->mb_input;
4407
4408     // FEI mode is disabled when external MVP is available
4409     if (fei_param->mv_predictor_enable)
4410         cmd->dw64.fei_mode = 0;
4411     else
4412         cmd->dw64.fei_mode = 1;
4413
4414     cmd->dw80.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
4415     cmd->dw81.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
4416     cmd->dw82.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
4417     cmd->dw83.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
4418     cmd->dw84.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
4419     cmd->dw85.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
4420     cmd->dw86.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
4421     cmd->dw87.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
4422     cmd->dw88.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
4423     cmd->dw89.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
4424     cmd->dw90.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
4425     cmd->dw91.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
4426     cmd->dw92.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
4427     cmd->dw93.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
4428     cmd->dw94.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
4429     cmd->dw95.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
4430     cmd->dw96.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
4431     cmd->dw97.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
4432     cmd->dw98.force_non_skip_mb_map_surface = GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
4433     cmd->dw99.widi_wa_surf_index = GEN9_AVC_MBENC_WIDI_WA_INDEX;
4434     cmd->dw100.brc_curbe_surf_index = GEN9_AVC_MBENC_BRC_CURBE_DATA_INDEX;
4435     cmd->dw101.static_detection_cost_table_index = GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX;
4436     cmd->dw102.fei_mv_predictor_surf_index = GEN9_AVC_MBENC_MV_PREDICTOR_INDEX;
4437     i965_gpe_context_unmap_curbe(gpe_context);
4438
4439     return;
4440 }
4441
4442 static void
4443 gen9_avc_send_surface_mbenc(VADriverContextP ctx,
4444                             struct encode_state *encode_state,
4445                             struct i965_gpe_context *gpe_context,
4446                             struct intel_encoder_context *encoder_context,
4447                             void * param_mbenc)
4448 {
4449     struct i965_driver_data *i965 = i965_driver_data(ctx);
4450     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4451     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4452     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4453     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4454     struct object_surface *obj_surface;
4455     struct gen9_surface_avc *avc_priv_surface;
4456     struct i965_gpe_resource *gpe_resource;
4457     struct mbenc_param * param = (struct mbenc_param *)param_mbenc ;
4458     VASurfaceID surface_id;
4459     unsigned int mbenc_i_frame_dist_in_use = param->mbenc_i_frame_dist_in_use;
4460     unsigned int size = 0;
4461     unsigned int frame_mb_size = generic_state->frame_width_in_mbs *
4462                                  generic_state->frame_height_in_mbs;
4463     int i = 0;
4464     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4465     unsigned char is_g95 = 0;
4466
4467     if (IS_SKL(i965->intel.device_info) ||
4468         IS_BXT(i965->intel.device_info))
4469         is_g95 = 0;
4470     else if (IS_KBL(i965->intel.device_info) ||
4471              IS_GEN10(i965->intel.device_info) ||
4472              IS_GLK(i965->intel.device_info))
4473         is_g95 = 1;
4474
4475     obj_surface = encode_state->reconstructed_object;
4476
4477     if (!obj_surface || !obj_surface->private_data)
4478         return;
4479     avc_priv_surface = obj_surface->private_data;
4480
4481     /*pak obj command buffer output*/
4482     size = frame_mb_size * 16 * 4;
4483     gpe_resource = &avc_priv_surface->res_mb_code_surface;
4484     i965_add_buffer_gpe_surface(ctx,
4485                                 gpe_context,
4486                                 gpe_resource,
4487                                 0,
4488                                 size / 4,
4489                                 0,
4490                                 GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX);
4491
4492     /*mv data buffer output*/
4493     size = frame_mb_size * 32 * 4;
4494     gpe_resource = &avc_priv_surface->res_mv_data_surface;
4495     i965_add_buffer_gpe_surface(ctx,
4496                                 gpe_context,
4497                                 gpe_resource,
4498                                 0,
4499                                 size / 4,
4500                                 0,
4501                                 GEN9_AVC_MBENC_IND_MV_DATA_INDEX);
4502
4503     /*input current  YUV surface, current input Y/UV object*/
4504     if (mbenc_i_frame_dist_in_use) {
4505         obj_surface = encode_state->reconstructed_object;
4506         if (!obj_surface || !obj_surface->private_data)
4507             return;
4508         avc_priv_surface = obj_surface->private_data;
4509         obj_surface = avc_priv_surface->scaled_4x_surface_obj;
4510     } else {
4511         obj_surface = encode_state->input_yuv_object;
4512     }
4513     i965_add_2d_gpe_surface(ctx,
4514                             gpe_context,
4515                             obj_surface,
4516                             0,
4517                             1,
4518                             I965_SURFACEFORMAT_R8_UNORM,
4519                             GEN9_AVC_MBENC_CURR_Y_INDEX);
4520
4521     i965_add_2d_gpe_surface(ctx,
4522                             gpe_context,
4523                             obj_surface,
4524                             1,
4525                             1,
4526                             I965_SURFACEFORMAT_R16_UINT,
4527                             GEN9_AVC_MBENC_CURR_UV_INDEX);
4528
4529     if (generic_state->hme_enabled) {
4530         /*memv input 4x*/
4531         if (!IS_GEN8(i965->intel.device_info)) {
4532             gpe_resource = &(avc_ctx->s4x_memv_data_buffer);
4533             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4534                                            gpe_resource,
4535                                            1,
4536                                            I965_SURFACEFORMAT_R8_UNORM,
4537                                            GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX);
4538             /* memv distortion input*/
4539             gpe_resource = &(avc_ctx->s4x_memv_distortion_buffer);
4540             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4541                                            gpe_resource,
4542                                            1,
4543                                            I965_SURFACEFORMAT_R8_UNORM,
4544                                            GEN9_AVC_MBENC_4XME_DISTORTION_INDEX);
4545         } else if (generic_state->frame_type != SLICE_TYPE_I) {
4546             gpe_resource = &(avc_ctx->s4x_memv_data_buffer);
4547             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4548                                            gpe_resource,
4549                                            1,
4550                                            I965_SURFACEFORMAT_R8_UNORM,
4551                                            GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX);
4552             /* memv distortion input*/
4553             gpe_resource = &(avc_ctx->s4x_memv_distortion_buffer);
4554             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4555                                            gpe_resource,
4556                                            1,
4557                                            I965_SURFACEFORMAT_R8_UNORM,
4558                                            GEN9_AVC_MBENC_4XME_DISTORTION_INDEX);
4559         }
4560     }
4561
4562     /*mbbrc const data_buffer*/
4563     if (param->mb_const_data_buffer_in_use) {
4564         size = 16 * AVC_QP_MAX * sizeof(unsigned int);
4565         gpe_resource = &avc_ctx->res_mbbrc_const_data_buffer;
4566         i965_add_buffer_gpe_surface(ctx,
4567                                     gpe_context,
4568                                     gpe_resource,
4569                                     0,
4570                                     size / 4,
4571                                     0,
4572                                     GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX);
4573
4574     }
4575
4576     /*mb qp data_buffer*/
4577     if (param->mb_qp_buffer_in_use) {
4578         if (avc_state->mb_qp_data_enable)
4579             gpe_resource = &(avc_ctx->res_mb_qp_data_surface);
4580         else
4581             gpe_resource = &(avc_ctx->res_mbbrc_mb_qp_data_surface);
4582         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4583                                        gpe_resource,
4584                                        1,
4585                                        I965_SURFACEFORMAT_R8_UNORM,
4586                                        GEN9_AVC_MBENC_MBQP_INDEX);
4587     }
4588
4589     /*input current  YUV surface, current input Y/UV object*/
4590     if (mbenc_i_frame_dist_in_use) {
4591         obj_surface = encode_state->reconstructed_object;
4592         if (!obj_surface || !obj_surface->private_data)
4593             return;
4594         avc_priv_surface = obj_surface->private_data;
4595         obj_surface = avc_priv_surface->scaled_4x_surface_obj;
4596     } else {
4597         obj_surface = encode_state->input_yuv_object;
4598     }
4599     i965_add_adv_gpe_surface(ctx, gpe_context,
4600                              obj_surface,
4601                              GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX);
4602     /*input ref YUV surface*/
4603     for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
4604         surface_id = slice_param->RefPicList0[i].picture_id;
4605         obj_surface = SURFACE(surface_id);
4606         if (!obj_surface || !obj_surface->private_data)
4607             break;
4608
4609         i965_add_adv_gpe_surface(ctx, gpe_context,
4610                                  obj_surface,
4611                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 1);
4612     }
4613     /*input current  YUV surface, current input Y/UV object*/
4614     if (mbenc_i_frame_dist_in_use) {
4615         obj_surface = encode_state->reconstructed_object;
4616         if (!obj_surface || !obj_surface->private_data)
4617             return;
4618         avc_priv_surface = obj_surface->private_data;
4619         obj_surface = avc_priv_surface->scaled_4x_surface_obj;
4620     } else {
4621         obj_surface = encode_state->input_yuv_object;
4622     }
4623     i965_add_adv_gpe_surface(ctx, gpe_context,
4624                              obj_surface,
4625                              GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX);
4626
4627     for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
4628         if (i > 0) break; // only  one ref supported here for B frame
4629         surface_id = slice_param->RefPicList1[i].picture_id;
4630         obj_surface = SURFACE(surface_id);
4631         if (!obj_surface || !obj_surface->private_data)
4632             break;
4633
4634         i965_add_adv_gpe_surface(ctx, gpe_context,
4635                                  obj_surface,
4636                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1);
4637         i965_add_adv_gpe_surface(ctx, gpe_context,
4638                                  obj_surface,
4639                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 2);
4640         if (i == 0) {
4641             avc_priv_surface = obj_surface->private_data;
4642             /*pak obj command buffer output(mb code)*/
4643             size = frame_mb_size * 16 * 4;
4644             gpe_resource = &avc_priv_surface->res_mb_code_surface;
4645             i965_add_buffer_gpe_surface(ctx,
4646                                         gpe_context,
4647                                         gpe_resource,
4648                                         0,
4649                                         size / 4,
4650                                         0,
4651                                         GEN9_AVC_MBENC_FWD_MB_DATA_INDEX);
4652
4653             /*mv data buffer output*/
4654             size = frame_mb_size * 32 * 4;
4655             gpe_resource = &avc_priv_surface->res_mv_data_surface;
4656             i965_add_buffer_gpe_surface(ctx,
4657                                         gpe_context,
4658                                         gpe_resource,
4659                                         0,
4660                                         size / 4,
4661                                         0,
4662                                         GEN9_AVC_MBENC_FWD_MV_DATA_INDEX);
4663
4664         }
4665
4666         if (i < INTEL_AVC_MAX_BWD_REF_NUM) {
4667             i965_add_adv_gpe_surface(ctx, gpe_context,
4668                                      obj_surface,
4669                                      GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1 + INTEL_AVC_MAX_BWD_REF_NUM);
4670         }
4671
4672     }
4673
4674     /* BRC distortion data buffer for I frame*/
4675     if (mbenc_i_frame_dist_in_use) {
4676         gpe_resource = &(avc_ctx->res_brc_dist_data_surface);
4677         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4678                                        gpe_resource,
4679                                        1,
4680                                        I965_SURFACEFORMAT_R8_UNORM,
4681                                        GEN9_AVC_MBENC_BRC_DISTORTION_INDEX);
4682     }
4683
4684     /* as ref frame ,update later RefPicSelect of Current Picture*/
4685     obj_surface = encode_state->reconstructed_object;
4686     avc_priv_surface = obj_surface->private_data;
4687     if (avc_state->ref_pic_select_list_supported && avc_priv_surface->is_as_ref) {
4688         gpe_resource = &(avc_priv_surface->res_ref_pic_select_surface);
4689         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4690                                        gpe_resource,
4691                                        1,
4692                                        I965_SURFACEFORMAT_R8_UNORM,
4693                                        GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX);
4694
4695     }
4696     if (!IS_GEN8(i965->intel.device_info)) {
4697         if (param->mb_vproc_stats_enable) {
4698             /*mb status buffer input*/
4699             size = frame_mb_size * 16 * 4;
4700             gpe_resource = &(avc_ctx->res_mb_status_buffer);
4701             i965_add_buffer_gpe_surface(ctx,
4702                                         gpe_context,
4703                                         gpe_resource,
4704                                         0,
4705                                         size / 4,
4706                                         0,
4707                                         GEN9_AVC_MBENC_MB_STATS_INDEX);
4708
4709         } else if (avc_state->flatness_check_enable) {
4710             gpe_resource = &(avc_ctx->res_flatness_check_surface);
4711             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4712                                            gpe_resource,
4713                                            1,
4714                                            I965_SURFACEFORMAT_R8_UNORM,
4715                                            GEN9_AVC_MBENC_MB_STATS_INDEX);
4716         }
4717     } else if (avc_state->flatness_check_enable) {
4718         gpe_resource = &(avc_ctx->res_flatness_check_surface);
4719         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4720                                        gpe_resource,
4721                                        1,
4722                                        I965_SURFACEFORMAT_R8_UNORM,
4723                                        GEN9_AVC_MBENC_MB_STATS_INDEX);
4724     }
4725
4726     if (param->mad_enable) {
4727         /*mad buffer input*/
4728         size = 4;
4729         gpe_resource = &(avc_ctx->res_mad_data_buffer);
4730         i965_add_buffer_gpe_surface(ctx,
4731                                     gpe_context,
4732                                     gpe_resource,
4733                                     0,
4734                                     size / 4,
4735                                     0,
4736                                     GEN9_AVC_MBENC_MAD_DATA_INDEX);
4737         i965_zero_gpe_resource(gpe_resource);
4738     }
4739
4740     /*brc updated mbenc curbe data buffer,it is ignored by gen9 and used in gen95*/
4741     if (avc_state->mbenc_brc_buffer_size > 0) {
4742         size = avc_state->mbenc_brc_buffer_size;
4743         gpe_resource = &(avc_ctx->res_mbenc_brc_buffer);
4744         i965_add_buffer_gpe_surface(ctx,
4745                                     gpe_context,
4746                                     gpe_resource,
4747                                     0,
4748                                     size / 4,
4749                                     0,
4750                                     GEN95_AVC_MBENC_BRC_CURBE_DATA_INDEX);
4751     }
4752
4753     /*artitratry num mbs in slice*/
4754     if (avc_state->arbitrary_num_mbs_in_slice) {
4755         /*slice surface input*/
4756         gpe_resource = &(avc_ctx->res_mbenc_slice_map_surface);
4757         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4758                                        gpe_resource,
4759                                        1,
4760                                        I965_SURFACEFORMAT_R8_UNORM,
4761                                        GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX);
4762         gen9_avc_generate_slice_map(ctx, encode_state, encoder_context);
4763     }
4764
4765     /* BRC distortion data buffer for I frame */
4766     if (!mbenc_i_frame_dist_in_use) {
4767         if (avc_state->mb_disable_skip_map_enable) {
4768             gpe_resource = &(avc_ctx->res_mb_disable_skip_map_surface);
4769             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4770                                            gpe_resource,
4771                                            1,
4772                                            I965_SURFACEFORMAT_R8_UNORM,
4773                                            (is_g95 ? GEN95_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX : GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX));
4774         }
4775         if (IS_GEN8(i965->intel.device_info)) {
4776             if (avc_state->sfd_enable) {
4777                 size = 128 / sizeof(unsigned long);
4778                 gpe_resource = &(avc_ctx->res_sfd_output_buffer);
4779                 i965_add_buffer_gpe_surface(ctx,
4780                                             gpe_context,
4781                                             gpe_resource,
4782                                             0,
4783                                             size / 4,
4784                                             0,
4785                                             GEN8_AVC_MBENC_STATIC_FRAME_DETECTION_OUTPUT_CM);
4786
4787             }
4788         } else {
4789             if (avc_state->sfd_enable && generic_state->hme_enabled) {
4790                 if (generic_state->frame_type == SLICE_TYPE_P) {
4791                     gpe_resource = &(avc_ctx->res_sfd_cost_table_p_frame_buffer);
4792                 } else if (generic_state->frame_type == SLICE_TYPE_B) {
4793                     gpe_resource = &(avc_ctx->res_sfd_cost_table_b_frame_buffer);
4794                 }
4795                 if (generic_state->frame_type != SLICE_TYPE_I) {
4796                     size = 64;
4797                     i965_add_buffer_gpe_surface(ctx,
4798                                                 gpe_context,
4799                                                 gpe_resource,
4800                                                 0,
4801                                                 size / 4,
4802                                                 0,
4803                                                 (is_g95 ? GEN95_AVC_MBENC_SFD_COST_TABLE_INDEX : GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX));
4804
4805
4806                 }
4807             }
4808         }
4809     }
4810     return;
4811 }
4812
4813 static void
4814 gen9_avc_fei_send_surface_mbenc(VADriverContextP ctx,
4815                                 struct encode_state *encode_state,
4816                                 struct i965_gpe_context *gpe_context,
4817                                 struct intel_encoder_context *encoder_context,
4818                                 void * param_mbenc)
4819 {
4820     struct i965_driver_data *i965 = i965_driver_data(ctx);
4821     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4822     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4823     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4824     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4825     VAEncMiscParameterFEIFrameControlH264 *fei_param = NULL;
4826     struct object_buffer *obj_buffer = NULL;
4827     struct buffer_store *buffer_store = NULL;
4828     struct object_surface *obj_surface = NULL;
4829     struct gen9_surface_avc *avc_priv_surface;
4830     struct i965_gpe_resource *gpe_resource;
4831     VASurfaceID surface_id;
4832     unsigned int size = 0;
4833     unsigned int frame_mb_nums;
4834     int i = 0, allocate_flag = 1;
4835
4836     obj_surface = encode_state->reconstructed_object;
4837     if (!obj_surface || !obj_surface->private_data)
4838         return;
4839     avc_priv_surface = obj_surface->private_data;
4840
4841     frame_mb_nums = generic_state->frame_width_in_mbs *
4842                     generic_state->frame_height_in_mbs;
4843     fei_param = avc_state->fei_framectl_param;
4844
4845     assert(fei_param != NULL);
4846
4847     /* res_mb_code_surface for MB code */
4848     size = frame_mb_nums * FEI_AVC_MB_CODE_BUFFER_SIZE;
4849     if (avc_priv_surface->res_mb_code_surface.bo != NULL)
4850         i965_free_gpe_resource(&avc_priv_surface->res_mb_code_surface);
4851     if (fei_param->mb_code_data != VA_INVALID_ID) {
4852         obj_buffer = BUFFER(fei_param->mb_code_data);
4853         assert(obj_buffer != NULL);
4854         buffer_store = obj_buffer->buffer_store;
4855         assert(size <= buffer_store->bo->size);
4856         i965_dri_object_to_buffer_gpe_resource(
4857             &avc_priv_surface->res_mb_code_surface,
4858             buffer_store->bo);
4859     } else {
4860         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
4861                                                    &avc_priv_surface->res_mb_code_surface,
4862                                                    ALIGN(size, 0x1000),
4863                                                    "mb code buffer");
4864         assert(allocate_flag != 0);
4865     }
4866
4867     /* res_mv_data_surface for MV data */
4868     size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
4869     if (avc_priv_surface->res_mv_data_surface.bo != NULL)
4870         i965_free_gpe_resource(&avc_priv_surface->res_mv_data_surface);
4871     if (fei_param->mv_data != VA_INVALID_ID) {
4872         obj_buffer = BUFFER(fei_param->mv_data);
4873         assert(obj_buffer != NULL);
4874         buffer_store = obj_buffer->buffer_store;
4875         assert(size <= buffer_store->bo->size);
4876         i965_dri_object_to_buffer_gpe_resource(
4877             &avc_priv_surface->res_mv_data_surface,
4878             buffer_store->bo);
4879     } else {
4880         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
4881                                                    &avc_priv_surface->res_mv_data_surface,
4882                                                    ALIGN(size, 0x1000),
4883                                                    "mv data buffer");
4884         assert(allocate_flag != 0);
4885     }
4886
4887     /* fei mb control data surface */
4888     size = frame_mb_nums * FEI_AVC_MB_CONTROL_BUFFER_SIZE;
4889     if (fei_param->mb_input | fei_param->mb_size_ctrl) {
4890         assert(fei_param->mb_ctrl != VA_INVALID_ID);
4891         obj_buffer = BUFFER(fei_param->mb_ctrl);
4892         assert(obj_buffer != NULL);
4893         buffer_store = obj_buffer->buffer_store;
4894         assert(size <= buffer_store->bo->size);
4895         if (avc_priv_surface->res_fei_mb_cntrl_surface.bo != NULL)
4896             i965_free_gpe_resource(&avc_priv_surface->res_fei_mb_cntrl_surface);
4897         i965_dri_object_to_buffer_gpe_resource(
4898             &avc_priv_surface->res_fei_mb_cntrl_surface,
4899             buffer_store->bo);
4900     }
4901
4902     /* fei mv predictor surface*/
4903     size = frame_mb_nums * FEI_AVC_MV_PREDICTOR_BUFFER_SIZE;
4904     if (fei_param->mv_predictor_enable &&
4905         (fei_param->mv_predictor != VA_INVALID_ID)) {
4906         obj_buffer = BUFFER(fei_param->mv_predictor);
4907         assert(obj_buffer != NULL);
4908         buffer_store = obj_buffer->buffer_store;
4909         assert(size <= buffer_store->bo->size);
4910         if (avc_priv_surface->res_fei_mv_predictor_surface.bo != NULL)
4911             i965_free_gpe_resource(&avc_priv_surface->res_fei_mv_predictor_surface);
4912         i965_dri_object_to_buffer_gpe_resource(
4913             &avc_priv_surface->res_fei_mv_predictor_surface,
4914             buffer_store->bo);
4915     } else {
4916         if (fei_param->mv_predictor_enable)
4917             assert(fei_param->mv_predictor != VA_INVALID_ID);
4918     }
4919
4920     /* fei vme distortion */
4921     size = frame_mb_nums * FEI_AVC_DISTORTION_BUFFER_SIZE;
4922     if (avc_priv_surface->res_fei_vme_distortion_surface.bo != NULL)
4923         i965_free_gpe_resource(&avc_priv_surface->res_fei_vme_distortion_surface);
4924     if (fei_param->distortion != VA_INVALID_ID) {
4925         obj_buffer = BUFFER(fei_param->distortion);
4926         assert(obj_buffer != NULL);
4927         buffer_store = obj_buffer->buffer_store;
4928         assert(size <= buffer_store->bo->size);
4929         i965_dri_object_to_buffer_gpe_resource(
4930             &avc_priv_surface->res_fei_vme_distortion_surface,
4931             buffer_store->bo);
4932     } else {
4933         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
4934                                                    &avc_priv_surface->res_fei_vme_distortion_surface,
4935                                                    ALIGN(size, 0x1000),
4936                                                    "fei vme distortion");
4937         assert(allocate_flag != 0);
4938     }
4939
4940     /* fei mb qp  */
4941     /* Fixme/Confirm:  not sure why we need 3 byte padding here */
4942     size = frame_mb_nums * FEI_AVC_QP_BUFFER_SIZE + 3;
4943     if (fei_param->mb_qp && (fei_param->qp != VA_INVALID_ID)) {
4944         obj_buffer = BUFFER(fei_param->qp);
4945         assert(obj_buffer != NULL);
4946         buffer_store = obj_buffer->buffer_store;
4947         assert((size - 3) <= buffer_store->bo->size);
4948         if (avc_priv_surface->res_fei_mb_qp_surface.bo != NULL)
4949             i965_free_gpe_resource(&avc_priv_surface->res_fei_mb_qp_surface);
4950         i965_dri_object_to_buffer_gpe_resource(
4951             &avc_priv_surface->res_fei_mb_qp_surface,
4952             buffer_store->bo);
4953     } else {
4954         if (fei_param->mb_qp)
4955             assert(fei_param->qp != VA_INVALID_ID);
4956     }
4957
4958     /*==== pak obj command buffer output ====*/
4959     size = frame_mb_nums * FEI_AVC_MB_CODE_BUFFER_SIZE;
4960     gpe_resource = &avc_priv_surface->res_mb_code_surface;
4961     i965_add_buffer_gpe_surface(ctx,
4962                                 gpe_context,
4963                                 gpe_resource,
4964                                 0,
4965                                 size / 4,
4966                                 0,
4967                                 GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX);
4968
4969
4970     /*=== mv data buffer output */
4971     size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
4972     gpe_resource = &avc_priv_surface->res_mv_data_surface;
4973     i965_add_buffer_gpe_surface(ctx,
4974                                 gpe_context,
4975                                 gpe_resource,
4976                                 0,
4977                                 size / 4,
4978                                 0,
4979                                 GEN9_AVC_MBENC_IND_MV_DATA_INDEX);
4980
4981
4982     /* === current input Y (binding table offset = 3)=== */
4983     obj_surface = encode_state->input_yuv_object;
4984     i965_add_2d_gpe_surface(ctx,
4985                             gpe_context,
4986                             obj_surface,
4987                             0,
4988                             1,
4989                             I965_SURFACEFORMAT_R8_UNORM,
4990                             GEN9_AVC_MBENC_CURR_Y_INDEX);
4991
4992     /* === current input UV === (binding table offset == 4)*/
4993     i965_add_2d_gpe_surface(ctx,
4994                             gpe_context,
4995                             obj_surface,
4996                             1,
4997                             1,
4998                             I965_SURFACEFORMAT_R16_UINT,
4999                             GEN9_AVC_MBENC_CURR_UV_INDEX);
5000
5001     /* === input current YUV surface, (binding table offset == 15) === */
5002     i965_add_adv_gpe_surface(ctx, gpe_context,
5003                              obj_surface,
5004                              GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX);
5005
5006
5007     /*== input current  YUV surface, (binding table offset == 32)*/
5008     i965_add_adv_gpe_surface(ctx, gpe_context,
5009                              obj_surface,
5010                              GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX);
5011
5012     /* list 0 references */
5013     for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
5014
5015         surface_id = slice_param->RefPicList0[i].picture_id;
5016         obj_surface = SURFACE(surface_id);
5017         if (!obj_surface || !obj_surface->private_data)
5018             break;
5019         i965_add_adv_gpe_surface(ctx, gpe_context,
5020                                  obj_surface,
5021                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 1);
5022     }
5023
5024
5025     /* list 1 references */
5026     for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
5027         if (i > 0) break; // only  one ref supported here for B frame
5028         surface_id = slice_param->RefPicList1[i].picture_id;
5029         obj_surface = SURFACE(surface_id);
5030         if (!obj_surface || !obj_surface->private_data)
5031             break;
5032
5033         i965_add_adv_gpe_surface(ctx, gpe_context,
5034                                  obj_surface,
5035                                  GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 2);
5036         if (i == 0) {
5037             avc_priv_surface = obj_surface->private_data;
5038             /* mb code of Backward reference frame */
5039             size = frame_mb_nums * FEI_AVC_MB_CODE_BUFFER_SIZE;
5040             gpe_resource = &avc_priv_surface->res_mb_code_surface;
5041             i965_add_buffer_gpe_surface(ctx,
5042                                         gpe_context,
5043                                         gpe_resource,
5044                                         0,
5045                                         size / 4,
5046                                         0,
5047                                         GEN9_AVC_MBENC_FWD_MB_DATA_INDEX);
5048
5049             /* mv data of backward ref frame */
5050             size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
5051             gpe_resource = &avc_priv_surface->res_mv_data_surface;
5052             i965_add_buffer_gpe_surface(ctx,
5053                                         gpe_context,
5054                                         gpe_resource,
5055                                         0,
5056                                         size / 4,
5057                                         0,
5058                                         GEN9_AVC_MBENC_FWD_MV_DATA_INDEX);
5059
5060         }
5061         //again
5062         if (i < INTEL_AVC_MAX_BWD_REF_NUM) {
5063             i965_add_adv_gpe_surface(ctx, gpe_context,
5064                                      obj_surface,
5065                                      GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1);
5066         }
5067     }
5068
5069     /* as ref frame ,update later RefPicSelect of Current Picture*/
5070     obj_surface = encode_state->reconstructed_object;
5071     avc_priv_surface = obj_surface->private_data;
5072     if (avc_state->ref_pic_select_list_supported && avc_priv_surface->is_as_ref) {
5073         gpe_resource = &(avc_priv_surface->res_ref_pic_select_surface);
5074         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5075                                        gpe_resource,
5076                                        1,
5077                                        I965_SURFACEFORMAT_R8_UNORM,
5078                                        GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX);
5079
5080     }
5081
5082
5083     /* mb specific data, macroblock control parameters */
5084     if ((fei_param->mb_input | fei_param->mb_size_ctrl) &&
5085         (fei_param->mb_ctrl != VA_INVALID_ID)) {
5086         size = frame_mb_nums * FEI_AVC_MB_CONTROL_BUFFER_SIZE;
5087         gpe_resource = &avc_priv_surface->res_fei_mb_cntrl_surface;
5088         i965_add_buffer_gpe_surface(ctx,
5089                                     gpe_context,
5090                                     gpe_resource,
5091                                     0,
5092                                     size / 4,
5093                                     0,
5094                                     GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX);
5095     }
5096
5097     /* multi mv predictor surface */
5098     if (fei_param->mv_predictor_enable && (fei_param->mv_predictor != VA_INVALID_ID)) {
5099         size = frame_mb_nums * 48; //sizeof (VAEncMVPredictorH264Intel) == 40
5100         gpe_resource = &avc_priv_surface->res_fei_mv_predictor_surface;
5101         i965_add_buffer_gpe_surface(ctx,
5102                                     gpe_context,
5103                                     gpe_resource,
5104                                     0,
5105                                     size / 4,
5106                                     0,
5107                                     GEN9_AVC_MBENC_MV_PREDICTOR_INDEX);
5108     }
5109
5110     /* mb qp */
5111     if (fei_param->mb_qp && (fei_param->qp != VA_INVALID_ID)) {
5112         size = frame_mb_nums  + 3;
5113         gpe_resource = &avc_priv_surface->res_fei_mb_qp_surface,
5114         i965_add_buffer_gpe_surface(ctx,
5115                                     gpe_context,
5116                                     gpe_resource,
5117                                     0,
5118                                     size / 4,
5119                                     0,
5120                                     GEN9_AVC_MBENC_MBQP_INDEX);
5121     }
5122
5123
5124     /*=== FEI distortion surface ====*/
5125     size = frame_mb_nums * 48; //sizeof (VAEncFEIDistortionBufferH264Intel) == 48
5126     gpe_resource = &avc_priv_surface->res_fei_vme_distortion_surface;
5127     i965_add_buffer_gpe_surface(ctx,
5128                                 gpe_context,
5129                                 gpe_resource,
5130                                 0,
5131                                 size / 4,
5132                                 0,
5133                                 GEN9_AVC_MBENC_AUX_VME_OUT_INDEX);
5134
5135     return;
5136 }
5137
5138 static VAStatus
5139 gen9_avc_kernel_mbenc(VADriverContextP ctx,
5140                       struct encode_state *encode_state,
5141                       struct intel_encoder_context *encoder_context,
5142                       bool i_frame_dist_in_use)
5143 {
5144     struct i965_driver_data *i965 = i965_driver_data(ctx);
5145     struct i965_gpe_table *gpe = &i965->gpe_table;
5146     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5147     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5148     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5149     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5150     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5151
5152     struct i965_gpe_context *gpe_context;
5153     struct gpe_media_object_walker_parameter media_object_walker_param;
5154     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
5155     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
5156     int media_function = 0;
5157     int kernel_idx = 0;
5158     unsigned int mb_const_data_buffer_in_use = 0;
5159     unsigned int mb_qp_buffer_in_use = 0;
5160     unsigned int brc_enabled = 0;
5161     unsigned int roi_enable = (generic_state->num_roi > 0) ? 1 : 0;
5162     unsigned int dirty_roi_enable = ((generic_state->dirty_num_roi > 0) && (generic_state->frame_type == SLICE_TYPE_P) && (0));
5163     struct mbenc_param param ;
5164
5165     int mbenc_i_frame_dist_in_use = i_frame_dist_in_use;
5166     int mad_enable = 0;
5167     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5168
5169     mb_const_data_buffer_in_use =
5170         generic_state->mb_brc_enabled ||
5171         roi_enable ||
5172         dirty_roi_enable ||
5173         avc_state->mb_qp_data_enable ||
5174         avc_state->rolling_intra_refresh_enable;
5175     mb_qp_buffer_in_use =
5176         generic_state->mb_brc_enabled ||
5177         generic_state->brc_roi_enable ||
5178         avc_state->mb_qp_data_enable;
5179
5180     if (mbenc_i_frame_dist_in_use) {
5181         media_function = INTEL_MEDIA_STATE_ENC_I_FRAME_DIST;
5182         kernel_idx = GEN9_AVC_KERNEL_BRC_I_FRAME_DIST;
5183         downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
5184         downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
5185         mad_enable = 0;
5186         brc_enabled = 0;
5187
5188         gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
5189     } else {
5190         switch (generic_state->kernel_mode) {
5191         case INTEL_ENC_KERNEL_NORMAL : {
5192             media_function = INTEL_MEDIA_STATE_ENC_NORMAL;
5193             kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_NORMAL_I;
5194             break;
5195         }
5196         case INTEL_ENC_KERNEL_PERFORMANCE : {
5197             media_function = INTEL_MEDIA_STATE_ENC_PERFORMANCE;
5198             kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_PERFORMANCE_I;
5199             break;
5200         }
5201         case INTEL_ENC_KERNEL_QUALITY : {
5202             media_function = INTEL_MEDIA_STATE_ENC_QUALITY;
5203             kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_QUALITY_I;
5204             break;
5205         }
5206         default:
5207             assert(0);
5208
5209         }
5210
5211         if (encoder_context->fei_enabled) {
5212             media_function = INTEL_MEDIA_STATE_ENC_NORMAL;
5213             kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_FEI_KERNEL_I;
5214         }
5215
5216         if (generic_state->frame_type == SLICE_TYPE_P) {
5217             kernel_idx += 1;
5218         } else if (generic_state->frame_type == SLICE_TYPE_B) {
5219             kernel_idx += 2;
5220         }
5221
5222         downscaled_width_in_mb = generic_state->frame_width_in_mbs;
5223         downscaled_height_in_mb = generic_state->frame_height_in_mbs;
5224         mad_enable = avc_state->mad_enable;
5225         brc_enabled = generic_state->brc_enabled;
5226
5227         gpe_context = &(avc_ctx->context_mbenc.gpe_contexts[kernel_idx]);
5228     }
5229
5230     memset(&param, 0, sizeof(struct mbenc_param));
5231
5232     param.mb_const_data_buffer_in_use = mb_const_data_buffer_in_use;
5233     param.mb_qp_buffer_in_use = mb_qp_buffer_in_use;
5234     param.mbenc_i_frame_dist_in_use = mbenc_i_frame_dist_in_use;
5235     param.mad_enable = mad_enable;
5236     param.brc_enabled = brc_enabled;
5237     param.roi_enabled = roi_enable;
5238
5239     if (avc_state->mb_status_supported) {
5240         param.mb_vproc_stats_enable =  avc_state->flatness_check_enable || avc_state->adaptive_transform_decision_enable;
5241     }
5242
5243     if (!avc_state->mbenc_curbe_set_in_brc_update) {
5244         gpe->context_init(ctx, gpe_context);
5245     }
5246
5247     gpe->reset_binding_table(ctx, gpe_context);
5248
5249     if (!avc_state->mbenc_curbe_set_in_brc_update) {
5250         /*set curbe here*/
5251         generic_ctx->pfn_set_curbe_mbenc(ctx, encode_state, gpe_context, encoder_context, &param);
5252     }
5253
5254     /* MB brc const data buffer set up*/
5255     if (mb_const_data_buffer_in_use) {
5256         // caculate the lambda table, it is kernel controlled trellis quantization,gen95+
5257         if (avc_state->lambda_table_enable)
5258             gen95_avc_calc_lambda_table(ctx, encode_state, encoder_context);
5259
5260         gen9_avc_load_mb_brc_const_data(ctx, encode_state, encoder_context);
5261     }
5262
5263     /*clear the mad buffer*/
5264     if (mad_enable) {
5265         i965_zero_gpe_resource(&(avc_ctx->res_mad_data_buffer));
5266     }
5267     /*send surface*/
5268     generic_ctx->pfn_send_mbenc_surface(ctx, encode_state, gpe_context, encoder_context, &param);
5269
5270     gpe->setup_interface_data(ctx, gpe_context);
5271
5272     /*walker setting*/
5273     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
5274
5275     kernel_walker_param.use_scoreboard = 1;
5276     kernel_walker_param.resolution_x = downscaled_width_in_mb ;
5277     kernel_walker_param.resolution_y = downscaled_height_in_mb ;
5278     if (mbenc_i_frame_dist_in_use) {
5279         kernel_walker_param.no_dependency = 1;
5280     } else {
5281         switch (generic_state->frame_type) {
5282         case SLICE_TYPE_I:
5283             kernel_walker_param.walker_degree = WALKER_45_DEGREE;
5284             break;
5285         case SLICE_TYPE_P:
5286             kernel_walker_param.walker_degree = WALKER_26_DEGREE;
5287             break;
5288         case SLICE_TYPE_B:
5289             kernel_walker_param.walker_degree = WALKER_26_DEGREE;
5290             if (!slice_param->direct_spatial_mv_pred_flag) {
5291                 kernel_walker_param.walker_degree = WALKER_45_DEGREE;
5292             }
5293             break;
5294         default:
5295             assert(0);
5296         }
5297         kernel_walker_param.no_dependency = 0;
5298     }
5299
5300     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
5301
5302     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
5303                                             gpe_context,
5304                                             media_function,
5305                                             &media_object_walker_param);
5306     return VA_STATUS_SUCCESS;
5307 }
5308
5309 /*
5310 me kernle related function
5311 */
5312 static void
5313 gen9_avc_set_curbe_me(VADriverContextP ctx,
5314                       struct encode_state *encode_state,
5315                       struct i965_gpe_context *gpe_context,
5316                       struct intel_encoder_context *encoder_context,
5317                       void * param)
5318 {
5319     gen9_avc_me_curbe_data *curbe_cmd;
5320     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5321     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5322     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5323
5324     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5325
5326     struct me_param * curbe_param = (struct me_param *)param ;
5327     unsigned char  use_mv_from_prev_step = 0;
5328     unsigned char write_distortions = 0;
5329     unsigned char qp_prime_y = 0;
5330     unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
5331     unsigned char seach_table_idx = 0;
5332     unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
5333     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
5334     unsigned int scale_factor = 0;
5335
5336     qp_prime_y = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta;
5337     switch (curbe_param->hme_type) {
5338     case INTEL_ENC_HME_4x : {
5339         use_mv_from_prev_step = (generic_state->b16xme_enabled) ? 1 : 0;
5340         write_distortions = 1;
5341         mv_shift_factor = 2;
5342         scale_factor = 4;
5343         prev_mv_read_pos_factor = 0;
5344         break;
5345     }
5346     case INTEL_ENC_HME_16x : {
5347         use_mv_from_prev_step = (generic_state->b32xme_enabled) ? 1 : 0;
5348         write_distortions = 0;
5349         mv_shift_factor = 2;
5350         scale_factor = 16;
5351         prev_mv_read_pos_factor = 1;
5352         break;
5353     }
5354     case INTEL_ENC_HME_32x : {
5355         use_mv_from_prev_step = 0;
5356         write_distortions = 0;
5357         mv_shift_factor = 1;
5358         scale_factor = 32;
5359         prev_mv_read_pos_factor = 0;
5360         break;
5361     }
5362     default:
5363         assert(0);
5364
5365     }
5366     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
5367
5368     if (!curbe_cmd)
5369         return;
5370
5371     downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
5372     downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
5373
5374     memcpy(curbe_cmd, gen9_avc_me_curbe_init_data, sizeof(gen9_avc_me_curbe_data));
5375
5376     curbe_cmd->dw3.sub_pel_mode = 3;
5377     if (avc_state->field_scaling_output_interleaved) {
5378         /*frame set to zero,field specified*/
5379         curbe_cmd->dw3.src_access = 0;
5380         curbe_cmd->dw3.ref_access = 0;
5381         curbe_cmd->dw7.src_field_polarity = 0;
5382     }
5383     curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
5384     curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
5385     curbe_cmd->dw5.qp_prime_y = qp_prime_y;
5386
5387     curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
5388     curbe_cmd->dw6.write_distortions = write_distortions;
5389     curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
5390     curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
5391
5392     if (generic_state->frame_type == SLICE_TYPE_B) {
5393         curbe_cmd->dw1.bi_weight = 32;
5394         curbe_cmd->dw13.num_ref_idx_l1_minus1 = slice_param->num_ref_idx_l1_active_minus1;
5395         me_method = gen9_avc_b_me_method[generic_state->preset];
5396         seach_table_idx = 1;
5397     }
5398
5399     if (generic_state->frame_type == SLICE_TYPE_P ||
5400         generic_state->frame_type == SLICE_TYPE_B)
5401         curbe_cmd->dw13.num_ref_idx_l0_minus1 = slice_param->num_ref_idx_l0_active_minus1;
5402
5403     curbe_cmd->dw13.ref_streamin_cost = 5;
5404     curbe_cmd->dw13.roi_enable = 0;
5405
5406     curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
5407     curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
5408
5409     memcpy(&curbe_cmd->dw16, table_enc_search_path[seach_table_idx][me_method], 14 * sizeof(int));
5410
5411     curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN9_AVC_ME_MV_DATA_SURFACE_INDEX;
5412     curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x) ? GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX : GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX ;
5413     curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN9_AVC_ME_DISTORTION_SURFACE_INDEX;
5414     curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN9_AVC_ME_BRC_DISTORTION_INDEX;
5415     curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX;
5416     curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX;
5417     curbe_cmd->dw38.reserved = GEN9_AVC_ME_VDENC_STREAMIN_INDEX;
5418
5419     i965_gpe_context_unmap_curbe(gpe_context);
5420     return;
5421 }
5422
5423 static void
5424 gen9_avc_send_surface_me(VADriverContextP ctx,
5425                          struct encode_state *encode_state,
5426                          struct i965_gpe_context *gpe_context,
5427                          struct intel_encoder_context *encoder_context,
5428                          void * param)
5429 {
5430     struct i965_driver_data *i965 = i965_driver_data(ctx);
5431
5432     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5433     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5434     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5435     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5436
5437     struct object_surface *obj_surface, *input_surface;
5438     struct gen9_surface_avc *avc_priv_surface;
5439     struct i965_gpe_resource *gpe_resource;
5440     struct me_param * curbe_param = (struct me_param *)param ;
5441
5442     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5443     VASurfaceID surface_id;
5444     int i = 0;
5445
5446     /* all scaled input surface stored in reconstructed_object*/
5447     obj_surface = encode_state->reconstructed_object;
5448     if (!obj_surface || !obj_surface->private_data)
5449         return;
5450     avc_priv_surface = obj_surface->private_data;
5451
5452
5453     switch (curbe_param->hme_type) {
5454     case INTEL_ENC_HME_4x : {
5455         /*memv output 4x*/
5456         gpe_resource = &avc_ctx->s4x_memv_data_buffer;
5457         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5458                                        gpe_resource,
5459                                        1,
5460                                        I965_SURFACEFORMAT_R8_UNORM,
5461                                        GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
5462
5463         /*memv input 16x*/
5464         if (generic_state->b16xme_enabled) {
5465             gpe_resource = &avc_ctx->s16x_memv_data_buffer;
5466             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5467                                            gpe_resource,
5468                                            1,
5469                                            I965_SURFACEFORMAT_R8_UNORM,
5470                                            GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX);
5471         }
5472         /* brc distortion  output*/
5473         gpe_resource = &avc_ctx->res_brc_dist_data_surface;
5474         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5475                                        gpe_resource,
5476                                        1,
5477                                        I965_SURFACEFORMAT_R8_UNORM,
5478                                        GEN9_AVC_ME_BRC_DISTORTION_INDEX);
5479         /* memv distortion output*/
5480         gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
5481         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5482                                        gpe_resource,
5483                                        1,
5484                                        I965_SURFACEFORMAT_R8_UNORM,
5485                                        GEN9_AVC_ME_DISTORTION_SURFACE_INDEX);
5486         /*input current down scaled YUV surface*/
5487         obj_surface = encode_state->reconstructed_object;
5488         avc_priv_surface = obj_surface->private_data;
5489         input_surface = avc_priv_surface->scaled_4x_surface_obj;
5490         i965_add_adv_gpe_surface(ctx, gpe_context,
5491                                  input_surface,
5492                                  GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
5493         /*input ref scaled YUV surface*/
5494         for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
5495             surface_id = slice_param->RefPicList0[i].picture_id;
5496             obj_surface = SURFACE(surface_id);
5497             if (!obj_surface || !obj_surface->private_data)
5498                 break;
5499             avc_priv_surface = obj_surface->private_data;
5500
5501             input_surface = avc_priv_surface->scaled_4x_surface_obj;
5502
5503             i965_add_adv_gpe_surface(ctx, gpe_context,
5504                                      input_surface,
5505                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
5506         }
5507
5508         obj_surface = encode_state->reconstructed_object;
5509         avc_priv_surface = obj_surface->private_data;
5510         input_surface = avc_priv_surface->scaled_4x_surface_obj;
5511
5512         i965_add_adv_gpe_surface(ctx, gpe_context,
5513                                  input_surface,
5514                                  GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
5515
5516         for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
5517             surface_id = slice_param->RefPicList1[i].picture_id;
5518             obj_surface = SURFACE(surface_id);
5519             if (!obj_surface || !obj_surface->private_data)
5520                 break;
5521             avc_priv_surface = obj_surface->private_data;
5522
5523             input_surface = avc_priv_surface->scaled_4x_surface_obj;
5524
5525             i965_add_adv_gpe_surface(ctx, gpe_context,
5526                                      input_surface,
5527                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
5528         }
5529         break;
5530
5531     }
5532     case INTEL_ENC_HME_16x : {
5533         gpe_resource = &avc_ctx->s16x_memv_data_buffer;
5534         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5535                                        gpe_resource,
5536                                        1,
5537                                        I965_SURFACEFORMAT_R8_UNORM,
5538                                        GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
5539
5540         if (generic_state->b32xme_enabled) {
5541             gpe_resource = &avc_ctx->s32x_memv_data_buffer;
5542             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5543                                            gpe_resource,
5544                                            1,
5545                                            I965_SURFACEFORMAT_R8_UNORM,
5546                                            GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX);
5547         }
5548
5549         obj_surface = encode_state->reconstructed_object;
5550         avc_priv_surface = obj_surface->private_data;
5551         input_surface = avc_priv_surface->scaled_16x_surface_obj;
5552         i965_add_adv_gpe_surface(ctx, gpe_context,
5553                                  input_surface,
5554                                  GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
5555
5556         for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
5557             surface_id = slice_param->RefPicList0[i].picture_id;
5558             obj_surface = SURFACE(surface_id);
5559             if (!obj_surface || !obj_surface->private_data)
5560                 break;
5561             avc_priv_surface = obj_surface->private_data;
5562
5563             input_surface = avc_priv_surface->scaled_16x_surface_obj;
5564
5565             i965_add_adv_gpe_surface(ctx, gpe_context,
5566                                      input_surface,
5567                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
5568         }
5569
5570         obj_surface = encode_state->reconstructed_object;
5571         avc_priv_surface = obj_surface->private_data;
5572         input_surface = avc_priv_surface->scaled_16x_surface_obj;
5573
5574         i965_add_adv_gpe_surface(ctx, gpe_context,
5575                                  input_surface,
5576                                  GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
5577
5578         for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
5579             surface_id = slice_param->RefPicList1[i].picture_id;
5580             obj_surface = SURFACE(surface_id);
5581             if (!obj_surface || !obj_surface->private_data)
5582                 break;
5583             avc_priv_surface = obj_surface->private_data;
5584
5585             input_surface = avc_priv_surface->scaled_16x_surface_obj;
5586
5587             i965_add_adv_gpe_surface(ctx, gpe_context,
5588                                      input_surface,
5589                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
5590         }
5591         break;
5592     }
5593     case INTEL_ENC_HME_32x : {
5594         gpe_resource = &avc_ctx->s32x_memv_data_buffer;
5595         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5596                                        gpe_resource,
5597                                        1,
5598                                        I965_SURFACEFORMAT_R8_UNORM,
5599                                        GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
5600
5601         obj_surface = encode_state->reconstructed_object;
5602         avc_priv_surface = obj_surface->private_data;
5603         input_surface = avc_priv_surface->scaled_32x_surface_obj;
5604         i965_add_adv_gpe_surface(ctx, gpe_context,
5605                                  input_surface,
5606                                  GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
5607
5608         for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
5609             surface_id = slice_param->RefPicList0[i].picture_id;
5610             obj_surface = SURFACE(surface_id);
5611             if (!obj_surface || !obj_surface->private_data)
5612                 break;
5613             avc_priv_surface = obj_surface->private_data;
5614
5615             input_surface = avc_priv_surface->scaled_32x_surface_obj;
5616
5617             i965_add_adv_gpe_surface(ctx, gpe_context,
5618                                      input_surface,
5619                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
5620         }
5621
5622         obj_surface = encode_state->reconstructed_object;
5623         avc_priv_surface = obj_surface->private_data;
5624         input_surface = avc_priv_surface->scaled_32x_surface_obj;
5625
5626         i965_add_adv_gpe_surface(ctx, gpe_context,
5627                                  input_surface,
5628                                  GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
5629
5630         for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
5631             surface_id = slice_param->RefPicList1[i].picture_id;
5632             obj_surface = SURFACE(surface_id);
5633             if (!obj_surface || !obj_surface->private_data)
5634                 break;
5635             avc_priv_surface = obj_surface->private_data;
5636
5637             input_surface = avc_priv_surface->scaled_32x_surface_obj;
5638
5639             i965_add_adv_gpe_surface(ctx, gpe_context,
5640                                      input_surface,
5641                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
5642         }
5643         break;
5644     }
5645     default:
5646         assert(0);
5647
5648     }
5649 }
5650
5651 static VAStatus
5652 gen9_avc_kernel_me(VADriverContextP ctx,
5653                    struct encode_state *encode_state,
5654                    struct intel_encoder_context *encoder_context,
5655                    int hme_type)
5656 {
5657     struct i965_driver_data *i965 = i965_driver_data(ctx);
5658     struct i965_gpe_table *gpe = &i965->gpe_table;
5659     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5660     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5661     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5662     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5663
5664     struct i965_gpe_context *gpe_context;
5665     struct gpe_media_object_walker_parameter media_object_walker_param;
5666     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
5667     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
5668     int media_function = 0;
5669     int kernel_idx = 0;
5670     struct me_param param ;
5671     unsigned int scale_factor = 0;
5672
5673     switch (hme_type) {
5674     case INTEL_ENC_HME_4x : {
5675         media_function = INTEL_MEDIA_STATE_4X_ME;
5676         scale_factor = 4;
5677         break;
5678     }
5679     case INTEL_ENC_HME_16x : {
5680         media_function = INTEL_MEDIA_STATE_16X_ME;
5681         scale_factor = 16;
5682         break;
5683     }
5684     case INTEL_ENC_HME_32x : {
5685         media_function = INTEL_MEDIA_STATE_32X_ME;
5686         scale_factor = 32;
5687         break;
5688     }
5689     default:
5690         assert(0);
5691
5692     }
5693
5694     downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
5695     downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
5696
5697     /* I frame should not come here.*/
5698     kernel_idx = (generic_state->frame_type == SLICE_TYPE_P) ? GEN9_AVC_KERNEL_ME_P_IDX : GEN9_AVC_KERNEL_ME_B_IDX;
5699     gpe_context = &(avc_ctx->context_me.gpe_contexts[kernel_idx]);
5700
5701     gpe->context_init(ctx, gpe_context);
5702     gpe->reset_binding_table(ctx, gpe_context);
5703
5704     /*set curbe*/
5705     memset(&param, 0, sizeof(param));
5706     param.hme_type = hme_type;
5707     generic_ctx->pfn_set_curbe_me(ctx, encode_state, gpe_context, encoder_context, &param);
5708
5709     /*send surface*/
5710     generic_ctx->pfn_send_me_surface(ctx, encode_state, gpe_context, encoder_context, &param);
5711
5712     gpe->setup_interface_data(ctx, gpe_context);
5713
5714     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
5715     /* the scaling is based on 8x8 blk level */
5716     kernel_walker_param.resolution_x = downscaled_width_in_mb ;
5717     kernel_walker_param.resolution_y = downscaled_height_in_mb ;
5718     kernel_walker_param.no_dependency = 1;
5719
5720     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
5721
5722     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
5723                                             gpe_context,
5724                                             media_function,
5725                                             &media_object_walker_param);
5726
5727     return VA_STATUS_SUCCESS;
5728 }
5729
5730 /*
5731 wp related function
5732 */
5733 static void
5734 gen9_avc_set_curbe_wp(VADriverContextP ctx,
5735                       struct encode_state *encode_state,
5736                       struct i965_gpe_context *gpe_context,
5737                       struct intel_encoder_context *encoder_context,
5738                       void * param)
5739 {
5740     gen9_avc_wp_curbe_data *cmd;
5741     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5742     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5743     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5744     struct wp_param * curbe_param = (struct wp_param *)param;
5745
5746     cmd = i965_gpe_context_map_curbe(gpe_context);
5747
5748     if (!cmd)
5749         return;
5750     memset(cmd, 0, sizeof(gen9_avc_wp_curbe_data));
5751     if (curbe_param->ref_list_idx) {
5752         cmd->dw0.default_weight = slice_param->luma_weight_l1[0];
5753         cmd->dw0.default_offset = slice_param->luma_offset_l1[0];
5754     } else {
5755         cmd->dw0.default_weight = slice_param->luma_weight_l0[0];
5756         cmd->dw0.default_offset = slice_param->luma_offset_l0[0];
5757     }
5758
5759     cmd->dw49.input_surface = GEN9_AVC_WP_INPUT_REF_SURFACE_INDEX;
5760     cmd->dw50.output_surface = GEN9_AVC_WP_OUTPUT_SCALED_SURFACE_INDEX;
5761
5762     i965_gpe_context_unmap_curbe(gpe_context);
5763
5764 }
5765
5766 static void
5767 gen9_avc_send_surface_wp(VADriverContextP ctx,
5768                          struct encode_state *encode_state,
5769                          struct i965_gpe_context *gpe_context,
5770                          struct intel_encoder_context *encoder_context,
5771                          void * param)
5772 {
5773     struct i965_driver_data *i965 = i965_driver_data(ctx);
5774     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5775     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5776     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5777     struct wp_param * curbe_param = (struct wp_param *)param;
5778     struct object_surface *obj_surface;
5779     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5780     VASurfaceID surface_id;
5781
5782     if (curbe_param->ref_list_idx) {
5783         surface_id = slice_param->RefPicList1[0].picture_id;
5784         obj_surface = SURFACE(surface_id);
5785         if (!obj_surface || !obj_surface->private_data)
5786             avc_state->weighted_ref_l1_enable = 0;
5787         else
5788             avc_state->weighted_ref_l1_enable = 1;
5789     } else {
5790         surface_id = slice_param->RefPicList0[0].picture_id;
5791         obj_surface = SURFACE(surface_id);
5792         if (!obj_surface || !obj_surface->private_data)
5793             avc_state->weighted_ref_l0_enable = 0;
5794         else
5795             avc_state->weighted_ref_l0_enable = 1;
5796     }
5797     if (!obj_surface)
5798         obj_surface = encode_state->reference_objects[0];
5799
5800
5801     i965_add_adv_gpe_surface(ctx, gpe_context,
5802                              obj_surface,
5803                              GEN9_AVC_WP_INPUT_REF_SURFACE_INDEX);
5804
5805     obj_surface = avc_ctx->wp_output_pic_select_surface_obj[curbe_param->ref_list_idx];
5806     i965_add_adv_gpe_surface(ctx, gpe_context,
5807                              obj_surface,
5808                              GEN9_AVC_WP_OUTPUT_SCALED_SURFACE_INDEX);
5809 }
5810
5811
5812 static VAStatus
5813 gen9_avc_kernel_wp(VADriverContextP ctx,
5814                    struct encode_state *encode_state,
5815                    struct intel_encoder_context *encoder_context,
5816                    unsigned int list1_in_use)
5817 {
5818     struct i965_driver_data *i965 = i965_driver_data(ctx);
5819     struct i965_gpe_table *gpe = &i965->gpe_table;
5820     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5821     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5822     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5823     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5824
5825     struct i965_gpe_context *gpe_context;
5826     struct gpe_media_object_walker_parameter media_object_walker_param;
5827     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
5828     int media_function = INTEL_MEDIA_STATE_ENC_WP;
5829     struct wp_param param;
5830
5831     gpe_context = &(avc_ctx->context_wp.gpe_contexts);
5832
5833     gpe->context_init(ctx, gpe_context);
5834     gpe->reset_binding_table(ctx, gpe_context);
5835
5836     memset(&param, 0, sizeof(param));
5837     param.ref_list_idx = (list1_in_use == 1) ? 1 : 0;
5838     /*set curbe*/
5839     generic_ctx->pfn_set_curbe_wp(ctx, encode_state, gpe_context, encoder_context, &param);
5840
5841     /*send surface*/
5842     generic_ctx->pfn_send_wp_surface(ctx, encode_state, gpe_context, encoder_context, &param);
5843
5844     gpe->setup_interface_data(ctx, gpe_context);
5845
5846     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
5847     /* the scaling is based on 8x8 blk level */
5848     kernel_walker_param.resolution_x = generic_state->frame_width_in_mbs;
5849     kernel_walker_param.resolution_y = generic_state->frame_height_in_mbs;
5850     kernel_walker_param.no_dependency = 1;
5851
5852     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
5853
5854     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
5855                                             gpe_context,
5856                                             media_function,
5857                                             &media_object_walker_param);
5858
5859     return VA_STATUS_SUCCESS;
5860 }
5861
5862
5863 /*
5864 sfd related function
5865 */
5866 static void
5867 gen9_avc_set_curbe_sfd(VADriverContextP ctx,
5868                        struct encode_state *encode_state,
5869                        struct i965_gpe_context *gpe_context,
5870                        struct intel_encoder_context *encoder_context,
5871                        void * param)
5872 {
5873     gen9_avc_sfd_curbe_data *cmd;
5874     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5875     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5876     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5877     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5878
5879     cmd = i965_gpe_context_map_curbe(gpe_context);
5880
5881     if (!cmd)
5882         return;
5883     memset(cmd, 0, sizeof(gen9_avc_sfd_curbe_data));
5884
5885     cmd->dw0.enable_intra_cost_scaling_for_static_frame = 1 ;
5886     cmd->dw0.enable_adaptive_mv_stream_in = 0 ;
5887     cmd->dw0.stream_in_type = 7 ;
5888     cmd->dw0.slice_type = slice_type_kernel[generic_state->frame_type]  ;
5889     cmd->dw0.brc_mode_enable = generic_state->brc_enabled ;
5890     cmd->dw0.vdenc_mode_disable = 1 ;
5891
5892     cmd->dw1.hme_stream_in_ref_cost = 5 ;
5893     cmd->dw1.num_of_refs = slice_param->num_ref_idx_l0_active_minus1 ;
5894     cmd->dw1.qp_value = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta ;
5895
5896     cmd->dw2.frame_width_in_mbs = generic_state->frame_width_in_mbs ;
5897     cmd->dw2.frame_height_in_mbs = generic_state->frame_height_in_mbs ;
5898
5899     cmd->dw3.large_mv_threshold = 128 ;
5900     cmd->dw4.total_large_mv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs) / 100 ;
5901     cmd->dw5.zmv_threshold = 4 ;
5902     cmd->dw6.total_zmv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * avc_state->zero_mv_threshold) / 100 ; // zero_mv_threshold = 60;
5903     cmd->dw7.min_dist_threshold = 10 ;
5904
5905     if (generic_state->frame_type == SLICE_TYPE_P) {
5906         memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_p_frame, AVC_QP_MAX * sizeof(unsigned char));
5907
5908     } else if (generic_state->frame_type == SLICE_TYPE_B) {
5909         memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_b_frame, AVC_QP_MAX * sizeof(unsigned char));
5910     }
5911
5912     cmd->dw21.actual_width_in_mb = cmd->dw2.frame_width_in_mbs ;
5913     cmd->dw21.actual_height_in_mb = cmd->dw2.frame_height_in_mbs ;
5914     cmd->dw24.vdenc_input_image_state_index = GEN9_AVC_SFD_VDENC_INPUT_IMAGE_STATE_INDEX ;
5915     cmd->dw26.mv_data_surface_index = GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX ;
5916     cmd->dw27.inter_distortion_surface_index = GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX ;
5917     cmd->dw28.output_data_surface_index = GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX ;
5918     cmd->dw29.vdenc_output_image_state_index = GEN9_AVC_SFD_VDENC_OUTPUT_IMAGE_STATE_INDEX ;
5919
5920     i965_gpe_context_unmap_curbe(gpe_context);
5921
5922 }
5923
5924 static void
5925 gen9_avc_send_surface_sfd(VADriverContextP ctx,
5926                           struct encode_state *encode_state,
5927                           struct i965_gpe_context *gpe_context,
5928                           struct intel_encoder_context *encoder_context,
5929                           void * param)
5930 {
5931     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5932     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5933     struct i965_gpe_resource *gpe_resource;
5934     int size = 0;
5935
5936     /*HME mv data surface memv output 4x*/
5937     gpe_resource = &avc_ctx->s4x_memv_data_buffer;
5938     i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5939                                    gpe_resource,
5940                                    1,
5941                                    I965_SURFACEFORMAT_R8_UNORM,
5942                                    GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX);
5943
5944     /* memv distortion */
5945     gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
5946     i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5947                                    gpe_resource,
5948                                    1,
5949                                    I965_SURFACEFORMAT_R8_UNORM,
5950                                    GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX);
5951     /*buffer output*/
5952     size = 32 * 4 * 4;
5953     gpe_resource = &avc_ctx->res_sfd_output_buffer;
5954     i965_add_buffer_gpe_surface(ctx,
5955                                 gpe_context,
5956                                 gpe_resource,
5957                                 0,
5958                                 size / 4,
5959                                 0,
5960                                 GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX);
5961
5962 }
5963
5964 static VAStatus
5965 gen9_avc_kernel_sfd(VADriverContextP ctx,
5966                     struct encode_state *encode_state,
5967                     struct intel_encoder_context *encoder_context)
5968 {
5969     struct i965_driver_data *i965 = i965_driver_data(ctx);
5970     struct i965_gpe_table *gpe = &i965->gpe_table;
5971     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5972     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5973     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5974
5975     struct i965_gpe_context *gpe_context;
5976     struct gpe_media_object_parameter media_object_param;
5977     struct gpe_media_object_inline_data media_object_inline_data;
5978     int media_function = INTEL_MEDIA_STATE_STATIC_FRAME_DETECTION;
5979     gpe_context = &(avc_ctx->context_sfd.gpe_contexts);
5980
5981     gpe->context_init(ctx, gpe_context);
5982     gpe->reset_binding_table(ctx, gpe_context);
5983
5984     /*set curbe*/
5985     generic_ctx->pfn_set_curbe_sfd(ctx, encode_state, gpe_context, encoder_context, NULL);
5986
5987     /*send surface*/
5988     generic_ctx->pfn_send_sfd_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
5989
5990     gpe->setup_interface_data(ctx, gpe_context);
5991
5992     memset(&media_object_param, 0, sizeof(media_object_param));
5993     memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
5994     media_object_param.pinline_data = &media_object_inline_data;
5995     media_object_param.inline_size = sizeof(media_object_inline_data);
5996
5997     gen9_avc_run_kernel_media_object(ctx, encoder_context,
5998                                      gpe_context,
5999                                      media_function,
6000                                      &media_object_param);
6001
6002     return VA_STATUS_SUCCESS;
6003 }
6004
6005 /**************** PreEnc Scaling *************************************/
6006 /* function to run preenc scaling: gen9_avc_preenc_kernel_scaling()
6007  * function to set preenc scaling curbe is the same one using for avc encode
6008         == gen95_avc_set_curbe_scaling4x()
6009  * function to send buffer/surface resources is the same one using for avc encode
6010         == gen9_avc_send_surface_scaling()
6011  */
6012 static VAStatus
6013 gen9_avc_preenc_kernel_scaling(VADriverContextP ctx,
6014                                struct encode_state *encode_state,
6015                                struct intel_encoder_context *encoder_context,
6016                                int hme_type,
6017                                int scale_surface_type)
6018 {
6019     struct i965_driver_data *i965 = i965_driver_data(ctx);
6020     struct i965_gpe_table *gpe = &i965->gpe_table;
6021     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6022     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
6023     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6024     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6025     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
6026     VAStatsStatisticsParameterH264 *stat_param_h264 = NULL;
6027     VAStatsStatisticsParameter *stat_param = NULL;
6028     struct i965_gpe_context *gpe_context;
6029     struct scaling_param surface_param;
6030     struct object_surface *obj_surface = NULL;
6031     struct gpe_media_object_walker_parameter media_object_walker_param;
6032     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
6033     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
6034     int media_function = 0;
6035     int kernel_idx = 0;
6036     int enable_statistics_output;
6037
6038     stat_param_h264 = avc_state->stat_param;
6039     assert(stat_param_h264);
6040     stat_param = &stat_param_h264->stats_params;
6041     enable_statistics_output = !stat_param_h264->disable_statistics_output;
6042
6043     memset(&surface_param, 0, sizeof(struct scaling_param));
6044     media_function = INTEL_MEDIA_STATE_4X_SCALING;
6045     kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
6046     downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
6047     downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
6048
6049     surface_param.input_frame_width = generic_state->frame_width_in_pixel;
6050     surface_param.input_frame_height = generic_state->frame_height_in_pixel;
6051     surface_param.output_frame_width = generic_state->frame_width_4x;
6052     surface_param.output_frame_height = generic_state->frame_height_4x;
6053     surface_param.use_4x_scaling  = 1 ;
6054     surface_param.use_16x_scaling = 0 ;
6055     surface_param.use_32x_scaling = 0 ;
6056     surface_param.enable_mb_flatness_check = enable_statistics_output;
6057     surface_param.enable_mb_variance_output = enable_statistics_output;
6058     surface_param.enable_mb_pixel_average_output = enable_statistics_output;
6059     surface_param.blk8x8_stat_enabled = stat_param_h264->enable_8x8_statistics;
6060
6061     switch (scale_surface_type) {
6062
6063     case  SCALE_CUR_PIC:
6064         surface_param.input_surface = encode_state->input_yuv_object ;
6065         surface_param.output_surface = avc_ctx->preenc_scaled_4x_surface_obj ;
6066
6067         if (enable_statistics_output) {
6068             surface_param.pres_mbv_proc_stat_buffer =
6069                 &avc_ctx->preproc_stat_data_out_buffer;
6070             surface_param.mbv_proc_stat_enabled = 1;
6071         } else {
6072             surface_param.mbv_proc_stat_enabled = 0;
6073             surface_param.pres_mbv_proc_stat_buffer = NULL;
6074         }
6075         break;
6076
6077     case SCALE_PAST_REF_PIC:
6078         obj_surface = SURFACE(stat_param->past_references[0].picture_id);
6079         assert(obj_surface);
6080         surface_param.input_surface = obj_surface;
6081         surface_param.output_surface = avc_ctx->preenc_past_ref_scaled_4x_surface_obj;
6082
6083         if (stat_param->past_ref_stat_buf) {
6084             surface_param.pres_mbv_proc_stat_buffer =
6085                 &avc_ctx->preenc_past_ref_stat_data_out_buffer;
6086             surface_param.mbv_proc_stat_enabled = 1;
6087         } else {
6088             surface_param.mbv_proc_stat_enabled = 0;
6089             surface_param.pres_mbv_proc_stat_buffer = NULL;
6090         }
6091         break;
6092
6093     case SCALE_FUTURE_REF_PIC:
6094
6095         obj_surface = SURFACE(stat_param->future_references[0].picture_id);
6096         assert(obj_surface);
6097         surface_param.input_surface = obj_surface;
6098         surface_param.output_surface = avc_ctx->preenc_future_ref_scaled_4x_surface_obj;
6099
6100         if (stat_param->future_ref_stat_buf) {
6101             surface_param.pres_mbv_proc_stat_buffer =
6102                 &avc_ctx->preenc_future_ref_stat_data_out_buffer;
6103             surface_param.mbv_proc_stat_enabled = 1;
6104         } else {
6105             surface_param.mbv_proc_stat_enabled = 0;
6106             surface_param.pres_mbv_proc_stat_buffer = NULL;
6107         }
6108         break;
6109     default :
6110         assert(0);
6111     }
6112
6113     gpe_context = &(avc_ctx->context_scaling.gpe_contexts[kernel_idx]);
6114
6115     gpe->context_init(ctx, gpe_context);
6116     gpe->reset_binding_table(ctx, gpe_context);
6117
6118     generic_ctx->pfn_set_curbe_scaling4x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
6119
6120     surface_param.scaling_out_use_16unorm_surf_fmt = 0 ;
6121     surface_param.scaling_out_use_32unorm_surf_fmt = 1 ;
6122
6123     /* No need of explicit flatness_check surface allocation. The field mb_is_flat
6124      * VAStatsStatisticsH264 will be used to store the output.  */
6125     surface_param.enable_mb_flatness_check = 0;
6126     generic_ctx->pfn_send_scaling_surface(ctx, encode_state, gpe_context, encoder_context, &surface_param);
6127
6128     /* setup the interface data */
6129     gpe->setup_interface_data(ctx, gpe_context);
6130
6131     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
6132     /* the scaling is based on 8x8 blk level */
6133     kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
6134     kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
6135     kernel_walker_param.no_dependency = 1;
6136
6137     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
6138
6139     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
6140                                             gpe_context,
6141                                             media_function,
6142                                             &media_object_walker_param);
6143
6144     return VA_STATUS_SUCCESS;
6145 }
6146
6147 /**************** PreEnc HME *************************************/
6148 /* function to run preenc hme is the same one we using in avc encode:
6149          ==  gen9_avc_kernel_me()
6150  * function to set preenc hme curbe: gen9_avc_preenc_set_curbe_me()
6151  * function to send hme buffer/surface: gen9_avc_preenc_send_surface_me()
6152  */
6153 static void
6154 gen9_avc_preenc_set_curbe_me(VADriverContextP ctx,
6155                              struct encode_state *encode_state,
6156                              struct i965_gpe_context *gpe_context,
6157                              struct intel_encoder_context *encoder_context,
6158                              void * param)
6159 {
6160     gen9_avc_fei_me_curbe_data *curbe_cmd;
6161     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6162     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6163     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6164     VAStatsStatisticsParameterH264 * stat_param_h264 = avc_state->stat_param;
6165     VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
6166
6167     struct me_param * curbe_param = (struct me_param *)param ;
6168     unsigned char  use_mv_from_prev_step = 0;
6169     unsigned char write_distortions = 0;
6170     unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
6171     unsigned char seach_table_idx = 0;
6172     unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
6173     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
6174     unsigned int scale_factor = 0;
6175
6176     switch (curbe_param->hme_type) {
6177     case INTEL_ENC_HME_4x:
6178         use_mv_from_prev_step = 0;
6179         write_distortions = 0;
6180         mv_shift_factor = 2;
6181         scale_factor = 4;
6182         prev_mv_read_pos_factor = 0;
6183         break;
6184
6185     default:
6186         assert(0);
6187     }
6188
6189     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
6190     if (!curbe_cmd)
6191         return;
6192
6193     downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
6194     downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
6195
6196     memcpy(curbe_cmd, gen9_avc_me_curbe_init_data, sizeof(gen9_avc_me_curbe_init_data));
6197
6198     curbe_cmd->dw3.sub_pel_mode = stat_param_h264->sub_pel_mode;
6199     if (avc_state->field_scaling_output_interleaved) {
6200         /*frame set to zero,field specified*/
6201         curbe_cmd->dw3.src_access = 0;
6202         curbe_cmd->dw3.ref_access = 0;
6203         curbe_cmd->dw7.src_field_polarity = 0;
6204     }
6205     curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
6206     curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
6207     curbe_cmd->dw5.qp_prime_y = stat_param_h264->frame_qp;
6208
6209     curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
6210     curbe_cmd->dw6.write_distortions = write_distortions;
6211     curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
6212     curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(INTEL_AVC_LEVEL_52) * 4;//frame only
6213
6214     if (generic_state->frame_type == SLICE_TYPE_B) {
6215         curbe_cmd->dw1.bi_weight = 32;
6216         curbe_cmd->dw13.num_ref_idx_l1_minus1 = stat_param->num_future_references - 1;
6217         me_method = gen9_avc_b_me_method[generic_state->preset];
6218         seach_table_idx = 1;
6219     }
6220
6221     if (generic_state->frame_type == SLICE_TYPE_P ||
6222         generic_state->frame_type == SLICE_TYPE_B)
6223         curbe_cmd->dw13.num_ref_idx_l0_minus1 = stat_param->num_past_references - 1;
6224
6225     curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
6226     curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
6227
6228     memcpy(&curbe_cmd->dw16, table_enc_search_path[seach_table_idx][me_method], 14 * sizeof(int));
6229
6230     curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN9_AVC_ME_MV_DATA_SURFACE_INDEX;
6231     curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x) ? GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX : GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX ;
6232     curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN9_AVC_ME_DISTORTION_SURFACE_INDEX;
6233     curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN9_AVC_ME_BRC_DISTORTION_INDEX;
6234     curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX;
6235     curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX;
6236     curbe_cmd->dw38.reserved = 0;
6237
6238     i965_gpe_context_unmap_curbe(gpe_context);
6239     return;
6240 }
6241
6242 static void
6243 gen9_avc_preenc_send_surface_me(VADriverContextP ctx,
6244                                 struct encode_state *encode_state,
6245                                 struct i965_gpe_context *gpe_context,
6246                                 struct intel_encoder_context *encoder_context,
6247                                 void * param)
6248 {
6249     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6250     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
6251     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6252     VAStatsStatisticsParameterH264 * stat_param_h264 = avc_state->stat_param;
6253     VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
6254     struct object_surface *input_surface;
6255     struct i965_gpe_resource *gpe_resource;
6256     struct me_param * curbe_param = (struct me_param *)param ;
6257     int i = 0;
6258
6259     /* PreEnc Only supports 4xme */
6260     assert(curbe_param->hme_type == INTEL_ENC_HME_4x);
6261
6262     switch (curbe_param->hme_type) {
6263     case INTEL_ENC_HME_4x : {
6264         /*memv output 4x*/
6265         gpe_resource = &avc_ctx->s4x_memv_data_buffer;
6266         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
6267                                        gpe_resource,
6268                                        1,
6269                                        I965_SURFACEFORMAT_R8_UNORM,
6270                                        GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
6271
6272         /* memv distortion output*/
6273         gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
6274         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
6275                                        gpe_resource,
6276                                        1,
6277                                        I965_SURFACEFORMAT_R8_UNORM,
6278                                        GEN9_AVC_ME_DISTORTION_SURFACE_INDEX);
6279
6280         /* brc distortion  output*/
6281         gpe_resource = &avc_ctx->res_brc_dist_data_surface;
6282         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
6283                                        gpe_resource,
6284                                        1,
6285                                        I965_SURFACEFORMAT_R8_UNORM,
6286                                        GEN9_AVC_ME_BRC_DISTORTION_INDEX);
6287
6288         /* input past ref scaled YUV surface*/
6289         for (i = 0; i < stat_param->num_past_references; i++) {
6290             /*input current down scaled YUV surface for forward refef */
6291             input_surface = avc_ctx->preenc_scaled_4x_surface_obj;
6292             i965_add_adv_gpe_surface(ctx, gpe_context,
6293                                      input_surface,
6294                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
6295
6296             input_surface = avc_ctx->preenc_past_ref_scaled_4x_surface_obj;
6297             i965_add_adv_gpe_surface(ctx, gpe_context,
6298                                      input_surface,
6299                                      GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
6300         }
6301
6302         /* input future ref scaled YUV surface*/
6303         for (i = 0; i < stat_param->num_future_references; i++) {
6304             /*input current down scaled YUV surface for backward ref */
6305             input_surface = avc_ctx->preenc_scaled_4x_surface_obj;
6306             i965_add_adv_gpe_surface(ctx, gpe_context,
6307                                      input_surface,
6308                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
6309
6310             input_surface = avc_ctx->preenc_future_ref_scaled_4x_surface_obj;
6311             i965_add_adv_gpe_surface(ctx, gpe_context,
6312                                      input_surface,
6313                                      GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
6314         }
6315         break;
6316
6317     }
6318     default:
6319         break;
6320
6321     }
6322 }
6323
6324 /**************** PreEnc PreProc *************************************/
6325 /* function to run preenc preproc: gen9_avc_preenc_kernel_preproc()
6326  * function to set preenc preproc curbe: gen9_avc_preenc_set_curbe_preproc()
6327  * function to send preproc buffer/surface: gen9_avc_preenc_send_surface_preproc ()
6328  */
6329 static void
6330 gen9_avc_preenc_set_curbe_preproc(VADriverContextP ctx,
6331                                   struct encode_state *encode_state,
6332                                   struct i965_gpe_context *gpe_context,
6333                                   struct intel_encoder_context *encoder_context,
6334                                   void * param)
6335 {
6336     gen9_avc_preproc_curbe_data *cmd;
6337     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6338     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6339     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6340     VAStatsStatisticsParameterH264 *stat_param_h264 = avc_state->stat_param;
6341     VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
6342     unsigned char me_method = 0;
6343     unsigned int table_idx = 0;
6344     int ref_width, ref_height, len_sp;
6345     int is_bframe = (generic_state->frame_type == SLICE_TYPE_B);
6346     int is_pframe = (generic_state->frame_type == SLICE_TYPE_P);
6347     unsigned int preset = generic_state->preset;
6348
6349     cmd = (gen9_avc_preproc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
6350     if (!cmd)
6351         return;
6352     memset(cmd, 0, sizeof(gen9_avc_preproc_curbe_data));
6353
6354     switch (generic_state->frame_type) {
6355     case SLICE_TYPE_I:
6356         memcpy(cmd, gen9_avc_preenc_preproc_curbe_i_frame_init_data,
6357                sizeof(gen9_avc_preproc_curbe_data));
6358         break;
6359     case SLICE_TYPE_P:
6360         memcpy(cmd, gen9_avc_preenc_preproc_curbe_p_frame_init_data,
6361                sizeof(gen9_avc_preproc_curbe_data));
6362         break;
6363     case SLICE_TYPE_B:
6364         memcpy(cmd, gen9_avc_preenc_preproc_curbe_b_frame_init_data,
6365                sizeof(gen9_avc_preproc_curbe_data));
6366         break;
6367     default:
6368         assert(0);
6369     }
6370     /* 4 means full search, 6 means diamand search */
6371     me_method  = (stat_param_h264->search_window == 5) ||
6372                  (stat_param_h264->search_window == 8) ? 4 : 6;
6373
6374     ref_width    = stat_param_h264->ref_width;
6375     ref_height   = stat_param_h264->ref_height;
6376     len_sp       = stat_param_h264->len_sp;
6377     /* If there is a serch_window, discard user provided ref_width, ref_height
6378      * and search_path length */
6379     switch (stat_param_h264->search_window) {
6380     case 0:
6381         /*  not use predefined search window, there should be a search_path input */
6382         if ((stat_param_h264->search_path != 0) &&
6383             (stat_param_h264->search_path != 1) &&
6384             (stat_param_h264->search_path != 2)) {
6385             WARN_ONCE("Invalid input search_path for SearchWindow=0  \n");
6386             assert(0);
6387         }
6388         /* 4 means full search, 6 means diamand search */
6389         me_method = (stat_param_h264->search_path == 1) ? 6 : 4;
6390         if (((ref_width * ref_height) > 2048) || (ref_width > 64) || (ref_height > 64)) {
6391             WARN_ONCE("Invalid input ref_width/ref_height in"
6392                       "SearchWindow=0 case! \n");
6393             assert(0);
6394         }
6395         break;
6396
6397     case 1:
6398         /* Tiny - 4 SUs 24x24 window */
6399         ref_width  = 24;
6400         ref_height = 24;
6401         len_sp     = 4;
6402         break;
6403
6404     case 2:
6405         /* Small - 9 SUs 28x28 window */
6406         ref_width  = 28;
6407         ref_height = 28;
6408         len_sp     = 9;
6409         break;
6410     case 3:
6411         /* Diamond - 16 SUs 48x40 window */
6412         ref_width  = 48;
6413         ref_height = 40;
6414         len_sp     = 16;
6415         break;
6416     case 4:
6417         /* Large Diamond - 32 SUs 48x40 window */
6418         ref_width  = 48;
6419         ref_height = 40;
6420         len_sp     = 32;
6421         break;
6422     case 5:
6423         /* Exhaustive - 48 SUs 48x40 window */
6424         ref_width  = 48;
6425         ref_height = 40;
6426         len_sp     = 48;
6427         break;
6428     case 6:
6429         /* Diamond - 16 SUs 64x32 window */
6430         ref_width  = 64;
6431         ref_height = 32;
6432         len_sp     = 16;
6433         break;
6434     case 7:
6435         /* Large Diamond - 32 SUs 64x32 window */
6436         ref_width  = 64;
6437         ref_height = 32;
6438         len_sp     = 32;
6439         break;
6440     case 8:
6441         /* Exhaustive - 48 SUs 64x32 window */
6442         ref_width  = 64;
6443         ref_height = 32;
6444         len_sp     = 48;
6445         break;
6446
6447     default:
6448         assert(0);
6449     }
6450
6451     /* ref_width*ref_height = Max 64x32 one direction, Max 32x32 two directions */
6452     if (is_bframe) {
6453         CLIP(ref_width, 4, 32);
6454         CLIP(ref_height, 4, 32);
6455     } else if (is_pframe) {
6456         CLIP(ref_width, 4, 64);
6457         CLIP(ref_height, 4, 32);
6458     }
6459
6460     cmd->dw0.adaptive_enable =
6461         cmd->dw37.adaptive_enable = stat_param_h264->adaptive_search;
6462     cmd->dw2.max_len_sp = len_sp;
6463     cmd->dw38.max_len_sp = 0; // HLD mandates this field to be Zero
6464     cmd->dw2.max_num_su = cmd->dw38.max_num_su = 57;
6465     cmd->dw3.src_access =
6466         cmd->dw3.ref_access = 0; // change it to (is_frame ? 0: 1) when interlace is suppoted
6467
6468     if (generic_state->frame_type != SLICE_TYPE_I && avc_state->ftq_enable)
6469         cmd->dw3.ft_enable = stat_param_h264->ft_enable;
6470     else
6471         cmd->dw3.ft_enable = 0;
6472
6473     cmd->dw2.pic_width = generic_state->frame_width_in_mbs;
6474     cmd->dw6.pic_height = cmd->dw5.slice_mb_height = generic_state->frame_height_in_mbs;
6475     cmd->dw3.sub_mb_part_mask = stat_param_h264->sub_mb_part_mask;
6476     cmd->dw3.sub_pel_mode = stat_param_h264->sub_pel_mode;
6477     cmd->dw3.inter_sad = stat_param_h264->inter_sad;
6478     cmd->dw3.intra_sad = stat_param_h264->intra_sad;
6479     cmd->dw4.hme_enable = generic_state->hme_enabled;
6480     cmd->dw4.frame_qp = stat_param_h264->frame_qp;
6481     cmd->dw4.per_mb_qp_enable = stat_param_h264->mb_qp;
6482
6483     cmd->dw4.multiple_mv_predictor_per_mb_enable =
6484         (generic_state->frame_type != SLICE_TYPE_I) ? 0 : stat_param_h264->mv_predictor_ctrl;
6485
6486     cmd->dw4.disable_mv_output = (generic_state->frame_type == SLICE_TYPE_I) ? 1 : stat_param_h264->disable_mv_output;
6487     cmd->dw4.disable_mb_stats = stat_param_h264->disable_statistics_output;
6488
6489     cmd->dw4.fwd_ref_pic_enable = (stat_param->num_past_references > 0) ? 1 : 0;
6490     cmd->dw4.bwd_ref_pic_enable = (stat_param->num_future_references > 0) ? 1 : 0;
6491
6492     cmd->dw7.intra_part_mask = stat_param_h264->intra_part_mask;
6493
6494     /* mv mode cost */
6495     memcpy(&(cmd->dw8), gen75_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][stat_param_h264->frame_qp], 8 * sizeof(unsigned int));
6496
6497     /* reset all except sic_fwd_trans_coeff_threshold_* from dw8 to dw15 */
6498     memset(&(cmd->dw8), 0, 6 * (sizeof(unsigned int)));
6499
6500     /* search path tables */
6501     table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
6502     memcpy(&(cmd->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
6503
6504     if (stat_param_h264->intra_part_mask  == 0x07)
6505         cmd->dw31.intra_compute_type  = 3;
6506
6507     cmd->dw38.ref_threshold = 400;
6508     cmd->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
6509
6510     if (generic_state->frame_type == SLICE_TYPE_I) {
6511         cmd->dw0.skip_mode_enable = cmd->dw37.skip_mode_enable = 0;
6512         cmd->dw36.hme_combine_overlap = 0;
6513     } else if (generic_state->frame_type == SLICE_TYPE_P) {
6514         cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(INTEL_AVC_LEVEL_52) / 2;
6515         cmd->dw3.bme_disable_fbr = 1;
6516         cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
6517         cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
6518         cmd->dw7.non_skip_zmv_added = 1;
6519         cmd->dw7.non_skip_mode_added = 1;
6520         cmd->dw7.skip_center_mask = 1;
6521         cmd->dw32.max_vmv_r =
6522             i965_avc_get_max_mv_len(INTEL_AVC_LEVEL_52) * 4;
6523         cmd->dw36.hme_combine_overlap = 1;
6524
6525     } else if (generic_state->frame_type == SLICE_TYPE_P) { /* B slice */
6526
6527         cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(INTEL_AVC_LEVEL_52) / 2;
6528         cmd->dw3.search_ctrl = 0;
6529         cmd->dw3.skip_type = 1;
6530         cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
6531         cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
6532         cmd->dw7.skip_center_mask = 0xff;
6533         cmd->dw32.max_vmv_r =
6534             i965_avc_get_max_mv_len(INTEL_AVC_LEVEL_52) * 4;
6535         cmd->dw36.hme_combine_overlap = 1;
6536     }
6537
6538     cmd->dw40.curr_pic_surf_index = GEN9_AVC_PREPROC_CURR_Y_INDEX;
6539     cmd->dw41.hme_mv_dat_surf_index = GEN9_AVC_PREPROC_HME_MV_DATA_INDEX;
6540     cmd->dw42.mv_predictor_surf_index = GEN9_AVC_PREPROC_MV_PREDICTOR_INDEX;
6541     cmd->dw43.mb_qp_surf_index = GEN9_AVC_PREPROC_MBQP_INDEX;
6542     cmd->dw44.mv_data_out_surf_index = GEN9_AVC_PREPROC_MV_DATA_INDEX;
6543     cmd->dw45.mb_stats_out_surf_index = GEN9_AVC_PREPROC_MB_STATS_INDEX;
6544     cmd->dw46.vme_inter_prediction_surf_index = GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_0_INDEX;
6545     cmd->dw47.vme_Inter_prediction_mr_surf_index = GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_1_INDEX;
6546     cmd->dw48.ftq_lut_surf_index = GEN9_AVC_PREPROC_FTQ_LUT_INDEX;
6547
6548     i965_gpe_context_unmap_curbe(gpe_context);
6549 }
6550
6551 static void
6552 gen9_avc_preenc_send_surface_preproc(VADriverContextP ctx,
6553                                      struct encode_state *encode_state,
6554                                      struct i965_gpe_context *gpe_context,
6555                                      struct intel_encoder_context *encoder_context,
6556                                      void * param)
6557 {
6558     struct i965_driver_data *i965 = i965_driver_data(ctx);
6559     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6560     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
6561     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6562     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6563     struct object_surface *obj_surface;
6564     struct i965_gpe_resource *gpe_resource;
6565     VASurfaceID surface_id;
6566     VAStatsStatisticsParameterH264 *stat_param_h264 = avc_state->stat_param;
6567     VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
6568     unsigned int size = 0, frame_mb_nums = 0;
6569
6570     frame_mb_nums = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs;
6571
6572     /* input yuv surface, Y index */
6573     obj_surface = encode_state->input_yuv_object;
6574     i965_add_2d_gpe_surface(ctx,
6575                             gpe_context,
6576                             obj_surface,
6577                             0,
6578                             1,
6579                             I965_SURFACEFORMAT_R8_UNORM,
6580                             GEN9_AVC_PREPROC_CURR_Y_INDEX);
6581
6582     /* input yuv surface, UV index */
6583     i965_add_2d_gpe_surface(ctx,
6584                             gpe_context,
6585                             obj_surface,
6586                             1,
6587                             1,
6588                             I965_SURFACEFORMAT_R16_UINT,
6589                             GEN9_AVC_MBENC_CURR_UV_INDEX);
6590
6591
6592     if (generic_state->hme_enabled) {
6593         /* HME mv data buffer */
6594         gpe_resource = &avc_ctx->s4x_memv_data_buffer;
6595         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
6596                                        gpe_resource,
6597                                        1,
6598                                        I965_SURFACEFORMAT_R8_UNORM,
6599                                        GEN9_AVC_PREPROC_HME_MV_DATA_INDEX);
6600     }
6601
6602     /* mv predictor buffer */
6603     if (stat_param_h264->mv_predictor_ctrl) {
6604         size = frame_mb_nums * FEI_AVC_MV_PREDICTOR_BUFFER_SIZE;
6605         gpe_resource = &avc_ctx->preproc_mv_predictor_buffer;
6606         i965_add_buffer_gpe_surface(ctx,
6607                                     gpe_context,
6608                                     gpe_resource,
6609                                     0,
6610                                     size / 4,
6611                                     0,
6612                                     GEN9_AVC_PREPROC_MV_PREDICTOR_INDEX);
6613     }
6614
6615     /* MB qp buffer */
6616     if (stat_param_h264->mb_qp) {
6617         size = frame_mb_nums * FEI_AVC_QP_BUFFER_SIZE;
6618         gpe_resource = &avc_ctx->preproc_mb_qp_buffer;
6619         i965_add_buffer_gpe_surface(ctx,
6620                                     gpe_context,
6621                                     gpe_resource,
6622                                     0,
6623                                     size / 4,
6624                                     0,
6625                                     GEN9_AVC_PREPROC_MBQP_INDEX);
6626
6627         gpe_resource = &avc_ctx->res_mbbrc_const_data_buffer;
6628         size = 16 * AVC_QP_MAX * 4;
6629         i965_add_buffer_gpe_surface(ctx,
6630                                     gpe_context,
6631                                     gpe_resource,
6632                                     0,
6633                                     size / 4,
6634                                     0,
6635                                     GEN9_AVC_PREPROC_FTQ_LUT_INDEX);
6636
6637     }
6638
6639     /* mv data output buffer */
6640     if (!stat_param_h264->disable_mv_output) {
6641         gpe_resource = &avc_ctx->preproc_mv_data_out_buffer;
6642         size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
6643         i965_add_buffer_gpe_surface(ctx,
6644                                     gpe_context,
6645                                     gpe_resource,
6646                                     0,
6647                                     size / 4,
6648                                     0,
6649                                     GEN9_AVC_PREPROC_MV_DATA_INDEX);
6650     }
6651
6652     /* statistics output buffer */
6653     if (!stat_param_h264->disable_statistics_output) {
6654         gpe_resource = &avc_ctx->preproc_stat_data_out_buffer;
6655         size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
6656         i965_add_buffer_gpe_surface(ctx,
6657                                     gpe_context,
6658                                     gpe_resource,
6659                                     0,
6660                                     size / 4,
6661                                     0,
6662                                     GEN9_AVC_PREPROC_MB_STATS_INDEX);
6663     }
6664
6665     /* vme cur pic y */
6666     obj_surface = encode_state->input_yuv_object;
6667     i965_add_2d_gpe_surface(ctx,
6668                             gpe_context,
6669                             obj_surface,
6670                             0,
6671                             1,
6672                             I965_SURFACEFORMAT_R8_UNORM,
6673                             GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_0_INDEX);
6674
6675     /* vme cur pic y (repeating based on required BTI order for mediakerel)*/
6676     obj_surface = encode_state->input_yuv_object;
6677     i965_add_2d_gpe_surface(ctx,
6678                             gpe_context,
6679                             obj_surface,
6680                             0,
6681                             1,
6682                             I965_SURFACEFORMAT_R8_UNORM,
6683                             GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_1_INDEX);
6684
6685     /* vme forward ref */
6686     /* Only supports one past ref */
6687     if (stat_param->num_past_references > 0) {
6688         surface_id = stat_param->past_references[0].picture_id;
6689         assert(surface_id != VA_INVALID_ID);
6690         obj_surface = SURFACE(surface_id);
6691         if (!obj_surface)
6692             return;
6693         i965_add_adv_gpe_surface(ctx, gpe_context,
6694                                  obj_surface,
6695                                  GEN9_AVC_PREPROC_VME_FWD_PIC_IDX0_INDEX);
6696
6697     }
6698
6699     /* vme future ref */
6700     /* Only supports one future ref */
6701     if (stat_param->num_future_references > 0) {
6702         surface_id = stat_param->future_references[0].picture_id;
6703         assert(surface_id != VA_INVALID_ID);
6704         obj_surface = SURFACE(surface_id);
6705         if (!obj_surface)
6706             return;
6707         i965_add_adv_gpe_surface(ctx, gpe_context,
6708                                  obj_surface,
6709                                  GEN9_AVC_PREPROC_VME_BWD_PIC_IDX0_0_INDEX);
6710
6711         surface_id = stat_param->future_references[0].picture_id;
6712         assert(surface_id != VA_INVALID_ID);
6713         obj_surface = SURFACE(surface_id);
6714         if (!obj_surface)
6715             return;
6716         i965_add_adv_gpe_surface(ctx, gpe_context,
6717                                  obj_surface,
6718                                  GEN9_AVC_PREPROC_VME_BWD_PIC_IDX0_1_INDEX);
6719     }
6720
6721     return;
6722
6723 }
6724
6725 static VAStatus
6726 gen9_avc_preenc_kernel_preproc(VADriverContextP ctx,
6727                                struct encode_state *encode_state,
6728                                struct intel_encoder_context *encoder_context)
6729 {
6730     struct i965_driver_data *i965 = i965_driver_data(ctx);
6731     struct i965_gpe_table *gpe = &i965->gpe_table;
6732     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6733     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
6734     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
6735     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6736     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6737     VAStatsStatisticsParameterH264 * stat_param_h264 = avc_state->stat_param;
6738     struct i965_gpe_context *gpe_context;
6739     struct gpe_media_object_walker_parameter media_object_walker_param;
6740     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
6741     int media_function = INTEL_MEDIA_STATE_PREPROC;
6742     struct i965_gpe_resource *gpe_resource = NULL;
6743     unsigned int * data = NULL;
6744     unsigned int ftq_lut_table_size = 16 * 52; /* 16 DW per QP for each qp*/
6745
6746     gpe_context = &(avc_ctx->context_preproc.gpe_contexts);
6747     gpe->context_init(ctx, gpe_context);
6748     gpe->reset_binding_table(ctx, gpe_context);
6749
6750     /*set curbe*/
6751     generic_ctx->pfn_set_curbe_preproc(ctx, encode_state, gpe_context, encoder_context, NULL);
6752
6753     /*send surface*/
6754     generic_ctx->pfn_send_preproc_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
6755
6756     gpe->setup_interface_data(ctx, gpe_context);
6757
6758     /*  Set up FtqLut Buffer if there is QP change within a frame */
6759     if (stat_param_h264->mb_qp) {
6760         gpe_resource = &(avc_ctx->res_mbbrc_const_data_buffer);
6761         assert(gpe_resource);
6762         data = i965_map_gpe_resource(gpe_resource);
6763         assert(data);
6764         memcpy(data, gen9_avc_preenc_preproc_ftq_lut, ftq_lut_table_size * sizeof(unsigned int));
6765     }
6766
6767     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
6768     kernel_walker_param.resolution_x = generic_state->frame_width_in_mbs ;
6769     kernel_walker_param.resolution_y = generic_state->frame_height_in_mbs ;
6770     kernel_walker_param.no_dependency = 1;
6771
6772     i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
6773
6774     gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
6775                                             gpe_context,
6776                                             media_function,
6777                                             &media_object_walker_param);
6778
6779     return VA_STATUS_SUCCESS;
6780 }
6781
6782
6783 static void
6784 gen8_avc_set_curbe_mbenc(VADriverContextP ctx,
6785                          struct encode_state *encode_state,
6786                          struct i965_gpe_context *gpe_context,
6787                          struct intel_encoder_context *encoder_context,
6788                          void * param)
6789 {
6790     struct i965_driver_data *i965 = i965_driver_data(ctx);
6791     gen8_avc_mbenc_curbe_data *cmd;
6792     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6793     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6794     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6795
6796     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
6797     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
6798     VASurfaceID surface_id;
6799     struct object_surface *obj_surface;
6800
6801     struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
6802     unsigned char qp = 0;
6803     unsigned char me_method = 0;
6804     unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
6805     unsigned int table_idx = 0;
6806     unsigned int curbe_size = 0;
6807
6808     unsigned int preset = generic_state->preset;
6809     if (IS_GEN8(i965->intel.device_info)) {
6810         cmd = (gen8_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
6811         if (!cmd)
6812             return;
6813         curbe_size = sizeof(gen8_avc_mbenc_curbe_data);
6814         memset(cmd, 0, curbe_size);
6815
6816         if (mbenc_i_frame_dist_in_use) {
6817             memcpy(cmd, gen8_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
6818         } else {
6819             switch (generic_state->frame_type) {
6820             case SLICE_TYPE_I:
6821                 memcpy(cmd, gen8_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
6822                 break;
6823             case SLICE_TYPE_P:
6824                 memcpy(cmd, gen8_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
6825                 break;
6826             case SLICE_TYPE_B:
6827                 memcpy(cmd, gen8_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
6828                 break;
6829             default:
6830                 assert(0);
6831             }
6832         }
6833     } else {
6834         assert(0);
6835
6836         return;
6837     }
6838
6839     me_method = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_b_me_method[preset] : gen9_avc_p_me_method[preset];
6840     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
6841
6842     cmd->dw0.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
6843     cmd->dw37.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
6844     cmd->dw0.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
6845     cmd->dw37.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
6846
6847     cmd->dw2.max_len_sp = gen9_avc_max_len_sp[preset];
6848     cmd->dw38.max_len_sp = 0;
6849
6850     cmd->dw3.src_access = 0;
6851     cmd->dw3.ref_access = 0;
6852
6853     if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) {
6854         //disable ftq_override by now.
6855         if (avc_state->ftq_override) {
6856             cmd->dw3.ftq_enable = avc_state->ftq_enable;
6857
6858         } else {
6859             if (generic_state->frame_type == SLICE_TYPE_P) {
6860                 cmd->dw3.ftq_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
6861
6862             } else {
6863                 cmd->dw3.ftq_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
6864             }
6865         }
6866     } else {
6867         cmd->dw3.ftq_enable = 0;
6868     }
6869
6870     if (avc_state->disable_sub_mb_partion)
6871         cmd->dw3.sub_mb_part_mask = 0x7;
6872
6873     if (mbenc_i_frame_dist_in_use) {
6874         cmd->dw2.pitch_width = generic_state->downscaled_width_4x_in_mb;
6875         cmd->dw4.picture_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
6876         cmd->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4;
6877         cmd->dw6.batch_buffer_end = 0;
6878         cmd->dw31.intra_compute_type = 1;
6879     } else {
6880         cmd->dw2.pitch_width = generic_state->frame_width_in_mbs;
6881         cmd->dw4.picture_height_minus1 = generic_state->frame_height_in_mbs - 1;
6882         cmd->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ? generic_state->frame_height_in_mbs : avc_state->slice_height;
6883
6884         {
6885             memcpy(&(cmd->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
6886             if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
6887             } else if (avc_state->skip_bias_adjustment_enable) {
6888                 /* Load different MvCost for P picture when SkipBiasAdjustment is enabled
6889                 // No need to check for P picture as the flag is only enabled for P picture */
6890                 cmd->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
6891             }
6892         }
6893         table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
6894         memcpy(&(cmd->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
6895     }
6896     cmd->dw4.enable_intra_cost_scaling_for_static_frame = avc_state->sfd_enable && generic_state->hme_enabled;
6897     cmd->dw4.field_parity_flag = 0;//bottom field
6898     cmd->dw4.enable_cur_fld_idr = 0;//field realted
6899     cmd->dw4.contrained_intra_pred_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
6900     cmd->dw4.hme_enable = generic_state->hme_enabled;
6901     cmd->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
6902     cmd->dw4.use_actual_ref_qp_value = generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
6903
6904     cmd->dw7.intra_part_mask = avc_state->transform_8x8_mode_enable ? 0 : 0x02;
6905     cmd->dw7.src_field_polarity = 0;//field related
6906
6907     /*ftq_skip_threshold_lut set,dw14 /15*/
6908
6909     /*r5 disable NonFTQSkipThresholdLUT*/
6910     if (generic_state->frame_type == SLICE_TYPE_P) {
6911         cmd->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
6912     } else if (generic_state->frame_type == SLICE_TYPE_B) {
6913         cmd->dw32.skip_val = gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
6914     }
6915
6916     cmd->dw13.qp_prime_y = qp;
6917     cmd->dw13.qp_prime_cb = qp;
6918     cmd->dw13.qp_prime_cr = qp;
6919     cmd->dw13.target_size_in_word = 0xff;//hardcode for brc disable
6920
6921     if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) {
6922         switch (gen9_avc_multi_pred[preset]) {
6923         case 0:
6924             cmd->dw32.mult_pred_l0_disable = 128;
6925             cmd->dw32.mult_pred_l1_disable = 128;
6926             break;
6927         case 1:
6928             cmd->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_P) ? 1 : 128;
6929             cmd->dw32.mult_pred_l1_disable = 128;
6930             break;
6931         case 2:
6932             cmd->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
6933             cmd->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
6934             break;
6935         case 3:
6936             cmd->dw32.mult_pred_l0_disable = 1;
6937             cmd->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
6938             break;
6939         }
6940
6941     } else {
6942         cmd->dw32.mult_pred_l0_disable = 128;
6943         cmd->dw32.mult_pred_l1_disable = 128;
6944     }
6945
6946     if (generic_state->frame_type == SLICE_TYPE_B) {
6947         cmd->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
6948         cmd->dw34.list1_ref_id0_frm_field_parity = 0;
6949         cmd->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
6950     }
6951
6952     cmd->dw34.b_original_bff = 0; //frame only
6953     cmd->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
6954     cmd->dw34.roi_enable_flag = curbe_param->roi_enabled;
6955     cmd->dw34.mad_enable_falg = avc_state->mad_enable;
6956     cmd->dw34.mb_brc_enable = avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled;
6957     cmd->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
6958     cmd->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
6959
6960     if (cmd->dw34.force_non_skip_check) {
6961         cmd->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
6962     }
6963
6964     cmd->dw36.check_all_fractional_enable = avc_state->caf_enable;
6965     cmd->dw38.ref_threshold = 400;
6966     cmd->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
6967     cmd->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable) ? 0 : 2;
6968
6969     if (mbenc_i_frame_dist_in_use) {
6970         cmd->dw13.qp_prime_y = 0;
6971         cmd->dw13.qp_prime_cb = 0;
6972         cmd->dw13.qp_prime_cr = 0;
6973         cmd->dw33.intra_16x16_nondc_penalty = 0;
6974         cmd->dw33.intra_8x8_nondc_penalty = 0;
6975         cmd->dw33.intra_4x4_nondc_penalty = 0;
6976     }
6977     if (cmd->dw4.use_actual_ref_qp_value) {
6978         cmd->dw44.actual_qp_value_for_ref_id0_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 0);
6979         cmd->dw44.actual_qp_value_for_ref_id1_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 1);
6980         cmd->dw44.actual_qp_value_for_ref_id2_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 2);
6981         cmd->dw44.actual_qp_value_for_ref_id3_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 3);
6982         cmd->dw45.actual_qp_value_for_ref_id4_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 4);
6983         cmd->dw45.actual_qp_value_for_ref_id5_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 5);
6984         cmd->dw45.actual_qp_value_for_ref_id6_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 6);
6985         cmd->dw45.actual_qp_value_for_ref_id7_list0 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 7);
6986         cmd->dw46.actual_qp_value_for_ref_id0_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 0);
6987         cmd->dw46.actual_qp_value_for_ref_id1_list1 =  gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 1);
6988     }
6989
6990     table_idx = slice_type_kernel[generic_state->frame_type];
6991     cmd->dw46.ref_cost = gen8_avc_ref_cost[table_idx][qp];
6992     if (generic_state->frame_type == SLICE_TYPE_I) {
6993         cmd->dw0.skip_mode_enable = 0;
6994         cmd->dw37.skip_mode_enable = 0;
6995         cmd->dw36.hme_combine_overlap = 0;
6996         cmd->dw47.intra_cost_sf = 16;
6997         cmd->dw34.enable_direct_bias_adjustment = 0;
6998         cmd->dw34.enable_global_motion_bias_adjustment = 0;
6999
7000     } else if (generic_state->frame_type == SLICE_TYPE_P) {
7001         cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
7002         cmd->dw3.bme_disable_fbr = 1;
7003         cmd->dw5.ref_width = gen9_avc_search_x[preset];
7004         cmd->dw5.ref_height = gen9_avc_search_y[preset];
7005         cmd->dw7.non_skip_zmv_added = 1;
7006         cmd->dw7.non_skip_mode_added = 1;
7007         cmd->dw7.skip_center_mask = 1;
7008         cmd->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
7009         cmd->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame onlys
7010         cmd->dw36.hme_combine_overlap = 1;
7011         cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
7012         cmd->dw39.ref_width = gen9_avc_search_x[preset];
7013         cmd->dw39.ref_height = gen9_avc_search_y[preset];
7014         cmd->dw34.enable_direct_bias_adjustment = 0;
7015         cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
7016         if (avc_state->global_motion_bias_adjustment_enable)
7017             cmd->dw58.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
7018     } else {
7019         cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
7020         cmd->dw1.bi_weight = avc_state->bi_weight;
7021         cmd->dw3.search_ctrl = 7;
7022         cmd->dw3.skip_type = 1;
7023         cmd->dw5.ref_width = gen9_avc_b_search_x[preset];
7024         cmd->dw5.ref_height = gen9_avc_b_search_y[preset];
7025         cmd->dw7.skip_center_mask = 0xff;
7026         cmd->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
7027         cmd->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
7028         cmd->dw36.hme_combine_overlap = 1;
7029         surface_id = slice_param->RefPicList1[0].picture_id;
7030         obj_surface = SURFACE(surface_id);
7031         if (!obj_surface) {
7032             WARN_ONCE("Invalid backward reference frame\n");
7033             return;
7034         }
7035         cmd->dw36.is_fwd_frame_short_term_ref = !!(slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
7036         cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
7037         cmd->dw36.num_ref_idx_l1_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l1_active_minus1 : 0;
7038         cmd->dw39.ref_width = gen9_avc_b_search_x[preset];
7039         cmd->dw39.ref_height = gen9_avc_b_search_y[preset];
7040         cmd->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
7041         cmd->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
7042         cmd->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
7043         cmd->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
7044         cmd->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
7045         cmd->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
7046         cmd->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
7047         cmd->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
7048         cmd->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
7049         if (cmd->dw34.enable_direct_bias_adjustment) {
7050             cmd->dw7.non_skip_zmv_added = 1;
7051             cmd->dw7.non_skip_mode_added = 1;
7052         }
7053
7054         cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
7055         if (avc_state->global_motion_bias_adjustment_enable)
7056             cmd->dw58.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
7057     }
7058     avc_state->block_based_skip_enable = cmd->dw3.block_based_skip_enable;
7059
7060     if (avc_state->rolling_intra_refresh_enable) {
7061         /*by now disable it*/
7062         if (generic_state->brc_enabled) {
7063             cmd->dw4.enable_intra_refresh = false;
7064             cmd->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
7065             cmd->dw48.widi_intra_refresh_mbx = 0;
7066             cmd->dw58.widi_intra_refresh_mby = 0;
7067         } else {
7068             cmd->dw4.enable_intra_refresh = true;
7069             cmd->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
7070         }
7071         cmd->dw32.mult_pred_l0_disable = 128;
7072         /* Pass the same IntraRefreshUnit to the kernel w/o the adjustment by -1, so as to have an overlap of one MB row or column of Intra macroblocks
7073          across one P frame to another P frame, as needed by the RollingI algo */
7074         cmd->dw48.widi_intra_refresh_mbx = 0;
7075         cmd->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
7076         cmd->dw48.widi_intra_refresh_qp_delta = 0;
7077
7078     } else {
7079         cmd->dw34.widi_intra_refresh_en = 0;
7080     }
7081
7082     /*roi set disable by now. 49-56*/
7083     if (curbe_param->roi_enabled) {
7084         cmd->dw49.roi_1_x_left   = generic_state->roi[0].left;
7085         cmd->dw49.roi_1_y_top    = generic_state->roi[0].top;
7086         cmd->dw50.roi_1_x_right  = generic_state->roi[0].right;
7087         cmd->dw50.roi_1_y_bottom = generic_state->roi[0].bottom;
7088
7089         cmd->dw51.roi_2_x_left   = generic_state->roi[1].left;
7090         cmd->dw51.roi_2_y_top    = generic_state->roi[1].top;
7091         cmd->dw52.roi_2_x_right  = generic_state->roi[1].right;
7092         cmd->dw52.roi_2_y_bottom = generic_state->roi[1].bottom;
7093
7094         cmd->dw53.roi_3_x_left   = generic_state->roi[2].left;
7095         cmd->dw53.roi_3_y_top    = generic_state->roi[2].top;
7096         cmd->dw54.roi_3_x_right  = generic_state->roi[2].right;
7097         cmd->dw54.roi_3_y_bottom = generic_state->roi[2].bottom;
7098
7099         cmd->dw55.roi_4_x_left   = generic_state->roi[3].left;
7100         cmd->dw55.roi_4_y_top    = generic_state->roi[3].top;
7101         cmd->dw56.roi_4_x_right  = generic_state->roi[3].right;
7102         cmd->dw56.roi_4_y_bottom = generic_state->roi[3].bottom;
7103
7104         cmd->dw36.enable_cabac_work_around = 0;
7105
7106         if (!generic_state->brc_enabled) {
7107             char tmp = 0;
7108             tmp = generic_state->roi[0].value;
7109             CLIP(tmp, -qp, AVC_QP_MAX - qp);
7110             cmd->dw57.roi_1_dqp_prime_y = tmp;
7111             tmp = generic_state->roi[1].value;
7112             CLIP(tmp, -qp, AVC_QP_MAX - qp);
7113             cmd->dw57.roi_2_dqp_prime_y = tmp;
7114             tmp = generic_state->roi[2].value;
7115             CLIP(tmp, -qp, AVC_QP_MAX - qp);
7116             cmd->dw57.roi_3_dqp_prime_y = tmp;
7117             tmp = generic_state->roi[3].value;
7118             CLIP(tmp, -qp, AVC_QP_MAX - qp);
7119             cmd->dw57.roi_4_dqp_prime_y = tmp;
7120         } else {
7121             cmd->dw34.roi_enable_flag = 0;
7122         }
7123     }
7124
7125     cmd->dw65.mb_data_surf_index = GEN8_AVC_MBENC_MFC_AVC_PAK_OBJ_CM;
7126     cmd->dw66.mv_data_surf_index =  GEN8_AVC_MBENC_IND_MV_DATA_CM;
7127     cmd->dw67.i_dist_surf_index = GEN8_AVC_MBENC_BRC_DISTORTION_CM;
7128     cmd->dw68.src_y_surf_index = GEN8_AVC_MBENC_CURR_Y_CM;
7129     cmd->dw69.mb_specific_data_surf_index = GEN8_AVC_MBENC_MB_SPECIFIC_DATA_CM;
7130     cmd->dw70.aux_vme_out_surf_index = GEN8_AVC_MBENC_AUX_VME_OUT_CM;
7131     cmd->dw71.curr_ref_pic_sel_surf_index = GEN8_AVC_MBENC_REFPICSELECT_L0_CM;
7132     cmd->dw72.hme_mv_pred_fwd_bwd_surf_index = GEN8_AVC_MBENC_MV_DATA_FROM_ME_CM;
7133     cmd->dw73.hme_dist_surf_index = GEN8_AVC_MBENC_4xME_DISTORTION_CM;
7134     cmd->dw74.slice_map_surf_index = GEN8_AVC_MBENC_SLICEMAP_DATA_CM;
7135     cmd->dw75.fwd_frm_mb_data_surf_index = GEN8_AVC_MBENC_FWD_MB_DATA_CM;
7136     cmd->dw76.fwd_frm_mv_surf_index = GEN8_AVC_MBENC_FWD_MV_DATA_CM;
7137     cmd->dw77.mb_qp_buffer = GEN8_AVC_MBENC_MBQP_CM;
7138     cmd->dw78.mb_brc_lut = GEN8_AVC_MBENC_MBBRC_CONST_DATA_CM;
7139     cmd->dw79.vme_inter_prediction_surf_index = GEN8_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_CM;
7140     cmd->dw80.vme_inter_prediction_mr_surf_index = GEN8_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_CM;
7141     cmd->dw81.flatness_chk_surf_index = GEN8_AVC_MBENC_FLATNESS_CHECK_CM;
7142     cmd->dw82.mad_surf_index = GEN8_AVC_MBENC_MAD_DATA_CM;
7143     cmd->dw83.force_non_skip_mb_map_surface = GEN8_AVC_MBENC_FORCE_NONSKIP_MB_MAP_CM;
7144     cmd->dw84.widi_wa_surf_index = GEN8_AVC_MBENC_WIDI_WA_DATA_CM;
7145     cmd->dw85.brc_curbe_surf_index = GEN8_AVC_MBENC_BRC_CURBE_DATA_CM;
7146     cmd->dw86.static_detection_cost_table_index = GEN8_AVC_MBENC_STATIC_FRAME_DETECTION_OUTPUT_CM;
7147
7148     i965_gpe_context_unmap_curbe(gpe_context);
7149
7150     return;
7151 }
7152
7153 static void
7154 gen8_avc_set_curbe_scaling4x(VADriverContextP ctx,
7155                              struct encode_state *encode_state,
7156                              struct i965_gpe_context *gpe_context,
7157                              struct intel_encoder_context *encoder_context,
7158                              void *param)
7159 {
7160     gen8_avc_scaling4x_curbe_data *curbe_cmd;
7161     struct scaling_param *surface_param = (struct scaling_param *)param;
7162
7163     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
7164
7165     if (!curbe_cmd)
7166         return;
7167
7168     memset(curbe_cmd, 0, sizeof(gen9_avc_scaling4x_curbe_data));
7169
7170     curbe_cmd->dw0.input_picture_width  = surface_param->input_frame_width;
7171     curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
7172
7173     curbe_cmd->dw1.input_y_bti = GEN8_SCALING_FRAME_SRC_Y_CM;
7174     curbe_cmd->dw2.output_y_bti = GEN8_SCALING_FRAME_DST_Y_CM;
7175
7176     curbe_cmd->dw5.flatness_threshold = 0;
7177     if (surface_param->enable_mb_flatness_check) {
7178         curbe_cmd->dw5.flatness_threshold = 128;
7179         curbe_cmd->dw8.flatness_output_bti_top_field = 4;
7180     }
7181
7182     curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
7183     curbe_cmd->dw6.enable_mb_variance_output = surface_param->enable_mb_variance_output;
7184     curbe_cmd->dw6.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
7185
7186     if (curbe_cmd->dw6.enable_mb_variance_output ||
7187         curbe_cmd->dw6.enable_mb_pixel_average_output) {
7188         curbe_cmd->dw10.mbv_proc_states_bti_top_field  = GEN8_SCALING_FIELD_TOP_MBVPROCSTATS_DST_CM;
7189         curbe_cmd->dw11.mbv_proc_states_bti_bottom_field = GEN8_SCALING_FIELD_BOT_MBVPROCSTATS_DST_CM;
7190     }
7191
7192     i965_gpe_context_unmap_curbe(gpe_context);
7193     return;
7194 }
7195
7196 static void
7197 gen8_avc_set_curbe_me(VADriverContextP ctx,
7198                       struct encode_state *encode_state,
7199                       struct i965_gpe_context *gpe_context,
7200                       struct intel_encoder_context *encoder_context,
7201                       void * param)
7202 {
7203     gen8_avc_me_curbe_data *curbe_cmd;
7204     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7205     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
7206     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
7207
7208     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
7209
7210     struct me_param * curbe_param = (struct me_param *)param ;
7211     unsigned char  use_mv_from_prev_step = 0;
7212     unsigned char write_distortions = 0;
7213     unsigned char qp_prime_y = 0;
7214     unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
7215     unsigned char seach_table_idx = 0;
7216     unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
7217     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
7218     unsigned int scale_factor = 0;
7219
7220     qp_prime_y = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta;
7221     switch (curbe_param->hme_type) {
7222     case INTEL_ENC_HME_4x : {
7223         use_mv_from_prev_step = (generic_state->b16xme_enabled) ? 1 : 0;
7224         write_distortions = 1;
7225         mv_shift_factor = 2;
7226         scale_factor = 4;
7227         prev_mv_read_pos_factor = 0;
7228         break;
7229     }
7230     case INTEL_ENC_HME_16x : {
7231         use_mv_from_prev_step = (generic_state->b32xme_enabled) ? 1 : 0;
7232         write_distortions = 0;
7233         mv_shift_factor = 2;
7234         scale_factor = 16;
7235         prev_mv_read_pos_factor = 1;
7236         break;
7237     }
7238     case INTEL_ENC_HME_32x : {
7239         use_mv_from_prev_step = 0;
7240         write_distortions = 0;
7241         mv_shift_factor = 1;
7242         scale_factor = 32;
7243         prev_mv_read_pos_factor = 0;
7244         break;
7245     }
7246     default:
7247         assert(0);
7248
7249     }
7250     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
7251
7252     if (!curbe_cmd)
7253         return;
7254
7255     downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
7256     downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
7257
7258     memcpy(curbe_cmd, gen8_avc_me_curbe_init_data, sizeof(gen8_avc_me_curbe_data));
7259
7260     curbe_cmd->dw3.sub_pel_mode = 3;
7261     if (avc_state->field_scaling_output_interleaved) {
7262         /*frame set to zero,field specified*/
7263         curbe_cmd->dw3.src_access = 0;
7264         curbe_cmd->dw3.ref_access = 0;
7265         curbe_cmd->dw7.src_field_polarity = 0;
7266     }
7267     curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
7268     curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
7269     curbe_cmd->dw5.qp_prime_y = qp_prime_y;
7270
7271     curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
7272     curbe_cmd->dw6.write_distortions = write_distortions;
7273     curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
7274     curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
7275
7276     if (generic_state->frame_type == SLICE_TYPE_B) {
7277         curbe_cmd->dw1.bi_weight = 32;
7278         curbe_cmd->dw13.num_ref_idx_l1_minus1 = slice_param->num_ref_idx_l1_active_minus1;
7279         me_method = gen9_avc_b_me_method[generic_state->preset];
7280         seach_table_idx = 1;
7281     }
7282
7283     if (generic_state->frame_type == SLICE_TYPE_P ||
7284         generic_state->frame_type == SLICE_TYPE_B)
7285         curbe_cmd->dw13.num_ref_idx_l0_minus1 = slice_param->num_ref_idx_l0_active_minus1;
7286
7287     curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
7288     curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
7289
7290     memcpy(&curbe_cmd->dw16, table_enc_search_path[seach_table_idx][me_method], 14 * sizeof(int));
7291
7292     curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN8_AVC_ME_MV_DATA_SURFACE_CM;
7293     curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x) ? GEN8_AVC_32xME_MV_DATA_SURFACE_CM : GEN8_AVC_16xME_MV_DATA_SURFACE_CM ;
7294     curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN8_AVC_ME_DISTORTION_SURFACE_CM;
7295     curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN8_AVC_ME_BRC_DISTORTION_CM;
7296     curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN8_AVC_ME_CURR_FOR_FWD_REF_CM;
7297     curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN8_AVC_ME_CURR_FOR_BWD_REF_CM;
7298     curbe_cmd->dw38.reserved = 0;
7299
7300     i965_gpe_context_unmap_curbe(gpe_context);
7301     return;
7302 }
7303
7304 static void
7305 gen8_avc_set_curbe_brc_frame_update(VADriverContextP ctx,
7306                                     struct encode_state *encode_state,
7307                                     struct i965_gpe_context *gpe_context,
7308                                     struct intel_encoder_context *encoder_context,
7309                                     void * param)
7310 {
7311     gen8_avc_frame_brc_update_curbe_data *cmd;
7312     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7313     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
7314     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
7315     struct object_surface *obj_surface;
7316     struct gen9_surface_avc *avc_priv_surface;
7317     struct avc_param common_param;
7318
7319     obj_surface = encode_state->reconstructed_object;
7320
7321     if (!obj_surface || !obj_surface->private_data)
7322         return;
7323     avc_priv_surface = obj_surface->private_data;
7324
7325     cmd = i965_gpe_context_map_curbe(gpe_context);
7326
7327     if (!cmd)
7328         return;
7329
7330     memcpy(cmd, &gen8_avc_frame_brc_update_curbe_init_data, sizeof(gen8_avc_frame_brc_update_curbe_data));
7331
7332     cmd->dw5.target_size_flag = 0 ;
7333     if (generic_state->brc_init_current_target_buf_full_in_bits > (double)generic_state->brc_init_reset_buf_size_in_bits) {
7334         /*overflow*/
7335         generic_state->brc_init_current_target_buf_full_in_bits -= (double)generic_state->brc_init_reset_buf_size_in_bits;
7336         cmd->dw5.target_size_flag = 1 ;
7337     }
7338
7339     if (generic_state->skip_frame_enbale) {
7340         cmd->dw6.num_skip_frames = generic_state->num_skip_frames ;
7341         cmd->dw7.size_skip_frames = generic_state->size_skip_frames;
7342
7343         generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame * generic_state->num_skip_frames;
7344
7345     }
7346     cmd->dw0.target_size = (unsigned int)generic_state->brc_init_current_target_buf_full_in_bits ;
7347     cmd->dw1.frame_number = generic_state->seq_frame_number ;
7348     cmd->dw2.size_of_pic_headers = generic_state->herder_bytes_inserted << 3 ;
7349     cmd->dw5.cur_frame_type = generic_state->frame_type ;
7350     cmd->dw5.brc_flag = 0 ;
7351     cmd->dw5.brc_flag |= (avc_priv_surface->is_as_ref) ? INTEL_ENCODE_BRCUPDATE_IS_REFERENCE : 0 ;
7352
7353     if (avc_state->multi_pre_enable) {
7354         cmd->dw5.brc_flag  |= INTEL_ENCODE_BRCUPDATE_IS_ACTUALQP ;
7355         cmd->dw14.qp_index_of_cur_pic = avc_priv_surface->frame_idx ; //do not know this. use -1
7356     }
7357
7358     cmd->dw5.max_num_paks = generic_state->num_pak_passes ;
7359     if (avc_state->min_max_qp_enable) {
7360         switch (generic_state->frame_type) {
7361         case SLICE_TYPE_I:
7362             cmd->dw6.minimum_qp = avc_state->min_qp_i ;
7363             cmd->dw6.maximum_qp = avc_state->max_qp_i ;
7364             break;
7365         case SLICE_TYPE_P:
7366             cmd->dw6.minimum_qp = avc_state->min_qp_p ;
7367             cmd->dw6.maximum_qp = avc_state->max_qp_p ;
7368             break;
7369         case SLICE_TYPE_B:
7370             cmd->dw6.minimum_qp = avc_state->min_qp_b ;
7371             cmd->dw6.maximum_qp = avc_state->max_qp_b ;
7372             break;
7373         }
7374     } else {
7375         cmd->dw6.minimum_qp = 0 ;
7376         cmd->dw6.maximum_qp = 0 ;
7377     }
7378
7379     generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame;
7380
7381     if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
7382         cmd->dw3.start_gadj_frame0 = (unsigned int)((10 *   generic_state->avbr_convergence) / (double)150);
7383         cmd->dw3.start_gadj_frame1 = (unsigned int)((50 *   generic_state->avbr_convergence) / (double)150);
7384         cmd->dw4.start_gadj_frame2 = (unsigned int)((100 *  generic_state->avbr_convergence) / (double)150);
7385         cmd->dw4.start_gadj_frame3 = (unsigned int)((150 *  generic_state->avbr_convergence) / (double)150);
7386         cmd->dw11.g_rate_ratio_threshold_0 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 40)));
7387         cmd->dw11.g_rate_ratio_threshold_1 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 75)));
7388         cmd->dw12.g_rate_ratio_threshold_2 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 97)));
7389         cmd->dw12.g_rate_ratio_threshold_3 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (103 - 100)));
7390         cmd->dw12.g_rate_ratio_threshold_4 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (125 - 100)));
7391         cmd->dw12.g_rate_ratio_threshold_5 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (160 - 100)));
7392
7393     }
7394
7395     memset(&common_param, 0, sizeof(common_param));
7396     common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
7397     common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
7398     common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
7399     common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
7400     common_param.frames_per_100s = generic_state->frames_per_100s;
7401     common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
7402     common_param.target_bit_rate = generic_state->target_bit_rate;
7403
7404     i965_gpe_context_unmap_curbe(gpe_context);
7405
7406     return;
7407 }
7408
7409 /*
7410 kernel related function:init/destroy etc
7411 */
7412 static void
7413 gen9_avc_kernel_init_scaling(VADriverContextP ctx,
7414                              struct generic_encoder_context *generic_context,
7415                              struct gen_avc_scaling_context *kernel_context,
7416                              int preenc_enabled)
7417 {
7418     struct i965_driver_data *i965 = i965_driver_data(ctx);
7419     struct i965_gpe_table *gpe = &i965->gpe_table;
7420     struct i965_gpe_context *gpe_context = NULL;
7421     struct encoder_kernel_parameter kernel_param ;
7422     struct encoder_scoreboard_parameter scoreboard_param;
7423     struct i965_kernel common_kernel;
7424
7425     memset(&kernel_param, 0, sizeof(kernel_param));
7426     if (IS_SKL(i965->intel.device_info) ||
7427         IS_BXT(i965->intel.device_info)) {
7428         if (!preenc_enabled) {
7429             kernel_param.curbe_size = sizeof(gen9_avc_scaling4x_curbe_data);
7430             kernel_param.inline_data_size = sizeof(gen9_avc_scaling4x_curbe_data);
7431         } else {
7432             /* Skylake PreEnc using GEN95/gen10 DS kernel */
7433             kernel_param.curbe_size = sizeof(gen95_avc_scaling4x_curbe_data);
7434             kernel_param.inline_data_size = sizeof(gen95_avc_scaling4x_curbe_data);
7435         }
7436     } else if (IS_KBL(i965->intel.device_info) ||
7437                IS_GEN10(i965->intel.device_info) ||
7438                IS_GLK(i965->intel.device_info)) {
7439         kernel_param.curbe_size = sizeof(gen95_avc_scaling4x_curbe_data);
7440         kernel_param.inline_data_size = sizeof(gen95_avc_scaling4x_curbe_data);
7441     } else if (IS_GEN8(i965->intel.device_info)) {
7442         kernel_param.curbe_size = sizeof(gen8_avc_scaling4x_curbe_data);
7443         kernel_param.inline_data_size = sizeof(gen8_avc_scaling4x_curbe_data);
7444     } else
7445         assert(0);
7446
7447     /* 4x scaling kernel*/
7448     kernel_param.sampler_size = 0;
7449
7450     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7451     scoreboard_param.mask = 0xFF;
7452     scoreboard_param.enable = generic_context->use_hw_scoreboard;
7453     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7454     scoreboard_param.walkpat_flag = 0;
7455
7456     gpe_context = &kernel_context->gpe_contexts[GEN9_AVC_KERNEL_SCALING_4X_IDX];
7457     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7458     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7459
7460     memset(&common_kernel, 0, sizeof(common_kernel));
7461
7462     generic_context->get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7463                                                 generic_context->enc_kernel_size,
7464                                                 INTEL_GENERIC_ENC_SCALING4X,
7465                                                 0,
7466                                                 &common_kernel);
7467
7468     gpe->load_kernels(ctx,
7469                       gpe_context,
7470                       &common_kernel,
7471                       1);
7472
7473     /* PreEnc using only the 4X scaling */
7474     if (preenc_enabled)
7475         return;
7476
7477     /*2x scaling kernel*/
7478     kernel_param.curbe_size = sizeof(gen9_avc_scaling2x_curbe_data);
7479     kernel_param.inline_data_size = 0;
7480     kernel_param.sampler_size = 0;
7481
7482     gpe_context = &kernel_context->gpe_contexts[GEN9_AVC_KERNEL_SCALING_2X_IDX];
7483     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7484     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7485
7486     memset(&common_kernel, 0, sizeof(common_kernel));
7487
7488     intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7489                                          generic_context->enc_kernel_size,
7490                                          INTEL_GENERIC_ENC_SCALING2X,
7491                                          0,
7492                                          &common_kernel);
7493
7494     gpe->load_kernels(ctx,
7495                       gpe_context,
7496                       &common_kernel,
7497                       1);
7498
7499 }
7500
7501 static void
7502 gen9_avc_kernel_init_me(VADriverContextP ctx,
7503                         struct generic_encoder_context *generic_context,
7504                         struct gen_avc_me_context *kernel_context,
7505                         int preenc_enabled)
7506 {
7507     struct i965_driver_data *i965 = i965_driver_data(ctx);
7508     struct i965_gpe_table *gpe = &i965->gpe_table;
7509     struct i965_gpe_context *gpe_context = NULL;
7510     struct encoder_kernel_parameter kernel_param ;
7511     struct encoder_scoreboard_parameter scoreboard_param;
7512     struct i965_kernel common_kernel;
7513     int i = 0;
7514     unsigned int curbe_size = 0;
7515
7516     if (IS_GEN8(i965->intel.device_info)) {
7517         curbe_size = sizeof(gen8_avc_me_curbe_data);
7518     } else {
7519         if (!preenc_enabled)
7520             curbe_size = sizeof(gen9_avc_me_curbe_data);
7521         else
7522             curbe_size = sizeof(gen9_avc_fei_me_curbe_data);
7523     }
7524
7525     kernel_param.curbe_size = curbe_size;
7526     kernel_param.inline_data_size = 0;
7527     kernel_param.sampler_size = 0;
7528
7529     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7530     scoreboard_param.mask = 0xFF;
7531     scoreboard_param.enable = generic_context->use_hw_scoreboard;
7532     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7533     scoreboard_param.walkpat_flag = 0;
7534
7535     /* There is two hme kernel, one for P and other for B frame */
7536     for (i = 0; i < 2; i++) {
7537         gpe_context = &kernel_context->gpe_contexts[i];
7538         gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7539         gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7540
7541         memset(&common_kernel, 0, sizeof(common_kernel));
7542
7543         generic_context->get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7544                                                     generic_context->enc_kernel_size,
7545                                                     INTEL_GENERIC_ENC_ME,
7546                                                     i,
7547                                                     &common_kernel);
7548
7549         gpe->load_kernels(ctx,
7550                           gpe_context,
7551                           &common_kernel,
7552                           1);
7553     }
7554
7555 }
7556
7557 static void
7558 gen9_avc_kernel_init_preproc(VADriverContextP ctx,
7559                              struct generic_encoder_context *generic_context,
7560                              struct gen_avc_preproc_context *kernel_context)
7561 {
7562     struct i965_driver_data *i965 = i965_driver_data(ctx);
7563     struct i965_gpe_table *gpe = &i965->gpe_table;
7564     struct i965_gpe_context *gpe_context = NULL;
7565     struct encoder_kernel_parameter kernel_param ;
7566     struct encoder_scoreboard_parameter scoreboard_param;
7567     struct i965_kernel common_kernel;
7568
7569     kernel_param.curbe_size = sizeof(gen9_avc_preproc_curbe_data);
7570     kernel_param.inline_data_size = 0;
7571     kernel_param.sampler_size = 0;
7572
7573     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7574     scoreboard_param.mask = 0xFF;
7575     scoreboard_param.enable = generic_context->use_hw_scoreboard;
7576     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7577     scoreboard_param.walkpat_flag = 0;
7578
7579     gpe_context = &kernel_context->gpe_contexts;
7580     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7581     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7582
7583     memset(&common_kernel, 0, sizeof(common_kernel));
7584
7585     intel_avc_fei_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7586                                              generic_context->enc_kernel_size,
7587                                              INTEL_GENERIC_ENC_PREPROC,
7588                                              0,
7589                                              &common_kernel);
7590
7591     gpe->load_kernels(ctx,
7592                       gpe_context,
7593                       &common_kernel,
7594                       1);
7595
7596 }
7597
7598 static void
7599 gen9_avc_kernel_init_mbenc(VADriverContextP ctx,
7600                            struct generic_encoder_context *generic_context,
7601                            struct gen_avc_mbenc_context *kernel_context,
7602                            int fei_enabled)
7603 {
7604     struct i965_driver_data *i965 = i965_driver_data(ctx);
7605     struct i965_gpe_table *gpe = &i965->gpe_table;
7606     struct i965_gpe_context *gpe_context = NULL;
7607     struct encoder_kernel_parameter kernel_param ;
7608     struct encoder_scoreboard_parameter scoreboard_param;
7609     struct i965_kernel common_kernel;
7610     int i = 0;
7611     unsigned int curbe_size = 0;
7612     unsigned int num_mbenc_kernels = 0;
7613
7614     if (IS_SKL(i965->intel.device_info) ||
7615         IS_BXT(i965->intel.device_info)) {
7616         if (!fei_enabled) {
7617             curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
7618             num_mbenc_kernels = NUM_GEN9_AVC_KERNEL_MBENC;
7619         } else {
7620             curbe_size = sizeof(gen9_avc_fei_mbenc_curbe_data);
7621             num_mbenc_kernels = NUM_GEN9_AVC_FEI_KERNEL_MBENC;
7622         }
7623     } else if (IS_KBL(i965->intel.device_info) ||
7624                IS_GEN10(i965->intel.device_info) ||
7625                IS_GLK(i965->intel.device_info)) {
7626         curbe_size = sizeof(gen95_avc_mbenc_curbe_data);
7627         num_mbenc_kernels = NUM_GEN9_AVC_KERNEL_MBENC;
7628     } else if (IS_GEN8(i965->intel.device_info)) {
7629         curbe_size = sizeof(gen8_avc_mbenc_curbe_data);
7630         num_mbenc_kernels = NUM_GEN9_AVC_KERNEL_MBENC;
7631     }
7632
7633     assert(curbe_size > 0);
7634     kernel_param.curbe_size = curbe_size;
7635     kernel_param.inline_data_size = 0;
7636     kernel_param.sampler_size = 0;
7637
7638     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7639     scoreboard_param.mask = 0xFF;
7640     scoreboard_param.enable = generic_context->use_hw_scoreboard;
7641     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7642     scoreboard_param.walkpat_flag = 0;
7643
7644     for (i = 0; i < num_mbenc_kernels ; i++) {
7645         gpe_context = &kernel_context->gpe_contexts[i];
7646         gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7647         gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7648
7649         memset(&common_kernel, 0, sizeof(common_kernel));
7650
7651         generic_context->get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7652                                                     generic_context->enc_kernel_size,
7653                                                     INTEL_GENERIC_ENC_MBENC,
7654                                                     i,
7655                                                     &common_kernel);
7656
7657         gpe->load_kernels(ctx,
7658                           gpe_context,
7659                           &common_kernel,
7660                           1);
7661     }
7662
7663 }
7664
7665 static void
7666 gen9_avc_kernel_init_brc(VADriverContextP ctx,
7667                          struct generic_encoder_context *generic_context,
7668                          struct gen_avc_brc_context *kernel_context)
7669 {
7670     struct i965_driver_data *i965 = i965_driver_data(ctx);
7671     struct i965_gpe_table *gpe = &i965->gpe_table;
7672     struct i965_gpe_context *gpe_context = NULL;
7673     struct encoder_kernel_parameter kernel_param ;
7674     struct encoder_scoreboard_parameter scoreboard_param;
7675     struct i965_kernel common_kernel;
7676     int num_brc_init_kernels = 0;
7677     int i = 0;
7678
7679     if (IS_GEN8(i965->intel.device_info)) {
7680         num_brc_init_kernels = NUM_GEN9_AVC_KERNEL_BRC - 1;
7681     } else {
7682         num_brc_init_kernels = NUM_GEN9_AVC_KERNEL_BRC;
7683     }
7684
7685     const int gen8_brc_curbe_size[NUM_GEN9_AVC_KERNEL_BRC - 1] = {
7686         (sizeof(gen9_avc_brc_init_reset_curbe_data)),
7687         (sizeof(gen8_avc_frame_brc_update_curbe_data)),
7688         (sizeof(gen9_avc_brc_init_reset_curbe_data)),
7689         (sizeof(gen8_avc_mbenc_curbe_data)),
7690         0,
7691     };
7692     const int gen9_brc_curbe_size[NUM_GEN9_AVC_KERNEL_BRC] = {
7693         (sizeof(gen9_avc_brc_init_reset_curbe_data)),
7694         (sizeof(gen9_avc_frame_brc_update_curbe_data)),
7695         (sizeof(gen9_avc_brc_init_reset_curbe_data)),
7696         ((IS_SKL(i965->intel.device_info) || IS_BXT(i965->intel.device_info)) ? sizeof(gen9_avc_mbenc_curbe_data) : sizeof(gen95_avc_mbenc_curbe_data)),
7697         0,
7698         (sizeof(gen9_avc_mb_brc_curbe_data))
7699     };
7700
7701     kernel_param.inline_data_size = 0;
7702     kernel_param.sampler_size = 0;
7703
7704     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7705     scoreboard_param.mask = 0xFF;
7706     scoreboard_param.enable = generic_context->use_hw_scoreboard;
7707     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7708     scoreboard_param.walkpat_flag = 0;
7709
7710     for (i = 0; i < num_brc_init_kernels; i++) {
7711         if (IS_GEN8(i965->intel.device_info)) {
7712             kernel_param.curbe_size = gen8_brc_curbe_size[i];
7713         } else {
7714             kernel_param.curbe_size = gen9_brc_curbe_size[i];
7715         }
7716         gpe_context = &kernel_context->gpe_contexts[i];
7717         gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7718         gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7719
7720         memset(&common_kernel, 0, sizeof(common_kernel));
7721
7722         intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7723                                              generic_context->enc_kernel_size,
7724                                              INTEL_GENERIC_ENC_BRC,
7725                                              i,
7726                                              &common_kernel);
7727
7728         gpe->load_kernels(ctx,
7729                           gpe_context,
7730                           &common_kernel,
7731                           1);
7732     }
7733
7734 }
7735
7736 static void
7737 gen9_avc_kernel_init_wp(VADriverContextP ctx,
7738                         struct generic_encoder_context *generic_context,
7739                         struct gen_avc_wp_context *kernel_context)
7740 {
7741     struct i965_driver_data *i965 = i965_driver_data(ctx);
7742     struct i965_gpe_table *gpe = &i965->gpe_table;
7743     struct i965_gpe_context *gpe_context = NULL;
7744     struct encoder_kernel_parameter kernel_param ;
7745     struct encoder_scoreboard_parameter scoreboard_param;
7746     struct i965_kernel common_kernel;
7747
7748     kernel_param.curbe_size = sizeof(gen9_avc_wp_curbe_data);
7749     kernel_param.inline_data_size = 0;
7750     kernel_param.sampler_size = 0;
7751
7752     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7753     scoreboard_param.mask = 0xFF;
7754     scoreboard_param.enable = generic_context->use_hw_scoreboard;
7755     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7756     scoreboard_param.walkpat_flag = 0;
7757
7758     gpe_context = &kernel_context->gpe_contexts;
7759     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7760     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7761
7762     memset(&common_kernel, 0, sizeof(common_kernel));
7763
7764     intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7765                                          generic_context->enc_kernel_size,
7766                                          INTEL_GENERIC_ENC_WP,
7767                                          0,
7768                                          &common_kernel);
7769
7770     gpe->load_kernels(ctx,
7771                       gpe_context,
7772                       &common_kernel,
7773                       1);
7774
7775 }
7776
7777 static void
7778 gen9_avc_kernel_init_sfd(VADriverContextP ctx,
7779                          struct generic_encoder_context *generic_context,
7780                          struct gen_avc_sfd_context *kernel_context)
7781 {
7782     struct i965_driver_data *i965 = i965_driver_data(ctx);
7783     struct i965_gpe_table *gpe = &i965->gpe_table;
7784     struct i965_gpe_context *gpe_context = NULL;
7785     struct encoder_kernel_parameter kernel_param ;
7786     struct encoder_scoreboard_parameter scoreboard_param;
7787     struct i965_kernel common_kernel;
7788
7789     kernel_param.curbe_size = sizeof(gen9_avc_sfd_curbe_data);
7790     kernel_param.inline_data_size = 0;
7791     kernel_param.sampler_size = 0;
7792
7793     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7794     scoreboard_param.mask = 0xFF;
7795     scoreboard_param.enable = generic_context->use_hw_scoreboard;
7796     scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7797     scoreboard_param.walkpat_flag = 0;
7798
7799     gpe_context = &kernel_context->gpe_contexts;
7800     gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7801     gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7802
7803     memset(&common_kernel, 0, sizeof(common_kernel));
7804
7805     intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7806                                          generic_context->enc_kernel_size,
7807                                          INTEL_GENERIC_ENC_SFD,
7808                                          0,
7809                                          &common_kernel);
7810
7811     gpe->load_kernels(ctx,
7812                       gpe_context,
7813                       &common_kernel,
7814                       1);
7815
7816 }
7817
7818 static void
7819 gen9_avc_kernel_destroy(struct encoder_vme_mfc_context * vme_context)
7820 {
7821
7822     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
7823     struct i965_driver_data *i965 = i965_driver_data(avc_ctx->ctx);
7824     struct i965_gpe_table *gpe = &i965->gpe_table;
7825
7826     int i = 0;
7827
7828     gen9_avc_free_resources(vme_context);
7829
7830     for (i = 0; i < NUM_GEN9_AVC_KERNEL_SCALING; i++)
7831         gpe->context_destroy(&avc_ctx->context_scaling.gpe_contexts[i]);
7832
7833     for (i = 0; i < NUM_GEN9_AVC_KERNEL_BRC; i++)
7834         gpe->context_destroy(&avc_ctx->context_brc.gpe_contexts[i]);
7835
7836     for (i = 0; i < NUM_GEN9_AVC_KERNEL_ME; i++)
7837         gpe->context_destroy(&avc_ctx->context_me.gpe_contexts[i]);
7838
7839     for (i = 0; i < NUM_GEN9_AVC_KERNEL_MBENC; i++)
7840         gpe->context_destroy(&avc_ctx->context_mbenc.gpe_contexts[i]);
7841
7842     gpe->context_destroy(&avc_ctx->context_wp.gpe_contexts);
7843
7844     gpe->context_destroy(&avc_ctx->context_sfd.gpe_contexts);
7845
7846     gpe->context_destroy(&avc_ctx->context_preproc.gpe_contexts);
7847
7848 }
7849
7850 /*
7851 vme pipeline
7852 */
7853 static void
7854 gen9_avc_update_parameters(VADriverContextP ctx,
7855                            VAProfile profile,
7856                            struct encode_state *encode_state,
7857                            struct intel_encoder_context *encoder_context)
7858 {
7859     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7860     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
7861     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
7862     VAEncSequenceParameterBufferH264 *seq_param;
7863     VAEncSliceParameterBufferH264 *slice_param;
7864     VAEncMiscParameterBuffer *fei_misc_param;
7865     int i, j, slice_index;
7866     unsigned int preset = generic_state->preset;
7867     unsigned int fei_enabled = encoder_context->fei_enabled;
7868
7869     /* seq/pic/slice parameter setting */
7870     generic_state->b16xme_supported = gen9_avc_super_hme[preset];
7871     generic_state->b32xme_supported = gen9_avc_ultra_hme[preset];
7872
7873     avc_state->seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
7874     avc_state->pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
7875
7876     if (fei_enabled &&
7877         encode_state->misc_param[VAEncMiscParameterTypeFEIFrameControl]) {
7878         fei_misc_param = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeFEIFrameControl][0]->buffer;
7879         avc_state->fei_framectl_param =
7880             (VAEncMiscParameterFEIFrameControlH264 *)fei_misc_param->data;
7881     }
7882
7883     avc_state->slice_num = 0;
7884     slice_index = 0;
7885     for (j = 0; j < encode_state->num_slice_params_ext; j++) {
7886         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
7887         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
7888             avc_state->slice_param[slice_index] = slice_param;
7889             slice_param++;
7890             slice_index++;
7891             avc_state->slice_num++;
7892         }
7893     }
7894
7895     /* how many slices support by now? 1 slice or multi slices, but row slice.not slice group. */
7896     seq_param = avc_state->seq_param;
7897     slice_param = avc_state->slice_param[0];
7898
7899     generic_state->frame_type = avc_state->slice_param[0]->slice_type;
7900
7901     if (slice_param->slice_type == SLICE_TYPE_I ||
7902         slice_param->slice_type == SLICE_TYPE_SI)
7903         generic_state->frame_type = SLICE_TYPE_I;
7904     else if (slice_param->slice_type == SLICE_TYPE_P)
7905         generic_state->frame_type = SLICE_TYPE_P;
7906     else if (slice_param->slice_type == SLICE_TYPE_B)
7907         generic_state->frame_type = SLICE_TYPE_B;
7908     if (profile == VAProfileH264High)
7909         avc_state->transform_8x8_mode_enable = 0;//work around for high profile to disabel pic_param->pic_fields.bits.transform_8x8_mode_flag
7910     else
7911         avc_state->transform_8x8_mode_enable = 0;
7912
7913     /* rc init*/
7914     if (generic_state->brc_enabled && (!generic_state->brc_inited || generic_state->brc_need_reset)) {
7915         generic_state->target_bit_rate = ALIGN(seq_param->bits_per_second, 1000) / 1000;
7916         generic_state->init_vbv_buffer_fullness_in_bit = seq_param->bits_per_second;
7917         generic_state->vbv_buffer_size_in_bit = (uint64_t)seq_param->bits_per_second << 1;
7918         generic_state->frames_per_100s = 3000; /* 30fps */
7919     }
7920
7921     generic_state->gop_size = seq_param->intra_period;
7922     generic_state->gop_ref_distance = seq_param->ip_period;
7923
7924     if (generic_state->internal_rate_mode == VA_RC_CBR) {
7925         generic_state->max_bit_rate = generic_state->target_bit_rate;
7926         generic_state->min_bit_rate = generic_state->target_bit_rate;
7927     }
7928
7929     if (generic_state->frame_type == SLICE_TYPE_I || generic_state->first_frame) {
7930         gen9_avc_update_misc_parameters(ctx, encode_state, encoder_context);
7931     }
7932
7933     generic_state->preset = encoder_context->quality_level;
7934     if (encoder_context->quality_level == INTEL_PRESET_UNKNOWN) {
7935         generic_state->preset = INTEL_PRESET_RT_SPEED;
7936     }
7937     generic_state->kernel_mode = gen9_avc_kernel_mode[generic_state->preset];
7938
7939     if (!generic_state->brc_inited) {
7940         generic_state->brc_init_reset_input_bits_per_frame = ((double)(generic_state->max_bit_rate * 1000) * 100) / generic_state->frames_per_100s;;
7941         generic_state->brc_init_current_target_buf_full_in_bits = generic_state->init_vbv_buffer_fullness_in_bit;
7942         generic_state->brc_init_reset_buf_size_in_bits = generic_state->vbv_buffer_size_in_bit;
7943         generic_state->brc_target_size = generic_state->init_vbv_buffer_fullness_in_bit;
7944     }
7945
7946
7947     generic_state->curr_pak_pass = 0;
7948     generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
7949
7950     if (generic_state->internal_rate_mode == VA_RC_CBR ||
7951         generic_state->internal_rate_mode == VA_RC_VBR)
7952         generic_state->brc_enabled = 1;
7953     else
7954         generic_state->brc_enabled = 0;
7955
7956     if (generic_state->brc_enabled &&
7957         (!generic_state->init_vbv_buffer_fullness_in_bit ||
7958          !generic_state->vbv_buffer_size_in_bit ||
7959          !generic_state->max_bit_rate ||
7960          !generic_state->target_bit_rate ||
7961          !generic_state->frames_per_100s)) {
7962         WARN_ONCE("Rate control parameter is required for BRC\n");
7963         generic_state->brc_enabled = 0;
7964     }
7965
7966     if (!generic_state->brc_enabled) {
7967         generic_state->target_bit_rate = 0;
7968         generic_state->max_bit_rate = 0;
7969         generic_state->min_bit_rate = 0;
7970         generic_state->init_vbv_buffer_fullness_in_bit = 0;
7971         generic_state->vbv_buffer_size_in_bit = 0;
7972         generic_state->num_pak_passes = 1;
7973     } else {
7974         generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
7975     }
7976
7977
7978     generic_state->frame_width_in_mbs = seq_param->picture_width_in_mbs;
7979     generic_state->frame_height_in_mbs = seq_param->picture_height_in_mbs;
7980     generic_state->frame_width_in_pixel = generic_state->frame_width_in_mbs * 16;
7981     generic_state->frame_height_in_pixel = generic_state->frame_height_in_mbs * 16;
7982
7983     generic_state->frame_width_4x  = ALIGN(generic_state->frame_width_in_pixel / 4, 16);
7984     generic_state->frame_height_4x = ALIGN(generic_state->frame_height_in_pixel / 4, 16);
7985     generic_state->downscaled_width_4x_in_mb  = generic_state->frame_width_4x / 16 ;
7986     generic_state->downscaled_height_4x_in_mb = generic_state->frame_height_4x / 16;
7987
7988     generic_state->frame_width_16x  =  ALIGN(generic_state->frame_width_in_pixel / 16, 16);
7989     generic_state->frame_height_16x =  ALIGN(generic_state->frame_height_in_pixel / 16, 16);
7990     generic_state->downscaled_width_16x_in_mb  = generic_state->frame_width_16x / 16 ;
7991     generic_state->downscaled_height_16x_in_mb = generic_state->frame_height_16x / 16;
7992
7993     generic_state->frame_width_32x  = ALIGN(generic_state->frame_width_in_pixel / 32, 16);
7994     generic_state->frame_height_32x = ALIGN(generic_state->frame_height_in_pixel / 32, 16);
7995     generic_state->downscaled_width_32x_in_mb  = generic_state->frame_width_32x / 16 ;
7996     generic_state->downscaled_height_32x_in_mb = generic_state->frame_height_32x / 16;
7997
7998     if (generic_state->hme_supported) {
7999         generic_state->hme_enabled = 1;
8000     } else {
8001         generic_state->hme_enabled = 0;
8002     }
8003
8004     if (generic_state->b16xme_supported) {
8005         generic_state->b16xme_enabled = 1;
8006     } else {
8007         generic_state->b16xme_enabled = 0;
8008     }
8009
8010     if (generic_state->b32xme_supported) {
8011         generic_state->b32xme_enabled = 1;
8012     } else {
8013         generic_state->b32xme_enabled = 0;
8014     }
8015     /* disable HME/16xME if the size is too small */
8016     if (generic_state->frame_width_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8017         generic_state->b32xme_supported = 0;
8018         generic_state->b32xme_enabled = 0;
8019         generic_state->b16xme_supported = 0;
8020         generic_state->b16xme_enabled = 0;
8021         generic_state->frame_width_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8022         generic_state->downscaled_width_4x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8023     }
8024     if (generic_state->frame_height_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8025         generic_state->b32xme_supported = 0;
8026         generic_state->b32xme_enabled = 0;
8027         generic_state->b16xme_supported = 0;
8028         generic_state->b16xme_enabled = 0;
8029         generic_state->frame_height_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8030         generic_state->downscaled_height_4x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8031     }
8032
8033     if (generic_state->frame_width_16x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8034         generic_state->b32xme_supported = 0;
8035         generic_state->b32xme_enabled = 0;
8036         generic_state->frame_width_16x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8037         generic_state->downscaled_width_16x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8038     }
8039     if (generic_state->frame_height_16x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8040         generic_state->b32xme_supported = 0;
8041         generic_state->b32xme_enabled = 0;
8042         generic_state->frame_height_16x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8043         generic_state->downscaled_height_16x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8044     }
8045
8046     if (generic_state->frame_width_32x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8047         generic_state->frame_width_32x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8048         generic_state->downscaled_width_32x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8049     }
8050     if (generic_state->frame_height_32x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8051         generic_state->frame_height_32x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8052         generic_state->downscaled_height_32x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8053     }
8054
8055 }
8056
8057 static VAStatus
8058 gen9_avc_encode_check_parameter(VADriverContextP ctx,
8059                                 struct encode_state *encode_state,
8060                                 struct intel_encoder_context *encoder_context)
8061 {
8062     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8063     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8064     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8065     unsigned int rate_control_mode = encoder_context->rate_control_mode;
8066     unsigned int preset = generic_state->preset;
8067     VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param ;
8068     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
8069     int i = 0;
8070     int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
8071     /*avbr init*/
8072     generic_state->avbr_curracy = 30;
8073     generic_state->avbr_convergence = 150;
8074
8075     switch (rate_control_mode & 0x7f) {
8076     case VA_RC_CBR:
8077         generic_state->internal_rate_mode = VA_RC_CBR;
8078         break;
8079
8080     case VA_RC_VBR:
8081         generic_state->internal_rate_mode = VA_RC_VBR;
8082         break;
8083
8084     case VA_RC_CQP:
8085     default:
8086         generic_state->internal_rate_mode = VA_RC_CQP;
8087         break;
8088     }
8089
8090     if (rate_control_mode != VA_RC_NONE &&
8091         rate_control_mode != VA_RC_CQP) {
8092         generic_state->brc_enabled = 1;
8093         generic_state->brc_distortion_buffer_supported = 1;
8094         generic_state->brc_constant_buffer_supported = 1;
8095         generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
8096     }
8097
8098     /*check brc parameter*/
8099     if (generic_state->brc_enabled) {
8100         avc_state->mb_qp_data_enable = 0;
8101     }
8102
8103     /*set the brc init and reset accordingly*/
8104     if (generic_state->brc_need_reset &&
8105         (generic_state->brc_distortion_buffer_supported == 0 ||
8106          rate_control_mode == VA_RC_CQP)) {
8107         generic_state->brc_need_reset = 0;// not support by CQP
8108     }
8109     if (generic_state->internal_rate_mode == VA_RC_CBR || generic_state->internal_rate_mode == VA_RC_VBR || generic_state->frame_type == SLICE_TYPE_I) {
8110         avc_state->sfd_enable = 0;
8111     } else {
8112         avc_state->sfd_enable = 1;
8113     }
8114
8115     if (generic_state->frames_per_window_size == 0) {
8116         generic_state->frames_per_window_size = (generic_state->frames_per_100s / 100 < 60) ? (generic_state->frames_per_100s / 100) : 60;
8117     } else if (generic_state->frames_per_window_size > 2 * generic_state->frames_per_100s / 100) {
8118         generic_state->frames_per_window_size = (generic_state->frames_per_100s / 100 < 60) ? (generic_state->frames_per_100s / 100) : 60;
8119     }
8120
8121     if (generic_state->brc_enabled) {
8122         generic_state->hme_enabled = generic_state->frame_type != SLICE_TYPE_I;
8123         if (avc_state->min_max_qp_enable) {
8124             generic_state->num_pak_passes = 1;
8125         }
8126         generic_state->brc_roi_enable = (rate_control_mode != VA_RC_CQP) && (generic_state->num_roi > 0);// only !CQP
8127         generic_state->mb_brc_enabled = generic_state->mb_brc_enabled || generic_state->brc_roi_enable;
8128     } else {
8129         generic_state->num_pak_passes = 1;// CQP only one pass
8130     }
8131
8132     avc_state->mbenc_i_frame_dist_in_use = 0;
8133     avc_state->mbenc_i_frame_dist_in_use = (generic_state->brc_enabled) && (generic_state->brc_distortion_buffer_supported) && (generic_state->frame_type == SLICE_TYPE_I);
8134
8135     /*ROI must enable mbbrc.*/
8136
8137     /*CAD check*/
8138     if (avc_state->caf_supported) {
8139         switch (generic_state->frame_type) {
8140         case SLICE_TYPE_I:
8141             avc_state->caf_enable = 0;
8142             break;
8143         case SLICE_TYPE_P:
8144             avc_state->caf_enable = gen9_avc_all_fractional[preset] & 0x01;
8145             break;
8146         case SLICE_TYPE_B:
8147             avc_state->caf_enable = (gen9_avc_all_fractional[preset] >> 1) & 0x01;
8148             break;
8149         }
8150
8151         if (avc_state->caf_enable && avc_state->caf_disable_hd && gen9_avc_disable_all_fractional_check_for_high_res[preset]) {
8152             if (generic_state->frame_width_in_pixel >= 1280 && generic_state->frame_height_in_pixel >= 720)
8153                 avc_state->caf_enable = 0;
8154         }
8155     }
8156
8157     avc_state->adaptive_transform_decision_enable &= gen9_avc_enable_adaptive_tx_decision[preset & 0x7];
8158
8159     /* Flatness check is enabled only if scaling will be performed and CAF is enabled. here only frame */
8160     if (avc_state->flatness_check_supported) {
8161         avc_state->flatness_check_enable = ((avc_state->caf_enable) && (generic_state->brc_enabled || generic_state->hme_supported)) ;
8162     } else {
8163         avc_state->flatness_check_enable = 0;
8164     }
8165
8166     /* check mb_status_supported/enbale*/
8167     if (avc_state->adaptive_transform_decision_enable) {
8168         avc_state->mb_status_enable = 1;
8169     } else {
8170         avc_state->mb_status_enable = 0;
8171     }
8172     /*slice check,all the slices use the same slice height except the last slice*/
8173     avc_state->arbitrary_num_mbs_in_slice = 0;
8174     for (i = 0; i < avc_state->slice_num; i++) {
8175         if (avc_state->slice_param[i]->num_macroblocks % generic_state->frame_width_in_mbs > 0) {
8176             avc_state->arbitrary_num_mbs_in_slice = 1;
8177             avc_state->slice_height = 1; /* slice height will be ignored by kernel ans here set it as default value */
8178         } else {
8179             avc_state->slice_height = avc_state->slice_param[i]->num_macroblocks / generic_state->frame_width_in_mbs;
8180         }
8181     }
8182
8183     if (avc_state->slice_num > 1)
8184         avc_state->arbitrary_num_mbs_in_slice = 1;
8185
8186     if (generic_state->frame_type == SLICE_TYPE_I) {
8187         generic_state->hme_enabled = 0;
8188         generic_state->b16xme_enabled = 0;
8189         generic_state->b32xme_enabled = 0;
8190     }
8191
8192     if (generic_state->frame_type == SLICE_TYPE_B) {
8193         gen9_avc_get_dist_scale_factor(ctx, encode_state, encoder_context);
8194         avc_state->bi_weight = gen9_avc_get_biweight(avc_state->dist_scale_factor_list0[0], pic_param->pic_fields.bits.weighted_bipred_idc);
8195     }
8196
8197     /* Determine if SkipBiasAdjustment should be enabled for P picture 1. No B frame 2. Qp >= 22 3. CQP mode */
8198     avc_state->skip_bias_adjustment_enable = avc_state->skip_bias_adjustment_supported && (generic_state->frame_type == SLICE_TYPE_P)
8199                                              && (generic_state->gop_ref_distance == 1) && (avc_state->pic_param->pic_init_qp + avc_state->slice_param[0]->slice_qp_delta >= 22) && !generic_state->brc_enabled;
8200
8201     if (generic_state->kernel_mode == INTEL_ENC_KERNEL_QUALITY) {
8202         avc_state->tq_enable = 1;
8203         avc_state->tq_rounding = 6;
8204         if (generic_state->brc_enabled) {
8205             generic_state->mb_brc_enabled = 1;
8206         }
8207     }
8208
8209     //check the inter rounding
8210     avc_state->rounding_value = 0;
8211     avc_state->rounding_inter_p = 255;//default
8212     avc_state->rounding_inter_b = 255; //default
8213     avc_state->rounding_inter_b_ref = 255; //default
8214
8215     if (generic_state->frame_type == SLICE_TYPE_P) {
8216         if (avc_state->rounding_inter_p == AVC_INVALID_ROUNDING_VALUE) {
8217             if (avc_state->adaptive_rounding_inter_enable && !(generic_state->brc_enabled)) {
8218                 if (generic_state->gop_ref_distance == 1)
8219                     avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_p_without_b[slice_qp];
8220                 else
8221                     avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_p[slice_qp];
8222             } else {
8223                 avc_state->rounding_value = gen9_avc_inter_rounding_p[generic_state->preset];
8224             }
8225
8226         } else {
8227             avc_state->rounding_value = avc_state->rounding_inter_p;
8228         }
8229     } else if (generic_state->frame_type == SLICE_TYPE_B) {
8230         if (pic_param->pic_fields.bits.reference_pic_flag) {
8231             if (avc_state->rounding_inter_b_ref == AVC_INVALID_ROUNDING_VALUE)
8232                 avc_state->rounding_value = gen9_avc_inter_rounding_b_ref[generic_state->preset];
8233             else
8234                 avc_state->rounding_value = avc_state->rounding_inter_b_ref;
8235         } else {
8236             if (avc_state->rounding_inter_b == AVC_INVALID_ROUNDING_VALUE) {
8237                 if (avc_state->adaptive_rounding_inter_enable && !(generic_state->brc_enabled))
8238                     avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_b[slice_qp];
8239                 else
8240                     avc_state->rounding_value = gen9_avc_inter_rounding_b[generic_state->preset];
8241             } else {
8242                 avc_state->rounding_value = avc_state->rounding_inter_b;
8243             }
8244         }
8245     }
8246     return VA_STATUS_SUCCESS;
8247 }
8248
8249 static VAStatus
8250 gen9_avc_vme_gpe_kernel_prepare(VADriverContextP ctx,
8251                                 struct encode_state *encode_state,
8252                                 struct intel_encoder_context *encoder_context)
8253 {
8254     VAStatus va_status;
8255     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8256     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
8257     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
8258     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8259     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8260
8261     struct object_surface *obj_surface;
8262     struct object_buffer *obj_buffer;
8263     VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
8264     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
8265     struct i965_coded_buffer_segment *coded_buffer_segment;
8266
8267     struct gen9_surface_avc *avc_priv_surface;
8268     dri_bo *bo;
8269     struct avc_surface_param surface_param;
8270     int i, j = 0;
8271     unsigned char * pdata;
8272
8273     /* Setup current reconstruct frame */
8274     obj_surface = encode_state->reconstructed_object;
8275     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
8276
8277     if (va_status != VA_STATUS_SUCCESS)
8278         return va_status;
8279
8280     memset(&surface_param, 0, sizeof(surface_param));
8281     surface_param.frame_width = generic_state->frame_width_in_pixel;
8282     surface_param.frame_height = generic_state->frame_height_in_pixel;
8283     va_status = gen9_avc_init_check_surfaces(ctx,
8284                                              obj_surface,
8285                                              encoder_context,
8286                                              &surface_param);
8287     if (va_status != VA_STATUS_SUCCESS)
8288         return va_status;
8289     {
8290         /* init the member of avc_priv_surface,frame_store_id,qp_value*/
8291         avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
8292         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = 0;
8293         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = 0;
8294         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2]);
8295         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1]);
8296         i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2], avc_priv_surface->dmv_top);
8297         i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1], avc_priv_surface->dmv_bottom);
8298         avc_priv_surface->qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
8299         avc_priv_surface->frame_store_id = 0;
8300         avc_priv_surface->frame_idx = pic_param->CurrPic.frame_idx;
8301         avc_priv_surface->top_field_order_cnt = pic_param->CurrPic.TopFieldOrderCnt;
8302         avc_priv_surface->is_as_ref = pic_param->pic_fields.bits.reference_pic_flag;
8303         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = avc_priv_surface->top_field_order_cnt;
8304         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = avc_priv_surface->top_field_order_cnt + 1;
8305     }
8306     i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
8307     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_reconstructed_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
8308
8309     /* input YUV surface*/
8310     obj_surface = encode_state->input_yuv_object;
8311     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
8312
8313     if (va_status != VA_STATUS_SUCCESS)
8314         return va_status;
8315     i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
8316     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_uncompressed_input_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
8317
8318     /* Reference surfaces */
8319     for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
8320         i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
8321         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2]);
8322         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1]);
8323         obj_surface = encode_state->reference_objects[i];
8324         avc_state->top_field_poc[2 * i] = 0;
8325         avc_state->top_field_poc[2 * i + 1] = 0;
8326
8327         if (obj_surface && obj_surface->bo) {
8328             i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->list_reference_res[i], obj_surface, GPE_RESOURCE_ALIGNMENT);
8329
8330             /* actually it should be handled when it is reconstructed surface*/
8331             va_status = gen9_avc_init_check_surfaces(ctx,
8332                                                      obj_surface, encoder_context,
8333                                                      &surface_param);
8334             if (va_status != VA_STATUS_SUCCESS)
8335                 return va_status;
8336             avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
8337             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2], avc_priv_surface->dmv_top);
8338             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1], avc_priv_surface->dmv_bottom);
8339             avc_state->top_field_poc[2 * i] = avc_priv_surface->top_field_order_cnt;
8340             avc_state->top_field_poc[2 * i + 1] = avc_priv_surface->top_field_order_cnt + 1;
8341             avc_priv_surface->frame_store_id = i;
8342         } else {
8343             break;
8344         }
8345     }
8346
8347     /* Encoded bitstream ?*/
8348     obj_buffer = encode_state->coded_buf_object;
8349     bo = obj_buffer->buffer_store->bo;
8350     i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
8351     i965_dri_object_to_buffer_gpe_resource(&generic_ctx->compressed_bitstream.res, bo);
8352     generic_ctx->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
8353     generic_ctx->compressed_bitstream.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
8354
8355     /*status buffer */
8356     avc_ctx->status_buffer.bo = bo;
8357
8358     /* set the internal flag to 0 to indicate the coded size is unknown */
8359     dri_bo_map(bo, 1);
8360     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
8361     coded_buffer_segment->mapped = 0;
8362     coded_buffer_segment->codec = encoder_context->codec;
8363     coded_buffer_segment->status_support = 1;
8364
8365     pdata = bo->virtual + avc_ctx->status_buffer.base_offset;
8366     memset(pdata, 0, avc_ctx->status_buffer.status_buffer_size);
8367     dri_bo_unmap(bo);
8368
8369     //frame id, it is the ref pic id in the reference_objects list.
8370     avc_state->num_refs[0] = 0;
8371     avc_state->num_refs[1] = 0;
8372     if (generic_state->frame_type == SLICE_TYPE_P) {
8373         avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
8374
8375         if (slice_param->num_ref_idx_active_override_flag)
8376             avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
8377     } else if (generic_state->frame_type == SLICE_TYPE_B) {
8378         avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
8379         avc_state->num_refs[1] = pic_param->num_ref_idx_l1_active_minus1 + 1;
8380
8381         if (slice_param->num_ref_idx_active_override_flag) {
8382             avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
8383             avc_state->num_refs[1] = slice_param->num_ref_idx_l1_active_minus1 + 1;
8384         }
8385     }
8386
8387     if (avc_state->num_refs[0] > ARRAY_ELEMS(avc_state->list_ref_idx[0]))
8388         return VA_STATUS_ERROR_INVALID_VALUE;
8389     if (avc_state->num_refs[1] > ARRAY_ELEMS(avc_state->list_ref_idx[1]))
8390         return VA_STATUS_ERROR_INVALID_VALUE;
8391
8392     for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[0]); i++) {
8393         VAPictureH264 *va_pic;
8394
8395         assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(avc_state->list_ref_idx[0]));
8396         avc_state->list_ref_idx[0][i] = 0;
8397
8398         if (i >= avc_state->num_refs[0])
8399             continue;
8400
8401         va_pic = &slice_param->RefPicList0[i];
8402
8403         for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
8404             obj_surface = encode_state->reference_objects[j];
8405
8406             if (obj_surface &&
8407                 obj_surface->bo &&
8408                 obj_surface->base.id == va_pic->picture_id) {
8409
8410                 assert(obj_surface->base.id != VA_INVALID_SURFACE);
8411                 avc_state->list_ref_idx[0][i] = j;
8412
8413                 break;
8414             }
8415         }
8416     }
8417     for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[1]); i++) {
8418         VAPictureH264 *va_pic;
8419
8420         assert(ARRAY_ELEMS(slice_param->RefPicList1) == ARRAY_ELEMS(avc_state->list_ref_idx[1]));
8421         avc_state->list_ref_idx[1][i] = 0;
8422
8423         if (i >= avc_state->num_refs[1])
8424             continue;
8425
8426         va_pic = &slice_param->RefPicList1[i];
8427
8428         for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
8429             obj_surface = encode_state->reference_objects[j];
8430
8431             if (obj_surface &&
8432                 obj_surface->bo &&
8433                 obj_surface->base.id == va_pic->picture_id) {
8434
8435                 assert(obj_surface->base.id != VA_INVALID_SURFACE);
8436                 avc_state->list_ref_idx[1][i] = j;
8437
8438                 break;
8439             }
8440         }
8441     }
8442
8443     return VA_STATUS_SUCCESS;
8444 }
8445
8446 static VAStatus
8447 gen9_avc_vme_gpe_kernel_init(VADriverContextP ctx,
8448                              struct encode_state *encode_state,
8449                              struct intel_encoder_context *encoder_context)
8450 {
8451     return VA_STATUS_SUCCESS;
8452 }
8453
8454 static VAStatus
8455 gen9_avc_vme_gpe_kernel_final(VADriverContextP ctx,
8456                               struct encode_state *encode_state,
8457                               struct intel_encoder_context *encoder_context)
8458 {
8459
8460     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8461     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8462     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8463
8464     /*set this flag when all kernel is finished*/
8465     if (generic_state->brc_enabled) {
8466         generic_state->brc_inited = 1;
8467         generic_state->brc_need_reset = 0;
8468         avc_state->mbenc_curbe_set_in_brc_update = 0;
8469     }
8470     return VA_STATUS_SUCCESS;
8471 }
8472
8473 static VAStatus
8474 gen9_avc_vme_gpe_kernel_run(VADriverContextP ctx,
8475                             struct encode_state *encode_state,
8476                             struct intel_encoder_context *encoder_context)
8477 {
8478     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8479     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8480     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8481     int fei_enabled = encoder_context->fei_enabled;
8482
8483     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
8484     VAEncSliceParameterBufferH264 *slice_param = avc_state->slice_param[0];
8485     int sfd_in_use = 0;
8486
8487     /* BRC init/reset needs to be called before HME since it will reset the Brc Distortion surface*/
8488     if (!fei_enabled && generic_state->brc_enabled && (!generic_state->brc_inited || generic_state->brc_need_reset)) {
8489         gen9_avc_kernel_brc_init_reset(ctx, encode_state, encoder_context);
8490     }
8491
8492     /*down scaling*/
8493     if (generic_state->hme_supported) {
8494         gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
8495         if (generic_state->b16xme_supported) {
8496             gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_16x);
8497             if (generic_state->b32xme_supported) {
8498                 gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_32x);
8499             }
8500         }
8501     }
8502
8503     /*me kernel*/
8504     if (generic_state->hme_enabled) {
8505         if (generic_state->b16xme_enabled) {
8506             if (generic_state->b32xme_enabled) {
8507                 gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_32x);
8508             }
8509             gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_16x);
8510         }
8511         gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
8512     }
8513
8514     /*call SFD kernel after HME in same command buffer*/
8515     sfd_in_use = avc_state->sfd_enable && generic_state->hme_enabled;
8516     sfd_in_use = sfd_in_use && !avc_state->sfd_mb_enable;
8517     if (sfd_in_use) {
8518         gen9_avc_kernel_sfd(ctx, encode_state, encoder_context);
8519     }
8520
8521     /* BRC and MbEnc are included in the same task phase*/
8522     if (generic_state->brc_enabled) {
8523         if (avc_state->mbenc_i_frame_dist_in_use) {
8524             gen9_avc_kernel_mbenc(ctx, encode_state, encoder_context, true);
8525         }
8526         gen9_avc_kernel_brc_frame_update(ctx, encode_state, encoder_context);
8527
8528         if (avc_state->brc_split_enable && generic_state->mb_brc_enabled) {
8529             gen9_avc_kernel_brc_mb_update(ctx, encode_state, encoder_context);
8530         }
8531     }
8532
8533     /*weight prediction,disable by now */
8534     avc_state->weighted_ref_l0_enable = 0;
8535     avc_state->weighted_ref_l1_enable = 0;
8536     if (avc_state->weighted_prediction_supported &&
8537         ((generic_state->frame_type == SLICE_TYPE_P && pic_param->pic_fields.bits.weighted_pred_flag) ||
8538          (generic_state->frame_type == SLICE_TYPE_B && pic_param->pic_fields.bits.weighted_bipred_idc == INTEL_AVC_WP_MODE_EXPLICIT))) {
8539         if (slice_param->luma_weight_l0_flag & 1) {
8540             gen9_avc_kernel_wp(ctx, encode_state, encoder_context, 0);
8541
8542         } else if (!(slice_param->chroma_weight_l0_flag & 1)) {
8543             pic_param->pic_fields.bits.weighted_pred_flag = 0;// it should be handled in app
8544         }
8545
8546         if (generic_state->frame_type == SLICE_TYPE_B && pic_param->pic_fields.bits.weighted_bipred_idc == INTEL_AVC_WP_MODE_EXPLICIT) {
8547             if (slice_param->luma_weight_l1_flag & 1) {
8548                 gen9_avc_kernel_wp(ctx, encode_state, encoder_context, 1);
8549             } else if (!((slice_param->luma_weight_l0_flag & 1) ||
8550                          (slice_param->chroma_weight_l0_flag & 1) ||
8551                          (slice_param->chroma_weight_l1_flag & 1))) {
8552                 pic_param->pic_fields.bits.weighted_bipred_idc = INTEL_AVC_WP_MODE_DEFAULT;// it should be handled in app
8553             }
8554         }
8555     }
8556
8557     /*mbenc kernel*/
8558     gen9_avc_kernel_mbenc(ctx, encode_state, encoder_context, false);
8559
8560     /*ignore the reset vertical line kernel*/
8561
8562     return VA_STATUS_SUCCESS;
8563 }
8564
8565 static VAStatus
8566 gen9_avc_vme_pipeline(VADriverContextP ctx,
8567                       VAProfile profile,
8568                       struct encode_state *encode_state,
8569                       struct intel_encoder_context *encoder_context)
8570 {
8571     VAStatus va_status;
8572
8573     gen9_avc_update_parameters(ctx, profile, encode_state, encoder_context);
8574
8575     va_status = gen9_avc_encode_check_parameter(ctx, encode_state, encoder_context);
8576     if (va_status != VA_STATUS_SUCCESS)
8577         return va_status;
8578
8579     va_status = gen9_avc_allocate_resources(ctx, encode_state, encoder_context);
8580     if (va_status != VA_STATUS_SUCCESS)
8581         return va_status;
8582
8583     va_status = gen9_avc_vme_gpe_kernel_prepare(ctx, encode_state, encoder_context);
8584     if (va_status != VA_STATUS_SUCCESS)
8585         return va_status;
8586
8587     va_status = gen9_avc_vme_gpe_kernel_init(ctx, encode_state, encoder_context);
8588     if (va_status != VA_STATUS_SUCCESS)
8589         return va_status;
8590
8591     va_status = gen9_avc_vme_gpe_kernel_run(ctx, encode_state, encoder_context);
8592     if (va_status != VA_STATUS_SUCCESS)
8593         return va_status;
8594
8595     gen9_avc_vme_gpe_kernel_final(ctx, encode_state, encoder_context);
8596
8597     return VA_STATUS_SUCCESS;
8598 }
8599
8600 /* Update PreEnc specific parameters */
8601 static VAStatus
8602 gen9_avc_preenc_update_parameters(VADriverContextP ctx,
8603                                   VAProfile profile,
8604                                   struct encode_state *encode_state,
8605                                   struct intel_encoder_context *encoder_context)
8606 {
8607     struct i965_driver_data *i965 = i965_driver_data(ctx);
8608     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8609     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8610     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
8611     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8612     VAStatsStatisticsParameterH264 *stat_param_h264 = NULL;
8613     VAStatsStatisticsParameter *stat_param = NULL;
8614     struct object_buffer *obj_buffer = NULL;
8615     struct object_buffer *obj_buffer_mv = NULL, *obj_buffer_stat = NULL;
8616     struct buffer_store *buffer_store = NULL;
8617     unsigned int size = 0, i = 0;
8618     unsigned int frame_mb_nums = 0;
8619
8620     if (!encoder_context->preenc_enabled ||
8621         !encode_state->stat_param_ext ||
8622         !encode_state->stat_param_ext->buffer)
8623         return VA_STATUS_ERROR_OPERATION_FAILED;
8624
8625     stat_param_h264 = avc_state->stat_param =
8626                           (VAStatsStatisticsParameterH264 *)encode_state->stat_param_ext->buffer;
8627     stat_param = &stat_param_h264->stats_params;
8628
8629     /* Assume the frame type based on number of past/future ref frames */
8630     if (!stat_param->num_past_references && !stat_param->num_future_references)
8631         generic_state->frame_type = SLICE_TYPE_I;
8632     else if (stat_param->num_future_references > 0)
8633         generic_state->frame_type = SLICE_TYPE_B;
8634     else
8635         generic_state->frame_type = SLICE_TYPE_P;
8636
8637     generic_state->preset = INTEL_PRESET_RT_SPEED;
8638     generic_state->kernel_mode = gen9_avc_kernel_mode[generic_state->preset];
8639
8640     /* frame width and height */
8641     generic_state->frame_width_in_pixel = encoder_context->frame_width_in_pixel;
8642     generic_state->frame_height_in_pixel = encoder_context->frame_height_in_pixel;
8643     generic_state->frame_width_in_mbs = ALIGN(generic_state->frame_width_in_pixel, 16) / 16;
8644     generic_state->frame_height_in_mbs = ALIGN(generic_state->frame_height_in_pixel, 16) / 16;
8645
8646     /* 4x downscaled width and height */
8647     generic_state->frame_width_4x = ALIGN(generic_state->frame_width_in_pixel / 4, 16);
8648     generic_state->frame_height_4x = ALIGN(generic_state->frame_height_in_pixel / 4, 16);
8649     generic_state->downscaled_width_4x_in_mb  = generic_state->frame_width_4x / 16 ;
8650     generic_state->downscaled_height_4x_in_mb = generic_state->frame_height_4x / 16;
8651
8652     /* reset hme types for preenc */
8653     if (generic_state->frame_type != SLICE_TYPE_I)
8654         generic_state->hme_enabled = 1;
8655
8656     /* ensure frame width is not too small */
8657     if (generic_state->frame_width_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8658         generic_state->frame_width_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8659         generic_state->downscaled_width_4x_in_mb =
8660             WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8661     }
8662
8663     /* ensure frame height is not too small*/
8664     if (generic_state->frame_height_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8665         generic_state->frame_height_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8666         generic_state->downscaled_height_4x_in_mb =
8667             WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8668     }
8669
8670     /********** Ensure buffer object parameters ********/
8671     frame_mb_nums = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs;
8672
8673     /* mv predictor buffer */
8674     if (stat_param_h264->mv_predictor_ctrl) {
8675         if (stat_param->mv_predictor == VA_INVALID_ID)
8676             goto error;
8677         size = frame_mb_nums * FEI_AVC_MV_PREDICTOR_BUFFER_SIZE;
8678         obj_buffer = BUFFER(stat_param->mv_predictor);
8679         if (!obj_buffer)
8680             goto error;
8681         buffer_store = obj_buffer->buffer_store;
8682         if (buffer_store->bo->size < size)
8683             goto error;
8684         if (avc_ctx->preproc_mv_predictor_buffer.bo != NULL)
8685             i965_free_gpe_resource(&avc_ctx->preproc_mv_predictor_buffer);
8686         i965_dri_object_to_buffer_gpe_resource(
8687             &avc_ctx->preproc_mv_predictor_buffer,
8688             buffer_store->bo);
8689     }
8690
8691     /* MB qp buffer */
8692     if (stat_param_h264->mb_qp) {
8693         if (stat_param->qp == VA_INVALID_ID)
8694             goto error;
8695         size = frame_mb_nums * FEI_AVC_QP_BUFFER_SIZE;
8696         obj_buffer = BUFFER(stat_param->qp);
8697         buffer_store = obj_buffer->buffer_store;
8698         if (buffer_store->bo->size < size)
8699             goto error;
8700         if (avc_ctx->preproc_mb_qp_buffer.bo != NULL)
8701             i965_free_gpe_resource(&avc_ctx->preproc_mb_qp_buffer);
8702         i965_dri_object_to_buffer_gpe_resource(
8703             &avc_ctx->preproc_mb_qp_buffer,
8704             buffer_store->bo);
8705     }
8706
8707     /* locate mv and stat buffer */
8708     if (!stat_param_h264->disable_mv_output ||
8709         !stat_param_h264->disable_statistics_output) {
8710
8711         if (!stat_param->outputs)
8712             goto error;
8713
8714         for (i = 0; i < 2 ; i++) {
8715             if (stat_param->outputs[i] != VA_INVALID_ID) {
8716                 obj_buffer = BUFFER(stat_param->outputs[i]);
8717                 switch (obj_buffer->type) {
8718                 case VAStatsMVBufferType:
8719                     obj_buffer_mv = obj_buffer;
8720                     break;
8721                 case VAStatsStatisticsBufferType:
8722                     obj_buffer_stat = obj_buffer;
8723                     break;
8724                 default:
8725                     assert(0);
8726                 }
8727             }
8728             if (!(!stat_param_h264->disable_mv_output &&
8729                   !stat_param_h264->disable_statistics_output))
8730                 break;
8731         }
8732     }
8733     /* mv data output buffer */
8734     if (!stat_param_h264->disable_mv_output && obj_buffer_mv) {
8735         size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
8736         buffer_store = obj_buffer_mv->buffer_store;
8737         if (buffer_store->bo->size < size)
8738             goto error;
8739         if (avc_ctx->preproc_mv_data_out_buffer.bo != NULL)
8740             i965_free_gpe_resource(&avc_ctx->preproc_mv_data_out_buffer);
8741         i965_dri_object_to_buffer_gpe_resource(
8742             &avc_ctx->preproc_mv_data_out_buffer,
8743             buffer_store->bo);
8744     }
8745     /* statistics output buffer */
8746     if (!stat_param_h264->disable_statistics_output && obj_buffer_stat) {
8747         size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
8748         buffer_store = obj_buffer_stat->buffer_store;
8749         if (buffer_store->bo->size < size)
8750             goto error;
8751         if (avc_ctx->preproc_stat_data_out_buffer.bo != NULL)
8752             i965_free_gpe_resource(&avc_ctx->preproc_stat_data_out_buffer);
8753         i965_dri_object_to_buffer_gpe_resource(
8754             &avc_ctx->preproc_stat_data_out_buffer,
8755             buffer_store->bo);
8756     }
8757
8758     /* past ref stat out buffer */
8759     if (stat_param->num_past_references && stat_param->past_ref_stat_buf &&
8760         stat_param->past_ref_stat_buf[0] != VA_INVALID_ID) {
8761         size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
8762         obj_buffer = BUFFER(stat_param->past_ref_stat_buf[0]);
8763         buffer_store = obj_buffer->buffer_store;
8764         if (buffer_store->bo->size < size)
8765             goto error;
8766         if (avc_ctx->preenc_past_ref_stat_data_out_buffer.bo != NULL)
8767             i965_free_gpe_resource(&avc_ctx->preenc_past_ref_stat_data_out_buffer);
8768         i965_dri_object_to_buffer_gpe_resource(
8769             &avc_ctx->preenc_past_ref_stat_data_out_buffer,
8770             buffer_store->bo);
8771     }
8772     /* future ref stat out buffer */
8773     if (stat_param->num_past_references && stat_param->future_ref_stat_buf &&
8774         stat_param->future_ref_stat_buf[0] != VA_INVALID_ID) {
8775         size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
8776         obj_buffer = BUFFER(stat_param->future_ref_stat_buf[0]);
8777         buffer_store = obj_buffer->buffer_store;
8778         if (buffer_store->bo->size < size)
8779             goto error;
8780         if (avc_ctx->preenc_future_ref_stat_data_out_buffer.bo != NULL)
8781             i965_free_gpe_resource(&avc_ctx->preenc_future_ref_stat_data_out_buffer);
8782         i965_dri_object_to_buffer_gpe_resource(
8783             &avc_ctx->preenc_future_ref_stat_data_out_buffer,
8784             buffer_store->bo);
8785     }
8786     return VA_STATUS_SUCCESS;
8787
8788 error:
8789     return VA_STATUS_ERROR_INVALID_BUFFER;
8790 }
8791
8792 /* allocate internal resouces required for PreEenc */
8793 static VAStatus
8794 gen9_avc_preenc_allocate_internal_resources(VADriverContextP ctx,
8795                                             struct encode_state *encode_state,
8796                                             struct intel_encoder_context *encoder_context)
8797 {
8798     struct i965_driver_data *i965 = i965_driver_data(ctx);
8799     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8800     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
8801     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8802     unsigned int width  = 0;
8803     unsigned int height  = 0;
8804     unsigned int size  = 0;
8805     int allocate_flag = 1;
8806
8807     /* 4x MEMV data buffer */
8808     width = ALIGN(generic_state->downscaled_width_4x_in_mb * 32, 64);
8809     height = generic_state->downscaled_height_4x_in_mb * 4 * 2 * 10;
8810     i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
8811     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
8812                                                   &avc_ctx->s4x_memv_data_buffer,
8813                                                   width, height,
8814                                                   width,
8815                                                   "4x MEMV data buffer");
8816     if (!allocate_flag)
8817         goto failed_allocation;
8818     i965_zero_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
8819
8820     /*  Output DISTORTION surface from 4x ME */
8821     width = generic_state->downscaled_width_4x_in_mb * 8;
8822     height = generic_state->downscaled_height_4x_in_mb * 4 * 10;
8823     i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
8824     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
8825                                                   &avc_ctx->s4x_memv_distortion_buffer,
8826                                                   width, height,
8827                                                   ALIGN(width, 64),
8828                                                   "4x MEMV distortion buffer");
8829     if (!allocate_flag)
8830         goto failed_allocation;
8831     i965_zero_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
8832
8833     /* output BRC DISTORTION surface from 4x ME  */
8834     width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
8835     height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
8836     i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
8837     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
8838                                                   &avc_ctx->res_brc_dist_data_surface,
8839                                                   width, height,
8840                                                   width,
8841                                                   "brc dist data buffer");
8842     if (!allocate_flag)
8843         goto failed_allocation;
8844     i965_zero_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
8845
8846
8847     /* FTQ Lut buffer,whichs is the mbbrc_const_data_buffer */
8848     i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
8849     size = 16 * AVC_QP_MAX * 4;
8850     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
8851                                                &avc_ctx->res_mbbrc_const_data_buffer,
8852                                                ALIGN(size, 0x1000),
8853                                                "mbbrc const data buffer");
8854     if (!allocate_flag)
8855         goto failed_allocation;
8856     i965_zero_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
8857
8858     /* 4x downscaled surface  */
8859     if (!avc_ctx->preenc_scaled_4x_surface_obj) {
8860         i965_CreateSurfaces(ctx,
8861                             generic_state->frame_width_4x,
8862                             generic_state->frame_height_4x,
8863                             VA_RT_FORMAT_YUV420,
8864                             1,
8865                             &avc_ctx->preenc_scaled_4x_surface_id);
8866         avc_ctx->preenc_scaled_4x_surface_obj = SURFACE(avc_ctx->preenc_scaled_4x_surface_id);
8867         if (!avc_ctx->preenc_scaled_4x_surface_obj)
8868             goto failed_allocation;
8869         i965_check_alloc_surface_bo(ctx, avc_ctx->preenc_scaled_4x_surface_obj, 1,
8870                                     VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
8871     }
8872
8873     /* 4x downscaled past ref surface  */
8874     if (!avc_ctx->preenc_past_ref_scaled_4x_surface_obj) {
8875         i965_CreateSurfaces(ctx,
8876                             generic_state->frame_width_4x,
8877                             generic_state->frame_height_4x,
8878                             VA_RT_FORMAT_YUV420,
8879                             1,
8880                             &avc_ctx->preenc_past_ref_scaled_4x_surface_id);
8881         avc_ctx->preenc_past_ref_scaled_4x_surface_obj =
8882             SURFACE(avc_ctx->preenc_past_ref_scaled_4x_surface_id);
8883         if (!avc_ctx->preenc_past_ref_scaled_4x_surface_obj)
8884             goto failed_allocation;
8885         i965_check_alloc_surface_bo(ctx, avc_ctx->preenc_past_ref_scaled_4x_surface_obj, 1,
8886                                     VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
8887     }
8888
8889     /* 4x downscaled future ref surface  */
8890     if (!avc_ctx->preenc_future_ref_scaled_4x_surface_obj) {
8891         i965_CreateSurfaces(ctx,
8892                             generic_state->frame_width_4x,
8893                             generic_state->frame_height_4x,
8894                             VA_RT_FORMAT_YUV420,
8895                             1,
8896                             &avc_ctx->preenc_future_ref_scaled_4x_surface_id);
8897         avc_ctx->preenc_future_ref_scaled_4x_surface_obj =
8898             SURFACE(avc_ctx->preenc_future_ref_scaled_4x_surface_id);
8899         if (!avc_ctx->preenc_future_ref_scaled_4x_surface_obj)
8900             goto failed_allocation;
8901         i965_check_alloc_surface_bo(ctx, avc_ctx->preenc_future_ref_scaled_4x_surface_obj, 1,
8902                                     VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
8903     }
8904
8905     /* FeiPreEncFixme: dummy coded buffer. This is a tweak which helps to use
8906      * the generic AVC Encdoe codepath which allocate status buffer as extension
8907      * to CodedBuffer */
8908     if (!avc_ctx->status_buffer.bo) {
8909         size =
8910             generic_state->frame_width_in_pixel * generic_state->frame_height_in_pixel * 12;
8911         size += I965_CODEDBUFFER_HEADER_SIZE;
8912         size += 0x1000;
8913         avc_ctx->status_buffer.bo = dri_bo_alloc(i965->intel.bufmgr,
8914                                                  "Dummy Coded Buffer",
8915                                                  size, 64);
8916     }
8917
8918     return VA_STATUS_SUCCESS;
8919
8920 failed_allocation:
8921     return VA_STATUS_ERROR_ALLOCATION_FAILED;
8922 }
8923
8924
8925 static VAStatus
8926 gen9_avc_preenc_gpe_kernel_run(VADriverContextP ctx,
8927                                struct encode_state *encode_state,
8928                                struct intel_encoder_context *encoder_context)
8929 {
8930     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8931     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8932     struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8933     VAStatsStatisticsParameterH264 *stat_param_h264 = avc_state->stat_param;;
8934     VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
8935
8936     /* FeiPreEncFixme: Optimize the scaling. Keep a cache of already scaled surfaces
8937      * to avoid repeated scaling of same surfaces */
8938
8939     /* down scaling */
8940     gen9_avc_preenc_kernel_scaling(ctx, encode_state, encoder_context,
8941                                    INTEL_ENC_HME_4x, SCALE_CUR_PIC);
8942     if (stat_param->num_past_references > 0) {
8943         gen9_avc_preenc_kernel_scaling(ctx, encode_state, encoder_context,
8944                                        INTEL_ENC_HME_4x, SCALE_PAST_REF_PIC);
8945     }
8946     if (stat_param->num_future_references > 0) {
8947         gen9_avc_preenc_kernel_scaling(ctx, encode_state, encoder_context,
8948                                        INTEL_ENC_HME_4x, SCALE_FUTURE_REF_PIC);
8949     }
8950
8951     /* me kernel */
8952     if (generic_state->hme_enabled) {
8953         gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
8954     }
8955
8956     /* preproc kernel */
8957     if (!stat_param_h264->disable_mv_output || !stat_param_h264->disable_statistics_output) {
8958         gen9_avc_preenc_kernel_preproc(ctx, encode_state, encoder_context);
8959     }
8960
8961     return VA_STATUS_SUCCESS;
8962 }
8963
8964 static VAStatus
8965 gen9_avc_preenc_pipeline(VADriverContextP ctx,
8966                          VAProfile profile,
8967                          struct encode_state *encode_state,
8968                          struct intel_encoder_context *encoder_context)
8969 {
8970     VAStatus va_status;
8971
8972     va_status = gen9_avc_preenc_update_parameters(ctx, profile, encode_state, encoder_context);
8973     if (va_status != VA_STATUS_SUCCESS)
8974         return va_status;
8975
8976     va_status = gen9_avc_preenc_allocate_internal_resources(ctx, encode_state, encoder_context);
8977     if (va_status != VA_STATUS_SUCCESS)
8978         return va_status;
8979
8980     va_status = gen9_avc_preenc_gpe_kernel_run(ctx, encode_state, encoder_context);
8981     if (va_status != VA_STATUS_SUCCESS)
8982         return va_status;
8983
8984     return VA_STATUS_SUCCESS;
8985 }
8986
8987 static void
8988 gen9_avc_vme_context_destroy(void * context)
8989 {
8990     struct encoder_vme_mfc_context *vme_context = (struct encoder_vme_mfc_context *)context;
8991     struct generic_encoder_context *generic_ctx;
8992     struct i965_avc_encoder_context *avc_ctx;
8993     struct generic_enc_codec_state *generic_state;
8994     struct avc_enc_state *avc_state;
8995
8996     if (!vme_context)
8997         return;
8998
8999     generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
9000     avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
9001     generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
9002     avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
9003
9004     gen9_avc_kernel_destroy(vme_context);
9005
9006     free(generic_ctx);
9007     free(avc_ctx);
9008     free(generic_state);
9009     free(avc_state);
9010     free(vme_context);
9011     return;
9012
9013 }
9014
9015 static void
9016 gen8_avc_kernel_init(VADriverContextP ctx,
9017                      struct intel_encoder_context *encoder_context)
9018 {
9019     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9020     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
9021     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
9022     int fei_enabled = encoder_context->fei_enabled;
9023
9024     generic_ctx->get_kernel_header_and_size = fei_enabled ?
9025                                               intel_avc_fei_get_kernel_header_and_size :
9026                                               intel_avc_get_kernel_header_and_size ;
9027     gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling, false);
9028     gen9_avc_kernel_init_brc(ctx, generic_ctx, &avc_ctx->context_brc);
9029     gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me, false);
9030     gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc, fei_enabled);
9031     gen9_avc_kernel_init_sfd(ctx, generic_ctx, &avc_ctx->context_sfd);
9032
9033     //function pointer
9034     generic_ctx->pfn_set_curbe_scaling2x = gen9_avc_set_curbe_scaling2x;
9035     generic_ctx->pfn_set_curbe_scaling4x = gen8_avc_set_curbe_scaling4x;
9036     generic_ctx->pfn_set_curbe_me = gen8_avc_set_curbe_me;
9037     generic_ctx->pfn_set_curbe_mbenc = gen8_avc_set_curbe_mbenc;
9038     generic_ctx->pfn_set_curbe_brc_init_reset = gen9_avc_set_curbe_brc_init_reset;
9039     generic_ctx->pfn_set_curbe_brc_frame_update = gen8_avc_set_curbe_brc_frame_update;
9040     generic_ctx->pfn_set_curbe_sfd = gen9_avc_set_curbe_sfd;
9041
9042     generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
9043     generic_ctx->pfn_send_me_surface = gen9_avc_send_surface_me;
9044     generic_ctx->pfn_send_mbenc_surface = gen9_avc_send_surface_mbenc;
9045     generic_ctx->pfn_send_brc_init_reset_surface = gen9_avc_send_surface_brc_init_reset;
9046     generic_ctx->pfn_send_brc_frame_update_surface = gen9_avc_send_surface_brc_frame_update;
9047     generic_ctx->pfn_send_sfd_surface = gen9_avc_send_surface_sfd;
9048 }
9049 static void
9050 gen9_avc_kernel_init(VADriverContextP ctx,
9051                      struct intel_encoder_context *encoder_context)
9052 {
9053     struct i965_driver_data *i965 = i965_driver_data(ctx);
9054     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9055     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
9056     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
9057     int fei_enabled = encoder_context->fei_enabled;
9058     int preenc_enabled = encoder_context->preenc_enabled;
9059
9060     generic_ctx->get_kernel_header_and_size = (fei_enabled || preenc_enabled) ?
9061                                               intel_avc_fei_get_kernel_header_and_size :
9062                                               intel_avc_get_kernel_header_and_size ;
9063
9064     if (!fei_enabled && !preenc_enabled) {
9065         /* generic AVC Encoder */
9066         gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling, false);
9067         gen9_avc_kernel_init_brc(ctx, generic_ctx, &avc_ctx->context_brc);
9068         gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me, false);
9069         gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc,
9070                                    encoder_context->fei_enabled);
9071         gen9_avc_kernel_init_wp(ctx, generic_ctx, &avc_ctx->context_wp);
9072         gen9_avc_kernel_init_sfd(ctx, generic_ctx, &avc_ctx->context_sfd);
9073
9074         //function pointer
9075         generic_ctx->pfn_set_curbe_scaling2x = gen9_avc_set_curbe_scaling2x;
9076         generic_ctx->pfn_set_curbe_scaling4x = gen9_avc_set_curbe_scaling4x;
9077         generic_ctx->pfn_set_curbe_me = gen9_avc_set_curbe_me;
9078         generic_ctx->pfn_set_curbe_mbenc = gen9_avc_set_curbe_mbenc;
9079         generic_ctx->pfn_set_curbe_brc_init_reset = gen9_avc_set_curbe_brc_init_reset;
9080         generic_ctx->pfn_set_curbe_brc_frame_update = gen9_avc_set_curbe_brc_frame_update;
9081         generic_ctx->pfn_set_curbe_brc_mb_update = gen9_avc_set_curbe_brc_mb_update;
9082         generic_ctx->pfn_set_curbe_sfd = gen9_avc_set_curbe_sfd;
9083         generic_ctx->pfn_set_curbe_wp = gen9_avc_set_curbe_wp;
9084
9085         generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
9086         generic_ctx->pfn_send_me_surface = gen9_avc_send_surface_me;
9087         generic_ctx->pfn_send_mbenc_surface = gen9_avc_send_surface_mbenc;
9088         generic_ctx->pfn_send_brc_init_reset_surface = gen9_avc_send_surface_brc_init_reset;
9089         generic_ctx->pfn_send_brc_frame_update_surface = gen9_avc_send_surface_brc_frame_update;
9090         generic_ctx->pfn_send_brc_mb_update_surface = gen9_avc_send_surface_brc_mb_update;
9091         generic_ctx->pfn_send_sfd_surface = gen9_avc_send_surface_sfd;
9092         generic_ctx->pfn_send_wp_surface = gen9_avc_send_surface_wp;
9093
9094         if (IS_SKL(i965->intel.device_info) ||
9095             IS_BXT(i965->intel.device_info))
9096             generic_ctx->pfn_set_curbe_scaling4x = gen9_avc_set_curbe_scaling4x;
9097         else if (IS_KBL(i965->intel.device_info) ||
9098                  IS_GEN10(i965->intel.device_info) ||
9099                  IS_GLK(i965->intel.device_info))
9100             generic_ctx->pfn_set_curbe_scaling4x = gen95_avc_set_curbe_scaling4x;
9101
9102     } else if (fei_enabled) {
9103         /* FEI AVC Encoding */
9104         gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc,
9105                                    encoder_context->fei_enabled);
9106         generic_ctx->pfn_set_curbe_mbenc = gen9_avc_fei_set_curbe_mbenc;
9107         generic_ctx->pfn_send_mbenc_surface = gen9_avc_fei_send_surface_mbenc;
9108
9109     } else {
9110         /* PreEnc for AVC */
9111         gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling,
9112                                      encoder_context->preenc_enabled);
9113         gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me,
9114                                 encoder_context->preenc_enabled);
9115         gen9_avc_kernel_init_preproc(ctx, generic_ctx, &avc_ctx->context_preproc);
9116
9117         /* preenc 4x scaling uses the gen95 kernel */
9118         generic_ctx->pfn_set_curbe_scaling4x = gen95_avc_set_curbe_scaling4x;
9119         generic_ctx->pfn_set_curbe_me = gen9_avc_preenc_set_curbe_me;
9120         generic_ctx->pfn_set_curbe_preproc = gen9_avc_preenc_set_curbe_preproc;
9121
9122         generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
9123         generic_ctx->pfn_send_me_surface = gen9_avc_preenc_send_surface_me;
9124         generic_ctx->pfn_send_preproc_surface = gen9_avc_preenc_send_surface_preproc;
9125     }
9126 }
9127
9128 /*
9129 PAK pipeline related function
9130 */
9131 extern int
9132 intel_avc_enc_slice_type_fixup(int slice_type);
9133
9134 /* Allocate resources needed for PAK only mode (get invoked only in FEI encode) */
9135 static VAStatus
9136 gen9_avc_allocate_pak_resources(VADriverContextP ctx,
9137                                 struct encode_state *encode_state,
9138                                 struct intel_encoder_context *encoder_context)
9139 {
9140     struct i965_driver_data *i965 = i965_driver_data(ctx);
9141     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9142     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
9143     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
9144     unsigned int size  = 0;
9145     int allocate_flag = 1;
9146
9147     /*second level batch buffer for image state write when cqp etc*/
9148     i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
9149     size = INTEL_AVC_IMAGE_STATE_CMD_SIZE ;
9150     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
9151                                                &avc_ctx->res_image_state_batch_buffer_2nd_level,
9152                                                ALIGN(size, 0x1000),
9153                                                "second levle batch (image state write) buffer");
9154     if (!allocate_flag)
9155         goto failed_allocation;
9156
9157     if (!generic_state->brc_allocated) {
9158         i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
9159         size = 64;//44
9160         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
9161                                                    &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
9162                                                    ALIGN(size, 0x1000),
9163                                                    "brc pak statistic buffer");
9164         if (!allocate_flag)
9165             goto failed_allocation;
9166     }
9167
9168     return VA_STATUS_SUCCESS;
9169
9170 failed_allocation:
9171     return VA_STATUS_ERROR_ALLOCATION_FAILED;
9172 }
9173
9174 static void
9175 gen9_mfc_avc_pipe_mode_select(VADriverContextP ctx,
9176                               struct encode_state *encode_state,
9177                               struct intel_encoder_context *encoder_context)
9178 {
9179     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9180     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9181     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9182     struct intel_batchbuffer *batch = encoder_context->base.batch;
9183
9184     BEGIN_BCS_BATCH(batch, 5);
9185
9186     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
9187     OUT_BCS_BATCH(batch,
9188                   (0 << 29) |
9189                   (MFX_LONG_MODE << 17) |       /* Must be long format for encoder */
9190                   (MFD_MODE_VLD << 15) |
9191                   (0 << 13) |                   /* Non-VDEnc mode  is 0*/
9192                   ((generic_state->curr_pak_pass != (generic_state->num_pak_passes - 1)) << 10) |                  /* Stream-Out Enable */
9193                   ((!!avc_ctx->res_post_deblocking_output.bo) << 9)  |    /* Post Deblocking Output */
9194                   ((!!avc_ctx->res_pre_deblocking_output.bo) << 8)  |     /* Pre Deblocking Output */
9195                   (0 << 7)  |                   /* Scaled surface enable */
9196                   (0 << 6)  |                   /* Frame statistics stream out enable */
9197                   (0 << 5)  |                   /* not in stitch mode */
9198                   (1 << 4)  |                   /* encoding mode */
9199                   (MFX_FORMAT_AVC << 0));
9200     OUT_BCS_BATCH(batch,
9201                   (0 << 7)  | /* expand NOA bus flag */
9202                   (0 << 6)  | /* disable slice-level clock gating */
9203                   (0 << 5)  | /* disable clock gating for NOA */
9204                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
9205                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
9206                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
9207                   (0 << 1)  |
9208                   (0 << 0));
9209     OUT_BCS_BATCH(batch, 0);
9210     OUT_BCS_BATCH(batch, 0);
9211
9212     ADVANCE_BCS_BATCH(batch);
9213 }
9214
9215 static void
9216 gen9_mfc_avc_surface_state(VADriverContextP ctx,
9217                            struct intel_encoder_context *encoder_context,
9218                            struct i965_gpe_resource *gpe_resource,
9219                            int id)
9220 {
9221     struct intel_batchbuffer *batch = encoder_context->base.batch;
9222
9223     BEGIN_BCS_BATCH(batch, 6);
9224
9225     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
9226     OUT_BCS_BATCH(batch, id);
9227     OUT_BCS_BATCH(batch,
9228                   ((gpe_resource->height - 1) << 18) |
9229                   ((gpe_resource->width - 1) << 4));
9230     OUT_BCS_BATCH(batch,
9231                   (MFX_SURFACE_PLANAR_420_8 << 28) |    /* 420 planar YUV surface */
9232                   (1 << 27) |                           /* must be 1 for interleave U/V, hardware requirement */
9233                   ((gpe_resource->pitch - 1) << 3) |    /* pitch */
9234                   (0 << 2)  |                           /* must be 0 for interleave U/V */
9235                   (1 << 1)  |                           /* must be tiled */
9236                   (I965_TILEWALK_YMAJOR << 0));         /* tile walk, TILEWALK_YMAJOR */
9237     OUT_BCS_BATCH(batch,
9238                   (0 << 16) |                   /* must be 0 for interleave U/V */
9239                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
9240     OUT_BCS_BATCH(batch,
9241                   (0 << 16) |                   /* must be 0 for interleave U/V */
9242                   (gpe_resource->y_cb_offset));         /* y offset for U(cb) */
9243
9244     ADVANCE_BCS_BATCH(batch);
9245 }
9246
9247 static void
9248 gen9_mfc_avc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
9249 {
9250     struct i965_driver_data *i965 = i965_driver_data(ctx);
9251     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9252     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
9253     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9254     struct intel_batchbuffer *batch = encoder_context->base.batch;
9255     int i;
9256     unsigned int cmd_len = 65;
9257
9258     if (IS_GEN10(i965->intel.device_info))
9259         cmd_len = 68;
9260
9261     BEGIN_BCS_BATCH(batch, cmd_len);
9262
9263     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (cmd_len - 2));
9264
9265     /* the DW1-3 is for pre_deblocking */
9266     OUT_BUFFER_3DW(batch, avc_ctx->res_pre_deblocking_output.bo, 1, 0, i965->intel.mocs_state);
9267
9268     /* the DW4-6 is for the post_deblocking */
9269     OUT_BUFFER_3DW(batch, avc_ctx->res_post_deblocking_output.bo, 1, 0, i965->intel.mocs_state);
9270
9271     /* the DW7-9 is for the uncompressed_picture */
9272     OUT_BUFFER_3DW(batch, generic_ctx->res_uncompressed_input_surface.bo, 0, 0, i965->intel.mocs_state);
9273
9274     /* the DW10-12 is for PAK information (write) */
9275     OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, i965->intel.mocs_state);//?
9276
9277     /* the DW13-15 is for the intra_row_store_scratch */
9278     OUT_BUFFER_3DW(batch, avc_ctx->res_intra_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
9279
9280     /* the DW16-18 is for the deblocking filter */
9281     OUT_BUFFER_3DW(batch, avc_ctx->res_deblocking_filter_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
9282
9283     /* the DW 19-50 is for Reference pictures*/
9284     for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
9285         OUT_BUFFER_2DW(batch, avc_ctx->list_reference_res[i].bo, 0, 0);
9286     }
9287
9288     /* DW 51, reference picture attributes */
9289     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
9290
9291     /* The DW 52-54 is for PAK information (read) */
9292     OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, i965->intel.mocs_state);
9293
9294     /* the DW 55-57 is the ILDB buffer */
9295     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9296
9297     /* the DW 58-60 is the second ILDB buffer */
9298     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9299
9300     /* DW 61, memory compress enable & mode */
9301     OUT_BCS_BATCH(batch, 0);
9302
9303     /* the DW 62-64 is the buffer */
9304     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9305
9306     /*65-67 for CNL */
9307     if (IS_GEN10(i965->intel.device_info)) {
9308         OUT_BCS_BATCH(batch, 0);
9309         OUT_BCS_BATCH(batch, 0);
9310         OUT_BCS_BATCH(batch, 0);
9311     }
9312
9313     ADVANCE_BCS_BATCH(batch);
9314 }
9315
9316 static void
9317 gen9_mfc_avc_ind_obj_base_addr_state(VADriverContextP ctx,
9318                                      struct encode_state *encode_state,
9319                                      struct intel_encoder_context *encoder_context)
9320 {
9321     struct i965_driver_data *i965 = i965_driver_data(ctx);
9322     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9323     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
9324     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9325     struct intel_batchbuffer *batch = encoder_context->base.batch;
9326     struct object_surface *obj_surface;
9327     struct gen9_surface_avc *avc_priv_surface;
9328     unsigned int size = 0;
9329     unsigned int w_mb = generic_state->frame_width_in_mbs;
9330     unsigned int h_mb = generic_state->frame_height_in_mbs;
9331
9332     obj_surface = encode_state->reconstructed_object;
9333
9334     if (!obj_surface || !obj_surface->private_data)
9335         return;
9336     avc_priv_surface = obj_surface->private_data;
9337
9338     BEGIN_BCS_BATCH(batch, 26);
9339
9340     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
9341     /* The DW1-5 is for the MFX indirect bistream offset */
9342     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9343     OUT_BUFFER_2DW(batch, NULL, 0, 0);
9344
9345     /* the DW6-10 is for MFX Indirect MV Object Base Address */
9346     size = w_mb * h_mb * 32 * 4;
9347     OUT_BUFFER_3DW(batch,
9348                    avc_priv_surface->res_mv_data_surface.bo,
9349                    1,
9350                    0,
9351                    i965->intel.mocs_state);
9352     OUT_BUFFER_2DW(batch,
9353                    avc_priv_surface->res_mv_data_surface.bo,
9354                    1,
9355                    ALIGN(size, 0x1000));
9356
9357     /* The DW11-15 is for MFX IT-COFF. Not used on encoder */
9358     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9359     OUT_BUFFER_2DW(batch, NULL, 0, 0);
9360
9361     /* The DW16-20 is for MFX indirect DBLK. Not used on encoder */
9362     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9363     OUT_BUFFER_2DW(batch, NULL, 0, 0);
9364
9365     /* The DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder
9366      * Note: an offset is specified in MFX_AVC_SLICE_STATE
9367      */
9368     OUT_BUFFER_3DW(batch,
9369                    generic_ctx->compressed_bitstream.res.bo,
9370                    1,
9371                    0,
9372                    i965->intel.mocs_state);
9373     OUT_BUFFER_2DW(batch,
9374                    generic_ctx->compressed_bitstream.res.bo,
9375                    1,
9376                    generic_ctx->compressed_bitstream.end_offset);
9377
9378     ADVANCE_BCS_BATCH(batch);
9379 }
9380
9381 static void
9382 gen9_mfc_avc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
9383 {
9384     struct i965_driver_data *i965 = i965_driver_data(ctx);
9385     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9386     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9387     struct intel_batchbuffer *batch = encoder_context->base.batch;
9388
9389     BEGIN_BCS_BATCH(batch, 10);
9390
9391     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
9392
9393     /* The DW1-3 is for bsd/mpc row store scratch buffer */
9394     OUT_BUFFER_3DW(batch, avc_ctx->res_bsd_mpc_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
9395
9396     /* The DW4-6 is for MPR Row Store Scratch Buffer Base Address, ignore for encoder */
9397     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9398
9399     /* The DW7-9 is for Bitplane Read Buffer Base Address, ignore for encoder */
9400     OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9401
9402     ADVANCE_BCS_BATCH(batch);
9403 }
9404
9405 static void
9406 gen9_mfc_avc_directmode_state(VADriverContextP ctx,
9407                               struct intel_encoder_context *encoder_context)
9408 {
9409     struct i965_driver_data *i965 = i965_driver_data(ctx);
9410     struct intel_batchbuffer *batch = encoder_context->base.batch;
9411     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9412     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9413     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
9414
9415     int i;
9416
9417     BEGIN_BCS_BATCH(batch, 71);
9418
9419     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
9420
9421     /* Reference frames and Current frames */
9422     /* the DW1-32 is for the direct MV for reference */
9423     for (i = 0; i < NUM_MFC_AVC_DMV_BUFFERS - 2; i += 2) {
9424         if (avc_ctx->res_direct_mv_buffersr[i].bo != NULL) {
9425             OUT_BCS_RELOC64(batch, avc_ctx->res_direct_mv_buffersr[i].bo,
9426                             I915_GEM_DOMAIN_INSTRUCTION, 0,
9427                             0);
9428         } else {
9429             OUT_BCS_BATCH(batch, 0);
9430             OUT_BCS_BATCH(batch, 0);
9431         }
9432     }
9433
9434     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
9435
9436     /* the DW34-36 is the MV for the current frame */
9437     OUT_BCS_RELOC64(batch, avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2].bo,
9438                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
9439                     0);
9440
9441     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
9442
9443     /* POL list */
9444     for (i = 0; i < 32; i++) {
9445         OUT_BCS_BATCH(batch, avc_state->top_field_poc[i]);
9446     }
9447     OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2]);
9448     OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1]);
9449
9450     ADVANCE_BCS_BATCH(batch);
9451 }
9452
9453 static void
9454 gen9_mfc_qm_state(VADriverContextP ctx,
9455                   int qm_type,
9456                   const unsigned int *qm,
9457                   int qm_length,
9458                   struct intel_encoder_context *encoder_context)
9459 {
9460     struct intel_batchbuffer *batch = encoder_context->base.batch;
9461     unsigned int qm_buffer[16];
9462
9463     assert(qm_length <= 16);
9464     assert(sizeof(*qm) == 4);
9465     memset(qm_buffer, 0, 16 * 4);
9466     memcpy(qm_buffer, qm, qm_length * 4);
9467
9468     BEGIN_BCS_BATCH(batch, 18);
9469     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
9470     OUT_BCS_BATCH(batch, qm_type << 0);
9471     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
9472     ADVANCE_BCS_BATCH(batch);
9473 }
9474
9475 static void
9476 gen9_mfc_avc_qm_state(VADriverContextP ctx,
9477                       struct encode_state *encode_state,
9478                       struct intel_encoder_context *encoder_context)
9479 {
9480     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9481     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
9482     VAEncSequenceParameterBufferH264  *seq_param = avc_state->seq_param;
9483     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
9484
9485
9486     const unsigned int *qm_4x4_intra;
9487     const unsigned int *qm_4x4_inter;
9488     const unsigned int *qm_8x8_intra;
9489     const unsigned int *qm_8x8_inter;
9490
9491     if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
9492         && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
9493         qm_4x4_intra = qm_4x4_inter = qm_8x8_intra = qm_8x8_inter = qm_flat;
9494     } else {
9495         VAIQMatrixBufferH264 *qm;
9496         assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
9497         qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
9498         qm_4x4_intra = (unsigned int *)qm->ScalingList4x4[0];
9499         qm_4x4_inter = (unsigned int *)qm->ScalingList4x4[3];
9500         qm_8x8_intra = (unsigned int *)qm->ScalingList8x8[0];
9501         qm_8x8_inter = (unsigned int *)qm->ScalingList8x8[1];
9502     }
9503
9504     gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm_4x4_intra, 12, encoder_context);
9505     gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm_4x4_inter, 12, encoder_context);
9506     gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm_8x8_intra, 16, encoder_context);
9507     gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm_8x8_inter, 16, encoder_context);
9508 }
9509
9510 static void
9511 gen9_mfc_fqm_state(VADriverContextP ctx,
9512                    int fqm_type,
9513                    const unsigned int *fqm,
9514                    int fqm_length,
9515                    struct intel_encoder_context *encoder_context)
9516 {
9517     struct intel_batchbuffer *batch = encoder_context->base.batch;
9518     unsigned int fqm_buffer[32];
9519
9520     assert(fqm_length <= 32);
9521     assert(sizeof(*fqm) == 4);
9522     memset(fqm_buffer, 0, 32 * 4);
9523     memcpy(fqm_buffer, fqm, fqm_length * 4);
9524
9525     BEGIN_BCS_BATCH(batch, 34);
9526     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
9527     OUT_BCS_BATCH(batch, fqm_type << 0);
9528     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
9529     ADVANCE_BCS_BATCH(batch);
9530 }
9531
9532 static void
9533 gen9_mfc_fill_fqm(uint8_t *qm, uint16_t *fqm, int len)
9534 {
9535     int i, j;
9536     for (i = 0; i < len; i++)
9537         for (j = 0; j < len; j++) {
9538             assert(qm[j * len + i]);
9539             fqm[i * len + j] = (1 << 16) / qm[j * len + i];
9540         }
9541 }
9542
9543 static void
9544 gen9_mfc_avc_fqm_state(VADriverContextP ctx,
9545                        struct encode_state *encode_state,
9546                        struct intel_encoder_context *encoder_context)
9547 {
9548     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9549     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
9550     VAEncSequenceParameterBufferH264  *seq_param = avc_state->seq_param;
9551     VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
9552
9553     if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
9554         && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
9555         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm_flat, 24, encoder_context);
9556         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm_flat, 24, encoder_context);
9557         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm_flat, 32, encoder_context);
9558         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm_flat, 32, encoder_context);
9559     } else {
9560         int i;
9561         uint32_t fqm[32];
9562         VAIQMatrixBufferH264 *qm;
9563         assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
9564         qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
9565
9566         for (i = 0; i < 3; i++)
9567             gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * i, 4);
9568         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm, 24, encoder_context);
9569
9570         for (i = 3; i < 6; i++)
9571             gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * (i - 3), 4);
9572         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm, 24, encoder_context);
9573
9574         gen9_mfc_fill_fqm(qm->ScalingList8x8[0], (uint16_t *)fqm, 8);
9575         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm, 32, encoder_context);
9576
9577         gen9_mfc_fill_fqm(qm->ScalingList8x8[1], (uint16_t *)fqm, 8);
9578         gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm, 32, encoder_context);
9579     }
9580 }
9581
9582 static void
9583 gen9_mfc_avc_insert_object(VADriverContextP ctx,
9584                            struct intel_encoder_context *encoder_context,
9585                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
9586                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
9587                            int slice_header_indicator,
9588                            struct intel_batchbuffer *batch)
9589 {
9590     if (data_bits_in_last_dw == 0)
9591         data_bits_in_last_dw = 32;
9592
9593     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
9594
9595     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws));
9596     OUT_BCS_BATCH(batch,
9597                   (0 << 16) |   /* always start at offset 0 */
9598                   (slice_header_indicator << 14) |
9599                   (data_bits_in_last_dw << 8) |
9600                   (skip_emul_byte_count << 4) |
9601                   (!!emulation_flag << 3) |
9602                   ((!!is_last_header) << 2) |
9603                   ((!!is_end_of_slice) << 1) |
9604                   (0 << 0));    /* check this flag */
9605     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
9606
9607     ADVANCE_BCS_BATCH(batch);
9608 }
9609
9610 static void
9611 gen9_mfc_avc_insert_aud_packed_data(VADriverContextP ctx,
9612                                     struct encode_state *encode_state,
9613                                     struct intel_encoder_context *encoder_context,
9614                                     struct intel_batchbuffer *batch)
9615 {
9616     VAEncPackedHeaderParameterBuffer *param = NULL;
9617     unsigned int length_in_bits;
9618     unsigned int *header_data = NULL;
9619     unsigned char *nal_type = NULL;
9620     int count, i, start_index;
9621
9622     count = encode_state->slice_rawdata_count[0];
9623     start_index = (encode_state->slice_rawdata_index[0] & SLICE_PACKED_DATA_INDEX_MASK);
9624
9625     for (i = 0; i < count; i++) {
9626         unsigned int skip_emul_byte_cnt;
9627
9628         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
9629         nal_type = (unsigned char *)header_data;
9630
9631         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
9632         if (param->type != VAEncPackedHeaderRawData)
9633             continue;
9634
9635         length_in_bits = param->bit_length;
9636
9637         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9638
9639         if ((*(nal_type + skip_emul_byte_cnt - 1) & 0x1f) == AVC_NAL_DELIMITER) {
9640             gen9_mfc_avc_insert_object(ctx,
9641                                        encoder_context,
9642                                        header_data,
9643                                        ALIGN(length_in_bits, 32) >> 5,
9644                                        length_in_bits & 0x1f,
9645                                        skip_emul_byte_cnt,
9646                                        0,
9647                                        0,
9648                                        !param->has_emulation_bytes,
9649                                        0,
9650                                        batch);
9651             break;
9652         }
9653     }
9654 }
9655
9656 static void
9657 gen9_mfc_avc_insert_slice_packed_data(VADriverContextP ctx,
9658                                       struct encode_state *encode_state,
9659                                       struct intel_encoder_context *encoder_context,
9660                                       int slice_index,
9661                                       struct intel_batchbuffer *batch)
9662 {
9663     VAEncPackedHeaderParameterBuffer *param = NULL;
9664     unsigned int length_in_bits;
9665     unsigned int *header_data = NULL;
9666     int count, i, start_index;
9667     int slice_header_index;
9668     unsigned char *nal_type = NULL;
9669
9670     if (encode_state->slice_header_index[slice_index] == 0)
9671         slice_header_index = -1;
9672     else
9673         slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
9674
9675     count = encode_state->slice_rawdata_count[slice_index];
9676     start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
9677
9678     for (i = 0; i < count; i++) {
9679         unsigned int skip_emul_byte_cnt;
9680
9681         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
9682         nal_type = (unsigned char *)header_data;
9683
9684         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
9685
9686         length_in_bits = param->bit_length;
9687
9688         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9689
9690         /* skip the slice header packed data type as it is lastly inserted */
9691         if (param->type == VAEncPackedHeaderSlice || (*(nal_type + skip_emul_byte_cnt - 1) & 0x1f) == AVC_NAL_DELIMITER)
9692             continue;
9693
9694         /* as the slice header is still required, the last header flag is set to
9695          * zero.
9696          */
9697         gen9_mfc_avc_insert_object(ctx,
9698                                    encoder_context,
9699                                    header_data,
9700                                    ALIGN(length_in_bits, 32) >> 5,
9701                                    length_in_bits & 0x1f,
9702                                    skip_emul_byte_cnt,
9703                                    0,
9704                                    0,
9705                                    !param->has_emulation_bytes,
9706                                    0,
9707                                    batch);
9708     }
9709
9710     if (slice_header_index == -1) {
9711         VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
9712         VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
9713         VAEncSliceParameterBufferH264 *slice_params = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
9714         unsigned char *slice_header = NULL;
9715         int slice_header_length_in_bits = 0;
9716
9717         /* No slice header data is passed. And the driver needs to generate it */
9718         /* For the Normal H264 */
9719         slice_header_length_in_bits = build_avc_slice_header(seq_param,
9720                                                              pic_param,
9721                                                              slice_params,
9722                                                              &slice_header);
9723         gen9_mfc_avc_insert_object(ctx,
9724                                    encoder_context,
9725                                    (unsigned int *)slice_header,
9726                                    ALIGN(slice_header_length_in_bits, 32) >> 5,
9727                                    slice_header_length_in_bits & 0x1f,
9728                                    5,  /* first 5 bytes are start code + nal unit type */
9729                                    1, 0, 1,
9730                                    1,
9731                                    batch);
9732
9733         free(slice_header);
9734     } else {
9735         unsigned int skip_emul_byte_cnt;
9736
9737         header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
9738
9739         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[slice_header_index]->buffer);
9740         length_in_bits = param->bit_length;
9741
9742         /* as the slice header is the last header data for one slice,
9743          * the last header flag is set to one.
9744          */
9745         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9746
9747         gen9_mfc_avc_insert_object(ctx,
9748                                    encoder_context,
9749                                    header_data,
9750                                    ALIGN(length_in_bits, 32) >> 5,
9751                                    length_in_bits & 0x1f,
9752                                    skip_emul_byte_cnt,
9753                                    1,
9754                                    0,
9755                                    !param->has_emulation_bytes,
9756                                    1,
9757                                    batch);
9758     }
9759
9760     return;
9761 }
9762
9763 static void
9764 gen9_mfc_avc_inset_headers(VADriverContextP ctx,
9765                            struct encode_state *encode_state,
9766                            struct intel_encoder_context *encoder_context,
9767                            VAEncSliceParameterBufferH264 *slice_param,
9768                            int slice_index,
9769                            struct intel_batchbuffer *batch)
9770 {
9771     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9772     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9773     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
9774     unsigned int internal_rate_mode = generic_state->internal_rate_mode;
9775     unsigned int skip_emul_byte_cnt;
9776
9777     if (slice_index == 0) {
9778
9779         /* if AUD exist and insert it firstly */
9780         gen9_mfc_avc_insert_aud_packed_data(ctx, encode_state, encoder_context, batch);
9781
9782         if (encode_state->packed_header_data[idx]) {
9783             VAEncPackedHeaderParameterBuffer *param = NULL;
9784             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
9785             unsigned int length_in_bits;
9786
9787             assert(encode_state->packed_header_param[idx]);
9788             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
9789             length_in_bits = param->bit_length;
9790
9791             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9792             gen9_mfc_avc_insert_object(ctx,
9793                                        encoder_context,
9794                                        header_data,
9795                                        ALIGN(length_in_bits, 32) >> 5,
9796                                        length_in_bits & 0x1f,
9797                                        skip_emul_byte_cnt,
9798                                        0,
9799                                        0,
9800                                        !param->has_emulation_bytes,
9801                                        0,
9802                                        batch);
9803         }
9804
9805         idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
9806
9807         if (encode_state->packed_header_data[idx]) {
9808             VAEncPackedHeaderParameterBuffer *param = NULL;
9809             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
9810             unsigned int length_in_bits;
9811
9812             assert(encode_state->packed_header_param[idx]);
9813             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
9814             length_in_bits = param->bit_length;
9815
9816             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9817
9818             gen9_mfc_avc_insert_object(ctx,
9819                                        encoder_context,
9820                                        header_data,
9821                                        ALIGN(length_in_bits, 32) >> 5,
9822                                        length_in_bits & 0x1f,
9823                                        skip_emul_byte_cnt,
9824                                        0,
9825                                        0,
9826                                        !param->has_emulation_bytes,
9827                                        0,
9828                                        batch);
9829         }
9830
9831         idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
9832
9833         if (encode_state->packed_header_data[idx]) {
9834             VAEncPackedHeaderParameterBuffer *param = NULL;
9835             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
9836             unsigned int length_in_bits;
9837
9838             assert(encode_state->packed_header_param[idx]);
9839             param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
9840             length_in_bits = param->bit_length;
9841
9842             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9843             gen9_mfc_avc_insert_object(ctx,
9844                                        encoder_context,
9845                                        header_data,
9846                                        ALIGN(length_in_bits, 32) >> 5,
9847                                        length_in_bits & 0x1f,
9848                                        skip_emul_byte_cnt,
9849                                        0,
9850                                        0,
9851                                        !param->has_emulation_bytes,
9852                                        0,
9853                                        batch);
9854         } else if (internal_rate_mode == VA_RC_CBR) {
9855             /* insert others */
9856         }
9857     }
9858
9859     gen9_mfc_avc_insert_slice_packed_data(ctx,
9860                                           encode_state,
9861                                           encoder_context,
9862                                           slice_index,
9863                                           batch);
9864 }
9865
9866 static void
9867 gen9_mfc_avc_slice_state(VADriverContextP ctx,
9868                          struct encode_state *encode_state,
9869                          struct intel_encoder_context *encoder_context,
9870                          VAEncPictureParameterBufferH264 *pic_param,
9871                          VAEncSliceParameterBufferH264 *slice_param,
9872                          VAEncSliceParameterBufferH264 *next_slice_param,
9873                          struct intel_batchbuffer *batch)
9874 {
9875     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9876     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
9877     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9878     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
9879     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
9880     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
9881     unsigned char correct[6], grow, shrink;
9882     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
9883     int max_qp_n, max_qp_p;
9884     int i;
9885     int weighted_pred_idc = 0;
9886     int num_ref_l0 = 0, num_ref_l1 = 0;
9887     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
9888     int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
9889     unsigned int rc_panic_enable = 0;
9890     unsigned int rate_control_counter_enable = 0;
9891     unsigned int rounding_value = 0;
9892     unsigned int rounding_inter_enable = 0;
9893
9894     slice_hor_pos = slice_param->macroblock_address % generic_state->frame_width_in_mbs;
9895     slice_ver_pos = slice_param->macroblock_address / generic_state->frame_width_in_mbs;
9896
9897     if (next_slice_param) {
9898         next_slice_hor_pos = next_slice_param->macroblock_address % generic_state->frame_width_in_mbs;
9899         next_slice_ver_pos = next_slice_param->macroblock_address / generic_state->frame_width_in_mbs;
9900     } else {
9901         next_slice_hor_pos = 0;
9902         next_slice_ver_pos = generic_state->frame_height_in_mbs;
9903     }
9904
9905     if (slice_type == SLICE_TYPE_I) {
9906         luma_log2_weight_denom = 0;
9907         chroma_log2_weight_denom = 0;
9908     } else if (slice_type == SLICE_TYPE_P) {
9909         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
9910         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
9911         rounding_inter_enable = avc_state->rounding_inter_enable;
9912         rounding_value = avc_state->rounding_value;
9913
9914         if (slice_param->num_ref_idx_active_override_flag)
9915             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
9916     } else if (slice_type == SLICE_TYPE_B) {
9917         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
9918         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
9919         num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
9920         rounding_inter_enable = avc_state->rounding_inter_enable;
9921         rounding_value = avc_state->rounding_value;
9922
9923         if (slice_param->num_ref_idx_active_override_flag) {
9924             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
9925             num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
9926         }
9927
9928         if (weighted_pred_idc == 2) {
9929             /* 8.4.3 - Derivation process for prediction weights (8-279) */
9930             luma_log2_weight_denom = 5;
9931             chroma_log2_weight_denom = 5;
9932         }
9933     }
9934
9935     max_qp_n = 0;
9936     max_qp_p = 0;
9937     grow = 0;
9938     shrink = 0;
9939
9940     rate_control_counter_enable = (generic_state->brc_enabled && (generic_state->curr_pak_pass != 0));
9941     rc_panic_enable = (avc_state->rc_panic_enable &&
9942                        (!avc_state->min_max_qp_enable) &&
9943                        (encoder_context->rate_control_mode != VA_RC_CQP) &&
9944                        (generic_state->curr_pak_pass == (generic_state->num_pak_passes - 1)));
9945
9946     for (i = 0; i < 6; i++)
9947         correct[i] = 0;
9948
9949     BEGIN_BCS_BATCH(batch, 11);
9950
9951     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
9952     OUT_BCS_BATCH(batch, slice_type);
9953     OUT_BCS_BATCH(batch,
9954                   (num_ref_l1 << 24) |
9955                   (num_ref_l0 << 16) |
9956                   (chroma_log2_weight_denom << 8) |
9957                   (luma_log2_weight_denom << 0));
9958     OUT_BCS_BATCH(batch,
9959                   (weighted_pred_idc << 30) |
9960                   (((slice_type == SLICE_TYPE_B) ? slice_param->direct_spatial_mv_pred_flag : 0) << 29) |
9961                   (slice_param->disable_deblocking_filter_idc << 27) |
9962                   (slice_param->cabac_init_idc << 24) |
9963                   (slice_qp << 16) |
9964                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
9965                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
9966
9967     OUT_BCS_BATCH(batch,
9968                   slice_ver_pos << 24 |
9969                   slice_hor_pos << 16 |
9970                   slice_param->macroblock_address);
9971     OUT_BCS_BATCH(batch,
9972                   next_slice_ver_pos << 16 |
9973                   next_slice_hor_pos);
9974
9975     OUT_BCS_BATCH(batch,
9976                   (rate_control_counter_enable << 31) |
9977                   (1 << 30) |           /* ResetRateControlCounter */
9978                   (2 << 28) |           /* Loose Rate Control */
9979                   (0 << 24) |           /* RC Stable Tolerance */
9980                   (rc_panic_enable << 23) |           /* RC Panic Enable */
9981                   (1 << 22) |           /* CBP mode */
9982                   (0 << 21) |           /* MB Type Direct Conversion, 0: Enable, 1: Disable */
9983                   (0 << 20) |           /* MB Type Skip Conversion, 0: Enable, 1: Disable */
9984                   (!next_slice_param << 19) |                   /* Is Last Slice */
9985                   (0 << 18) |           /* BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable */
9986                   (1 << 17) |           /* HeaderPresentFlag */
9987                   (1 << 16) |           /* SliceData PresentFlag */
9988                   (0 << 15) |           /* TailPresentFlag  */
9989                   (1 << 13) |           /* RBSP NAL TYPE */
9990                   (1 << 12));           /* CabacZeroWordInsertionEnable */
9991
9992     OUT_BCS_BATCH(batch, generic_ctx->compressed_bitstream.start_offset);
9993
9994     OUT_BCS_BATCH(batch,
9995                   (max_qp_n << 24) |     /*Target QP - 24 is lowest QP*/
9996                   (max_qp_p << 16) |     /*Target QP + 20 is highest QP*/
9997                   (shrink << 8) |
9998                   (grow << 0));
9999     OUT_BCS_BATCH(batch,
10000                   (rounding_inter_enable << 31) |
10001                   (rounding_value << 28) |
10002                   (1 << 27) |
10003                   (5 << 24) |
10004                   (correct[5] << 20) |
10005                   (correct[4] << 16) |
10006                   (correct[3] << 12) |
10007                   (correct[2] << 8) |
10008                   (correct[1] << 4) |
10009                   (correct[0] << 0));
10010     OUT_BCS_BATCH(batch, 0);
10011
10012     ADVANCE_BCS_BATCH(batch);
10013 }
10014
10015 static uint8_t
10016 gen9_mfc_avc_get_ref_idx_state(VAPictureH264 *va_pic, unsigned int frame_store_id)
10017 {
10018     unsigned int is_long_term =
10019         !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
10020     unsigned int is_top_field =
10021         !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
10022     unsigned int is_bottom_field =
10023         !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
10024
10025     return ((is_long_term                         << 6) |
10026             (0 << 5) |
10027             (frame_store_id                       << 1) |
10028             ((is_top_field ^ 1) & is_bottom_field));
10029 }
10030
10031 static void
10032 gen9_mfc_avc_ref_idx_state(VADriverContextP ctx,
10033                            struct encode_state *encode_state,
10034                            struct intel_encoder_context *encoder_context,
10035                            VAEncSliceParameterBufferH264 *slice_param,
10036                            struct intel_batchbuffer *batch)
10037 {
10038     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10039     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10040     VAPictureH264 *ref_pic;
10041     int i, slice_type, ref_idx_shift;
10042     unsigned int fwd_ref_entry;
10043     unsigned int bwd_ref_entry;
10044
10045     /* max 4 ref frames are allowed for l0 and l1 */
10046     fwd_ref_entry = 0x80808080;
10047     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
10048
10049     if ((slice_type == SLICE_TYPE_P) ||
10050         (slice_type == SLICE_TYPE_B)) {
10051         for (i = 0; i < MIN(avc_state->num_refs[0], 4); i++) {
10052             ref_pic = &slice_param->RefPicList0[i];
10053             ref_idx_shift = i * 8;
10054
10055             fwd_ref_entry &= ~(0xFF << ref_idx_shift);
10056             fwd_ref_entry += (gen9_mfc_avc_get_ref_idx_state(ref_pic, avc_state->list_ref_idx[0][i]) << ref_idx_shift);
10057         }
10058     }
10059
10060     bwd_ref_entry = 0x80808080;
10061     if (slice_type == SLICE_TYPE_B) {
10062         for (i = 0; i < MIN(avc_state->num_refs[1], 4); i++) {
10063             ref_pic = &slice_param->RefPicList1[i];
10064             ref_idx_shift = i * 8;
10065
10066             bwd_ref_entry &= ~(0xFF << ref_idx_shift);
10067             bwd_ref_entry += (gen9_mfc_avc_get_ref_idx_state(ref_pic, avc_state->list_ref_idx[1][i]) << ref_idx_shift);
10068         }
10069     }
10070
10071     if ((slice_type == SLICE_TYPE_P) ||
10072         (slice_type == SLICE_TYPE_B)) {
10073         BEGIN_BCS_BATCH(batch, 10);
10074         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
10075         OUT_BCS_BATCH(batch, 0);                        // L0
10076         OUT_BCS_BATCH(batch, fwd_ref_entry);
10077
10078         for (i = 0; i < 7; i++) {
10079             OUT_BCS_BATCH(batch, 0x80808080);
10080         }
10081
10082         ADVANCE_BCS_BATCH(batch);
10083     }
10084
10085     if (slice_type == SLICE_TYPE_B) {
10086         BEGIN_BCS_BATCH(batch, 10);
10087         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
10088         OUT_BCS_BATCH(batch, 1);                  //Select L1
10089         OUT_BCS_BATCH(batch, bwd_ref_entry);      //max 4 reference allowed
10090         for (i = 0; i < 7; i++) {
10091             OUT_BCS_BATCH(batch, 0x80808080);
10092         }
10093         ADVANCE_BCS_BATCH(batch);
10094     }
10095 }
10096
10097 static void
10098 gen9_mfc_avc_weightoffset_state(VADriverContextP ctx,
10099                                 struct encode_state *encode_state,
10100                                 struct intel_encoder_context *encoder_context,
10101                                 VAEncPictureParameterBufferH264 *pic_param,
10102                                 VAEncSliceParameterBufferH264 *slice_param,
10103                                 struct intel_batchbuffer *batch)
10104 {
10105     int i, slice_type;
10106     short weightoffsets[32 * 6];
10107
10108     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
10109
10110     if (slice_type == SLICE_TYPE_P &&
10111         pic_param->pic_fields.bits.weighted_pred_flag == 1) {
10112         memset(weightoffsets, 0, 32 * 6 * sizeof(short));
10113         for (i = 0; i < 32; i++) {
10114             weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
10115             weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
10116             weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
10117             weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
10118             weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
10119             weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
10120         }
10121
10122         BEGIN_BCS_BATCH(batch, 98);
10123         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
10124         OUT_BCS_BATCH(batch, 0);
10125         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
10126
10127         ADVANCE_BCS_BATCH(batch);
10128     }
10129
10130     if (slice_type == SLICE_TYPE_B &&
10131         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
10132         memset(weightoffsets, 0, 32 * 6 * sizeof(short));
10133         for (i = 0; i < 32; i++) {
10134             weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
10135             weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
10136             weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
10137             weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
10138             weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
10139             weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
10140         }
10141
10142         BEGIN_BCS_BATCH(batch, 98);
10143         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
10144         OUT_BCS_BATCH(batch, 0);
10145         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
10146         ADVANCE_BCS_BATCH(batch);
10147
10148         memset(weightoffsets, 0, 32 * 6 * sizeof(short));
10149         for (i = 0; i < 32; i++) {
10150             weightoffsets[i * 6 + 0] = slice_param->luma_weight_l1[i];
10151             weightoffsets[i * 6 + 1] = slice_param->luma_offset_l1[i];
10152             weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l1[i][0];
10153             weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l1[i][0];
10154             weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l1[i][1];
10155             weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l1[i][1];
10156         }
10157
10158         BEGIN_BCS_BATCH(batch, 98);
10159         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
10160         OUT_BCS_BATCH(batch, 1);
10161         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
10162         ADVANCE_BCS_BATCH(batch);
10163     }
10164 }
10165
10166 static void
10167 gen9_mfc_avc_single_slice(VADriverContextP ctx,
10168                           struct encode_state *encode_state,
10169                           struct intel_encoder_context *encoder_context,
10170                           VAEncSliceParameterBufferH264 *slice_param,
10171                           VAEncSliceParameterBufferH264 *next_slice_param,
10172                           int slice_index)
10173 {
10174     struct i965_driver_data *i965 = i965_driver_data(ctx);
10175     struct i965_gpe_table *gpe = &i965->gpe_table;
10176     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10177     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10178     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10179     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10180     struct intel_batchbuffer *batch = encoder_context->base.batch;
10181     struct intel_batchbuffer *slice_batch = avc_ctx->pres_slice_batch_buffer_2nd_level;
10182     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
10183     struct gpe_mi_batch_buffer_start_parameter second_level_batch;
10184     struct object_surface *obj_surface;
10185     struct gen9_surface_avc *avc_priv_surface;
10186
10187     unsigned int slice_offset = 0;
10188
10189     if (generic_state->curr_pak_pass == 0) {
10190         slice_offset = intel_batchbuffer_used_size(slice_batch);
10191         avc_state->slice_batch_offset[slice_index] = slice_offset;
10192         gen9_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context, slice_param, slice_batch);
10193         gen9_mfc_avc_weightoffset_state(ctx,
10194                                         encode_state,
10195                                         encoder_context,
10196                                         pic_param,
10197                                         slice_param,
10198                                         slice_batch);
10199         gen9_mfc_avc_slice_state(ctx,
10200                                  encode_state,
10201                                  encoder_context,
10202                                  pic_param,
10203                                  slice_param,
10204                                  next_slice_param,
10205                                  slice_batch);
10206         gen9_mfc_avc_inset_headers(ctx,
10207                                    encode_state,
10208                                    encoder_context,
10209                                    slice_param,
10210                                    slice_index,
10211                                    slice_batch);
10212
10213         BEGIN_BCS_BATCH(slice_batch, 2);
10214         OUT_BCS_BATCH(slice_batch, 0);
10215         OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END);
10216         ADVANCE_BCS_BATCH(slice_batch);
10217
10218     } else {
10219         slice_offset = avc_state->slice_batch_offset[slice_index];
10220     }
10221     /* insert slice as second level.*/
10222     memset(&second_level_batch, 0, sizeof(second_level_batch));
10223     second_level_batch.is_second_level = 1; /* Must be the second level batch buffer */
10224     second_level_batch.offset = slice_offset;
10225     second_level_batch.bo = slice_batch->buffer;
10226     gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
10227
10228     /* insert mb code as second level.*/
10229     obj_surface = encode_state->reconstructed_object;
10230     assert(obj_surface->private_data);
10231     avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
10232
10233     memset(&second_level_batch, 0, sizeof(second_level_batch));
10234     second_level_batch.is_second_level = 1; /* Must be the second level batch buffer */
10235     second_level_batch.offset = slice_param->macroblock_address * 16 * 4;
10236     second_level_batch.bo = avc_priv_surface->res_mb_code_surface.bo;
10237     gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
10238
10239 }
10240
10241 static void
10242 gen9_avc_pak_slice_level(VADriverContextP ctx,
10243                          struct encode_state *encode_state,
10244                          struct intel_encoder_context *encoder_context)
10245 {
10246     struct i965_driver_data *i965 = i965_driver_data(ctx);
10247     struct i965_gpe_table *gpe = &i965->gpe_table;
10248     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10249     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10250     struct intel_batchbuffer *batch = encoder_context->base.batch;
10251     struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
10252     VAEncSliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
10253     int i, j;
10254     int slice_index = 0;
10255     int is_frame_level = (avc_state->slice_num > 1) ? 0 : 1;   /* check it for SKL,now single slice per frame */
10256     int has_tail = 0;             /* check it later */
10257
10258     for (j = 0; j < encode_state->num_slice_params_ext; j++) {
10259         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
10260
10261         if (j == encode_state->num_slice_params_ext - 1)
10262             next_slice_group_param = NULL;
10263         else
10264             next_slice_group_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j + 1]->buffer;
10265
10266         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
10267             if (i < encode_state->slice_params_ext[j]->num_elements - 1)
10268                 next_slice_param = slice_param + 1;
10269             else
10270                 next_slice_param = next_slice_group_param;
10271
10272             gen9_mfc_avc_single_slice(ctx,
10273                                       encode_state,
10274                                       encoder_context,
10275                                       slice_param,
10276                                       next_slice_param,
10277                                       slice_index);
10278             slice_param++;
10279             slice_index++;
10280
10281             if (is_frame_level)
10282                 break;
10283         }
10284
10285         if (is_frame_level)
10286             break;
10287     }
10288
10289     if (has_tail) {
10290         /* insert a tail if required */
10291     }
10292
10293     memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
10294     mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
10295     gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_params);
10296 }
10297 static void
10298 gen9_avc_pak_picture_level(VADriverContextP ctx,
10299                            struct encode_state *encode_state,
10300                            struct intel_encoder_context *encoder_context)
10301 {
10302     struct i965_driver_data *i965 = i965_driver_data(ctx);
10303     struct i965_gpe_table *gpe = &i965->gpe_table;
10304     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10305     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
10306     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10307     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10308     struct gpe_mi_batch_buffer_start_parameter second_level_batch;
10309     struct intel_batchbuffer *batch = encoder_context->base.batch;
10310
10311     if (generic_state->brc_enabled &&
10312         generic_state->curr_pak_pass) {
10313         struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
10314         struct encoder_status_buffer_internal *status_buffer;
10315         status_buffer = &(avc_ctx->status_buffer);
10316
10317         memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
10318         mi_conditional_batch_buffer_end_params.offset = status_buffer->image_status_mask_offset;
10319         mi_conditional_batch_buffer_end_params.bo = status_buffer->bo;
10320         mi_conditional_batch_buffer_end_params.compare_data = 0;
10321         mi_conditional_batch_buffer_end_params.compare_mask_mode_disabled = 0;
10322         gpe->mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
10323     }
10324
10325     gen9_mfc_avc_pipe_mode_select(ctx, encode_state, encoder_context);
10326     gen9_mfc_avc_surface_state(ctx, encoder_context, &(generic_ctx->res_reconstructed_surface), 0);
10327     gen9_mfc_avc_surface_state(ctx, encoder_context, &(generic_ctx->res_uncompressed_input_surface), 4);
10328     gen9_mfc_avc_pipe_buf_addr_state(ctx, encoder_context);
10329     gen9_mfc_avc_ind_obj_base_addr_state(ctx, encode_state, encoder_context);
10330     gen9_mfc_avc_bsp_buf_base_addr_state(ctx, encoder_context);
10331
10332     if (generic_state->brc_enabled) {
10333         memset(&second_level_batch, 0, sizeof(second_level_batch));
10334         if (generic_state->curr_pak_pass == 0) {
10335             second_level_batch.offset = 0;
10336         } else {
10337             second_level_batch.offset = generic_state->curr_pak_pass * INTEL_AVC_IMAGE_STATE_CMD_SIZE;
10338         }
10339         second_level_batch.is_second_level = 1;
10340         second_level_batch.bo = avc_ctx->res_brc_image_state_read_buffer.bo;
10341         gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
10342     } else {
10343         /*generate a new image state */
10344         gen9_avc_set_image_state_non_brc(ctx, encode_state, encoder_context, &(avc_ctx->res_image_state_batch_buffer_2nd_level));
10345         memset(&second_level_batch, 0, sizeof(second_level_batch));
10346         second_level_batch.offset = 0;
10347         second_level_batch.is_second_level = 1;
10348         second_level_batch.bo = avc_ctx->res_image_state_batch_buffer_2nd_level.bo;
10349         gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
10350     }
10351
10352     gen9_mfc_avc_qm_state(ctx, encode_state, encoder_context);
10353     gen9_mfc_avc_fqm_state(ctx, encode_state, encoder_context);
10354     gen9_mfc_avc_directmode_state(ctx, encoder_context);
10355
10356 }
10357
10358 static void
10359 gen9_avc_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
10360 {
10361     struct i965_driver_data *i965 = i965_driver_data(ctx);
10362     struct i965_gpe_table *gpe = &i965->gpe_table;
10363     struct intel_batchbuffer *batch = encoder_context->base.batch;
10364     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10365     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10366     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10367
10368     struct gpe_mi_store_register_mem_parameter mi_store_reg_mem_param;
10369     struct gpe_mi_store_data_imm_parameter mi_store_data_imm_param;
10370     struct gpe_mi_flush_dw_parameter mi_flush_dw_param;
10371     struct encoder_status_buffer_internal *status_buffer;
10372
10373     status_buffer = &(avc_ctx->status_buffer);
10374
10375     memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
10376     gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
10377
10378     /* read register and store into status_buffer and pak_statitistic info */
10379     memset(&mi_store_reg_mem_param, 0, sizeof(mi_store_reg_mem_param));
10380     mi_store_reg_mem_param.bo = status_buffer->bo;
10381     mi_store_reg_mem_param.offset = status_buffer->bs_byte_count_frame_offset;
10382     mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_reg_offset;
10383     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10384
10385     mi_store_reg_mem_param.bo = status_buffer->bo;
10386     mi_store_reg_mem_param.offset = status_buffer->image_status_mask_offset;
10387     mi_store_reg_mem_param.mmio_offset = status_buffer->image_status_mask_reg_offset;
10388     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10389
10390     /*update the status in the pak_statistic_surface */
10391     mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
10392     mi_store_reg_mem_param.offset = 0;
10393     mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_reg_offset;
10394     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10395
10396     mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
10397     mi_store_reg_mem_param.offset = 4;
10398     mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_nh_reg_offset;
10399     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10400
10401     memset(&mi_store_data_imm_param, 0, sizeof(mi_store_data_imm_param));
10402     mi_store_data_imm_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
10403     mi_store_data_imm_param.offset = sizeof(unsigned int) * 2;
10404     mi_store_data_imm_param.dw0 = (generic_state->curr_pak_pass + 1);
10405     gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm_param);
10406
10407     mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
10408     mi_store_reg_mem_param.offset = sizeof(unsigned int) * (4 + generic_state->curr_pak_pass) ;
10409     mi_store_reg_mem_param.mmio_offset = status_buffer->image_status_ctrl_reg_offset;
10410     gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10411
10412     memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
10413     gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
10414
10415     return;
10416 }
10417
10418 static void
10419 gen9_avc_pak_brc_prepare(struct encode_state *encode_state,
10420                          struct intel_encoder_context *encoder_context)
10421 {
10422     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10423     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10424     unsigned int rate_control_mode = encoder_context->rate_control_mode;
10425
10426     switch (rate_control_mode & 0x7f) {
10427     case VA_RC_CBR:
10428         generic_state->internal_rate_mode = VA_RC_CBR;
10429         break;
10430
10431     case VA_RC_VBR:
10432         generic_state->internal_rate_mode = VA_RC_VBR;//AVBR
10433         break;
10434
10435     case VA_RC_CQP:
10436     default:
10437         generic_state->internal_rate_mode = VA_RC_CQP;
10438         break;
10439     }
10440
10441     if (encoder_context->quality_level == 0)
10442         encoder_context->quality_level = ENCODER_DEFAULT_QUALITY_AVC;
10443 }
10444
10445 /* allcate resources for pak only (fei mode) */
10446 static VAStatus
10447 gen9_avc_fei_pak_pipeline_prepare(VADriverContextP ctx,
10448                                   VAProfile profile,
10449                                   struct encode_state *encode_state,
10450                                   struct intel_encoder_context *encoder_context)
10451 {
10452     VAStatus va_status;
10453     struct i965_driver_data *i965 = i965_driver_data(ctx);
10454     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10455     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
10456     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10457     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10458     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10459     struct gen9_surface_avc *avc_priv_surface;
10460     VAEncPictureParameterBufferH264  *pic_param;
10461     VAEncSliceParameterBufferH264 *slice_param;
10462     VAEncMiscParameterFEIFrameControlH264 *fei_param = NULL;
10463     unsigned int size = 0, i, j;
10464     unsigned int frame_mb_nums;
10465     struct object_buffer *obj_buffer = NULL;
10466     struct buffer_store *buffer_store = NULL;
10467     struct object_surface *obj_surface = NULL;
10468     struct avc_surface_param surface_param;
10469     struct i965_coded_buffer_segment *coded_buffer_segment;
10470     dri_bo *bo;
10471     unsigned char * pdata;
10472
10473     gen9_avc_update_parameters(ctx, profile, encode_state, encoder_context);
10474
10475     pic_param = avc_state->pic_param;
10476     slice_param = avc_state->slice_param[0];
10477
10478     va_status = gen9_avc_encode_check_parameter(ctx, encode_state, encoder_context);
10479     if (va_status != VA_STATUS_SUCCESS)
10480         return va_status;
10481
10482     va_status = gen9_avc_allocate_pak_resources(ctx, encode_state, encoder_context);
10483     if (va_status != VA_STATUS_SUCCESS)
10484         return va_status;
10485
10486     /* Encoded bitstream ?*/
10487     obj_buffer = encode_state->coded_buf_object;
10488     bo = obj_buffer->buffer_store->bo;
10489     i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
10490     i965_dri_object_to_buffer_gpe_resource(&generic_ctx->compressed_bitstream.res, bo);
10491     generic_ctx->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
10492     generic_ctx->compressed_bitstream.end_offset =
10493         ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
10494
10495     /*status buffer */
10496     dri_bo_unreference(avc_ctx->status_buffer.bo);
10497     avc_ctx->status_buffer.bo = bo;
10498     dri_bo_reference(bo);
10499
10500     /* set the internal flag to 0 to indicate the coded size is unknown */
10501     dri_bo_map(bo, 1);
10502     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
10503     coded_buffer_segment->mapped = 0;
10504     coded_buffer_segment->codec = encoder_context->codec;
10505     coded_buffer_segment->status_support = 1;
10506
10507     pdata = bo->virtual + avc_ctx->status_buffer.base_offset;
10508     memset(pdata, 0, avc_ctx->status_buffer.status_buffer_size);
10509     dri_bo_unmap(bo);
10510     //frame id, it is the ref pic id in the reference_objects list.
10511     avc_state->num_refs[0] = 0;
10512     avc_state->num_refs[1] = 0;
10513     if (generic_state->frame_type == SLICE_TYPE_P) {
10514         avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
10515
10516         if (slice_param->num_ref_idx_active_override_flag)
10517             avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
10518     } else if (generic_state->frame_type == SLICE_TYPE_B) {
10519         avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
10520         avc_state->num_refs[1] = pic_param->num_ref_idx_l1_active_minus1 + 1;
10521
10522         if (slice_param->num_ref_idx_active_override_flag) {
10523             avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
10524             avc_state->num_refs[1] = slice_param->num_ref_idx_l1_active_minus1 + 1;
10525         }
10526     }
10527     for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[0]); i++) {
10528         VAPictureH264 *va_pic;
10529
10530         assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(avc_state->list_ref_idx[0]));
10531         avc_state->list_ref_idx[0][i] = 0;
10532
10533         if (i >= avc_state->num_refs[0])
10534             continue;
10535
10536         va_pic = &slice_param->RefPicList0[i];
10537
10538         for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
10539             obj_surface = encode_state->reference_objects[j];
10540
10541             if (obj_surface &&
10542                 obj_surface->bo &&
10543                 obj_surface->base.id == va_pic->picture_id) {
10544
10545                 assert(obj_surface->base.id != VA_INVALID_SURFACE);
10546                 avc_state->list_ref_idx[0][i] = j;
10547
10548                 break;
10549             }
10550         }
10551     }
10552     for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[1]); i++) {
10553         VAPictureH264 *va_pic;
10554
10555         assert(ARRAY_ELEMS(slice_param->RefPicList1) == ARRAY_ELEMS(avc_state->list_ref_idx[1]));
10556         avc_state->list_ref_idx[1][i] = 0;
10557
10558         if (i >= avc_state->num_refs[1])
10559             continue;
10560
10561         va_pic = &slice_param->RefPicList1[i];
10562
10563         for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
10564             obj_surface = encode_state->reference_objects[j];
10565
10566
10567             if (obj_surface &&
10568                 obj_surface->bo &&
10569                 obj_surface->base.id == va_pic->picture_id) {
10570
10571                 assert(obj_surface->base.id != VA_INVALID_SURFACE);
10572                 avc_state->list_ref_idx[1][i] = j;
10573
10574                 break;
10575                 break;
10576             }
10577         }
10578     }
10579
10580     obj_surface = encode_state->reconstructed_object;
10581     fei_param = avc_state->fei_framectl_param;
10582     frame_mb_nums = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs;
10583
10584     /* Setup current reconstruct frame */
10585     obj_surface = encode_state->reconstructed_object;
10586     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
10587
10588     if (va_status != VA_STATUS_SUCCESS)
10589         return va_status;
10590
10591     memset(&surface_param, 0, sizeof(surface_param));
10592     surface_param.frame_width = generic_state->frame_width_in_pixel;
10593     surface_param.frame_height = generic_state->frame_height_in_pixel;
10594     va_status = gen9_avc_init_check_surfaces(ctx,
10595                                              obj_surface, encoder_context,
10596                                              &surface_param);
10597     avc_priv_surface = obj_surface->private_data;
10598
10599     /* res_mb_code_surface for MB code */
10600     /* PAK only mode must have the mb_code_surface from middleware,
10601      * so the code shouldn't reach here without an externally provided
10602      * MB Code buffer */
10603     assert(fei_param->mb_code_data != VA_INVALID_ID);
10604     size = frame_mb_nums * FEI_AVC_MB_CODE_BUFFER_SIZE;
10605     obj_buffer = BUFFER(fei_param->mb_code_data);
10606     assert(obj_buffer != NULL);
10607     buffer_store = obj_buffer->buffer_store;
10608     assert(size <= buffer_store->bo->size);
10609     if (avc_priv_surface->res_mb_code_surface.bo != NULL)
10610         i965_free_gpe_resource(&avc_priv_surface->res_mb_code_surface);
10611     i965_dri_object_to_buffer_gpe_resource(&avc_priv_surface->res_mb_code_surface,
10612                                            buffer_store->bo);
10613     /* res_mv_data_surface for MV data */
10614     size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
10615     if (fei_param->mv_data != VA_INVALID_ID) {
10616         obj_buffer = BUFFER(fei_param->mv_data);
10617         assert(obj_buffer != NULL);
10618         buffer_store = obj_buffer->buffer_store;
10619         assert(size <= buffer_store->bo->size);
10620         if (avc_priv_surface->res_mv_data_surface.bo != NULL)
10621             i965_free_gpe_resource(&avc_priv_surface->res_mv_data_surface);
10622         i965_dri_object_to_buffer_gpe_resource(&avc_priv_surface->res_mv_data_surface,
10623                                                buffer_store->bo);
10624     }
10625
10626     return VA_STATUS_SUCCESS;
10627
10628 }
10629
10630 static VAStatus
10631 gen9_avc_pak_pipeline_prepare(VADriverContextP ctx,
10632                               VAProfile profile,
10633                               struct encode_state *encode_state,
10634                               struct intel_encoder_context *encoder_context)
10635 {
10636     VAStatus va_status;
10637     struct i965_driver_data *i965 = i965_driver_data(ctx);
10638     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10639     struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
10640     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10641     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10642     struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10643
10644     struct object_surface *obj_surface;
10645     VAEncPictureParameterBufferH264  *pic_param;
10646     VAEncSliceParameterBufferH264 *slice_param;
10647
10648     struct gen9_surface_avc *avc_priv_surface;
10649     struct avc_surface_param surface_param;
10650     int i, j, enable_avc_ildb = 0;
10651     unsigned int allocate_flag = 1;
10652     unsigned int size, w_mb, h_mb;
10653
10654     if (encoder_context->fei_function_mode == VA_FEI_FUNCTION_PAK) {
10655         va_status = gen9_avc_fei_pak_pipeline_prepare(ctx, profile, encode_state, encoder_context);
10656         if (va_status != VA_STATUS_SUCCESS)
10657             return va_status;
10658     }
10659
10660     pic_param = avc_state->pic_param;
10661     slice_param = avc_state->slice_param[0];
10662     w_mb = generic_state->frame_width_in_mbs;
10663     h_mb = generic_state->frame_height_in_mbs;
10664
10665     /* update the parameter and check slice parameter */
10666     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
10667         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
10668         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
10669
10670         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
10671             assert((slice_param->slice_type == SLICE_TYPE_I) ||
10672                    (slice_param->slice_type == SLICE_TYPE_SI) ||
10673                    (slice_param->slice_type == SLICE_TYPE_P) ||
10674                    (slice_param->slice_type == SLICE_TYPE_SP) ||
10675                    (slice_param->slice_type == SLICE_TYPE_B));
10676
10677             if (slice_param->disable_deblocking_filter_idc != 1) {
10678                 enable_avc_ildb = 1;
10679                 break;
10680             }
10681
10682             slice_param++;
10683         }
10684     }
10685     avc_state->enable_avc_ildb = enable_avc_ildb;
10686
10687     /* setup the all surface and buffer for PAK */
10688     /* Setup current reconstruct frame */
10689     obj_surface = encode_state->reconstructed_object;
10690     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
10691
10692     if (va_status != VA_STATUS_SUCCESS)
10693         return va_status;
10694
10695     memset(&surface_param, 0, sizeof(surface_param));
10696     surface_param.frame_width = generic_state->frame_width_in_pixel;
10697     surface_param.frame_height = generic_state->frame_height_in_pixel;
10698     va_status = gen9_avc_init_check_surfaces(ctx,
10699                                              obj_surface, encoder_context,
10700                                              &surface_param);
10701     if (va_status != VA_STATUS_SUCCESS)
10702         return va_status;
10703     /* init the member of avc_priv_surface,frame_store_id,qp_value */
10704     {
10705         avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
10706         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = 0;
10707         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = 0;
10708         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2]);
10709         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1]);
10710         i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2], avc_priv_surface->dmv_top);
10711         i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1], avc_priv_surface->dmv_bottom);
10712         avc_priv_surface->qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
10713         avc_priv_surface->frame_store_id = 0;
10714         avc_priv_surface->frame_idx = pic_param->CurrPic.frame_idx;
10715         avc_priv_surface->top_field_order_cnt = pic_param->CurrPic.TopFieldOrderCnt;
10716         avc_priv_surface->is_as_ref = pic_param->pic_fields.bits.reference_pic_flag;
10717         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = avc_priv_surface->top_field_order_cnt;
10718         avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = avc_priv_surface->top_field_order_cnt + 1;
10719     }
10720     i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
10721     i965_free_gpe_resource(&avc_ctx->res_post_deblocking_output);
10722     i965_free_gpe_resource(&avc_ctx->res_pre_deblocking_output);
10723     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_reconstructed_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
10724
10725
10726     if (avc_state->enable_avc_ildb) {
10727         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_post_deblocking_output, obj_surface, GPE_RESOURCE_ALIGNMENT);
10728     } else {
10729         i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_pre_deblocking_output, obj_surface, GPE_RESOURCE_ALIGNMENT);
10730     }
10731     /* input YUV surface */
10732     obj_surface = encode_state->input_yuv_object;
10733     va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
10734
10735     if (va_status != VA_STATUS_SUCCESS)
10736         return va_status;
10737     i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
10738     i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_uncompressed_input_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
10739
10740     /* Reference surfaces */
10741     for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
10742         i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
10743         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2]);
10744         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1]);
10745         obj_surface = encode_state->reference_objects[i];
10746         avc_state->top_field_poc[2 * i] = 0;
10747         avc_state->top_field_poc[2 * i + 1] = 0;
10748
10749         if (obj_surface && obj_surface->bo) {
10750             i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->list_reference_res[i], obj_surface, GPE_RESOURCE_ALIGNMENT);
10751
10752             /* actually it should be handled when it is reconstructed surface */
10753             va_status = gen9_avc_init_check_surfaces(ctx,
10754                                                      obj_surface, encoder_context,
10755                                                      &surface_param);
10756             if (va_status != VA_STATUS_SUCCESS)
10757                 return va_status;
10758             avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
10759             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2], avc_priv_surface->dmv_top);
10760             i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1], avc_priv_surface->dmv_bottom);
10761             avc_priv_surface->frame_store_id = i;
10762             avc_state->top_field_poc[2 * i] = avc_priv_surface->top_field_order_cnt;
10763             avc_state->top_field_poc[2 * i + 1] = avc_priv_surface->top_field_order_cnt + 1;
10764         } else {
10765             break;
10766         }
10767     }
10768
10769     if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
10770         intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
10771         avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
10772     }
10773
10774     avc_ctx->pres_slice_batch_buffer_2nd_level =
10775         intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD,
10776                               4096 *
10777                               encode_state->num_slice_params_ext);
10778     if (!avc_ctx->pres_slice_batch_buffer_2nd_level)
10779         return VA_STATUS_ERROR_ALLOCATION_FAILED;
10780
10781     for (i = 0; i < MAX_AVC_SLICE_NUM; i++) {
10782         avc_state->slice_batch_offset[i] = 0;
10783     }
10784
10785
10786     size = w_mb * 64;
10787     i965_free_gpe_resource(&avc_ctx->res_intra_row_store_scratch_buffer);
10788     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
10789                                                &avc_ctx->res_intra_row_store_scratch_buffer,
10790                                                size,
10791                                                "PAK Intra row store scratch buffer");
10792     if (!allocate_flag)
10793         goto failed_allocation;
10794
10795     size = w_mb * 4 * 64;
10796     i965_free_gpe_resource(&avc_ctx->res_deblocking_filter_row_store_scratch_buffer);
10797     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
10798                                                &avc_ctx->res_deblocking_filter_row_store_scratch_buffer,
10799                                                size,
10800                                                "PAK Deblocking filter row store scratch buffer");
10801     if (!allocate_flag)
10802         goto failed_allocation;
10803
10804     size = w_mb * 2 * 64;
10805     i965_free_gpe_resource(&avc_ctx->res_bsd_mpc_row_store_scratch_buffer);
10806     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
10807                                                &avc_ctx->res_bsd_mpc_row_store_scratch_buffer,
10808                                                size,
10809                                                "PAK BSD/MPC row store scratch buffer");
10810     if (!allocate_flag)
10811         goto failed_allocation;
10812
10813     size = w_mb * h_mb * 16;
10814     i965_free_gpe_resource(&avc_ctx->res_pak_mb_status_buffer);
10815     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
10816                                                &avc_ctx->res_pak_mb_status_buffer,
10817                                                size,
10818                                                "PAK MB status buffer");
10819     if (!allocate_flag)
10820         goto failed_allocation;
10821
10822     return VA_STATUS_SUCCESS;
10823
10824 failed_allocation:
10825     return VA_STATUS_ERROR_ALLOCATION_FAILED;
10826 }
10827
10828 static VAStatus
10829 gen9_avc_encode_picture(VADriverContextP ctx,
10830                         VAProfile profile,
10831                         struct encode_state *encode_state,
10832                         struct intel_encoder_context *encoder_context)
10833 {
10834     VAStatus va_status;
10835     struct i965_driver_data *i965 = i965_driver_data(ctx);
10836     struct i965_gpe_table *gpe = &i965->gpe_table;
10837     struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10838     struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
10839     struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
10840     struct intel_batchbuffer *batch = encoder_context->base.batch;
10841
10842     va_status = gen9_avc_pak_pipeline_prepare(ctx, profile, encode_state, encoder_context);
10843
10844     if (va_status != VA_STATUS_SUCCESS)
10845         return va_status;
10846
10847     if (i965->intel.has_bsd2)
10848         intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
10849     else
10850         intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
10851     intel_batchbuffer_emit_mi_flush(batch);
10852     for (generic_state->curr_pak_pass = 0;
10853          generic_state->curr_pak_pass < generic_state->num_pak_passes;
10854          generic_state->curr_pak_pass++) {
10855
10856         if (generic_state->curr_pak_pass == 0) {
10857             /* Initialize the avc Image Ctrl reg for the first pass,write 0 to staturs/control register, is it needed in AVC? */
10858             struct gpe_mi_load_register_imm_parameter mi_load_reg_imm;
10859             struct encoder_status_buffer_internal *status_buffer;
10860
10861             status_buffer = &(avc_ctx->status_buffer);
10862             memset(&mi_load_reg_imm, 0, sizeof(mi_load_reg_imm));
10863             mi_load_reg_imm.mmio_offset = status_buffer->image_status_ctrl_reg_offset;
10864             mi_load_reg_imm.data = 0;
10865             gpe->mi_load_register_imm(ctx, batch, &mi_load_reg_imm);
10866         }
10867         gen9_avc_pak_picture_level(ctx, encode_state, encoder_context);
10868         gen9_avc_pak_slice_level(ctx, encode_state, encoder_context);
10869         gen9_avc_read_mfc_status(ctx, encoder_context);
10870     }
10871
10872     if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
10873         intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
10874         avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
10875     }
10876
10877     intel_batchbuffer_end_atomic(batch);
10878     intel_batchbuffer_flush(batch);
10879
10880     generic_state->seq_frame_number++;
10881     generic_state->total_frame_number++;
10882     generic_state->first_frame = 0;
10883     return VA_STATUS_SUCCESS;
10884 }
10885
10886 static VAStatus
10887 gen9_avc_pak_pipeline(VADriverContextP ctx,
10888                       VAProfile profile,
10889                       struct encode_state *encode_state,
10890                       struct intel_encoder_context *encoder_context)
10891 {
10892     VAStatus vaStatus;
10893
10894     switch (profile) {
10895     case VAProfileH264ConstrainedBaseline:
10896     case VAProfileH264Main:
10897     case VAProfileH264High:
10898     case VAProfileH264MultiviewHigh:
10899     case VAProfileH264StereoHigh:
10900         vaStatus = gen9_avc_encode_picture(ctx, profile, encode_state, encoder_context);
10901         break;
10902
10903     default:
10904         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
10905         break;
10906     }
10907
10908     return vaStatus;
10909 }
10910
10911 static void
10912 gen9_avc_pak_context_destroy(void * context)
10913 {
10914     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)context;
10915     struct generic_encoder_context * generic_ctx;
10916     struct i965_avc_encoder_context * avc_ctx;
10917     int i = 0;
10918
10919     if (!pak_context)
10920         return;
10921
10922     generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
10923     avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10924
10925     // other things
10926     i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
10927     i965_free_gpe_resource(&avc_ctx->res_post_deblocking_output);
10928     i965_free_gpe_resource(&avc_ctx->res_pre_deblocking_output);
10929     i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
10930
10931     i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
10932     i965_free_gpe_resource(&avc_ctx->res_intra_row_store_scratch_buffer);
10933     i965_free_gpe_resource(&avc_ctx->res_deblocking_filter_row_store_scratch_buffer);
10934     i965_free_gpe_resource(&avc_ctx->res_bsd_mpc_row_store_scratch_buffer);
10935     i965_free_gpe_resource(&avc_ctx->res_pak_mb_status_buffer);
10936
10937     for (i = 0 ; i < MAX_MFC_AVC_REFERENCE_SURFACES; i++) {
10938         i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
10939     }
10940
10941     for (i = 0 ; i < NUM_MFC_AVC_DMV_BUFFERS; i++) {
10942         i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i]);
10943     }
10944
10945     if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
10946         intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
10947         avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
10948     }
10949
10950 }
10951
10952 static VAStatus
10953 gen9_avc_get_coded_status(VADriverContextP ctx,
10954                           struct intel_encoder_context *encoder_context,
10955                           struct i965_coded_buffer_segment *coded_buf_seg)
10956 {
10957     struct encoder_status *avc_encode_status;
10958
10959     if (!encoder_context || !coded_buf_seg)
10960         return VA_STATUS_ERROR_INVALID_BUFFER;
10961
10962     avc_encode_status = (struct encoder_status *)coded_buf_seg->codec_private_data;
10963     coded_buf_seg->base.size = avc_encode_status->bs_byte_count_frame;
10964
10965     return VA_STATUS_SUCCESS;
10966 }
10967
10968 Bool
10969 gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
10970 {
10971     /* VME & PAK share the same context */
10972     struct i965_driver_data *i965 = i965_driver_data(ctx);
10973     struct encoder_vme_mfc_context * vme_context = NULL;
10974     struct generic_encoder_context * generic_ctx = NULL;
10975     struct i965_avc_encoder_context * avc_ctx = NULL;
10976     struct generic_enc_codec_state * generic_state = NULL;
10977     struct avc_enc_state * avc_state = NULL;
10978     struct encoder_status_buffer_internal *status_buffer;
10979     uint32_t base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
10980
10981     vme_context = calloc(1, sizeof(struct encoder_vme_mfc_context));
10982     generic_ctx = calloc(1, sizeof(struct generic_encoder_context));
10983     avc_ctx = calloc(1, sizeof(struct i965_avc_encoder_context));
10984     generic_state = calloc(1, sizeof(struct generic_enc_codec_state));
10985     avc_state = calloc(1, sizeof(struct avc_enc_state));
10986
10987     if (!vme_context || !generic_ctx || !avc_ctx || !generic_state || !avc_state)
10988         goto allocate_structure_failed;
10989
10990     memset(vme_context, 0, sizeof(struct encoder_vme_mfc_context));
10991     memset(generic_ctx, 0, sizeof(struct generic_encoder_context));
10992     memset(avc_ctx, 0, sizeof(struct i965_avc_encoder_context));
10993     memset(generic_state, 0, sizeof(struct generic_enc_codec_state));
10994     memset(avc_state, 0, sizeof(struct avc_enc_state));
10995
10996     encoder_context->vme_context = vme_context;
10997     vme_context->generic_enc_ctx = generic_ctx;
10998     vme_context->private_enc_ctx = avc_ctx;
10999     vme_context->generic_enc_state = generic_state;
11000     vme_context->private_enc_state = avc_state;
11001
11002     if (IS_SKL(i965->intel.device_info) ||
11003         IS_BXT(i965->intel.device_info)) {
11004         if (!encoder_context->fei_enabled && !encoder_context->preenc_enabled) {
11005             generic_ctx->enc_kernel_ptr = (void *)skl_avc_encoder_kernels;
11006             generic_ctx->enc_kernel_size = sizeof(skl_avc_encoder_kernels);
11007         } else {
11008             /* FEI and PreEnc operation kernels are included in
11009             * the monolithic kernel binary */
11010             generic_ctx->enc_kernel_ptr = (void *)skl_avc_fei_encoder_kernels;
11011             generic_ctx->enc_kernel_size = sizeof(skl_avc_fei_encoder_kernels);
11012         }
11013     } else if (IS_GEN8(i965->intel.device_info)) {
11014         generic_ctx->enc_kernel_ptr = (void *)bdw_avc_encoder_kernels;
11015         generic_ctx->enc_kernel_size = sizeof(bdw_avc_encoder_kernels);
11016     } else if (IS_KBL(i965->intel.device_info) ||
11017                IS_GLK(i965->intel.device_info)) {
11018         generic_ctx->enc_kernel_ptr = (void *)kbl_avc_encoder_kernels;
11019         generic_ctx->enc_kernel_size = sizeof(kbl_avc_encoder_kernels);
11020     } else if (IS_GEN10(i965->intel.device_info)) {
11021         generic_ctx->enc_kernel_ptr = (void *)cnl_avc_encoder_kernels;
11022         generic_ctx->enc_kernel_size = sizeof(cnl_avc_encoder_kernels);
11023     } else
11024         goto allocate_structure_failed;
11025
11026     /* initialize misc ? */
11027     avc_ctx->ctx = ctx;
11028     generic_ctx->use_hw_scoreboard = 1;
11029     generic_ctx->use_hw_non_stalling_scoreboard = 1;
11030
11031     /* initialize generic state */
11032
11033     generic_state->kernel_mode = INTEL_ENC_KERNEL_NORMAL;
11034     generic_state->preset = INTEL_PRESET_RT_SPEED;
11035     generic_state->seq_frame_number = 0;
11036     generic_state->total_frame_number = 0;
11037     generic_state->frame_type = 0;
11038     generic_state->first_frame = 1;
11039
11040     generic_state->frame_width_in_pixel = 0;
11041     generic_state->frame_height_in_pixel = 0;
11042     generic_state->frame_width_in_mbs = 0;
11043     generic_state->frame_height_in_mbs = 0;
11044     generic_state->frame_width_4x = 0;
11045     generic_state->frame_height_4x = 0;
11046     generic_state->frame_width_16x = 0;
11047     generic_state->frame_height_16x = 0;
11048     generic_state->frame_width_32x = 0;
11049     generic_state->downscaled_width_4x_in_mb = 0;
11050     generic_state->downscaled_height_4x_in_mb = 0;
11051     generic_state->downscaled_width_16x_in_mb = 0;
11052     generic_state->downscaled_height_16x_in_mb = 0;
11053     generic_state->downscaled_width_32x_in_mb = 0;
11054     generic_state->downscaled_height_32x_in_mb = 0;
11055
11056     generic_state->hme_supported = 1;
11057     generic_state->b16xme_supported = 1;
11058     generic_state->b32xme_supported = 0;
11059     generic_state->hme_enabled = 0;
11060     generic_state->b16xme_enabled = 0;
11061     generic_state->b32xme_enabled = 0;
11062
11063     if (encoder_context->fei_enabled) {
11064         /* Disabling HME in FEI encode */
11065         generic_state->hme_supported = 0;
11066         generic_state->b16xme_supported = 0;
11067     } else if (encoder_context->preenc_enabled) {
11068         /* Disabling 16x16ME in PreEnc */
11069         generic_state->b16xme_supported = 0;
11070     }
11071
11072     generic_state->brc_distortion_buffer_supported = 1;
11073     generic_state->brc_constant_buffer_supported = 0;
11074
11075     generic_state->frame_rate = 30;
11076     generic_state->brc_allocated = 0;
11077     generic_state->brc_inited = 0;
11078     generic_state->brc_need_reset = 0;
11079     generic_state->is_low_delay = 0;
11080     generic_state->brc_enabled = 0;//default
11081     generic_state->internal_rate_mode = 0;
11082     generic_state->curr_pak_pass = 0;
11083     generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
11084     generic_state->is_first_pass = 1;
11085     generic_state->is_last_pass = 0;
11086     generic_state->mb_brc_enabled = 0; // enable mb brc
11087     generic_state->brc_roi_enable = 0;
11088     generic_state->brc_dirty_roi_enable = 0;
11089     generic_state->skip_frame_enbale = 0;
11090
11091     generic_state->target_bit_rate = 0;
11092     generic_state->max_bit_rate = 0;
11093     generic_state->min_bit_rate = 0;
11094     generic_state->init_vbv_buffer_fullness_in_bit = 0;
11095     generic_state->vbv_buffer_size_in_bit = 0;
11096     generic_state->frames_per_100s = 0;
11097     generic_state->gop_size = 0;
11098     generic_state->gop_ref_distance = 0;
11099     generic_state->brc_target_size = 0;
11100     generic_state->brc_mode = 0;
11101     generic_state->brc_init_current_target_buf_full_in_bits = 0.0;
11102     generic_state->brc_init_reset_input_bits_per_frame = 0.0;
11103     generic_state->brc_init_reset_buf_size_in_bits = 0;
11104     generic_state->brc_init_previous_target_buf_full_in_bits = 0;
11105     generic_state->frames_per_window_size = 0;//default
11106     generic_state->target_percentage = 0;
11107
11108     generic_state->avbr_curracy = 0;
11109     generic_state->avbr_convergence = 0;
11110
11111     generic_state->num_skip_frames = 0;
11112     generic_state->size_skip_frames = 0;
11113
11114     generic_state->num_roi = 0;
11115     generic_state->max_delta_qp = 0;
11116     generic_state->min_delta_qp = 0;
11117
11118     if (encoder_context->rate_control_mode != VA_RC_NONE &&
11119         encoder_context->rate_control_mode != VA_RC_CQP) {
11120         generic_state->brc_enabled = 1;
11121         generic_state->brc_distortion_buffer_supported = 1;
11122         generic_state->brc_constant_buffer_supported = 1;
11123         generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
11124     }
11125     /*avc state initialization */
11126     avc_state->mad_enable = 0;
11127     avc_state->mb_disable_skip_map_enable = 0;
11128     avc_state->sfd_enable = 1;//default
11129     avc_state->sfd_mb_enable = 1;//set it true
11130     avc_state->adaptive_search_window_enable = 1;//default
11131     avc_state->mb_qp_data_enable = 0;
11132     avc_state->intra_refresh_i_enable = 0;
11133     avc_state->min_max_qp_enable = 0;
11134     avc_state->skip_bias_adjustment_enable = 0;//default,same as   skip_bias_adjustment_supporte? no
11135
11136     //external input
11137     avc_state->non_ftq_skip_threshold_lut_input_enable = 0;
11138     avc_state->ftq_skip_threshold_lut_input_enable = 0;
11139     avc_state->ftq_override = 0;
11140
11141     avc_state->direct_bias_adjustment_enable = 0;
11142     avc_state->global_motion_bias_adjustment_enable = 0;
11143     avc_state->disable_sub_mb_partion = 0;
11144     avc_state->arbitrary_num_mbs_in_slice = 0;
11145     avc_state->adaptive_transform_decision_enable = 0;//default
11146     avc_state->skip_check_disable = 0;
11147     avc_state->tq_enable = 0;
11148     avc_state->enable_avc_ildb = 0;
11149     avc_state->mbaff_flag = 0;
11150     avc_state->enable_force_skip = 1;//default
11151     avc_state->rc_panic_enable = 1;//default
11152     avc_state->suppress_recon_enable = 1;//default
11153
11154     avc_state->ref_pic_select_list_supported = 1;
11155     avc_state->mb_brc_supported = 1;//?,default
11156     avc_state->multi_pre_enable = 1;//default
11157     avc_state->ftq_enable = 1;//default
11158     avc_state->caf_supported = 1; //default
11159     avc_state->caf_enable = 0;
11160     avc_state->caf_disable_hd = 1;//default
11161     avc_state->skip_bias_adjustment_supported = 1;//default
11162
11163     avc_state->adaptive_intra_scaling_enable = 1;//default
11164     avc_state->old_mode_cost_enable = 0;//default
11165     avc_state->multi_ref_qp_enable = 1;//default
11166     avc_state->weighted_ref_l0_enable = 1;//default
11167     avc_state->weighted_ref_l1_enable = 1;//default
11168     avc_state->weighted_prediction_supported = 0;
11169     avc_state->brc_split_enable = 0;
11170     avc_state->slice_level_report_supported = 0;
11171
11172     avc_state->fbr_bypass_enable = 1;//default
11173     avc_state->field_scaling_output_interleaved = 0;
11174     avc_state->mb_variance_output_enable = 0;
11175     avc_state->mb_pixel_average_output_enable = 0;
11176     avc_state->rolling_intra_refresh_enable = 0;// same as intra_refresh_i_enable?
11177     avc_state->mbenc_curbe_set_in_brc_update = 0;
11178     avc_state->rounding_inter_enable = 1; //default
11179     avc_state->adaptive_rounding_inter_enable = 1;//default
11180
11181     avc_state->mbenc_i_frame_dist_in_use = 0;
11182     avc_state->mb_status_supported = 1; //set in intialization for gen9
11183     avc_state->mb_status_enable = 0;
11184     avc_state->mb_vproc_stats_enable = 0;
11185     avc_state->flatness_check_enable = 0;
11186     avc_state->flatness_check_supported = 1;//default
11187     avc_state->block_based_skip_enable = 0;
11188     avc_state->use_widi_mbenc_kernel = 0;
11189     avc_state->kernel_trellis_enable = 0;
11190     avc_state->generic_reserved = 0;
11191
11192     avc_state->rounding_value = 0;
11193     avc_state->rounding_inter_p = AVC_INVALID_ROUNDING_VALUE;//default
11194     avc_state->rounding_inter_b = AVC_INVALID_ROUNDING_VALUE; //default
11195     avc_state->rounding_inter_b_ref = AVC_INVALID_ROUNDING_VALUE; //default
11196     avc_state->min_qp_i = INTEL_AVC_MIN_QP;
11197     avc_state->min_qp_p = INTEL_AVC_MIN_QP;
11198     avc_state->min_qp_b = INTEL_AVC_MIN_QP;
11199     avc_state->max_qp_i = INTEL_AVC_MAX_QP;
11200     avc_state->max_qp_p = INTEL_AVC_MAX_QP;
11201     avc_state->max_qp_b = INTEL_AVC_MAX_QP;
11202
11203     memset(avc_state->non_ftq_skip_threshold_lut, 0, AVC_QP_MAX * sizeof(uint8_t));
11204     memset(avc_state->ftq_skip_threshold_lut, 0, AVC_QP_MAX * sizeof(uint8_t));
11205     memset(avc_state->lamda_value_lut, 0, AVC_QP_MAX * 2 * sizeof(uint32_t));
11206
11207     avc_state->intra_refresh_qp_threshold = 0;
11208     avc_state->trellis_flag = 0;
11209     avc_state->hme_mv_cost_scaling_factor = 0;
11210     avc_state->slice_height = 1;
11211     avc_state->slice_num = 1;
11212     memset(avc_state->dist_scale_factor_list0, 0, 32 * sizeof(uint32_t));
11213     avc_state->bi_weight = 0;
11214
11215     avc_state->lambda_table_enable = 0;
11216
11217     if (IS_GEN8(i965->intel.device_info)) {
11218         avc_state->brc_const_data_surface_width = 64;
11219         avc_state->brc_const_data_surface_height = 44;
11220         avc_state->mb_status_supported = 0;
11221     } else if (IS_SKL(i965->intel.device_info) ||
11222                IS_BXT(i965->intel.device_info)) {
11223         avc_state->brc_const_data_surface_width = 64;
11224         avc_state->brc_const_data_surface_height = 44;
11225         avc_state->brc_split_enable = 1;
11226     } else if (IS_KBL(i965->intel.device_info) ||
11227                IS_GEN10(i965->intel.device_info) ||
11228                IS_GLK(i965->intel.device_info)) {
11229         avc_state->brc_const_data_surface_width = 64;
11230         avc_state->brc_const_data_surface_height = 53;
11231         //gen95
11232         avc_state->decouple_mbenc_curbe_from_brc_enable = 1;
11233         avc_state->extended_mv_cost_range_enable = 0;
11234         avc_state->reserved_g95 = 0;
11235         avc_state->mbenc_brc_buffer_size = 128;
11236         avc_state->kernel_trellis_enable = 1;
11237         avc_state->lambda_table_enable = 1;
11238         avc_state->brc_split_enable = 1;
11239
11240         if (IS_GEN10(i965->intel.device_info))
11241             avc_state->adaptive_transform_decision_enable = 1;// CNL
11242     }
11243
11244     avc_state->num_refs[0] = 0;
11245     avc_state->num_refs[1] = 0;
11246     memset(avc_state->list_ref_idx, 0, 32 * 2 * sizeof(uint32_t));
11247     memset(avc_state->top_field_poc, 0, NUM_MFC_AVC_DMV_BUFFERS * sizeof(int32_t));
11248     avc_state->tq_rounding = 0;
11249     avc_state->zero_mv_threshold = 0;
11250     avc_state->slice_second_levle_batch_buffer_in_use = 0;
11251
11252     //1. seq/pic/slice
11253
11254     /* the definition of status buffer offset for Encoder */
11255
11256     status_buffer = &avc_ctx->status_buffer;
11257     memset(status_buffer, 0, sizeof(struct encoder_status_buffer_internal));
11258
11259     status_buffer->base_offset = base_offset;
11260     status_buffer->bs_byte_count_frame_offset = base_offset + offsetof(struct encoder_status, bs_byte_count_frame);
11261     status_buffer->bs_byte_count_frame_nh_offset = base_offset + offsetof(struct encoder_status, bs_byte_count_frame_nh);
11262     status_buffer->image_status_mask_offset = base_offset + offsetof(struct encoder_status, image_status_mask);
11263     status_buffer->image_status_ctrl_offset = base_offset + offsetof(struct encoder_status, image_status_ctrl);
11264     status_buffer->mfc_qp_status_count_offset = base_offset + offsetof(struct encoder_status, mfc_qp_status_count);
11265     status_buffer->media_index_offset       = base_offset + offsetof(struct encoder_status, media_index);
11266
11267     status_buffer->status_buffer_size = sizeof(struct encoder_status);
11268     status_buffer->bs_byte_count_frame_reg_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG;
11269     status_buffer->bs_byte_count_frame_nh_reg_offset = MFC_BITSTREAM_BYTECOUNT_SLICE_REG;
11270     status_buffer->image_status_mask_reg_offset = MFC_IMAGE_STATUS_MASK_REG;
11271     status_buffer->image_status_ctrl_reg_offset = MFC_IMAGE_STATUS_CTRL_REG;
11272     status_buffer->mfc_qp_status_count_reg_offset = MFC_QP_STATUS_COUNT_REG;
11273
11274     if (IS_GEN8(i965->intel.device_info)) {
11275         gen8_avc_kernel_init(ctx, encoder_context);
11276     } else {
11277         gen9_avc_kernel_init(ctx, encoder_context);
11278     }
11279     encoder_context->vme_context = vme_context;
11280     /* Handling PreEnc operations separately since it gives better
11281      * code readability, avoid possible vme operations mess-up */
11282     encoder_context->vme_pipeline =
11283         !encoder_context->preenc_enabled ? gen9_avc_vme_pipeline : gen9_avc_preenc_pipeline;
11284     encoder_context->vme_context_destroy = gen9_avc_vme_context_destroy;
11285
11286     return true;
11287
11288 allocate_structure_failed:
11289
11290     free(vme_context);
11291     free(generic_ctx);
11292     free(avc_ctx);
11293     free(generic_state);
11294     free(avc_state);
11295     return false;
11296 }
11297
11298 Bool
11299 gen9_avc_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
11300 {
11301     /* VME & PAK share the same context */
11302     struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
11303
11304     if (!pak_context)
11305         return false;
11306
11307     encoder_context->mfc_context = pak_context;
11308     encoder_context->mfc_context_destroy = gen9_avc_pak_context_destroy;
11309     encoder_context->mfc_pipeline = gen9_avc_pak_pipeline;
11310     encoder_context->mfc_brc_prepare = gen9_avc_pak_brc_prepare;
11311     encoder_context->get_status = gen9_avc_get_coded_status;
11312     return true;
11313 }