OSDN Git Service

vp9enc: limit min_qp of brc to be non-zero
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_vp9_encoder.c
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWAR
23  *
24  * Authors:
25  *    Zhao, Yakui <yakui.zhao@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <stdbool.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35 #include <va/va.h>
36
37 #include "intel_batchbuffer.h"
38 #include "intel_driver.h"
39
40 #include "i965_defines.h"
41 #include "i965_drv_video.h"
42 #include "i965_encoder.h"
43 #include "gen9_vp9_encapi.h"
44 #include "gen9_vp9_encoder.h"
45 #include "gen9_vp9_encoder_kernels.h"
46 #include "vp9_probs.h"
47 #include "gen9_vp9_const_def.h"
48
49 #define MAX_VP9_ENCODER_SURFACES        64
50
51 #define MAX_URB_SIZE                    4096 /* In register */
52 #define NUM_KERNELS_PER_GPE_CONTEXT     1
53
54 #define VP9_BRC_KBPS                    1000
55
56 #define BRC_KERNEL_CBR                  0x0010
57 #define BRC_KERNEL_VBR                  0x0020
58 #define BRC_KERNEL_AVBR                 0x0040
59 #define BRC_KERNEL_CQL                  0x0080
60
61 #define VP9_PIC_STATE_BUFFER_SIZE 192
62
63 typedef struct _intel_kernel_header_ {
64     uint32_t       reserved                        : 6;
65     uint32_t       kernel_start_pointer            : 26;
66 } intel_kernel_header;
67
68 typedef struct _intel_vp9_kernel_header {
69     int nKernelCount;
70     intel_kernel_header PLY_DSCALE;
71     intel_kernel_header VP9_ME_P;
72     intel_kernel_header VP9_Enc_I_32x32;
73     intel_kernel_header VP9_Enc_I_16x16;
74     intel_kernel_header VP9_Enc_P;
75     intel_kernel_header VP9_Enc_TX;
76     intel_kernel_header VP9_DYS;
77
78     intel_kernel_header VP9BRC_Intra_Distortion;
79     intel_kernel_header VP9BRC_Init;
80     intel_kernel_header VP9BRC_Reset;
81     intel_kernel_header VP9BRC_Update;
82 } intel_vp9_kernel_header;
83
84 #define DYS_1X_FLAG    0x01
85 #define DYS_4X_FLAG    0x02
86 #define DYS_16X_FLAG   0x04
87
88 struct vp9_surface_param {
89     uint32_t frame_width;
90     uint32_t frame_height;
91 };
92
93 static uint32_t intel_convert_sign_mag(int val, int sign_bit_pos)
94 {
95     uint32_t ret_val = 0;
96     if (val < 0) {
97         val = -val;
98         ret_val = ((1 << (sign_bit_pos - 1)) | (val & ((1 << (sign_bit_pos - 1)) - 1)));
99     } else {
100         ret_val = val & ((1 << (sign_bit_pos - 1)) - 1);
101     }
102     return ret_val;
103 }
104
105 static bool
106 intel_vp9_get_kernel_header_and_size(
107     void                             *pvbinary,
108     int                              binary_size,
109     INTEL_VP9_ENC_OPERATION          operation,
110     int                              krnstate_idx,
111     struct i965_kernel               *ret_kernel)
112 {
113     typedef uint32_t BIN_PTR[4];
114
115     char *bin_start;
116     intel_vp9_kernel_header      *pkh_table;
117     intel_kernel_header          *pcurr_header, *pinvalid_entry, *pnext_header;
118     int next_krnoffset;
119
120     if (!pvbinary || !ret_kernel)
121         return false;
122
123     bin_start = (char *)pvbinary;
124     pkh_table = (intel_vp9_kernel_header *)pvbinary;
125     pinvalid_entry = &(pkh_table->VP9BRC_Update) + 1;
126     next_krnoffset = binary_size;
127
128     if ((operation == INTEL_VP9_ENC_SCALING4X) || (operation == INTEL_VP9_ENC_SCALING2X)) {
129         pcurr_header = &pkh_table->PLY_DSCALE;
130     } else if (operation == INTEL_VP9_ENC_ME) {
131         pcurr_header = &pkh_table->VP9_ME_P;
132     } else if (operation == INTEL_VP9_ENC_MBENC) {
133         pcurr_header = &pkh_table->VP9_Enc_I_32x32;
134     } else if (operation == INTEL_VP9_ENC_DYS) {
135         pcurr_header = &pkh_table->VP9_DYS;
136     } else if (operation == INTEL_VP9_ENC_BRC) {
137         pcurr_header = &pkh_table->VP9BRC_Intra_Distortion;
138     } else {
139         return false;
140     }
141
142     pcurr_header += krnstate_idx;
143     ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
144
145     pnext_header = (pcurr_header + 1);
146     if (pnext_header < pinvalid_entry) {
147         next_krnoffset = pnext_header->kernel_start_pointer << 6;
148     }
149     ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
150
151     return true;
152 }
153
154
155 static void
156 gen9_free_surfaces_vp9(void **data)
157 {
158     struct gen9_surface_vp9 *vp9_surface;
159
160     if (!data || !*data)
161         return;
162
163     vp9_surface = *data;
164
165     if (vp9_surface->scaled_4x_surface_obj) {
166         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->scaled_4x_surface_id, 1);
167         vp9_surface->scaled_4x_surface_id = VA_INVALID_SURFACE;
168         vp9_surface->scaled_4x_surface_obj = NULL;
169     }
170
171     if (vp9_surface->scaled_16x_surface_obj) {
172         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->scaled_16x_surface_id, 1);
173         vp9_surface->scaled_16x_surface_id = VA_INVALID_SURFACE;
174         vp9_surface->scaled_16x_surface_obj = NULL;
175     }
176
177     if (vp9_surface->dys_4x_surface_obj) {
178         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_4x_surface_id, 1);
179         vp9_surface->dys_4x_surface_id = VA_INVALID_SURFACE;
180         vp9_surface->dys_4x_surface_obj = NULL;
181     }
182
183     if (vp9_surface->dys_16x_surface_obj) {
184         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_16x_surface_id, 1);
185         vp9_surface->dys_16x_surface_id = VA_INVALID_SURFACE;
186         vp9_surface->dys_16x_surface_obj = NULL;
187     }
188
189     if (vp9_surface->dys_surface_obj) {
190         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_surface_id, 1);
191         vp9_surface->dys_surface_id = VA_INVALID_SURFACE;
192         vp9_surface->dys_surface_obj = NULL;
193     }
194
195     free(vp9_surface);
196
197     *data = NULL;
198
199     return;
200 }
201
202 static VAStatus
203 gen9_vp9_init_check_surfaces(VADriverContextP ctx,
204                              struct object_surface *obj_surface,
205                              struct vp9_surface_param *surface_param)
206 {
207     struct i965_driver_data *i965 = i965_driver_data(ctx);
208     struct gen9_surface_vp9 *vp9_surface;
209     int downscaled_width_4x, downscaled_height_4x;
210     int downscaled_width_16x, downscaled_height_16x;
211
212     if (!obj_surface || !obj_surface->bo)
213         return VA_STATUS_ERROR_INVALID_SURFACE;
214
215     if (obj_surface->private_data &&
216         obj_surface->free_private_data != gen9_free_surfaces_vp9) {
217         obj_surface->free_private_data(&obj_surface->private_data);
218         obj_surface->private_data = NULL;
219     }
220
221     if (obj_surface->private_data) {
222         /* if the frame width/height is already the same as the expected,
223          * it is unncessary to reallocate it.
224          */
225         vp9_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
226         if (vp9_surface->frame_width >= surface_param->frame_width ||
227             vp9_surface->frame_height >= surface_param->frame_height)
228             return VA_STATUS_SUCCESS;
229
230         obj_surface->free_private_data(&obj_surface->private_data);
231         obj_surface->private_data = NULL;
232         vp9_surface = NULL;
233     }
234
235     vp9_surface = calloc(1, sizeof(struct gen9_surface_vp9));
236
237     if (!vp9_surface)
238         return VA_STATUS_ERROR_ALLOCATION_FAILED;
239
240     vp9_surface->ctx = ctx;
241     obj_surface->private_data = vp9_surface;
242     obj_surface->free_private_data = gen9_free_surfaces_vp9;
243
244     vp9_surface->frame_width = surface_param->frame_width;
245     vp9_surface->frame_height = surface_param->frame_height;
246
247     downscaled_width_4x = ALIGN(surface_param->frame_width / 4, 16);
248     downscaled_height_4x = ALIGN(surface_param->frame_height / 4, 16);
249
250     i965_CreateSurfaces(ctx,
251                         downscaled_width_4x,
252                         downscaled_height_4x,
253                         VA_RT_FORMAT_YUV420,
254                         1,
255                         &vp9_surface->scaled_4x_surface_id);
256
257     vp9_surface->scaled_4x_surface_obj = SURFACE(vp9_surface->scaled_4x_surface_id);
258
259     if (!vp9_surface->scaled_4x_surface_obj) {
260         return VA_STATUS_ERROR_ALLOCATION_FAILED;
261     }
262
263     i965_check_alloc_surface_bo(ctx, vp9_surface->scaled_4x_surface_obj, 1,
264                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
265
266     downscaled_width_16x = ALIGN(surface_param->frame_width / 16, 16);
267     downscaled_height_16x = ALIGN(surface_param->frame_height / 16, 16);
268     i965_CreateSurfaces(ctx,
269                         downscaled_width_16x,
270                         downscaled_height_16x,
271                         VA_RT_FORMAT_YUV420,
272                         1,
273                         &vp9_surface->scaled_16x_surface_id);
274     vp9_surface->scaled_16x_surface_obj = SURFACE(vp9_surface->scaled_16x_surface_id);
275
276     if (!vp9_surface->scaled_16x_surface_obj) {
277         return VA_STATUS_ERROR_ALLOCATION_FAILED;
278     }
279
280     i965_check_alloc_surface_bo(ctx, vp9_surface->scaled_16x_surface_obj, 1,
281                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
282
283     return VA_STATUS_SUCCESS;
284 }
285
286 static VAStatus
287 gen9_vp9_check_dys_surfaces(VADriverContextP ctx,
288                             struct object_surface *obj_surface,
289                             struct vp9_surface_param *surface_param)
290 {
291     struct i965_driver_data *i965 = i965_driver_data(ctx);
292     struct gen9_surface_vp9 *vp9_surface;
293     int dys_width_4x, dys_height_4x;
294     int dys_width_16x, dys_height_16x;
295
296     /* As this is handled after the surface checking, it is unnecessary
297      * to check the surface bo and vp9_priv_surface again
298      */
299
300     vp9_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
301
302     if (!vp9_surface)
303         return VA_STATUS_ERROR_INVALID_SURFACE;
304
305     /* if the frame_width/height of dys_surface is the same as
306      * the expected, it is unnecessary to allocate it again
307      */
308     if (vp9_surface->dys_frame_width == surface_param->frame_width &&
309         vp9_surface->dys_frame_width == surface_param->frame_width)
310         return VA_STATUS_SUCCESS;
311
312     if (vp9_surface->dys_4x_surface_obj) {
313         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_4x_surface_id, 1);
314         vp9_surface->dys_4x_surface_id = VA_INVALID_SURFACE;
315         vp9_surface->dys_4x_surface_obj = NULL;
316     }
317
318     if (vp9_surface->dys_16x_surface_obj) {
319         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_16x_surface_id, 1);
320         vp9_surface->dys_16x_surface_id = VA_INVALID_SURFACE;
321         vp9_surface->dys_16x_surface_obj = NULL;
322     }
323
324     if (vp9_surface->dys_surface_obj) {
325         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_surface_id, 1);
326         vp9_surface->dys_surface_id = VA_INVALID_SURFACE;
327         vp9_surface->dys_surface_obj = NULL;
328     }
329
330     vp9_surface->dys_frame_width = surface_param->frame_width;
331     vp9_surface->dys_frame_height = surface_param->frame_height;
332
333     i965_CreateSurfaces(ctx,
334                         surface_param->frame_width,
335                         surface_param->frame_height,
336                         VA_RT_FORMAT_YUV420,
337                         1,
338                         &vp9_surface->dys_surface_id);
339     vp9_surface->dys_surface_obj = SURFACE(vp9_surface->dys_surface_id);
340
341     if (!vp9_surface->dys_surface_obj) {
342         return VA_STATUS_ERROR_ALLOCATION_FAILED;
343     }
344
345     i965_check_alloc_surface_bo(ctx, vp9_surface->dys_surface_obj, 1,
346                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
347
348     dys_width_4x = ALIGN(surface_param->frame_width / 4, 16);
349     dys_height_4x = ALIGN(surface_param->frame_width / 4, 16);
350
351     i965_CreateSurfaces(ctx,
352                         dys_width_4x,
353                         dys_height_4x,
354                         VA_RT_FORMAT_YUV420,
355                         1,
356                         &vp9_surface->dys_4x_surface_id);
357
358     vp9_surface->dys_4x_surface_obj = SURFACE(vp9_surface->dys_4x_surface_id);
359
360     if (!vp9_surface->dys_4x_surface_obj) {
361         return VA_STATUS_ERROR_ALLOCATION_FAILED;
362     }
363
364     i965_check_alloc_surface_bo(ctx, vp9_surface->dys_4x_surface_obj, 1,
365                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
366
367     dys_width_16x = ALIGN(surface_param->frame_width / 16, 16);
368     dys_height_16x = ALIGN(surface_param->frame_width / 16, 16);
369     i965_CreateSurfaces(ctx,
370                         dys_width_16x,
371                         dys_height_16x,
372                         VA_RT_FORMAT_YUV420,
373                         1,
374                         &vp9_surface->dys_16x_surface_id);
375     vp9_surface->dys_16x_surface_obj = SURFACE(vp9_surface->dys_16x_surface_id);
376
377     if (!vp9_surface->dys_16x_surface_obj) {
378         return VA_STATUS_ERROR_ALLOCATION_FAILED;
379     }
380
381     i965_check_alloc_surface_bo(ctx, vp9_surface->dys_16x_surface_obj, 1,
382                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
383
384     return VA_STATUS_SUCCESS;
385 }
386
387 static VAStatus
388 gen9_vp9_allocate_resources(VADriverContextP ctx,
389                             struct encode_state *encode_state,
390                             struct intel_encoder_context *encoder_context,
391                             int allocate)
392 {
393     struct i965_driver_data *i965 = i965_driver_data(ctx);
394     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
395     struct gen9_vp9_state *vp9_state;
396     int allocate_flag, i;
397     int res_size;
398     uint32_t        frame_width_in_sb, frame_height_in_sb, frame_sb_num;
399     unsigned int width, height;
400
401     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
402
403     if (!vp9_state || !vp9_state->pic_param)
404         return VA_STATUS_ERROR_INVALID_PARAMETER;
405
406     /* the buffer related with BRC is not changed. So it is allocated
407      * based on the input parameter
408      */
409     if (allocate) {
410         i965_free_gpe_resource(&vme_context->res_brc_history_buffer);
411         i965_free_gpe_resource(&vme_context->res_brc_const_data_buffer);
412         i965_free_gpe_resource(&vme_context->res_brc_mbenc_curbe_write_buffer);
413         i965_free_gpe_resource(&vme_context->res_pic_state_brc_read_buffer);
414         i965_free_gpe_resource(&vme_context->res_pic_state_brc_write_hfw_read_buffer);
415         i965_free_gpe_resource(&vme_context->res_pic_state_hfw_write_buffer);
416         i965_free_gpe_resource(&vme_context->res_seg_state_brc_read_buffer);
417         i965_free_gpe_resource(&vme_context->res_seg_state_brc_write_buffer);
418         i965_free_gpe_resource(&vme_context->res_brc_bitstream_size_buffer);
419         i965_free_gpe_resource(&vme_context->res_brc_hfw_data_buffer);
420         i965_free_gpe_resource(&vme_context->res_brc_mmdk_pak_buffer);
421
422         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
423                                                    &vme_context->res_brc_history_buffer,
424                                                    VP9_BRC_HISTORY_BUFFER_SIZE,
425                                                    "Brc History buffer");
426         if (!allocate_flag)
427             goto failed_allocation;
428         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
429                                                    &vme_context->res_brc_const_data_buffer,
430                                                    VP9_BRC_CONSTANTSURFACE_SIZE,
431                                                    "Brc Constant buffer");
432         if (!allocate_flag)
433             goto failed_allocation;
434
435         res_size = ALIGN(sizeof(vp9_mbenc_curbe_data), 64) + 128 +
436                    ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) * NUM_VP9_MBENC;
437         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
438                                                    &vme_context->res_brc_mbenc_curbe_write_buffer,
439                                                    res_size,
440                                                    "Brc Curbe write");
441         if (!allocate_flag)
442             goto failed_allocation;
443
444         res_size = VP9_PIC_STATE_BUFFER_SIZE * 4;
445         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
446                                                    &vme_context->res_pic_state_brc_read_buffer,
447                                                    res_size,
448                                                    "Pic State Brc_read");
449         if (!allocate_flag)
450             goto failed_allocation;
451
452         res_size = VP9_PIC_STATE_BUFFER_SIZE * 4;
453         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
454                                                    &vme_context->res_pic_state_brc_write_hfw_read_buffer,
455                                                    res_size,
456                                                    "Pic State Brc_write Hfw_Read");
457         if (!allocate_flag)
458             goto failed_allocation;
459
460         res_size = VP9_PIC_STATE_BUFFER_SIZE * 4;
461         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
462                                                    &vme_context->res_pic_state_hfw_write_buffer,
463                                                    res_size,
464                                                    "Pic State Hfw Write");
465         if (!allocate_flag)
466             goto failed_allocation;
467
468         res_size = VP9_SEGMENT_STATE_BUFFER_SIZE;
469         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
470                                                    &vme_context->res_seg_state_brc_read_buffer,
471                                                    res_size,
472                                                    "Segment state brc_read");
473         if (!allocate_flag)
474             goto failed_allocation;
475
476         res_size = VP9_SEGMENT_STATE_BUFFER_SIZE;
477         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
478                                                    &vme_context->res_seg_state_brc_write_buffer,
479                                                    res_size,
480                                                    "Segment state brc_write");
481         if (!allocate_flag)
482             goto failed_allocation;
483
484         res_size = VP9_BRC_BITSTREAM_SIZE_BUFFER_SIZE;
485         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
486                                                    &vme_context->res_brc_bitstream_size_buffer,
487                                                    res_size,
488                                                    "Brc bitstream buffer");
489         if (!allocate_flag)
490             goto failed_allocation;
491
492         res_size = VP9_HFW_BRC_DATA_BUFFER_SIZE;
493         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
494                                                    &vme_context->res_brc_hfw_data_buffer,
495                                                    res_size,
496                                                    "mfw Brc data");
497         if (!allocate_flag)
498             goto failed_allocation;
499
500         res_size = VP9_BRC_MMDK_PAK_BUFFER_SIZE;
501         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
502                                                    &vme_context->res_brc_mmdk_pak_buffer,
503                                                    res_size,
504                                                    "Brc mmdk_pak");
505         if (!allocate_flag)
506             goto failed_allocation;
507     }
508
509     /* If the width/height of allocated buffer is greater than the expected,
510      * it is unnecessary to allocate it again
511      */
512     if (vp9_state->res_width >= vp9_state->frame_width &&
513         vp9_state->res_height >= vp9_state->frame_height) {
514
515         return VA_STATUS_SUCCESS;
516     }
517     frame_width_in_sb = ALIGN(vp9_state->frame_width, 64) / 64;
518     frame_height_in_sb = ALIGN(vp9_state->frame_height, 64) / 64;
519     frame_sb_num  = frame_width_in_sb * frame_height_in_sb;
520
521     i965_free_gpe_resource(&vme_context->res_hvd_line_buffer);
522     res_size = frame_width_in_sb * 64;
523     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
524                                                &vme_context->res_hvd_line_buffer,
525                                                res_size,
526                                                "VP9 hvd line line");
527     if (!allocate_flag)
528         goto failed_allocation;
529
530     i965_free_gpe_resource(&vme_context->res_hvd_tile_line_buffer);
531     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
532                                                &vme_context->res_hvd_tile_line_buffer,
533                                                res_size,
534                                                "VP9 hvd tile_line line");
535     if (!allocate_flag)
536         goto failed_allocation;
537
538     i965_free_gpe_resource(&vme_context->res_deblocking_filter_line_buffer);
539     res_size = frame_width_in_sb * 18 * 64;
540     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
541                                                &vme_context->res_deblocking_filter_line_buffer,
542                                                res_size,
543                                                "VP9 deblocking filter line");
544     if (!allocate_flag)
545         goto failed_allocation;
546
547     i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_line_buffer);
548     res_size = frame_width_in_sb * 18 * 64;
549     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
550                                                &vme_context->res_deblocking_filter_tile_line_buffer,
551                                                res_size,
552                                                "VP9 deblocking tile line");
553     if (!allocate_flag)
554         goto failed_allocation;
555
556     i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_col_buffer);
557     res_size = frame_height_in_sb * 17 * 64;
558     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
559                                                &vme_context->res_deblocking_filter_tile_col_buffer,
560                                                res_size,
561                                                "VP9 deblocking tile col");
562     if (!allocate_flag)
563         goto failed_allocation;
564
565     i965_free_gpe_resource(&vme_context->res_metadata_line_buffer);
566     res_size = frame_width_in_sb * 5 * 64;
567     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
568                                                &vme_context->res_metadata_line_buffer,
569                                                res_size,
570                                                "VP9 metadata line");
571     if (!allocate_flag)
572         goto failed_allocation;
573
574     i965_free_gpe_resource(&vme_context->res_metadata_tile_line_buffer);
575     res_size = frame_width_in_sb * 5 * 64;
576     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
577                                                &vme_context->res_metadata_tile_line_buffer,
578                                                res_size,
579                                                "VP9 metadata tile line");
580     if (!allocate_flag)
581         goto failed_allocation;
582
583     i965_free_gpe_resource(&vme_context->res_metadata_tile_col_buffer);
584     res_size = frame_height_in_sb * 5 * 64;
585     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
586                                                &vme_context->res_metadata_tile_col_buffer,
587                                                res_size,
588                                                "VP9 metadata tile col");
589     if (!allocate_flag)
590         goto failed_allocation;
591
592     i965_free_gpe_resource(&vme_context->res_prob_buffer);
593     res_size = 2048;
594     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
595                                                &vme_context->res_prob_buffer,
596                                                res_size,
597                                                "VP9 prob");
598     if (!allocate_flag)
599         goto failed_allocation;
600
601     i965_free_gpe_resource(&vme_context->res_segmentid_buffer);
602     res_size = frame_sb_num * 64;
603     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
604                                                &vme_context->res_segmentid_buffer,
605                                                res_size,
606                                                "VP9 segment id");
607     if (!allocate_flag)
608         goto failed_allocation;
609
610     i965_zero_gpe_resource(&vme_context->res_segmentid_buffer);
611
612     i965_free_gpe_resource(&vme_context->res_prob_delta_buffer);
613     res_size = 29 * 64;
614     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
615                                                &vme_context->res_prob_delta_buffer,
616                                                res_size,
617                                                "VP9 prob delta");
618     if (!allocate_flag)
619         goto failed_allocation;
620
621     i965_zero_gpe_resource(&vme_context->res_segmentid_buffer);
622
623     i965_free_gpe_resource(&vme_context->res_prob_delta_buffer);
624     res_size = 29 * 64;
625     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
626                                                &vme_context->res_prob_delta_buffer,
627                                                res_size,
628                                                "VP9 prob delta");
629     if (!allocate_flag)
630         goto failed_allocation;
631
632     i965_free_gpe_resource(&vme_context->res_compressed_input_buffer);
633     res_size = 32 * 64;
634     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
635                                                &vme_context->res_compressed_input_buffer,
636                                                res_size,
637                                                "VP9 compressed_input buffer");
638     if (!allocate_flag)
639         goto failed_allocation;
640
641     i965_free_gpe_resource(&vme_context->res_prob_counter_buffer);
642     res_size = 193 * 64;
643     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
644                                                &vme_context->res_prob_counter_buffer,
645                                                res_size,
646                                                "VP9 prob counter");
647     if (!allocate_flag)
648         goto failed_allocation;
649
650     i965_free_gpe_resource(&vme_context->res_tile_record_streamout_buffer);
651     res_size = frame_sb_num * 64;
652     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
653                                                &vme_context->res_tile_record_streamout_buffer,
654                                                res_size,
655                                                "VP9 tile record stream_out");
656     if (!allocate_flag)
657         goto failed_allocation;
658
659     i965_free_gpe_resource(&vme_context->res_cu_stat_streamout_buffer);
660     res_size = frame_sb_num * 64;
661     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
662                                                &vme_context->res_cu_stat_streamout_buffer,
663                                                res_size,
664                                                "VP9 CU stat stream_out");
665     if (!allocate_flag)
666         goto failed_allocation;
667
668     width = vp9_state->downscaled_width_4x_in_mb * 32;
669     height = vp9_state->downscaled_height_4x_in_mb * 16;
670     i965_free_gpe_resource(&vme_context->s4x_memv_data_buffer);
671     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
672                                                   &vme_context->s4x_memv_data_buffer,
673                                                   width, height,
674                                                   ALIGN(width, 64),
675                                                   "VP9 4x MEMV data");
676     if (!allocate_flag)
677         goto failed_allocation;
678
679     width = vp9_state->downscaled_width_4x_in_mb * 8;
680     height = vp9_state->downscaled_height_4x_in_mb * 16;
681     i965_free_gpe_resource(&vme_context->s4x_memv_distortion_buffer);
682     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
683                                                   &vme_context->s4x_memv_distortion_buffer,
684                                                   width, height,
685                                                   ALIGN(width, 64),
686                                                   "VP9 4x MEMV distorion");
687     if (!allocate_flag)
688         goto failed_allocation;
689
690     width = ALIGN(vp9_state->downscaled_width_16x_in_mb * 32, 64);
691     height = vp9_state->downscaled_height_16x_in_mb * 16;
692     i965_free_gpe_resource(&vme_context->s16x_memv_data_buffer);
693     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
694                                                   &vme_context->s16x_memv_data_buffer,
695                                                   width, height,
696                                                   width,
697                                                   "VP9 16x MEMV data");
698     if (!allocate_flag)
699         goto failed_allocation;
700
701     width = vp9_state->frame_width_in_mb * 16;
702     height = vp9_state->frame_height_in_mb * 8;
703     i965_free_gpe_resource(&vme_context->res_output_16x16_inter_modes);
704     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
705                                                   &vme_context->res_output_16x16_inter_modes,
706                                                   width, height,
707                                                   ALIGN(width, 64),
708                                                   "VP9 output inter_mode");
709     if (!allocate_flag)
710         goto failed_allocation;
711
712     res_size = vp9_state->frame_width_in_mb * vp9_state->frame_height_in_mb *
713                16 * 4;
714     for (i = 0; i < 2; i++) {
715         i965_free_gpe_resource(&vme_context->res_mode_decision[i]);
716         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
717                                                    &vme_context->res_mode_decision[i],
718                                                    res_size,
719                                                    "VP9 mode decision");
720         if (!allocate_flag)
721             goto failed_allocation;
722
723     }
724
725     res_size = frame_sb_num * 9 * 64;
726     for (i = 0; i < 2; i++) {
727         i965_free_gpe_resource(&vme_context->res_mv_temporal_buffer[i]);
728         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
729                                                    &vme_context->res_mv_temporal_buffer[i],
730                                                    res_size,
731                                                    "VP9 temporal mv");
732         if (!allocate_flag)
733             goto failed_allocation;
734     }
735
736     vp9_state->mb_data_offset = ALIGN(frame_sb_num * 16, 4096) + 4096;
737     res_size = vp9_state->mb_data_offset + frame_sb_num * 64 * 64 + 1000;
738     i965_free_gpe_resource(&vme_context->res_mb_code_surface);
739     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
740                                                &vme_context->res_mb_code_surface,
741                                                ALIGN(res_size, 4096),
742                                                "VP9 mb_code surface");
743     if (!allocate_flag)
744         goto failed_allocation;
745
746     res_size = 128;
747     i965_free_gpe_resource(&vme_context->res_pak_uncompressed_input_buffer);
748     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
749                                                &vme_context->res_pak_uncompressed_input_buffer,
750                                                ALIGN(res_size, 4096),
751                                                "VP9 pak_uncompressed_input");
752     if (!allocate_flag)
753         goto failed_allocation;
754
755     if (!vme_context->frame_header_data) {
756         /* allocate 512 bytes for generating the uncompressed header */
757         vme_context->frame_header_data = calloc(1, 512);
758     }
759
760     vp9_state->res_width = vp9_state->frame_width;
761     vp9_state->res_height = vp9_state->frame_height;
762
763     return VA_STATUS_SUCCESS;
764
765 failed_allocation:
766     return VA_STATUS_ERROR_ALLOCATION_FAILED;
767 }
768
769 static void
770 gen9_vp9_free_resources(struct gen9_encoder_context_vp9 *vme_context)
771 {
772     int i;
773     struct gen9_vp9_state *vp9_state = (struct gen9_vp9_state *) vme_context->enc_priv_state;
774
775     if (vp9_state->brc_enabled) {
776         i965_free_gpe_resource(&vme_context->res_brc_history_buffer);
777         i965_free_gpe_resource(&vme_context->res_brc_const_data_buffer);
778         i965_free_gpe_resource(&vme_context->res_brc_mbenc_curbe_write_buffer);
779         i965_free_gpe_resource(&vme_context->res_pic_state_brc_read_buffer);
780         i965_free_gpe_resource(&vme_context->res_pic_state_brc_write_hfw_read_buffer);
781         i965_free_gpe_resource(&vme_context->res_pic_state_hfw_write_buffer);
782         i965_free_gpe_resource(&vme_context->res_seg_state_brc_read_buffer);
783         i965_free_gpe_resource(&vme_context->res_seg_state_brc_write_buffer);
784         i965_free_gpe_resource(&vme_context->res_brc_bitstream_size_buffer);
785         i965_free_gpe_resource(&vme_context->res_brc_hfw_data_buffer);
786         i965_free_gpe_resource(&vme_context->res_brc_mmdk_pak_buffer);
787     }
788
789     i965_free_gpe_resource(&vme_context->res_hvd_line_buffer);
790     i965_free_gpe_resource(&vme_context->res_hvd_tile_line_buffer);
791     i965_free_gpe_resource(&vme_context->res_deblocking_filter_line_buffer);
792     i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_line_buffer);
793     i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_col_buffer);
794     i965_free_gpe_resource(&vme_context->res_metadata_line_buffer);
795     i965_free_gpe_resource(&vme_context->res_metadata_tile_line_buffer);
796     i965_free_gpe_resource(&vme_context->res_metadata_tile_col_buffer);
797     i965_free_gpe_resource(&vme_context->res_prob_buffer);
798     i965_free_gpe_resource(&vme_context->res_segmentid_buffer);
799     i965_free_gpe_resource(&vme_context->res_prob_delta_buffer);
800     i965_free_gpe_resource(&vme_context->res_prob_counter_buffer);
801     i965_free_gpe_resource(&vme_context->res_tile_record_streamout_buffer);
802     i965_free_gpe_resource(&vme_context->res_cu_stat_streamout_buffer);
803     i965_free_gpe_resource(&vme_context->s4x_memv_data_buffer);
804     i965_free_gpe_resource(&vme_context->s4x_memv_distortion_buffer);
805     i965_free_gpe_resource(&vme_context->s16x_memv_data_buffer);
806     i965_free_gpe_resource(&vme_context->res_output_16x16_inter_modes);
807     for (i = 0; i < 2; i++) {
808         i965_free_gpe_resource(&vme_context->res_mode_decision[i]);
809     }
810
811     for (i = 0; i < 2; i++) {
812         i965_free_gpe_resource(&vme_context->res_mv_temporal_buffer[i]);
813     }
814
815     i965_free_gpe_resource(&vme_context->res_compressed_input_buffer);
816     i965_free_gpe_resource(&vme_context->res_mb_code_surface);
817     i965_free_gpe_resource(&vme_context->res_pak_uncompressed_input_buffer);
818
819     if (vme_context->frame_header_data) {
820         free(vme_context->frame_header_data);
821         vme_context->frame_header_data = NULL;
822     }
823     return;
824 }
825
826 static void
827 gen9_init_media_object_walker_parameter(struct intel_encoder_context *encoder_context,
828                                         struct gpe_encoder_kernel_walker_parameter *kernel_walker_param,
829                                         struct gpe_media_object_walker_parameter *walker_param)
830 {
831     memset(walker_param, 0, sizeof(*walker_param));
832
833     walker_param->use_scoreboard = kernel_walker_param->use_scoreboard;
834
835     walker_param->block_resolution.x = kernel_walker_param->resolution_x;
836     walker_param->block_resolution.y = kernel_walker_param->resolution_y;
837
838     walker_param->global_resolution.x = kernel_walker_param->resolution_x;
839     walker_param->global_resolution.y = kernel_walker_param->resolution_y;
840
841     walker_param->global_outer_loop_stride.x = kernel_walker_param->resolution_x;
842     walker_param->global_outer_loop_stride.y = 0;
843
844     walker_param->global_inner_loop_unit.x = 0;
845     walker_param->global_inner_loop_unit.y = kernel_walker_param->resolution_y;
846
847     walker_param->local_loop_exec_count = 0xFFFF;  //MAX VALUE
848     walker_param->global_loop_exec_count = 0xFFFF;  //MAX VALUE
849
850     if (kernel_walker_param->no_dependency) {
851         walker_param->scoreboard_mask = 0;
852         walker_param->use_scoreboard = 0;
853         // Raster scan walking pattern
854         walker_param->local_outer_loop_stride.x = 0;
855         walker_param->local_outer_loop_stride.y = 1;
856         walker_param->local_inner_loop_unit.x = 1;
857         walker_param->local_inner_loop_unit.y = 0;
858         walker_param->local_end.x = kernel_walker_param->resolution_x - 1;
859         walker_param->local_end.y = 0;
860     } else {
861         walker_param->local_end.x = 0;
862         walker_param->local_end.y = 0;
863
864         if (kernel_walker_param->walker_degree == VP9_45Z_DEGREE) {
865             // 45z degree
866             walker_param->scoreboard_mask = 0x0F;
867
868             walker_param->global_loop_exec_count = 0x3FF;
869             walker_param->local_loop_exec_count = 0x3FF;
870
871             walker_param->global_resolution.x = (unsigned int)(kernel_walker_param->resolution_x / 2.f) + 1;
872             walker_param->global_resolution.y = 2 * kernel_walker_param->resolution_y;
873
874             walker_param->global_start.x = 0;
875             walker_param->global_start.y = 0;
876
877             walker_param->global_outer_loop_stride.x = walker_param->global_resolution.x;
878             walker_param->global_outer_loop_stride.y = 0;
879
880             walker_param->global_inner_loop_unit.x = 0;
881             walker_param->global_inner_loop_unit.y = walker_param->global_resolution.y;
882
883             walker_param->block_resolution.x = walker_param->global_resolution.x;
884             walker_param->block_resolution.y = walker_param->global_resolution.y;
885
886             walker_param->local_start.x = 0;
887             walker_param->local_start.y = 0;
888
889             walker_param->local_outer_loop_stride.x = 1;
890             walker_param->local_outer_loop_stride.y = 0;
891
892             walker_param->local_inner_loop_unit.x = -1;
893             walker_param->local_inner_loop_unit.y = 4;
894
895             walker_param->middle_loop_extra_steps = 3;
896             walker_param->mid_loop_unit_x = 0;
897             walker_param->mid_loop_unit_y = 1;
898         } else {
899             // 26 degree
900             walker_param->scoreboard_mask = 0x0F;
901             walker_param->local_outer_loop_stride.x = 1;
902             walker_param->local_outer_loop_stride.y = 0;
903             walker_param->local_inner_loop_unit.x = -2;
904             walker_param->local_inner_loop_unit.y = 1;
905         }
906     }
907 }
908
909 static void
910 gen9_run_kernel_media_object(VADriverContextP ctx,
911                              struct intel_encoder_context *encoder_context,
912                              struct i965_gpe_context *gpe_context,
913                              int media_function,
914                              struct gpe_media_object_parameter *param)
915 {
916     struct intel_batchbuffer *batch = encoder_context->base.batch;
917     struct vp9_encode_status_buffer_internal *status_buffer;
918     struct gen9_vp9_state *vp9_state;
919     struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
920
921     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
922     if (!vp9_state || !batch)
923         return;
924
925     intel_batchbuffer_start_atomic(batch, 0x1000);
926
927     status_buffer = &(vp9_state->status_buffer);
928     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
929     mi_store_data_imm.bo = status_buffer->bo;
930     mi_store_data_imm.offset = status_buffer->media_index_offset;
931     mi_store_data_imm.dw0 = media_function;
932     gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);
933
934     intel_batchbuffer_emit_mi_flush(batch);
935     gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
936     gen8_gpe_media_object(ctx, gpe_context, batch, param);
937     gen8_gpe_media_state_flush(ctx, gpe_context, batch);
938
939     gen9_gpe_pipeline_end(ctx, gpe_context, batch);
940
941     intel_batchbuffer_end_atomic(batch);
942
943     intel_batchbuffer_flush(batch);
944 }
945
946 static void
947 gen9_run_kernel_media_object_walker(VADriverContextP ctx,
948                                     struct intel_encoder_context *encoder_context,
949                                     struct i965_gpe_context *gpe_context,
950                                     int media_function,
951                                     struct gpe_media_object_walker_parameter *param)
952 {
953     struct intel_batchbuffer *batch = encoder_context->base.batch;
954     struct vp9_encode_status_buffer_internal *status_buffer;
955     struct gen9_vp9_state *vp9_state;
956     struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
957
958     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
959     if (!vp9_state || !batch)
960         return;
961
962     intel_batchbuffer_start_atomic(batch, 0x1000);
963
964     intel_batchbuffer_emit_mi_flush(batch);
965
966     status_buffer = &(vp9_state->status_buffer);
967     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
968     mi_store_data_imm.bo = status_buffer->bo;
969     mi_store_data_imm.offset = status_buffer->media_index_offset;
970     mi_store_data_imm.dw0 = media_function;
971     gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);
972
973     gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
974     gen8_gpe_media_object_walker(ctx, gpe_context, batch, param);
975     gen8_gpe_media_state_flush(ctx, gpe_context, batch);
976
977     gen9_gpe_pipeline_end(ctx, gpe_context, batch);
978
979     intel_batchbuffer_end_atomic(batch);
980
981     intel_batchbuffer_flush(batch);
982 }
983
984 static
985 void gen9_vp9_set_curbe_brc(VADriverContextP ctx,
986                             struct encode_state *encode_state,
987                             struct i965_gpe_context *gpe_context,
988                             struct intel_encoder_context *encoder_context,
989                             struct gen9_vp9_brc_curbe_param *param)
990 {
991     VAEncSequenceParameterBufferVP9 *seq_param;
992     VAEncPictureParameterBufferVP9  *pic_param;
993     VAEncMiscParameterTypeVP9PerSegmantParam *segment_param;
994     vp9_brc_curbe_data      *cmd;
995     double                  dbps_ratio, dInputBitsPerFrame;
996     struct gen9_vp9_state *vp9_state;
997
998     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
999
1000     pic_param      = param->ppic_param;
1001     seq_param      = param->pseq_param;
1002     segment_param  = param->psegment_param;
1003
1004     cmd = i965_gpe_context_map_curbe(gpe_context);
1005
1006     if (!cmd)
1007         return;
1008
1009     memset(cmd, 0, sizeof(vp9_brc_curbe_data));
1010
1011     if (!vp9_state->dys_enabled) {
1012         cmd->dw0.frame_width  = pic_param->frame_width_src;
1013         cmd->dw0.frame_height = pic_param->frame_height_src;
1014     } else {
1015         cmd->dw0.frame_width  = pic_param->frame_width_dst;
1016         cmd->dw0.frame_height = pic_param->frame_height_dst;
1017     }
1018
1019     cmd->dw1.frame_type           = vp9_state->picture_coding_type;
1020     cmd->dw1.segmentation_enable  = 0;
1021     cmd->dw1.ref_frame_flags      = vp9_state->ref_frame_flag;
1022     cmd->dw1.num_tlevels          = 1;
1023
1024     switch (param->media_state_type) {
1025     case VP9_MEDIA_STATE_BRC_INIT_RESET: {
1026         cmd->dw3.max_level_ratiot0 = 0;
1027         cmd->dw3.max_level_ratiot1 = 0;
1028         cmd->dw3.max_level_ratiot2 = 0;
1029         cmd->dw3.max_level_ratiot3 = 0;
1030
1031         cmd->dw4.profile_level_max_frame    = seq_param->max_frame_width *
1032                                               seq_param->max_frame_height;
1033         cmd->dw5.init_buf_fullness         = vp9_state->init_vbv_buffer_fullness_in_bit;
1034         cmd->dw6.buf_size                  = vp9_state->vbv_buffer_size_in_bit;
1035         cmd->dw7.target_bit_rate           = (vp9_state->target_bit_rate  + VP9_BRC_KBPS - 1) / VP9_BRC_KBPS *
1036                                              VP9_BRC_KBPS;
1037         cmd->dw8.max_bit_rate           = (vp9_state->max_bit_rate  + VP9_BRC_KBPS - 1) / VP9_BRC_KBPS *
1038                                           VP9_BRC_KBPS;
1039         cmd->dw9.min_bit_rate           = (vp9_state->min_bit_rate  + VP9_BRC_KBPS - 1) / VP9_BRC_KBPS *
1040                                           VP9_BRC_KBPS;
1041         cmd->dw10.frame_ratem           = vp9_state->framerate.num;
1042         cmd->dw11.frame_rated           = vp9_state->framerate.den;
1043
1044         cmd->dw14.avbr_accuracy         = 30;
1045         cmd->dw14.avbr_convergence      = 150;
1046
1047         if (encoder_context->rate_control_mode == VA_RC_CBR) {
1048             cmd->dw12.brc_flag    = BRC_KERNEL_CBR;
1049             cmd->dw8.max_bit_rate  = cmd->dw7.target_bit_rate;
1050             cmd->dw9.min_bit_rate  = 0;
1051         } else if (encoder_context->rate_control_mode == VA_RC_VBR) {
1052             cmd->dw12.brc_flag    = BRC_KERNEL_VBR;
1053         } else {
1054             cmd->dw12.brc_flag = BRC_KERNEL_CQL;
1055             cmd->dw16.cq_level = 30;
1056         }
1057         cmd->dw12.gopp = seq_param->intra_period - 1;
1058
1059         cmd->dw13.init_frame_width   = pic_param->frame_width_src;
1060         cmd->dw13.init_frame_height   = pic_param->frame_height_src;
1061
1062         cmd->dw15.min_qp          = 1;
1063         cmd->dw15.max_qp          = 255;
1064
1065         cmd->dw16.cq_level            = 30;
1066
1067         cmd->dw17.enable_dynamic_scaling = vp9_state->dys_in_use;
1068         cmd->dw17.brc_overshoot_cbr_pct = 150;
1069
1070         dInputBitsPerFrame = (double)cmd->dw8.max_bit_rate * (double)vp9_state->framerate.den / (double)vp9_state->framerate.num;
1071         dbps_ratio         = dInputBitsPerFrame / ((double)vp9_state->vbv_buffer_size_in_bit / 30.0);
1072         if (dbps_ratio < 0.1)
1073             dbps_ratio = 0.1;
1074         if (dbps_ratio > 3.5)
1075             dbps_ratio = 3.5;
1076
1077         *param->pbrc_init_reset_buf_size_in_bits  = cmd->dw6.buf_size;
1078         *param->pbrc_init_reset_input_bits_per_frame  = dInputBitsPerFrame;
1079         *param->pbrc_init_current_target_buf_full_in_bits = cmd->dw6.buf_size >> 1;
1080
1081         cmd->dw18.pframe_deviation_threshold0 = (uint32_t)(-50 * pow(0.90, dbps_ratio));
1082         cmd->dw18.pframe_deviation_threshold1  = (uint32_t)(-50 * pow(0.66, dbps_ratio));
1083         cmd->dw18.pframe_deviation_threshold2  = (uint32_t)(-50 * pow(0.46, dbps_ratio));
1084         cmd->dw18.pframe_deviation_threshold3  = (uint32_t)(-50 * pow(0.3, dbps_ratio));
1085         cmd->dw19.pframe_deviation_threshold4  = (uint32_t)(50 * pow(0.3, dbps_ratio));
1086         cmd->dw19.pframe_deviation_threshold5  = (uint32_t)(50 * pow(0.46, dbps_ratio));
1087         cmd->dw19.pframe_deviation_threshold6  = (uint32_t)(50 * pow(0.7, dbps_ratio));
1088         cmd->dw19.pframe_deviation_threshold7  = (uint32_t)(50 * pow(0.9, dbps_ratio));
1089
1090         cmd->dw20.vbr_deviation_threshold0     = (uint32_t)(-50 * pow(0.9, dbps_ratio));
1091         cmd->dw20.vbr_deviation_threshold1     = (uint32_t)(-50 * pow(0.7, dbps_ratio));
1092         cmd->dw20.vbr_deviation_threshold2     = (uint32_t)(-50 * pow(0.5, dbps_ratio));
1093         cmd->dw20.vbr_deviation_threshold3     = (uint32_t)(-50 * pow(0.3, dbps_ratio));
1094         cmd->dw21.vbr_deviation_threshold4     = (uint32_t)(100 * pow(0.4, dbps_ratio));
1095         cmd->dw21.vbr_deviation_threshold5     = (uint32_t)(100 * pow(0.5, dbps_ratio));
1096         cmd->dw21.vbr_deviation_threshold6     = (uint32_t)(100 * pow(0.75, dbps_ratio));
1097         cmd->dw21.vbr_deviation_threshold7     = (uint32_t)(100 * pow(0.9, dbps_ratio));
1098
1099         cmd->dw22.kframe_deviation_threshold0  = (uint32_t)(-50 * pow(0.8, dbps_ratio));
1100         cmd->dw22.kframe_deviation_threshold1  = (uint32_t)(-50 * pow(0.6, dbps_ratio));
1101         cmd->dw22.kframe_deviation_threshold2  = (uint32_t)(-50 * pow(0.34, dbps_ratio));
1102         cmd->dw22.kframe_deviation_threshold3  = (uint32_t)(-50 * pow(0.2, dbps_ratio));
1103         cmd->dw23.kframe_deviation_threshold4  = (uint32_t)(50 * pow(0.2, dbps_ratio));
1104         cmd->dw23.kframe_deviation_threshold5  = (uint32_t)(50 * pow(0.4, dbps_ratio));
1105         cmd->dw23.kframe_deviation_threshold6  = (uint32_t)(50 * pow(0.66, dbps_ratio));
1106         cmd->dw23.kframe_deviation_threshold7  = (uint32_t)(50 * pow(0.9, dbps_ratio));
1107
1108         break;
1109     }
1110     case VP9_MEDIA_STATE_BRC_UPDATE: {
1111         cmd->dw15.min_qp          = 1;
1112         cmd->dw15.max_qp          = 255;
1113
1114         cmd->dw25.frame_number    = param->frame_number;
1115
1116         // Used in dynamic scaling. set to zero for now
1117         cmd->dw27.hrd_buffer_fullness_upper_limit = 0;
1118         cmd->dw28.hrd_buffer_fullness_lower_limit = 0;
1119
1120         if (pic_param->pic_flags.bits.segmentation_enabled) {
1121             cmd->dw32.seg_delta_qp0              = segment_param->seg_data[0].segment_qindex_delta;
1122             cmd->dw32.seg_delta_qp1              = segment_param->seg_data[1].segment_qindex_delta;
1123             cmd->dw32.seg_delta_qp2              = segment_param->seg_data[2].segment_qindex_delta;
1124             cmd->dw32.seg_delta_qp3              = segment_param->seg_data[3].segment_qindex_delta;
1125
1126             cmd->dw33.seg_delta_qp4              = segment_param->seg_data[4].segment_qindex_delta;
1127             cmd->dw33.seg_delta_qp5              = segment_param->seg_data[5].segment_qindex_delta;
1128             cmd->dw33.seg_delta_qp6              = segment_param->seg_data[6].segment_qindex_delta;
1129             cmd->dw33.seg_delta_qp7              = segment_param->seg_data[7].segment_qindex_delta;
1130         }
1131
1132         //cmd->dw34.temporal_id                = pPicParams->temporal_idi;
1133         cmd->dw34.temporal_id                = 0;
1134         cmd->dw34.multi_ref_qp_check         = param->multi_ref_qp_check;
1135
1136         cmd->dw35.max_num_pak_passes         = param->brc_num_pak_passes;
1137         cmd->dw35.sync_async                 = 0;
1138         cmd->dw35.mbrc                       = param->mbbrc_enabled;
1139         if (*param->pbrc_init_current_target_buf_full_in_bits >
1140             ((double)(*param->pbrc_init_reset_buf_size_in_bits))) {
1141             *param->pbrc_init_current_target_buf_full_in_bits -=
1142                 (double)(*param->pbrc_init_reset_buf_size_in_bits);
1143             cmd->dw35.overflow = 1;
1144         } else
1145             cmd->dw35.overflow = 0;
1146
1147         cmd->dw24.target_size                 = (uint32_t)(*param->pbrc_init_current_target_buf_full_in_bits);
1148
1149         cmd->dw36.segmentation               = pic_param->pic_flags.bits.segmentation_enabled;
1150
1151         *param->pbrc_init_current_target_buf_full_in_bits += *param->pbrc_init_reset_input_bits_per_frame;
1152
1153         cmd->dw38.qdelta_ydc  = pic_param->luma_dc_qindex_delta;
1154         cmd->dw38.qdelta_uvdc = pic_param->chroma_dc_qindex_delta;
1155         cmd->dw38.qdelta_uvac = pic_param->chroma_ac_qindex_delta;
1156
1157         break;
1158     }
1159     case VP9_MEDIA_STATE_ENC_I_FRAME_DIST:
1160         cmd->dw2.intra_mode_disable        = 0;
1161         break;
1162     default:
1163         break;
1164     }
1165
1166     cmd->dw48.brc_y4x_input_bti                = VP9_BTI_BRC_SRCY4X_G9;
1167     cmd->dw49.brc_vme_coarse_intra_input_bti   = VP9_BTI_BRC_VME_COARSE_INTRA_G9;
1168     cmd->dw50.brc_history_buffer_bti           = VP9_BTI_BRC_HISTORY_G9;
1169     cmd->dw51.brc_const_data_input_bti         = VP9_BTI_BRC_CONSTANT_DATA_G9;
1170     cmd->dw52.brc_distortion_bti               = VP9_BTI_BRC_DISTORTION_G9;
1171     cmd->dw53.brc_mmdk_pak_output_bti          = VP9_BTI_BRC_MMDK_PAK_OUTPUT_G9;
1172     cmd->dw54.brc_enccurbe_input_bti           = VP9_BTI_BRC_MBENC_CURBE_INPUT_G9;
1173     cmd->dw55.brc_enccurbe_output_bti          = VP9_BTI_BRC_MBENC_CURBE_OUTPUT_G9;
1174     cmd->dw56.brc_pic_state_input_bti          = VP9_BTI_BRC_PIC_STATE_INPUT_G9;
1175     cmd->dw57.brc_pic_state_output_bti         = VP9_BTI_BRC_PIC_STATE_OUTPUT_G9;
1176     cmd->dw58.brc_seg_state_input_bti          = VP9_BTI_BRC_SEGMENT_STATE_INPUT_G9;
1177     cmd->dw59.brc_seg_state_output_bti         = VP9_BTI_BRC_SEGMENT_STATE_OUTPUT_G9;
1178     cmd->dw60.brc_bitstream_size_data_bti      = VP9_BTI_BRC_BITSTREAM_SIZE_G9;
1179     cmd->dw61.brc_hfw_data_output_bti          = VP9_BTI_BRC_HFW_DATA_G9;
1180
1181     i965_gpe_context_unmap_curbe(gpe_context);
1182     return;
1183 }
1184
1185 static void
1186 gen9_brc_init_reset_add_surfaces_vp9(VADriverContextP ctx,
1187                                      struct encode_state *encode_state,
1188                                      struct intel_encoder_context *encoder_context,
1189                                      struct i965_gpe_context *gpe_context)
1190 {
1191     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1192
1193     i965_add_buffer_gpe_surface(ctx,
1194                                 gpe_context,
1195                                 &vme_context->res_brc_history_buffer,
1196                                 0,
1197                                 vme_context->res_brc_history_buffer.size,
1198                                 0,
1199                                 VP9_BTI_BRC_HISTORY_G9);
1200
1201     i965_add_buffer_2d_gpe_surface(ctx,
1202                                    gpe_context,
1203                                    &vme_context->s4x_memv_distortion_buffer,
1204                                    1,
1205                                    I965_SURFACEFORMAT_R8_UNORM,
1206                                    VP9_BTI_BRC_DISTORTION_G9);
1207 }
1208
1209 /* The function related with BRC */
1210 static VAStatus
1211 gen9_vp9_brc_init_reset_kernel(VADriverContextP ctx,
1212                                struct encode_state *encode_state,
1213                                struct intel_encoder_context *encoder_context)
1214 {
1215     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1216     struct vp9_brc_context *brc_context = &vme_context->brc_context;
1217     struct gpe_media_object_parameter media_object_param;
1218     struct i965_gpe_context *gpe_context;
1219     int gpe_index = VP9_BRC_INIT;
1220     int media_function = VP9_MEDIA_STATE_BRC_INIT_RESET;
1221     struct gen9_vp9_brc_curbe_param                brc_initreset_curbe;
1222     VAEncPictureParameterBufferVP9 *pic_param;
1223     struct gen9_vp9_state *vp9_state;
1224
1225     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1226
1227     if (!vp9_state || !vp9_state->pic_param)
1228         return VA_STATUS_ERROR_INVALID_PARAMETER;
1229
1230     pic_param = vp9_state->pic_param;
1231
1232     if (vp9_state->brc_inited)
1233         gpe_index = VP9_BRC_RESET;
1234
1235     gpe_context = &brc_context->gpe_contexts[gpe_index];
1236
1237     gen8_gpe_context_init(ctx, gpe_context);
1238     gen9_gpe_reset_binding_table(ctx, gpe_context);
1239
1240     brc_initreset_curbe.media_state_type    = media_function;
1241     brc_initreset_curbe.curr_frame          = pic_param->reconstructed_frame;
1242     brc_initreset_curbe.ppic_param          = vp9_state->pic_param;
1243     brc_initreset_curbe.pseq_param          = vp9_state->seq_param;
1244     brc_initreset_curbe.psegment_param      = vp9_state->segment_param;
1245     brc_initreset_curbe.frame_width         = vp9_state->frame_width;
1246     brc_initreset_curbe.frame_height        = vp9_state->frame_height;
1247     brc_initreset_curbe.pbrc_init_current_target_buf_full_in_bits =
1248         &vp9_state->brc_init_current_target_buf_full_in_bits;
1249     brc_initreset_curbe.pbrc_init_reset_buf_size_in_bits =
1250         &vp9_state->brc_init_reset_buf_size_in_bits;
1251     brc_initreset_curbe.pbrc_init_reset_input_bits_per_frame =
1252         &vp9_state->brc_init_reset_input_bits_per_frame;
1253     brc_initreset_curbe.picture_coding_type  = vp9_state->picture_coding_type;
1254     brc_initreset_curbe.initbrc            = !vp9_state->brc_inited;
1255     brc_initreset_curbe.mbbrc_enabled      = 0;
1256     brc_initreset_curbe.ref_frame_flag      = vp9_state->ref_frame_flag;
1257
1258     vme_context->pfn_set_curbe_brc(ctx, encode_state,
1259                                    gpe_context,
1260                                    encoder_context,
1261                                    &brc_initreset_curbe);
1262
1263     gen9_brc_init_reset_add_surfaces_vp9(ctx, encode_state, encoder_context, gpe_context);
1264     gen8_gpe_setup_interface_data(ctx, gpe_context);
1265
1266     memset(&media_object_param, 0, sizeof(media_object_param));
1267     gen9_run_kernel_media_object(ctx, encoder_context, gpe_context, media_function, &media_object_param);
1268
1269     return VA_STATUS_SUCCESS;
1270 }
1271
1272 static void
1273 gen9_brc_intra_dist_add_surfaces_vp9(VADriverContextP ctx,
1274                                      struct encode_state *encode_state,
1275                                      struct intel_encoder_context *encoder_context,
1276                                      struct i965_gpe_context *gpe_context)
1277 {
1278     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1279
1280     struct object_surface *obj_surface;
1281     struct gen9_surface_vp9 *vp9_priv_surface;
1282
1283     /* sScaled4xSurface surface */
1284     obj_surface = encode_state->reconstructed_object;
1285
1286     vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
1287
1288     obj_surface = vp9_priv_surface->scaled_4x_surface_obj;
1289     i965_add_2d_gpe_surface(ctx, gpe_context,
1290                             obj_surface,
1291                             0, 1,
1292                             I965_SURFACEFORMAT_R8_UNORM,
1293                             VP9_BTI_BRC_SRCY4X_G9
1294                            );
1295
1296     i965_add_adv_gpe_surface(ctx, gpe_context,
1297                              obj_surface,
1298                              VP9_BTI_BRC_VME_COARSE_INTRA_G9);
1299
1300     i965_add_buffer_2d_gpe_surface(ctx,
1301                                    gpe_context,
1302                                    &vme_context->s4x_memv_distortion_buffer,
1303                                    1,
1304                                    I965_SURFACEFORMAT_R8_UNORM,
1305                                    VP9_BTI_BRC_DISTORTION_G9);
1306
1307     return;
1308 }
1309
1310 /* The function related with BRC */
1311 static VAStatus
1312 gen9_vp9_brc_intra_dist_kernel(VADriverContextP ctx,
1313                                struct encode_state *encode_state,
1314                                struct intel_encoder_context *encoder_context)
1315 {
1316     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1317     struct vp9_brc_context *brc_context = &vme_context->brc_context;
1318     struct i965_gpe_context *gpe_context;
1319     int gpe_index = VP9_BRC_INTRA_DIST;
1320     int media_function = VP9_MEDIA_STATE_ENC_I_FRAME_DIST;
1321     struct gen9_vp9_brc_curbe_param                brc_intra_dist_curbe;
1322     VAEncPictureParameterBufferVP9 *pic_param;
1323     struct gen9_vp9_state *vp9_state;
1324     struct gpe_media_object_walker_parameter media_object_walker_param;
1325     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
1326
1327     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1328
1329     if (!vp9_state || !vp9_state->pic_param)
1330         return VA_STATUS_ERROR_INVALID_PARAMETER;
1331
1332     pic_param = vp9_state->pic_param;
1333
1334     gpe_context = &brc_context->gpe_contexts[gpe_index];
1335
1336     gen8_gpe_context_init(ctx, gpe_context);
1337     gen9_gpe_reset_binding_table(ctx, gpe_context);
1338
1339     brc_intra_dist_curbe.media_state_type    = media_function;
1340     brc_intra_dist_curbe.curr_frame          = pic_param->reconstructed_frame;
1341     brc_intra_dist_curbe.ppic_param          = vp9_state->pic_param;
1342     brc_intra_dist_curbe.pseq_param          = vp9_state->seq_param;
1343     brc_intra_dist_curbe.psegment_param      = vp9_state->segment_param;
1344     brc_intra_dist_curbe.frame_width         = vp9_state->frame_width;
1345     brc_intra_dist_curbe.frame_height        = vp9_state->frame_height;
1346     brc_intra_dist_curbe.pbrc_init_current_target_buf_full_in_bits =
1347         &vp9_state->brc_init_current_target_buf_full_in_bits;
1348     brc_intra_dist_curbe.pbrc_init_reset_buf_size_in_bits =
1349         &vp9_state->brc_init_reset_buf_size_in_bits;
1350     brc_intra_dist_curbe.pbrc_init_reset_input_bits_per_frame =
1351         &vp9_state->brc_init_reset_input_bits_per_frame;
1352     brc_intra_dist_curbe.picture_coding_type  = vp9_state->picture_coding_type;
1353     brc_intra_dist_curbe.initbrc            = !vp9_state->brc_inited;
1354     brc_intra_dist_curbe.mbbrc_enabled      = 0;
1355     brc_intra_dist_curbe.ref_frame_flag      = vp9_state->ref_frame_flag;
1356
1357     vme_context->pfn_set_curbe_brc(ctx, encode_state,
1358                                    gpe_context,
1359                                    encoder_context,
1360                                    &brc_intra_dist_curbe);
1361
1362     /* zero distortion buffer */
1363     i965_zero_gpe_resource(&vme_context->s4x_memv_distortion_buffer);
1364
1365     gen9_brc_intra_dist_add_surfaces_vp9(ctx, encode_state, encoder_context, gpe_context);
1366     gen8_gpe_setup_interface_data(ctx, gpe_context);
1367
1368     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
1369     kernel_walker_param.resolution_x = vp9_state->downscaled_width_4x_in_mb;
1370     kernel_walker_param.resolution_y = vp9_state->downscaled_height_4x_in_mb;
1371     kernel_walker_param.no_dependency = 1;
1372
1373     gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
1374
1375     gen9_run_kernel_media_object_walker(ctx, encoder_context,
1376                                         gpe_context,
1377                                         media_function,
1378                                         &media_object_walker_param);
1379
1380     return VA_STATUS_SUCCESS;
1381 }
1382
1383 static void
1384 intel_vp9enc_construct_picstate_batchbuf(VADriverContextP ctx,
1385                                          struct encode_state *encode_state,
1386                                          struct intel_encoder_context *encoder_context,
1387                                          struct i965_gpe_resource *gpe_resource)
1388 {
1389     struct gen9_vp9_state *vp9_state;
1390     VAEncPictureParameterBufferVP9 *pic_param;
1391     int frame_width_minus1, frame_height_minus1;
1392     int is_lossless = 0;
1393     int is_intra_only = 0;
1394     unsigned int last_frame_type;
1395     unsigned int ref_flags;
1396     unsigned int use_prev_frame_mvs, adapt_flag;
1397     struct gen9_surface_vp9 *vp9_surface = NULL;
1398     struct object_surface *obj_surface = NULL;
1399     uint32_t scale_h = 0;
1400     uint32_t scale_w = 0;
1401
1402     char *pdata;
1403     int i, j;
1404     unsigned int *cmd_ptr, cmd_value, tmp;
1405
1406     pdata = i965_map_gpe_resource(gpe_resource);
1407     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1408
1409     if (!vp9_state || !vp9_state->pic_param || !pdata)
1410         return;
1411
1412     pic_param = vp9_state->pic_param;
1413     frame_width_minus1 = ALIGN(pic_param->frame_width_dst, 8) - 1;
1414     frame_height_minus1 = ALIGN(pic_param->frame_height_dst, 8) - 1;
1415     if ((pic_param->luma_ac_qindex == 0) &&
1416         (pic_param->luma_dc_qindex_delta == 0) &&
1417         (pic_param->chroma_ac_qindex_delta == 0) &&
1418         (pic_param->chroma_dc_qindex_delta == 0))
1419         is_lossless = 1;
1420
1421     if (pic_param->pic_flags.bits.frame_type)
1422         is_intra_only = pic_param->pic_flags.bits.intra_only;
1423
1424     last_frame_type = vp9_state->vp9_last_frame.frame_type;
1425
1426     use_prev_frame_mvs = 0;
1427     if (pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) {
1428         last_frame_type = 0;
1429         ref_flags = 0;
1430     } else {
1431         ref_flags = ((pic_param->ref_flags.bits.ref_arf_sign_bias << 9) |
1432                      (pic_param->ref_flags.bits.ref_gf_sign_bias << 8) |
1433                      (pic_param->ref_flags.bits.ref_last_sign_bias << 7)
1434                     );
1435         if (!pic_param->pic_flags.bits.error_resilient_mode &&
1436             (pic_param->frame_width_dst == vp9_state->vp9_last_frame.frame_width) &&
1437             (pic_param->frame_height_dst == vp9_state->vp9_last_frame.frame_height) &&
1438             !pic_param->pic_flags.bits.intra_only &&
1439             vp9_state->vp9_last_frame.show_frame &&
1440             ((vp9_state->vp9_last_frame.frame_type == HCP_VP9_INTER_FRAME) &&
1441              !vp9_state->vp9_last_frame.intra_only)
1442            )
1443             use_prev_frame_mvs = 1;
1444     }
1445     adapt_flag = 0;
1446     if (!pic_param->pic_flags.bits.error_resilient_mode &&
1447         !pic_param->pic_flags.bits.frame_parallel_decoding_mode)
1448         adapt_flag = 1;
1449
1450     for (i = 0; i < 4; i++) {
1451         uint32_t non_first_pass;
1452         non_first_pass = 1;
1453         if (i == 0)
1454             non_first_pass = 0;
1455
1456         cmd_ptr = (unsigned int *)(pdata + i * VP9_PIC_STATE_BUFFER_SIZE);
1457
1458         *cmd_ptr++ = (HCP_VP9_PIC_STATE | (33 - 2));
1459         *cmd_ptr++ = (frame_height_minus1 << 16 |
1460                       frame_width_minus1);
1461         /* dw2 */
1462         *cmd_ptr++ = (0 << 31 |  /* disable segment_in */
1463                       0 << 30 | /* disable segment_out */
1464                       is_lossless << 29 | /* loseless */
1465                       (pic_param->pic_flags.bits.segmentation_enabled && pic_param->pic_flags.bits.segmentation_temporal_update) << 28 | /* temporal update */
1466                       (pic_param->pic_flags.bits.segmentation_enabled && pic_param->pic_flags.bits.segmentation_update_map) << 27 | /* temporal update */
1467                       (pic_param->pic_flags.bits.segmentation_enabled << 26) |
1468                       (pic_param->sharpness_level << 23) |
1469                       (pic_param->filter_level << 17) |
1470                       (pic_param->pic_flags.bits.frame_parallel_decoding_mode << 16) |
1471                       (pic_param->pic_flags.bits.error_resilient_mode << 15) |
1472                       (pic_param->pic_flags.bits.refresh_frame_context << 14) |
1473                       (last_frame_type << 13) |
1474                       (vp9_state->tx_mode == TX_MODE_SELECT) << 12 |
1475                       (pic_param->pic_flags.bits.comp_prediction_mode == REFERENCE_MODE_SELECT) << 11 |
1476                       (use_prev_frame_mvs) << 10 |
1477                       ref_flags |
1478                       (pic_param->pic_flags.bits.mcomp_filter_type << 4) |
1479                       (pic_param->pic_flags.bits.allow_high_precision_mv << 3) |
1480                       (is_intra_only << 2) |
1481                       (adapt_flag << 1) |
1482                       (pic_param->pic_flags.bits.frame_type) << 0);
1483
1484         *cmd_ptr++ = ((0 << 28) | /* VP9Profile0 */
1485                       (0 << 24) | /* 8-bit depth */
1486                       (0 << 22) | /* only 420 format */
1487                       (0 << 0)  | /* sse statistics */
1488                       (pic_param->log2_tile_rows << 8) |
1489                       (pic_param->log2_tile_columns << 0));
1490
1491         /* dw4..6 */
1492         if (pic_param->pic_flags.bits.frame_type &&
1493             !pic_param->pic_flags.bits.intra_only) {
1494             for (j = 0; j < 3; j++) {
1495                 obj_surface = encode_state->reference_objects[j];
1496                 scale_w = 0;
1497                 scale_h = 0;
1498                 if (obj_surface && obj_surface->private_data) {
1499                     vp9_surface = obj_surface->private_data;
1500                     scale_w = (vp9_surface->frame_width  << 14) / pic_param->frame_width_dst;
1501                     scale_h = (vp9_surface->frame_height << 14) / pic_param->frame_height_dst;
1502                     *cmd_ptr++ = (scale_w << 16 |
1503                                   scale_h);
1504                 } else
1505                     *cmd_ptr++ = 0;
1506             }
1507         } else {
1508             *cmd_ptr++ = 0;
1509             *cmd_ptr++ = 0;
1510             *cmd_ptr++ = 0;
1511         }
1512         /* dw7..9 */
1513         for (j = 0; j < 3; j++) {
1514             obj_surface = encode_state->reference_objects[j];
1515             vp9_surface = NULL;
1516
1517             if (obj_surface && obj_surface->private_data) {
1518                 vp9_surface = obj_surface->private_data;
1519                 *cmd_ptr++ = (vp9_surface->frame_height - 1) << 16 |
1520                              (vp9_surface->frame_width - 1);
1521             } else
1522                 *cmd_ptr++ = 0;
1523         }
1524         /* dw10 */
1525         *cmd_ptr++ = 0;
1526         /* dw11 */
1527         *cmd_ptr++ = (1 << 1);
1528         *cmd_ptr++ = 0;
1529
1530         /* dw13 */
1531         *cmd_ptr++ = ((1 << 25) | /* header insertation for VP9 */
1532                       (0 << 24) | /* tail insertation */
1533                       (pic_param->luma_ac_qindex << 16) |
1534                       0 /* compressed header bin count */);
1535
1536         /* dw14 */
1537         tmp = intel_convert_sign_mag(pic_param->luma_dc_qindex_delta, 5);
1538         cmd_value = (tmp << 16);
1539         tmp = intel_convert_sign_mag(pic_param->chroma_dc_qindex_delta, 5);
1540         cmd_value |= (tmp << 8);
1541         tmp = intel_convert_sign_mag(pic_param->chroma_ac_qindex_delta, 5);
1542         cmd_value |= tmp;
1543         *cmd_ptr++ = cmd_value;
1544
1545         tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[0], 7);
1546         cmd_value = tmp;
1547         tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[1], 7);
1548         cmd_value |= (tmp << 8);
1549         tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[2], 7);
1550         cmd_value |= (tmp << 16);
1551         tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[3], 7);
1552         cmd_value |= (tmp << 24);
1553         *cmd_ptr++ = cmd_value;
1554
1555         /* dw16 */
1556         tmp = intel_convert_sign_mag(pic_param->mode_lf_delta[0], 7);
1557         cmd_value = tmp;
1558         tmp = intel_convert_sign_mag(pic_param->mode_lf_delta[1], 7);
1559         cmd_value |= (tmp << 8);
1560         *cmd_ptr++ = cmd_value;
1561
1562         /* dw17 */
1563         *cmd_ptr++ = vp9_state->frame_header.bit_offset_ref_lf_delta |
1564                      (vp9_state->frame_header.bit_offset_mode_lf_delta << 16);
1565         *cmd_ptr++ = vp9_state->frame_header.bit_offset_qindex |
1566                      (vp9_state->frame_header.bit_offset_lf_level << 16);
1567
1568         /* dw19 */
1569         *cmd_ptr++ = (1 << 26 | (1 << 25) |
1570                       non_first_pass << 16);
1571         /* dw20 */
1572         *cmd_ptr++ = (1 << 31) | (256);
1573
1574         /* dw21 */
1575         *cmd_ptr++ = (0 << 31) | 1;
1576
1577         /* dw22-dw24. Frame_delta_qindex_range */
1578         *cmd_ptr++ = 0;
1579         *cmd_ptr++ = 0;
1580         *cmd_ptr++ = 0;
1581
1582         /* dw25-26. frame_delta_lf_range */
1583         *cmd_ptr++ = 0;
1584         *cmd_ptr++ = 0;
1585
1586         /* dw27. frame_delta_lf_min */
1587         *cmd_ptr++ = 0;
1588
1589         /* dw28..30 */
1590         *cmd_ptr++ = 0;
1591         *cmd_ptr++ = 0;
1592         *cmd_ptr++ = 0;
1593
1594         /* dw31 */
1595         *cmd_ptr++ = (0 << 30) | 1;
1596         /* dw32 */
1597         *cmd_ptr++ = vp9_state->frame_header.bit_offset_first_partition_size;
1598
1599         *cmd_ptr++ = 0;
1600         *cmd_ptr++ = MI_BATCH_BUFFER_END;
1601     }
1602
1603     i965_unmap_gpe_resource(gpe_resource);
1604 }
1605
1606 static void
1607 gen9_brc_update_add_surfaces_vp9(VADriverContextP ctx,
1608                                  struct encode_state *encode_state,
1609                                  struct intel_encoder_context *encoder_context,
1610                                  struct i965_gpe_context *brc_gpe_context,
1611                                  struct i965_gpe_context *mbenc_gpe_context)
1612 {
1613     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1614
1615     /* 0. BRC history buffer */
1616     i965_add_buffer_gpe_surface(ctx,
1617                                 brc_gpe_context,
1618                                 &vme_context->res_brc_history_buffer,
1619                                 0,
1620                                 vme_context->res_brc_history_buffer.size,
1621                                 0,
1622                                 VP9_BTI_BRC_HISTORY_G9);
1623
1624     /* 1. Constant data buffer */
1625     i965_add_buffer_gpe_surface(ctx,
1626                                 brc_gpe_context,
1627                                 &vme_context->res_brc_const_data_buffer,
1628                                 0,
1629                                 vme_context->res_brc_const_data_buffer.size,
1630                                 0,
1631                                 VP9_BTI_BRC_CONSTANT_DATA_G9);
1632
1633     /* 2. Distortion 2D surface buffer */
1634     i965_add_buffer_2d_gpe_surface(ctx,
1635                                    brc_gpe_context,
1636                                    &vme_context->s4x_memv_distortion_buffer,
1637                                    1,
1638                                    I965_SURFACEFORMAT_R8_UNORM,
1639                                    VP9_BTI_BRC_DISTORTION_G9);
1640
1641     /* 3. pak buffer */
1642     i965_add_buffer_gpe_surface(ctx,
1643                                 brc_gpe_context,
1644                                 &vme_context->res_brc_mmdk_pak_buffer,
1645                                 0,
1646                                 vme_context->res_brc_mmdk_pak_buffer.size,
1647                                 0,
1648                                 VP9_BTI_BRC_MMDK_PAK_OUTPUT_G9);
1649     /* 4. Mbenc curbe input buffer */
1650     gen9_add_dri_buffer_gpe_surface(ctx,
1651                                     brc_gpe_context,
1652                                     mbenc_gpe_context->curbe.bo,
1653                                     0,
1654                                     ALIGN(mbenc_gpe_context->curbe.length, 64),
1655                                     mbenc_gpe_context->curbe.offset,
1656                                     VP9_BTI_BRC_MBENC_CURBE_INPUT_G9);
1657     /* 5. Mbenc curbe output buffer */
1658     gen9_add_dri_buffer_gpe_surface(ctx,
1659                                     brc_gpe_context,
1660                                     mbenc_gpe_context->curbe.bo,
1661                                     0,
1662                                     ALIGN(mbenc_gpe_context->curbe.length, 64),
1663                                     mbenc_gpe_context->curbe.offset,
1664                                     VP9_BTI_BRC_MBENC_CURBE_OUTPUT_G9);
1665
1666     /* 6. BRC_PIC_STATE read buffer */
1667     i965_add_buffer_gpe_surface(ctx, brc_gpe_context,
1668                                 &vme_context->res_pic_state_brc_read_buffer,
1669                                 0,
1670                                 vme_context->res_pic_state_brc_read_buffer.size,
1671                                 0,
1672                                 VP9_BTI_BRC_PIC_STATE_INPUT_G9);
1673
1674     /* 7. BRC_PIC_STATE write buffer */
1675     i965_add_buffer_gpe_surface(ctx, brc_gpe_context,
1676                                 &vme_context->res_pic_state_brc_write_hfw_read_buffer,
1677                                 0,
1678                                 vme_context->res_pic_state_brc_write_hfw_read_buffer.size,
1679                                 0,
1680                                 VP9_BTI_BRC_PIC_STATE_OUTPUT_G9);
1681
1682     /* 8. SEGMENT_STATE read buffer */
1683     i965_add_buffer_gpe_surface(ctx, brc_gpe_context,
1684                                 &vme_context->res_seg_state_brc_read_buffer,
1685                                 0,
1686                                 vme_context->res_seg_state_brc_read_buffer.size,
1687                                 0,
1688                                 VP9_BTI_BRC_SEGMENT_STATE_INPUT_G9);
1689
1690     /* 9. SEGMENT_STATE write buffer */
1691     i965_add_buffer_gpe_surface(ctx, brc_gpe_context,
1692                                 &vme_context->res_seg_state_brc_write_buffer,
1693                                 0,
1694                                 vme_context->res_seg_state_brc_write_buffer.size,
1695                                 0,
1696                                 VP9_BTI_BRC_SEGMENT_STATE_OUTPUT_G9);
1697
1698     /* 10. Bitstream size buffer */
1699     i965_add_buffer_gpe_surface(ctx, brc_gpe_context,
1700                                 &vme_context->res_brc_bitstream_size_buffer,
1701                                 0,
1702                                 vme_context->res_brc_bitstream_size_buffer.size,
1703                                 0,
1704                                 VP9_BTI_BRC_BITSTREAM_SIZE_G9);
1705
1706     i965_add_buffer_gpe_surface(ctx, brc_gpe_context,
1707                                 &vme_context->res_brc_hfw_data_buffer,
1708                                 0,
1709                                 vme_context->res_brc_hfw_data_buffer.size,
1710                                 0,
1711                                 VP9_BTI_BRC_HFW_DATA_G9);
1712
1713     return;
1714 }
1715
1716 static VAStatus
1717 gen9_vp9_brc_update_kernel(VADriverContextP ctx,
1718                            struct encode_state *encode_state,
1719                            struct intel_encoder_context *encoder_context)
1720 {
1721     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1722     struct vp9_brc_context *brc_context = &vme_context->brc_context;
1723     struct i965_gpe_context *brc_gpe_context, *mbenc_gpe_context;
1724     int mbenc_index, gpe_index = VP9_BRC_UPDATE;
1725     int media_function = VP9_MEDIA_STATE_BRC_UPDATE;
1726     int mbenc_function;
1727     struct gen9_vp9_brc_curbe_param        brc_update_curbe_param;
1728     VAEncPictureParameterBufferVP9 *pic_param;
1729     struct gen9_vp9_state *vp9_state;
1730     struct gen9_vp9_mbenc_curbe_param    mbenc_curbe_param;
1731     struct gpe_media_object_parameter media_object_param;
1732
1733     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1734     if (!vp9_state || !vp9_state->pic_param)
1735         return VA_STATUS_ERROR_INVALID_PARAMETER;
1736
1737     pic_param = vp9_state->pic_param;
1738     // Setup VP9 MbEnc Curbe
1739     if (vp9_state->picture_coding_type) {
1740         mbenc_function = VP9_MEDIA_STATE_MBENC_P;
1741         mbenc_index = VP9_MBENC_IDX_INTER;
1742     } else {
1743         mbenc_function = VP9_MEDIA_STATE_MBENC_I_32x32;
1744         mbenc_index = VP9_MBENC_IDX_KEY_32x32;
1745     }
1746
1747     mbenc_gpe_context = &(vme_context->mbenc_context.gpe_contexts[mbenc_index]);
1748
1749     memset(&mbenc_curbe_param, 0, sizeof(mbenc_curbe_param));
1750
1751     mbenc_curbe_param.ppic_param             = vp9_state->pic_param;
1752     mbenc_curbe_param.pseq_param             = vp9_state->seq_param;
1753     mbenc_curbe_param.psegment_param         = vp9_state->segment_param;
1754     //mbenc_curbe_param.ppRefList              = &(vp9_state->pRefList[0]);
1755     mbenc_curbe_param.last_ref_obj           = vp9_state->last_ref_obj;
1756     mbenc_curbe_param.golden_ref_obj         = vp9_state->golden_ref_obj;
1757     mbenc_curbe_param.alt_ref_obj            = vp9_state->alt_ref_obj;
1758     mbenc_curbe_param.frame_width_in_mb      = ALIGN(vp9_state->frame_width, 16) / 16;
1759     mbenc_curbe_param.frame_height_in_mb     = ALIGN(vp9_state->frame_height, 16) / 16;
1760     mbenc_curbe_param.hme_enabled            = vp9_state->hme_enabled;
1761     mbenc_curbe_param.ref_frame_flag         = vp9_state->ref_frame_flag;
1762     mbenc_curbe_param.multi_ref_qp_check     = vp9_state->multi_ref_qp_check;
1763     mbenc_curbe_param.picture_coding_type    = vp9_state->picture_coding_type;
1764     mbenc_curbe_param.media_state_type       = mbenc_function;
1765
1766     vme_context->pfn_set_curbe_mbenc(ctx, encode_state,
1767                                      mbenc_gpe_context,
1768                                      encoder_context,
1769                                      &mbenc_curbe_param);
1770
1771     vp9_state->mbenc_curbe_set_in_brc_update = true;
1772
1773     brc_gpe_context = &brc_context->gpe_contexts[gpe_index];
1774
1775     gen8_gpe_context_init(ctx, brc_gpe_context);
1776     gen9_gpe_reset_binding_table(ctx, brc_gpe_context);
1777
1778     memset(&brc_update_curbe_param, 0, sizeof(brc_update_curbe_param));
1779
1780     // Setup BRC Update Curbe
1781     brc_update_curbe_param.media_state_type       = media_function;
1782     brc_update_curbe_param.curr_frame               = pic_param->reconstructed_frame;
1783     brc_update_curbe_param.ppic_param             = vp9_state->pic_param;
1784     brc_update_curbe_param.pseq_param             = vp9_state->seq_param;
1785     brc_update_curbe_param.psegment_param         = vp9_state->segment_param;
1786     brc_update_curbe_param.picture_coding_type    = vp9_state->picture_coding_type;
1787     brc_update_curbe_param.frame_width_in_mb      = ALIGN(vp9_state->frame_width, 16) / 16;
1788     brc_update_curbe_param.frame_height_in_mb     = ALIGN(vp9_state->frame_height, 16) / 16;
1789     brc_update_curbe_param.hme_enabled            = vp9_state->hme_enabled;
1790     brc_update_curbe_param.b_used_ref             = 1;
1791     brc_update_curbe_param.frame_number           = vp9_state->frame_number;
1792     brc_update_curbe_param.ref_frame_flag         = vp9_state->ref_frame_flag;
1793     brc_update_curbe_param.mbbrc_enabled          = 0;
1794     brc_update_curbe_param.multi_ref_qp_check     = vp9_state->multi_ref_qp_check;
1795     brc_update_curbe_param.brc_num_pak_passes     = vp9_state->num_pak_passes;
1796
1797     brc_update_curbe_param.pbrc_init_current_target_buf_full_in_bits =
1798         &vp9_state->brc_init_current_target_buf_full_in_bits;
1799     brc_update_curbe_param.pbrc_init_reset_buf_size_in_bits =
1800         &vp9_state->brc_init_reset_buf_size_in_bits;
1801     brc_update_curbe_param.pbrc_init_reset_input_bits_per_frame =
1802         &vp9_state->brc_init_reset_input_bits_per_frame;
1803
1804     vme_context->pfn_set_curbe_brc(ctx, encode_state,
1805                                    brc_gpe_context,
1806                                    encoder_context,
1807                                    &brc_update_curbe_param);
1808
1809
1810     // Check if the constant data surface is present
1811     if (vp9_state->brc_constant_buffer_supported) {
1812         char *brc_const_buffer;
1813         brc_const_buffer = i965_map_gpe_resource(&vme_context->res_brc_const_data_buffer);
1814
1815         if (!brc_const_buffer)
1816             return VA_STATUS_ERROR_OPERATION_FAILED;
1817
1818         if (vp9_state->picture_coding_type)
1819             memcpy(brc_const_buffer, vp9_brc_const_data_p_g9,
1820                    sizeof(vp9_brc_const_data_p_g9));
1821         else
1822             memcpy(brc_const_buffer, vp9_brc_const_data_i_g9,
1823                    sizeof(vp9_brc_const_data_i_g9));
1824
1825         i965_unmap_gpe_resource(&vme_context->res_brc_const_data_buffer);
1826     }
1827
1828     if (pic_param->pic_flags.bits.segmentation_enabled) {
1829         //reallocate the vme_state->mb_segment_map_surface
1830         /* this will be added later */
1831     }
1832
1833     {
1834         pic_param->filter_level = 0;
1835         // clear the filter level value in picParams ebfore programming pic state, as this value will be determined and updated by BRC.
1836         intel_vp9enc_construct_picstate_batchbuf(ctx, encode_state,
1837                                                  encoder_context, &vme_context->res_pic_state_brc_read_buffer);
1838     }
1839
1840     gen9_brc_update_add_surfaces_vp9(ctx, encode_state,
1841                                      encoder_context,
1842                                      brc_gpe_context,
1843                                      mbenc_gpe_context);
1844
1845     gen8_gpe_setup_interface_data(ctx, brc_gpe_context);
1846     memset(&media_object_param, 0, sizeof(media_object_param));
1847     gen9_run_kernel_media_object(ctx, encoder_context,
1848                                  brc_gpe_context,
1849                                  media_function,
1850                                  &media_object_param);
1851     return VA_STATUS_SUCCESS;
1852 }
1853
1854 static
1855 void gen9_vp9_set_curbe_me(VADriverContextP ctx,
1856                            struct encode_state *encode_state,
1857                            struct i965_gpe_context *gpe_context,
1858                            struct intel_encoder_context *encoder_context,
1859                            struct gen9_vp9_me_curbe_param *param)
1860 {
1861     vp9_me_curbe_data        *me_cmd;
1862     int enc_media_state;
1863     int                                       me_mode;
1864     unsigned int                                       width, height;
1865     uint32_t                                  l0_ref_frames;
1866     uint32_t                                  scale_factor;
1867
1868     if (param->b16xme_enabled) {
1869         if (param->use_16x_me)
1870             me_mode = VP9_ENC_ME16X_BEFORE_ME4X;
1871         else
1872             me_mode = VP9_ENC_ME4X_AFTER_ME16X;
1873     } else {
1874         me_mode = VP9_ENC_ME4X_ONLY;
1875     }
1876
1877     if (me_mode == VP9_ENC_ME16X_BEFORE_ME4X)
1878         scale_factor = 16;
1879     else
1880         scale_factor = 4;
1881
1882     if (param->use_16x_me)
1883         enc_media_state = VP9_MEDIA_STATE_16X_ME;
1884     else
1885         enc_media_state = VP9_MEDIA_STATE_4X_ME;
1886
1887     me_cmd = i965_gpe_context_map_curbe(gpe_context);
1888
1889     if (!me_cmd)
1890         return;
1891
1892     memset(me_cmd, 0, sizeof(vp9_me_curbe_data));
1893
1894     me_cmd->dw1.max_num_mvs           = 0x10;
1895     me_cmd->dw1.bi_weight             = 0x00;
1896
1897     me_cmd->dw2.max_num_su            = 0x39;
1898     me_cmd->dw2.max_len_sp            = 0x39;
1899
1900     me_cmd->dw3.sub_mb_part_mask       = 0x77;
1901     me_cmd->dw3.inter_sad             = 0x00;
1902     me_cmd->dw3.intra_sad            = 0x00;
1903     me_cmd->dw3.bme_disable_fbr      = 0x01;
1904     me_cmd->dw3.sub_pel_mode         = 0x03;
1905
1906     width = param->frame_width / scale_factor;
1907     height = param->frame_height / scale_factor;
1908
1909     me_cmd->dw4.picture_width        = ALIGN(width, 16) / 16;
1910     me_cmd->dw4.picture_height_minus1       = ALIGN(height, 16) / 16 - 1;
1911
1912     me_cmd->dw5.ref_width            = 0x30;
1913     me_cmd->dw5.ref_height           = 0x28;
1914
1915     if (enc_media_state == VP9_MEDIA_STATE_4X_ME)
1916         me_cmd->dw6.write_distortions = 0x01;
1917
1918     me_cmd->dw6.use_mv_from_prev_step   = me_mode == VP9_ENC_ME4X_AFTER_ME16X ? 1 : 0;
1919     me_cmd->dw6.super_combine_dist    = 0x5;
1920     me_cmd->dw6.max_vmvr              = 0x7fc;
1921
1922     l0_ref_frames = (param->ref_frame_flag & 0x01) +
1923                     !!(param->ref_frame_flag & 0x02) +
1924                     !!(param->ref_frame_flag & 0x04);
1925     me_cmd->dw13.num_ref_idx_l0_minus1 = (l0_ref_frames > 0) ? l0_ref_frames - 1 : 0;
1926     me_cmd->dw13.num_ref_idx_l1_minus1 =  0;
1927
1928     me_cmd->dw14.l0_ref_pic_polarity_bits = 0;
1929     me_cmd->dw14.l1_ref_pic_polarity_bits = 0;
1930
1931     me_cmd->dw15.mv_shift_factor        = 0x02;
1932
1933     {
1934         memcpy((void *)((char *)me_cmd + 64),
1935                vp9_diamond_ime_search_path_delta,
1936                sizeof(vp9_diamond_ime_search_path_delta));
1937     }
1938
1939
1940     me_cmd->dw32._4x_memv_output_data_surf_index     = VP9_BTI_ME_MV_DATA_SURFACE;
1941     me_cmd->dw33._16x_32x_memv_input_data_surf_index = VP9_BTI_16XME_MV_DATA_SURFACE;
1942     me_cmd->dw34._4x_me_output_dist_surf_index       = VP9_BTI_ME_DISTORTION_SURFACE;
1943     me_cmd->dw35._4x_me_output_brc_dist_surf_index   = VP9_BTI_ME_BRC_DISTORTION_SURFACE;
1944     me_cmd->dw36.vme_fwd_inter_pred_surf_index       = VP9_BTI_ME_CURR_PIC_L0;
1945     me_cmd->dw37.vme_bdw_inter_pred_surf_index       = VP9_BTI_ME_CURR_PIC_L1;
1946
1947     i965_gpe_context_unmap_curbe(gpe_context);
1948 }
1949
1950 static void
1951 gen9_vp9_send_me_surface(VADriverContextP ctx,
1952                          struct encode_state *encode_state,
1953                          struct i965_gpe_context *gpe_context,
1954                          struct intel_encoder_context *encoder_context,
1955                          struct gen9_vp9_me_surface_param *param)
1956 {
1957     struct i965_driver_data *i965 = i965_driver_data(ctx);
1958     struct object_surface *obj_surface;
1959     struct gen9_surface_vp9 *vp9_priv_surface;
1960     struct object_surface *input_surface;
1961     struct i965_gpe_resource *gpe_resource;
1962     int ref_bti;
1963
1964     obj_surface = SURFACE(param->curr_pic);
1965
1966     if (!obj_surface || !obj_surface->private_data)
1967         return;
1968
1969     vp9_priv_surface = obj_surface->private_data;
1970     if (param->use_16x_me) {
1971         gpe_resource = param->pres_16x_memv_data_buffer;
1972     } else {
1973         gpe_resource = param->pres_4x_memv_data_buffer;
1974     }
1975
1976     i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
1977                                    gpe_resource,
1978                                    1,
1979                                    I965_SURFACEFORMAT_R8_UNORM,
1980                                    VP9_BTI_ME_MV_DATA_SURFACE);
1981
1982     if (param->b16xme_enabled) {
1983         gpe_resource = param->pres_16x_memv_data_buffer;
1984         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
1985                                        gpe_resource,
1986                                        1,
1987                                        I965_SURFACEFORMAT_R8_UNORM,
1988                                        VP9_BTI_16XME_MV_DATA_SURFACE);
1989     }
1990
1991     if (!param->use_16x_me) {
1992         gpe_resource = param->pres_me_brc_distortion_buffer;
1993
1994         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
1995                                        gpe_resource,
1996                                        1,
1997                                        I965_SURFACEFORMAT_R8_UNORM,
1998                                        VP9_BTI_ME_BRC_DISTORTION_SURFACE);
1999
2000         gpe_resource = param->pres_me_distortion_buffer;
2001
2002         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
2003                                        gpe_resource,
2004                                        1,
2005                                        I965_SURFACEFORMAT_R8_UNORM,
2006                                        VP9_BTI_ME_DISTORTION_SURFACE);
2007     }
2008
2009     if (param->use_16x_me)
2010         input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2011     else
2012         input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2013
2014     i965_add_adv_gpe_surface(ctx, gpe_context,
2015                              input_surface,
2016                              VP9_BTI_ME_CURR_PIC_L0);
2017
2018     ref_bti = VP9_BTI_ME_CURR_PIC_L0 + 1;
2019
2020
2021     if (param->last_ref_pic) {
2022         obj_surface = param->last_ref_pic;
2023         vp9_priv_surface = obj_surface->private_data;
2024
2025         if (param->use_16x_me)
2026             input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2027         else
2028             input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2029
2030         if (param->dys_enabled &&
2031             ((vp9_priv_surface->frame_width != param->frame_width) ||
2032              (vp9_priv_surface->frame_height != param->frame_height))) {
2033             if (param->use_16x_me)
2034                 input_surface = vp9_priv_surface->dys_16x_surface_obj;
2035             else
2036                 input_surface = vp9_priv_surface->dys_4x_surface_obj;
2037         }
2038         i965_add_adv_gpe_surface(ctx, gpe_context,
2039                                  input_surface,
2040                                  ref_bti);
2041         i965_add_adv_gpe_surface(ctx, gpe_context,
2042                                  input_surface,
2043                                  ref_bti + 1);
2044         ref_bti += 2;
2045     }
2046
2047     if (param->golden_ref_pic) {
2048         obj_surface = param->golden_ref_pic;
2049         vp9_priv_surface = obj_surface->private_data;
2050
2051         if (param->use_16x_me)
2052             input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2053         else
2054             input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2055
2056         if (param->dys_enabled &&
2057             ((vp9_priv_surface->frame_width != param->frame_width) ||
2058              (vp9_priv_surface->frame_height != param->frame_height))) {
2059             if (param->use_16x_me)
2060                 input_surface = vp9_priv_surface->dys_16x_surface_obj;
2061             else
2062                 input_surface = vp9_priv_surface->dys_4x_surface_obj;
2063         }
2064
2065         i965_add_adv_gpe_surface(ctx, gpe_context,
2066                                  input_surface,
2067                                  ref_bti);
2068         i965_add_adv_gpe_surface(ctx, gpe_context,
2069                                  input_surface,
2070                                  ref_bti + 1);
2071         ref_bti += 2;
2072     }
2073
2074     if (param->alt_ref_pic) {
2075         obj_surface = param->alt_ref_pic;
2076         vp9_priv_surface = obj_surface->private_data;
2077
2078         if (param->use_16x_me)
2079             input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2080         else
2081             input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2082
2083         if (param->dys_enabled &&
2084             ((vp9_priv_surface->frame_width != param->frame_width) ||
2085              (vp9_priv_surface->frame_height != param->frame_height))) {
2086             if (param->use_16x_me)
2087                 input_surface = vp9_priv_surface->dys_16x_surface_obj;
2088             else
2089                 input_surface = vp9_priv_surface->dys_4x_surface_obj;
2090         }
2091         i965_add_adv_gpe_surface(ctx, gpe_context,
2092                                  input_surface,
2093                                  ref_bti);
2094         i965_add_adv_gpe_surface(ctx, gpe_context,
2095                                  input_surface,
2096                                  ref_bti + 1);
2097         ref_bti += 2;
2098     }
2099
2100     return;
2101 }
2102
2103 static
2104 void gen9_me_add_surfaces_vp9(VADriverContextP ctx,
2105                               struct encode_state *encode_state,
2106                               struct intel_encoder_context *encoder_context,
2107                               struct i965_gpe_context *gpe_context,
2108                               int use_16x_me)
2109 {
2110     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2111     struct gen9_vp9_me_surface_param  me_surface_param;
2112     struct gen9_vp9_state *vp9_state;
2113
2114     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
2115
2116     /* sScaled4xSurface surface */
2117     memset(&me_surface_param, 0, sizeof(me_surface_param));
2118     me_surface_param.last_ref_pic = vp9_state->last_ref_obj;
2119     me_surface_param.golden_ref_pic = vp9_state->golden_ref_obj;
2120     me_surface_param.alt_ref_pic = vp9_state->alt_ref_obj;
2121     me_surface_param.curr_pic = vp9_state->curr_frame;
2122     me_surface_param.pres_4x_memv_data_buffer  = &vme_context->s4x_memv_data_buffer;
2123     me_surface_param.pres_16x_memv_data_buffer = &vme_context->s16x_memv_data_buffer;
2124     me_surface_param.pres_me_distortion_buffer = &vme_context->s4x_memv_distortion_buffer;
2125     me_surface_param.pres_me_brc_distortion_buffer = &vme_context->s4x_memv_distortion_buffer;
2126
2127     if (use_16x_me) {
2128         me_surface_param.downscaled_width_in_mb = vp9_state->downscaled_width_16x_in_mb;
2129         me_surface_param.downscaled_height_in_mb = vp9_state->downscaled_height_16x_in_mb;
2130     } else {
2131         me_surface_param.downscaled_width_in_mb = vp9_state->downscaled_width_4x_in_mb;
2132         me_surface_param.downscaled_height_in_mb = vp9_state->downscaled_height_4x_in_mb;
2133     }
2134     me_surface_param.frame_width  = vp9_state->frame_width;
2135     me_surface_param.frame_height  = vp9_state->frame_height;
2136
2137     me_surface_param.use_16x_me = use_16x_me;
2138     me_surface_param.b16xme_enabled = vp9_state->b16xme_enabled;
2139     me_surface_param.dys_enabled = vp9_state->dys_in_use;
2140
2141     vme_context->pfn_send_me_surface(ctx, encode_state,
2142                                      gpe_context,
2143                                      encoder_context,
2144                                      &me_surface_param);
2145     return;
2146 }
2147
2148 static VAStatus
2149 gen9_vp9_me_kernel(VADriverContextP ctx,
2150                    struct encode_state *encode_state,
2151                    struct intel_encoder_context *encoder_context,
2152                    int use_16x_me)
2153 {
2154     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2155     struct i965_gpe_context *gpe_context;
2156     int media_function;
2157     struct gen9_vp9_me_curbe_param me_curbe_param;
2158     struct gen9_vp9_state *vp9_state;
2159     struct gpe_media_object_walker_parameter media_object_walker_param;
2160     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
2161
2162     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2163     if (!vp9_state || !vp9_state->pic_param)
2164         return VA_STATUS_ERROR_INVALID_PARAMETER;
2165
2166     if (use_16x_me)
2167         media_function = VP9_MEDIA_STATE_16X_ME;
2168     else
2169         media_function = VP9_MEDIA_STATE_4X_ME;
2170
2171     gpe_context = &(vme_context->me_context.gpe_context);
2172
2173     gen8_gpe_context_init(ctx, gpe_context);
2174     gen9_gpe_reset_binding_table(ctx, gpe_context);
2175
2176     memset(&me_curbe_param, 0, sizeof(me_curbe_param));
2177     me_curbe_param.ppic_param = vp9_state->pic_param;
2178     me_curbe_param.pseq_param = vp9_state->seq_param;
2179     me_curbe_param.frame_width = vp9_state->frame_width;
2180     me_curbe_param.frame_height = vp9_state->frame_height;
2181     me_curbe_param.ref_frame_flag = vp9_state->ref_frame_flag;
2182     me_curbe_param.use_16x_me = use_16x_me;
2183     me_curbe_param.b16xme_enabled = vp9_state->b16xme_enabled;
2184     vme_context->pfn_set_curbe_me(ctx, encode_state,
2185                                   gpe_context,
2186                                   encoder_context,
2187                                   &me_curbe_param);
2188
2189     gen9_me_add_surfaces_vp9(ctx, encode_state,
2190                              encoder_context,
2191                              gpe_context,
2192                              use_16x_me);
2193
2194     gen8_gpe_setup_interface_data(ctx, gpe_context);
2195
2196     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2197     if (use_16x_me) {
2198         kernel_walker_param.resolution_x = vp9_state->downscaled_width_16x_in_mb;
2199         kernel_walker_param.resolution_y = vp9_state->downscaled_height_16x_in_mb;
2200     } else {
2201         kernel_walker_param.resolution_x = vp9_state->downscaled_width_4x_in_mb;
2202         kernel_walker_param.resolution_y = vp9_state->downscaled_height_4x_in_mb;
2203     }
2204     kernel_walker_param.no_dependency = 1;
2205
2206     gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
2207
2208     gen9_run_kernel_media_object_walker(ctx, encoder_context,
2209                                         gpe_context,
2210                                         media_function,
2211                                         &media_object_walker_param);
2212
2213     return VA_STATUS_SUCCESS;
2214 }
2215
2216 static void
2217 gen9_vp9_set_curbe_scaling_cm(VADriverContextP ctx,
2218                               struct encode_state *encode_state,
2219                               struct i965_gpe_context *gpe_context,
2220                               struct intel_encoder_context *encoder_context,
2221                               struct gen9_vp9_scaling_curbe_param *curbe_param)
2222 {
2223     vp9_scaling4x_curbe_data_cm *curbe_cmd;
2224
2225     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
2226
2227     if (!curbe_cmd)
2228         return;
2229
2230     memset(curbe_cmd, 0, sizeof(vp9_scaling4x_curbe_data_cm));
2231
2232     curbe_cmd->dw0.input_picture_width = curbe_param->input_picture_width;
2233     curbe_cmd->dw0.input_picture_height = curbe_param->input_picture_height;
2234
2235     curbe_cmd->dw1.input_y_bti = VP9_BTI_SCALING_FRAME_SRC_Y;
2236     curbe_cmd->dw2.output_y_bti = VP9_BTI_SCALING_FRAME_DST_Y;
2237
2238
2239     curbe_cmd->dw6.enable_mb_variance_output = 0;
2240     curbe_cmd->dw6.enable_mb_pixel_average_output = 0;
2241     curbe_cmd->dw6.enable_blk8x8_stat_output = 0;
2242
2243     if (curbe_param->mb_variance_output_enabled ||
2244         curbe_param->mb_pixel_average_output_enabled) {
2245         curbe_cmd->dw10.mbv_proc_stat_bti = VP9_BTI_SCALING_FRAME_MBVPROCSTATS_DST_CM;
2246     }
2247
2248     i965_gpe_context_unmap_curbe(gpe_context);
2249     return;
2250 }
2251
2252 static void
2253 gen9_vp9_send_scaling_surface(VADriverContextP ctx,
2254                               struct encode_state *encode_state,
2255                               struct i965_gpe_context *gpe_context,
2256                               struct intel_encoder_context *encoder_context,
2257                               struct gen9_vp9_scaling_surface_param *scaling_surface_param)
2258 {
2259     vp9_bti_scaling_offset *scaling_bti;
2260     unsigned int surface_format;
2261
2262     scaling_bti = scaling_surface_param->p_scaling_bti;
2263
2264     if (scaling_surface_param->scaling_out_use_32unorm_surf_fmt)
2265         surface_format = I965_SURFACEFORMAT_R32_UNORM;
2266     else if (scaling_surface_param->scaling_out_use_16unorm_surf_fmt)
2267         surface_format = I965_SURFACEFORMAT_R16_UNORM;
2268     else
2269         surface_format = I965_SURFACEFORMAT_R8_UNORM;
2270
2271     i965_add_2d_gpe_surface(ctx, gpe_context,
2272                             scaling_surface_param->input_surface,
2273                             0, 1, surface_format,
2274                             scaling_bti->scaling_frame_src_y);
2275
2276     i965_add_2d_gpe_surface(ctx, gpe_context,
2277                             scaling_surface_param->output_surface,
2278                             0, 1, surface_format,
2279                             scaling_bti->scaling_frame_dst_y);
2280
2281
2282     return;
2283 }
2284
2285 static VAStatus
2286 gen9_vp9_scaling_kernel(VADriverContextP ctx,
2287                         struct encode_state *encode_state,
2288                         struct intel_encoder_context *encoder_context,
2289                         int use_16x_scaling)
2290 {
2291     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2292     struct i965_gpe_context *gpe_context;
2293     int media_function;
2294     struct gen9_vp9_scaling_curbe_param scaling_curbe_param;
2295     struct gen9_vp9_scaling_surface_param scaling_surface_param;
2296     struct gen9_vp9_state *vp9_state;
2297     VAEncPictureParameterBufferVP9  *pic_param;
2298     struct gpe_media_object_walker_parameter media_object_walker_param;
2299     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
2300     struct object_surface *obj_surface;
2301     struct object_surface *input_surface, *output_surface;
2302     struct gen9_surface_vp9 *vp9_priv_surface;
2303     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
2304     unsigned int input_frame_width, input_frame_height;
2305     unsigned int output_frame_width, output_frame_height;
2306
2307     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2308     if (!vp9_state || !vp9_state->pic_param)
2309         return VA_STATUS_ERROR_INVALID_PARAMETER;
2310
2311     pic_param = vp9_state->pic_param;
2312
2313     if (use_16x_scaling)
2314         media_function = VP9_MEDIA_STATE_16X_SCALING;
2315     else
2316         media_function = VP9_MEDIA_STATE_4X_SCALING;
2317
2318     gpe_context = &(vme_context->scaling_context.gpe_contexts[0]);
2319
2320     gen8_gpe_context_init(ctx, gpe_context);
2321     gen9_gpe_reset_binding_table(ctx, gpe_context);
2322
2323     obj_surface = encode_state->reconstructed_object;
2324     vp9_priv_surface = obj_surface->private_data;
2325
2326     if (use_16x_scaling) {
2327         downscaled_width_in_mb      = vp9_state->downscaled_width_16x_in_mb;
2328         downscaled_height_in_mb      = vp9_state->downscaled_height_16x_in_mb;
2329
2330         input_surface               = vp9_priv_surface->scaled_4x_surface_obj;
2331         input_frame_width           = vp9_state->frame_width_4x;
2332         input_frame_height          = vp9_state->frame_height_4x;
2333
2334         output_surface              = vp9_priv_surface->scaled_16x_surface_obj;
2335         output_frame_width          = vp9_state->frame_width_16x;
2336         output_frame_height         = vp9_state->frame_height_16x;
2337     } else {
2338         downscaled_width_in_mb      = vp9_state->downscaled_width_4x_in_mb;
2339         downscaled_height_in_mb      = vp9_state->downscaled_height_4x_in_mb;
2340
2341         if (vp9_state->dys_in_use &&
2342             ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
2343              (pic_param->frame_height_src != pic_param->frame_height_dst)))
2344             input_surface               = vp9_priv_surface->dys_surface_obj;
2345         else
2346             input_surface               = encode_state->input_yuv_object;
2347
2348         input_frame_width           = vp9_state->frame_width;
2349         input_frame_height          = vp9_state->frame_height;
2350
2351         output_surface              = vp9_priv_surface->scaled_4x_surface_obj;
2352         output_frame_width          = vp9_state->frame_width_4x;
2353         output_frame_height         = vp9_state->frame_height_4x;
2354     }
2355
2356     memset(&scaling_curbe_param, 0, sizeof(scaling_curbe_param));
2357
2358     scaling_curbe_param.input_picture_width  = input_frame_width;
2359     scaling_curbe_param.input_picture_height = input_frame_height;
2360
2361     scaling_curbe_param.use_16x_scaling = use_16x_scaling;
2362     scaling_curbe_param.use_32x_scaling = 0;
2363
2364     if (use_16x_scaling)
2365         scaling_curbe_param.mb_variance_output_enabled = 0;
2366     else
2367         scaling_curbe_param.mb_variance_output_enabled = vp9_state->adaptive_transform_decision_enabled;
2368
2369     scaling_curbe_param.blk8x8_stat_enabled = 0;
2370
2371     vme_context->pfn_set_curbe_scaling(ctx, encode_state,
2372                                        gpe_context,
2373                                        encoder_context,
2374                                        &scaling_curbe_param);
2375
2376     memset(&scaling_surface_param, 0, sizeof(scaling_surface_param));
2377     scaling_surface_param.p_scaling_bti = (void *)(&vme_context->scaling_context.scaling_4x_bti);
2378     scaling_surface_param.input_surface                      = input_surface;
2379     scaling_surface_param.input_frame_width                  = input_frame_width;
2380     scaling_surface_param.input_frame_height                 = input_frame_height;
2381
2382     scaling_surface_param.output_surface                     = output_surface;
2383     scaling_surface_param.output_frame_width                 = output_frame_width;
2384     scaling_surface_param.output_frame_height                = output_frame_height;
2385     scaling_surface_param.scaling_out_use_16unorm_surf_fmt   = 0;
2386     scaling_surface_param.scaling_out_use_32unorm_surf_fmt   = 1;
2387
2388     vme_context->pfn_send_scaling_surface(ctx, encode_state,
2389                                           gpe_context,
2390                                           encoder_context,
2391                                           &scaling_surface_param);
2392
2393     gen8_gpe_setup_interface_data(ctx, gpe_context);
2394
2395     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2396     /* the scaling is based on 8x8 blk level */
2397     kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
2398     kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
2399     kernel_walker_param.no_dependency = 1;
2400
2401     gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
2402
2403     gen9_run_kernel_media_object_walker(ctx, encoder_context,
2404                                         gpe_context,
2405                                         media_function,
2406                                         &media_object_walker_param);
2407
2408     return VA_STATUS_SUCCESS;
2409 }
2410
2411 static void
2412 gen9_vp9_dys_set_sampler_state(struct i965_gpe_context *gpe_context)
2413 {
2414     struct gen9_sampler_8x8_avs                *sampler_cmd;
2415
2416     if (!gpe_context)
2417         return;
2418
2419     dri_bo_map(gpe_context->sampler.bo, 1);
2420
2421     if (!gpe_context->sampler.bo->virtual)
2422         return;
2423
2424     sampler_cmd = (struct gen9_sampler_8x8_avs *)
2425                   (gpe_context->sampler.bo->virtual + gpe_context->sampler.offset);
2426
2427     memset(sampler_cmd, 0, sizeof(struct gen9_sampler_8x8_avs));
2428
2429     sampler_cmd->dw0.r3c_coefficient                      = 15;
2430     sampler_cmd->dw0.r3x_coefficient                      = 6;
2431     sampler_cmd->dw0.strong_edge_threshold                = 8;
2432     sampler_cmd->dw0.weak_edge_threshold                  = 1;
2433     sampler_cmd->dw0.gain_factor                          = 32;
2434
2435     sampler_cmd->dw2.r5c_coefficient                     = 3;
2436     sampler_cmd->dw2.r5cx_coefficient                    = 8;
2437     sampler_cmd->dw2.r5x_coefficient                     = 9;
2438     sampler_cmd->dw2.strong_edge_weight                  = 6;
2439     sampler_cmd->dw2.regular_weight                      = 3;
2440     sampler_cmd->dw2.non_edge_weight                     = 2;
2441     sampler_cmd->dw2.global_noise_estimation             = 255;
2442
2443     sampler_cmd->dw3.enable_8tap_adaptive_filter         = 0;
2444     sampler_cmd->dw3.cos_alpha                           = 79;
2445     sampler_cmd->dw3.sin_alpha                           = 101;
2446
2447     sampler_cmd->dw5.diamond_du                           = 0;
2448     sampler_cmd->dw5.hs_margin                            = 3;
2449     sampler_cmd->dw5.diamond_alpha                        = 100;
2450
2451     sampler_cmd->dw7.inv_margin_vyl                       = 3300;
2452
2453     sampler_cmd->dw8.inv_margin_vyu                       = 1600;
2454
2455     sampler_cmd->dw10.y_slope2                            = 24;
2456     sampler_cmd->dw10.s0l                                 = 1792;
2457
2458     sampler_cmd->dw12.y_slope1                            = 24;
2459
2460     sampler_cmd->dw14.s0u                                = 256;
2461
2462     sampler_cmd->dw15.s2u                                = 1792;
2463     sampler_cmd->dw15.s1u                                = 0;
2464
2465     memcpy(sampler_cmd->coefficients,
2466            &gen9_vp9_avs_coeffs[0],
2467            17 * sizeof(struct gen8_sampler_8x8_avs_coefficients));
2468
2469     sampler_cmd->dw152.default_sharpness_level     = 255;
2470     sampler_cmd->dw152.max_derivative_4_pixels     = 7;
2471     sampler_cmd->dw152.max_derivative_8_pixels     = 20;
2472     sampler_cmd->dw152.transition_area_with_4_pixels    = 4;
2473     sampler_cmd->dw152.transition_area_with_8_pixels    = 5;
2474
2475     sampler_cmd->dw153.bypass_x_adaptive_filtering  = 1;
2476     sampler_cmd->dw153.bypass_y_adaptive_filtering  = 1;
2477     sampler_cmd->dw153.adaptive_filter_for_all_channel = 0;
2478
2479     memcpy(sampler_cmd->extra_coefficients,
2480            &gen9_vp9_avs_coeffs[17 * 8],
2481            15 * sizeof(struct gen8_sampler_8x8_avs_coefficients));
2482
2483     dri_bo_unmap(gpe_context->sampler.bo);
2484 }
2485
2486 static void
2487 gen9_vp9_set_curbe_dys(VADriverContextP ctx,
2488                        struct encode_state *encode_state,
2489                        struct i965_gpe_context *gpe_context,
2490                        struct intel_encoder_context *encoder_context,
2491                        struct gen9_vp9_dys_curbe_param *curbe_param)
2492 {
2493     vp9_dys_curbe_data  *curbe_cmd;
2494
2495     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
2496
2497     if (!curbe_cmd)
2498         return;
2499
2500     memset(curbe_cmd, 0, sizeof(vp9_dys_curbe_data));
2501
2502     curbe_cmd->dw0.input_frame_width    = curbe_param->input_width;
2503     curbe_cmd->dw0.input_frame_height   = curbe_param->input_height;
2504
2505     curbe_cmd->dw1.output_frame_width   = curbe_param->output_width;
2506     curbe_cmd->dw1.output_frame_height  = curbe_param->output_height;
2507
2508     curbe_cmd->dw2.delta_u                 = 1.0f / curbe_param->output_width;
2509     curbe_cmd->dw3.delta_v                 = 1.0f / curbe_param->output_height;
2510
2511     curbe_cmd->dw16.input_frame_nv12_bti  = VP9_BTI_DYS_INPUT_NV12;
2512     curbe_cmd->dw17.output_frame_y_bti    = VP9_BTI_DYS_OUTPUT_Y;
2513     curbe_cmd->dw18.avs_sample_idx            = 0;
2514
2515     i965_gpe_context_unmap_curbe(gpe_context);
2516 }
2517
2518 static void
2519 gen9_vp9_send_dys_surface(VADriverContextP ctx,
2520                           struct encode_state *encode_state,
2521                           struct i965_gpe_context *gpe_context,
2522                           struct intel_encoder_context *encoder_context,
2523                           struct gen9_vp9_dys_surface_param *surface_param)
2524 {
2525
2526     if (surface_param->input_frame)
2527         i965_add_adv_gpe_surface(ctx,
2528                                  gpe_context,
2529                                  surface_param->input_frame,
2530                                  VP9_BTI_DYS_INPUT_NV12);
2531
2532     if (surface_param->output_frame) {
2533         i965_add_2d_gpe_surface(ctx,
2534                                 gpe_context,
2535                                 surface_param->output_frame,
2536                                 0,
2537                                 1,
2538                                 I965_SURFACEFORMAT_R8_UNORM,
2539                                 VP9_BTI_DYS_OUTPUT_Y);
2540
2541         i965_add_2d_gpe_surface(ctx,
2542                                 gpe_context,
2543                                 surface_param->output_frame,
2544                                 1,
2545                                 1,
2546                                 I965_SURFACEFORMAT_R16_UINT,
2547                                 VP9_BTI_DYS_OUTPUT_UV);
2548     }
2549
2550     return;
2551 }
2552
2553 static VAStatus
2554 gen9_vp9_dys_kernel(VADriverContextP ctx,
2555                     struct encode_state *encode_state,
2556                     struct intel_encoder_context *encoder_context,
2557                     gen9_vp9_dys_kernel_param *dys_kernel_param)
2558 {
2559     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2560     struct i965_gpe_context *gpe_context;
2561     int media_function;
2562     struct gen9_vp9_dys_curbe_param                 curbe_param;
2563     struct gen9_vp9_dys_surface_param               surface_param;
2564     struct gpe_media_object_walker_parameter        media_object_walker_param;
2565     struct gpe_encoder_kernel_walker_parameter      kernel_walker_param;
2566     unsigned int                                    resolution_x, resolution_y;
2567
2568     media_function = VP9_MEDIA_STATE_DYS;
2569     gpe_context = &vme_context->dys_context.gpe_context;
2570
2571     //gen8_gpe_context_init(ctx, gpe_context);
2572     gen9_gpe_reset_binding_table(ctx, gpe_context);
2573
2574     /* sampler state is configured only when initializing the GPE context */
2575
2576     memset(&curbe_param, 0, sizeof(curbe_param));
2577     curbe_param.input_width   = dys_kernel_param->input_width;
2578     curbe_param.input_height  = dys_kernel_param->input_height;
2579     curbe_param.output_width = dys_kernel_param->output_width;
2580     curbe_param.output_height = dys_kernel_param->output_height;
2581     vme_context->pfn_set_curbe_dys(ctx, encode_state,
2582                                    gpe_context,
2583                                    encoder_context,
2584                                    &curbe_param);
2585
2586     // Add surface states
2587     memset(&surface_param, 0, sizeof(surface_param));
2588     surface_param.input_frame = dys_kernel_param->input_surface;
2589     surface_param.output_frame = dys_kernel_param->output_surface;
2590     surface_param.vert_line_stride = 0;
2591     surface_param.vert_line_stride_offset = 0;
2592
2593     vme_context->pfn_send_dys_surface(ctx,
2594                                       encode_state,
2595                                       gpe_context,
2596                                       encoder_context,
2597                                       &surface_param);
2598
2599     resolution_x = ALIGN(dys_kernel_param->output_width, 16) / 16;
2600     resolution_y = ALIGN(dys_kernel_param->output_height, 16) / 16;
2601
2602     gen8_gpe_setup_interface_data(ctx, gpe_context);
2603
2604     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2605     kernel_walker_param.resolution_x = resolution_x;
2606     kernel_walker_param.resolution_y = resolution_y;
2607     kernel_walker_param.no_dependency = 1;
2608
2609     gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
2610
2611     gen9_run_kernel_media_object_walker(ctx, encoder_context,
2612                                         gpe_context,
2613                                         media_function,
2614                                         &media_object_walker_param);
2615
2616     return VA_STATUS_SUCCESS;
2617 }
2618
2619 static VAStatus
2620 gen9_vp9_run_dys_refframes(VADriverContextP ctx,
2621                            struct encode_state *encode_state,
2622                            struct intel_encoder_context *encoder_context)
2623 {
2624     struct gen9_vp9_state *vp9_state;
2625     VAEncPictureParameterBufferVP9  *pic_param;
2626     gen9_vp9_dys_kernel_param dys_kernel_param;
2627     struct object_surface *obj_surface;
2628     struct object_surface *input_surface, *output_surface;
2629     struct gen9_surface_vp9 *vp9_priv_surface;
2630
2631     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2632
2633     if (!vp9_state || !vp9_state->pic_param)
2634         return VA_STATUS_ERROR_INVALID_PARAMETER;
2635
2636     pic_param = vp9_state->pic_param;
2637
2638     if ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
2639         (pic_param->frame_height_src != pic_param->frame_height_dst)) {
2640         input_surface = encode_state->input_yuv_object;
2641         obj_surface = encode_state->reconstructed_object;
2642         vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2643         output_surface = vp9_priv_surface->dys_surface_obj;
2644
2645         memset(&dys_kernel_param, 0, sizeof(dys_kernel_param));
2646         dys_kernel_param.input_width = pic_param->frame_width_src;
2647         dys_kernel_param.input_height = pic_param->frame_height_src;
2648         dys_kernel_param.input_surface = input_surface;
2649         dys_kernel_param.output_width = pic_param->frame_width_dst;
2650         dys_kernel_param.output_height = pic_param->frame_height_dst;
2651         dys_kernel_param.output_surface = output_surface;
2652         gen9_vp9_dys_kernel(ctx, encode_state,
2653                             encoder_context,
2654                             &dys_kernel_param);
2655     }
2656
2657     if ((vp9_state->dys_ref_frame_flag & VP9_LAST_REF) &&
2658         vp9_state->last_ref_obj) {
2659         obj_surface = vp9_state->last_ref_obj;
2660         vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2661
2662         input_surface = obj_surface;
2663         output_surface = vp9_priv_surface->dys_surface_obj;
2664
2665         dys_kernel_param.input_width = vp9_priv_surface->frame_width;
2666         dys_kernel_param.input_height = vp9_priv_surface->frame_height;
2667         dys_kernel_param.input_surface = input_surface;
2668
2669         dys_kernel_param.output_width = pic_param->frame_width_dst;
2670         dys_kernel_param.output_height = pic_param->frame_height_dst;
2671         dys_kernel_param.output_surface = output_surface;
2672
2673         gen9_vp9_dys_kernel(ctx, encode_state,
2674                             encoder_context,
2675                             &dys_kernel_param);
2676
2677         if (vp9_state->hme_enabled) {
2678             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 4), 16);
2679             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_height / 4), 16);
2680             dys_kernel_param.input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2681
2682             dys_kernel_param.output_width = vp9_state->frame_width_4x;
2683             dys_kernel_param.output_height = vp9_state->frame_height_4x;
2684             dys_kernel_param.output_surface = vp9_priv_surface->dys_4x_surface_obj;
2685
2686             gen9_vp9_dys_kernel(ctx, encode_state,
2687                                 encoder_context,
2688                                 &dys_kernel_param);
2689
2690             /* Does it really need to do the 16x HME if the
2691              * resolution is different?
2692              * Maybe it should be restricted
2693              */
2694             if (vp9_state->b16xme_enabled) {
2695                 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 16), 16);
2696                 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 16), 16);
2697                 dys_kernel_param.input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2698
2699                 dys_kernel_param.output_width = vp9_state->frame_width_16x;
2700                 dys_kernel_param.output_height = vp9_state->frame_height_16x;
2701                 dys_kernel_param.output_surface = vp9_priv_surface->dys_16x_surface_obj;
2702
2703                 gen9_vp9_dys_kernel(ctx, encode_state,
2704                                     encoder_context,
2705                                     &dys_kernel_param);
2706             }
2707         }
2708     }
2709
2710     if ((vp9_state->dys_ref_frame_flag & VP9_GOLDEN_REF) &&
2711         vp9_state->golden_ref_obj) {
2712         obj_surface = vp9_state->golden_ref_obj;
2713         vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2714
2715         input_surface = obj_surface;
2716         output_surface = vp9_priv_surface->dys_surface_obj;
2717
2718         dys_kernel_param.input_width = vp9_priv_surface->frame_width;
2719         dys_kernel_param.input_height = vp9_priv_surface->frame_height;
2720         dys_kernel_param.input_surface = input_surface;
2721
2722         dys_kernel_param.output_width = pic_param->frame_width_dst;
2723         dys_kernel_param.output_height = pic_param->frame_height_dst;
2724         dys_kernel_param.output_surface = output_surface;
2725
2726         gen9_vp9_dys_kernel(ctx, encode_state,
2727                             encoder_context,
2728                             &dys_kernel_param);
2729
2730         if (vp9_state->hme_enabled) {
2731             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 4), 16);
2732             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_height / 4), 16);
2733             dys_kernel_param.input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2734
2735             dys_kernel_param.output_width = vp9_state->frame_width_4x;
2736             dys_kernel_param.output_height = vp9_state->frame_height_4x;
2737             dys_kernel_param.output_surface = vp9_priv_surface->dys_4x_surface_obj;
2738
2739             gen9_vp9_dys_kernel(ctx, encode_state,
2740                                 encoder_context,
2741                                 &dys_kernel_param);
2742
2743             /* Does it really need to do the 16x HME if the
2744              * resolution is different?
2745              * Maybe it should be restricted
2746              */
2747             if (vp9_state->b16xme_enabled) {
2748                 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 16), 16);
2749                 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 16), 16);
2750                 dys_kernel_param.input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2751
2752                 dys_kernel_param.output_width = vp9_state->frame_width_16x;
2753                 dys_kernel_param.output_height = vp9_state->frame_height_16x;
2754                 dys_kernel_param.output_surface = vp9_priv_surface->dys_16x_surface_obj;
2755
2756                 gen9_vp9_dys_kernel(ctx, encode_state,
2757                                     encoder_context,
2758                                     &dys_kernel_param);
2759             }
2760         }
2761     }
2762
2763     if ((vp9_state->dys_ref_frame_flag & VP9_ALT_REF) &&
2764         vp9_state->alt_ref_obj) {
2765         obj_surface = vp9_state->alt_ref_obj;
2766         vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2767
2768         input_surface = obj_surface;
2769         output_surface = vp9_priv_surface->dys_surface_obj;
2770
2771         dys_kernel_param.input_width = vp9_priv_surface->frame_width;
2772         dys_kernel_param.input_height = vp9_priv_surface->frame_height;
2773         dys_kernel_param.input_surface = input_surface;
2774
2775         dys_kernel_param.output_width = pic_param->frame_width_dst;
2776         dys_kernel_param.output_height = pic_param->frame_height_dst;
2777         dys_kernel_param.output_surface = output_surface;
2778
2779         gen9_vp9_dys_kernel(ctx, encode_state,
2780                             encoder_context,
2781                             &dys_kernel_param);
2782
2783         if (vp9_state->hme_enabled) {
2784             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 4), 16);
2785             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_height / 4), 16);
2786             dys_kernel_param.input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2787
2788             dys_kernel_param.output_width = vp9_state->frame_width_4x;
2789             dys_kernel_param.output_height = vp9_state->frame_height_4x;
2790             dys_kernel_param.output_surface = vp9_priv_surface->dys_4x_surface_obj;
2791
2792             gen9_vp9_dys_kernel(ctx, encode_state,
2793                                 encoder_context,
2794                                 &dys_kernel_param);
2795
2796             /* Does it really need to do the 16x HME if the
2797              * resolution is different?
2798              * Maybe it should be restricted
2799              */
2800             if (vp9_state->b16xme_enabled) {
2801                 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 16), 16);
2802                 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 16), 16);
2803                 dys_kernel_param.input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2804
2805                 dys_kernel_param.output_width = vp9_state->frame_width_16x;
2806                 dys_kernel_param.output_height = vp9_state->frame_height_16x;
2807                 dys_kernel_param.output_surface = vp9_priv_surface->dys_16x_surface_obj;
2808
2809                 gen9_vp9_dys_kernel(ctx, encode_state,
2810                                     encoder_context,
2811                                     &dys_kernel_param);
2812             }
2813         }
2814     }
2815
2816     return VA_STATUS_SUCCESS;
2817 }
2818
2819 static void
2820 gen9_vp9_set_curbe_mbenc(VADriverContextP ctx,
2821                          struct encode_state *encode_state,
2822                          struct i965_gpe_context *gpe_context,
2823                          struct intel_encoder_context *encoder_context,
2824                          struct gen9_vp9_mbenc_curbe_param *curbe_param)
2825 {
2826     struct gen9_vp9_state *vp9_state;
2827     VAEncMiscParameterTypeVP9PerSegmantParam *seg_param, tmp_seg_param;
2828     vp9_mbenc_curbe_data  *curbe_cmd;
2829     VAEncPictureParameterBufferVP9  *pic_param;
2830     int i, segment_count;
2831     int seg_qindex;
2832     struct object_surface *obj_surface;
2833     struct gen9_surface_vp9 *vp9_priv_surface;
2834
2835     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2836
2837     if (!vp9_state || !vp9_state->pic_param)
2838         return;
2839
2840     pic_param = curbe_param->ppic_param;
2841     seg_param = curbe_param->psegment_param;
2842
2843     if (!seg_param) {
2844         memset(&tmp_seg_param, 0, sizeof(tmp_seg_param));
2845         seg_param = &tmp_seg_param;
2846     }
2847
2848     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
2849
2850     if (!curbe_cmd)
2851         return;
2852
2853     memset(curbe_cmd, 0, sizeof(vp9_mbenc_curbe_data));
2854
2855     if (vp9_state->dys_in_use) {
2856         curbe_cmd->dw0.frame_width = pic_param->frame_width_dst;
2857         curbe_cmd->dw0.frame_height = pic_param->frame_height_dst;
2858     } else {
2859         curbe_cmd->dw0.frame_width = pic_param->frame_width_src;
2860         curbe_cmd->dw0.frame_height = pic_param->frame_height_src;
2861     }
2862
2863     curbe_cmd->dw1.frame_type = curbe_param->picture_coding_type;
2864
2865     curbe_cmd->dw1.segmentation_enable = pic_param->pic_flags.bits.segmentation_enabled;
2866     if (pic_param->pic_flags.bits.segmentation_enabled)
2867         segment_count = 8;
2868     else
2869         segment_count = 1;
2870
2871     curbe_cmd->dw1.ref_frame_flags = curbe_param->ref_frame_flag;
2872
2873     //right now set them to normal settings
2874     if (curbe_param->picture_coding_type) {
2875         switch (vp9_state->target_usage) {
2876         case INTEL_ENC_VP9_TU_QUALITY:
2877             curbe_cmd->dw1.min_16for32_check    = 0x00;
2878             curbe_cmd->dw2.multi_pred           = 0x02;
2879             curbe_cmd->dw2.len_sp               = 0x39;
2880             curbe_cmd->dw2.search_x             = 0x30;
2881             curbe_cmd->dw2.search_y             = 0x28;
2882             curbe_cmd->dw3.min_ref_for32_check = 0x01;
2883             curbe_cmd->dw4.skip16_threshold     = 0x000A;
2884             curbe_cmd->dw4.disable_mr_threshold = 0x000C;
2885
2886             memcpy(&curbe_cmd->dw16,
2887                    vp9_diamond_ime_search_path_delta,
2888                    14 * sizeof(unsigned int));
2889             break;
2890         case INTEL_ENC_VP9_TU_PERFORMANCE:
2891             curbe_cmd->dw1.min_16for32_check    = 0x02;
2892             curbe_cmd->dw2.multi_pred           = 0x00;
2893             curbe_cmd->dw2.len_sp               = 0x10;
2894             curbe_cmd->dw2.search_x             = 0x20;
2895             curbe_cmd->dw2.search_y             = 0x20;
2896             curbe_cmd->dw3.min_ref_for32_check = 0x03;
2897             curbe_cmd->dw4.skip16_threshold     = 0x0014;
2898             curbe_cmd->dw4.disable_mr_threshold = 0x0016;
2899
2900             memcpy(&curbe_cmd->dw16,
2901                    vp9_fullspiral_ime_search_path_delta,
2902                    14 * sizeof(unsigned int));
2903
2904             break;
2905         default:  // normal settings
2906             curbe_cmd->dw1.min_16for32_check     = 0x01;
2907             curbe_cmd->dw2.multi_pred           = 0x00;
2908             curbe_cmd->dw2.len_sp               = 0x19;
2909             curbe_cmd->dw2.search_x             = 0x30;
2910             curbe_cmd->dw2.search_y             = 0x28;
2911             curbe_cmd->dw3.min_ref_for32_check = 0x02;
2912             curbe_cmd->dw4.skip16_threshold     = 0x000F;
2913             curbe_cmd->dw4.disable_mr_threshold = 0x0011;
2914
2915             memcpy(&curbe_cmd->dw16,
2916                    vp9_diamond_ime_search_path_delta,
2917                    14 * sizeof(unsigned int));
2918             break;
2919         }
2920
2921         curbe_cmd->dw3.hme_enabled               = curbe_param->hme_enabled;
2922         curbe_cmd->dw3.multi_ref_qp_check         = curbe_param->multi_ref_qp_check;
2923         // co-located predictor must be disabled when dynamic scaling is enabled
2924         curbe_cmd->dw3.disable_temp_pred    = vp9_state->dys_in_use;
2925     }
2926
2927     curbe_cmd->dw5.inter_round = 0;
2928     curbe_cmd->dw5.intra_round = 4;
2929     curbe_cmd->dw5.frame_qpindex = pic_param->luma_ac_qindex;
2930
2931     for (i = 0; i < segment_count; i++) {
2932         seg_qindex = pic_param->luma_ac_qindex + pic_param->luma_dc_qindex_delta
2933                      + seg_param->seg_data[i].segment_qindex_delta;
2934
2935         seg_qindex = CLAMP(0, 255, seg_qindex);
2936
2937         if (curbe_param->picture_coding_type)
2938             memcpy(&curbe_cmd->segments[i],
2939                    &intel_vp9_costlut_p[seg_qindex * 16],
2940                    16 * sizeof(unsigned int));
2941         else
2942             memcpy(&curbe_cmd->segments[i],
2943                    &intel_vp9_costlut_key[seg_qindex * 16],
2944                    16 * sizeof(unsigned int));
2945     }
2946
2947     if (curbe_param->picture_coding_type) {
2948         if (curbe_cmd->dw3.multi_ref_qp_check) {
2949             if (curbe_param->ref_frame_flag & 0x01) {
2950                 obj_surface = curbe_param->last_ref_obj;
2951                 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2952                 curbe_cmd->dw8.last_ref_qp = vp9_quant_dc[vp9_priv_surface->qp_value];
2953             }
2954
2955             if (curbe_param->ref_frame_flag & 0x02) {
2956                 obj_surface = curbe_param->golden_ref_obj;
2957                 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2958                 curbe_cmd->dw8.golden_ref_qp = vp9_quant_dc[vp9_priv_surface->qp_value];
2959             }
2960
2961             if (curbe_param->ref_frame_flag & 0x04) {
2962                 obj_surface = curbe_param->alt_ref_obj;
2963                 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2964                 curbe_cmd->dw9.alt_ref_qp = vp9_quant_dc[vp9_priv_surface->qp_value];
2965             }
2966         }
2967     }
2968     curbe_cmd->dw160.enc_curr_y_surf_bti           = VP9_BTI_MBENC_CURR_Y_G9;
2969     curbe_cmd->dw162.enc_curr_nv12_surf_bti        = VP9_BTI_MBENC_CURR_NV12_G9;
2970     curbe_cmd->dw166.segmentation_map_bti          = VP9_BTI_MBENC_SEGMENTATION_MAP_G9;
2971     curbe_cmd->dw172.mode_decision_bti           = VP9_BTI_MBENC_MODE_DECISION_G9;
2972     curbe_cmd->dw167.tx_curbe_bti                = VP9_BTI_MBENC_TX_CURBE_G9;
2973     curbe_cmd->dw168.hme_mvdata_bti             = VP9_BTI_MBENC_HME_MV_DATA_G9;
2974     curbe_cmd->dw169.hme_distortion_bti          = VP9_BTI_MBENC_HME_DISTORTION_G9;
2975     curbe_cmd->dw171.mode_decision_prev_bti      = VP9_BTI_MBENC_MODE_DECISION_PREV_G9;
2976     curbe_cmd->dw172.mode_decision_bti           = VP9_BTI_MBENC_MODE_DECISION_G9;
2977     curbe_cmd->dw173.output_16x16_inter_modes_bti = VP9_BTI_MBENC_OUT_16x16_INTER_MODES_G9;
2978     curbe_cmd->dw174.cu_record_bti               = VP9_BTI_MBENC_CU_RECORDS_G9;
2979     curbe_cmd->dw175.pak_data_bti                = VP9_BTI_MBENC_PAK_DATA_G9;
2980
2981     i965_gpe_context_unmap_curbe(gpe_context);
2982     return;
2983 }
2984
2985 static void
2986 gen9_vp9_send_mbenc_surface(VADriverContextP ctx,
2987                             struct encode_state *encode_state,
2988                             struct i965_gpe_context *gpe_context,
2989                             struct intel_encoder_context *encoder_context,
2990                             struct gen9_vp9_mbenc_surface_param *mbenc_param)
2991 {
2992     struct gen9_vp9_state *vp9_state;
2993     unsigned int            res_size;
2994     unsigned int            frame_width_in_sb, frame_height_in_sb;
2995     struct object_surface   *obj_surface, *tmp_input;
2996     struct gen9_surface_vp9 *vp9_priv_surface;
2997     int media_function;
2998
2999     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3000
3001     if (!vp9_state || !vp9_state->pic_param)
3002         return;
3003
3004     frame_width_in_sb = ALIGN(mbenc_param->frame_width, 64) / 64;
3005     frame_height_in_sb = ALIGN(mbenc_param->frame_height, 64) / 64;
3006     media_function = mbenc_param->media_state_type;
3007
3008     switch (media_function) {
3009     case VP9_MEDIA_STATE_MBENC_I_32x32: {
3010         obj_surface = mbenc_param->curr_frame_obj;
3011
3012         i965_add_2d_gpe_surface(ctx,
3013                                 gpe_context,
3014                                 obj_surface,
3015                                 0,
3016                                 1,
3017                                 I965_SURFACEFORMAT_R8_UNORM,
3018                                 VP9_BTI_MBENC_CURR_Y_G9);
3019
3020         i965_add_2d_gpe_surface(ctx,
3021                                 gpe_context,
3022                                 obj_surface,
3023                                 1,
3024                                 1,
3025                                 I965_SURFACEFORMAT_R16_UINT,
3026                                 VP9_BTI_MBENC_CURR_UV_G9);
3027
3028
3029         if (mbenc_param->segmentation_enabled) {
3030             i965_add_buffer_2d_gpe_surface(ctx,
3031                                            gpe_context,
3032                                            mbenc_param->pres_segmentation_map,
3033                                            1,
3034                                            I965_SURFACEFORMAT_R8_UNORM,
3035                                            VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3036
3037         }
3038
3039         res_size = 16 * mbenc_param->frame_width_in_mb *
3040                    mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3041         i965_add_buffer_gpe_surface(ctx,
3042                                     gpe_context,
3043                                     mbenc_param->pres_mode_decision,
3044                                     0,
3045                                     res_size / 4,
3046                                     0,
3047                                     VP9_BTI_MBENC_MODE_DECISION_G9);
3048
3049         break;
3050     }
3051     case VP9_MEDIA_STATE_MBENC_I_16x16: {
3052         obj_surface = mbenc_param->curr_frame_obj;
3053
3054         i965_add_2d_gpe_surface(ctx,
3055                                 gpe_context,
3056                                 obj_surface,
3057                                 0,
3058                                 1,
3059                                 I965_SURFACEFORMAT_R8_UNORM,
3060                                 VP9_BTI_MBENC_CURR_Y_G9);
3061
3062         i965_add_2d_gpe_surface(ctx,
3063                                 gpe_context,
3064                                 obj_surface,
3065                                 1,
3066                                 1,
3067                                 I965_SURFACEFORMAT_R16_UINT,
3068                                 VP9_BTI_MBENC_CURR_UV_G9);
3069
3070         i965_add_adv_gpe_surface(ctx, gpe_context,
3071                                  obj_surface,
3072                                  VP9_BTI_MBENC_CURR_NV12_G9);
3073
3074         if (mbenc_param->segmentation_enabled) {
3075             i965_add_buffer_2d_gpe_surface(ctx,
3076                                            gpe_context,
3077                                            mbenc_param->pres_segmentation_map,
3078                                            1,
3079                                            I965_SURFACEFORMAT_R8_UNORM,
3080                                            VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3081
3082         }
3083
3084         res_size = 16 * mbenc_param->frame_width_in_mb *
3085                    mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3086         i965_add_buffer_gpe_surface(ctx,
3087                                     gpe_context,
3088                                     mbenc_param->pres_mode_decision,
3089                                     0,
3090                                     res_size / 4,
3091                                     0,
3092                                     VP9_BTI_MBENC_MODE_DECISION_G9);
3093
3094         res_size = 160;
3095
3096         gen9_add_dri_buffer_gpe_surface(ctx,
3097                                         gpe_context,
3098                                         mbenc_param->gpe_context_tx->curbe.bo,
3099                                         0,
3100                                         ALIGN(res_size, 64),
3101                                         mbenc_param->gpe_context_tx->curbe.offset,
3102                                         VP9_BTI_MBENC_TX_CURBE_G9);
3103
3104         break;
3105     }
3106     case VP9_MEDIA_STATE_MBENC_P: {
3107         obj_surface = mbenc_param->curr_frame_obj;
3108
3109         i965_add_2d_gpe_surface(ctx,
3110                                 gpe_context,
3111                                 obj_surface,
3112                                 0,
3113                                 1,
3114                                 I965_SURFACEFORMAT_R8_UNORM,
3115                                 VP9_BTI_MBENC_CURR_Y_G9);
3116
3117         i965_add_2d_gpe_surface(ctx, gpe_context,
3118                                 obj_surface,
3119                                 1,
3120                                 1,
3121                                 I965_SURFACEFORMAT_R16_UINT,
3122                                 VP9_BTI_MBENC_CURR_UV_G9);
3123
3124         i965_add_adv_gpe_surface(ctx, gpe_context,
3125                                  obj_surface,
3126                                  VP9_BTI_MBENC_CURR_NV12_G9);
3127
3128         if (mbenc_param->last_ref_obj) {
3129             obj_surface = mbenc_param->last_ref_obj;
3130             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3131
3132             if (vp9_state->dys_in_use &&
3133                 ((vp9_priv_surface->frame_width != vp9_state->frame_width) ||
3134                  (vp9_priv_surface->frame_height != vp9_state->frame_height)))
3135                 tmp_input = vp9_priv_surface->dys_surface_obj;
3136             else
3137                 tmp_input = obj_surface;
3138
3139             i965_add_adv_gpe_surface(ctx, gpe_context,
3140                                      tmp_input,
3141                                      VP9_BTI_MBENC_LAST_NV12_G9);
3142
3143             i965_add_adv_gpe_surface(ctx, gpe_context,
3144                                      tmp_input,
3145                                      VP9_BTI_MBENC_LAST_NV12_G9 + 1);
3146
3147         }
3148
3149         if (mbenc_param->golden_ref_obj) {
3150             obj_surface = mbenc_param->golden_ref_obj;
3151             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3152
3153             if (vp9_state->dys_in_use &&
3154                 ((vp9_priv_surface->frame_width != vp9_state->frame_width) ||
3155                  (vp9_priv_surface->frame_height != vp9_state->frame_height)))
3156                 tmp_input = vp9_priv_surface->dys_surface_obj;
3157             else
3158                 tmp_input = obj_surface;
3159
3160             i965_add_adv_gpe_surface(ctx, gpe_context,
3161                                      tmp_input,
3162                                      VP9_BTI_MBENC_GOLD_NV12_G9);
3163
3164             i965_add_adv_gpe_surface(ctx, gpe_context,
3165                                      tmp_input,
3166                                      VP9_BTI_MBENC_GOLD_NV12_G9 + 1);
3167
3168         }
3169
3170         if (mbenc_param->alt_ref_obj) {
3171             obj_surface = mbenc_param->alt_ref_obj;
3172             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3173
3174             if (vp9_state->dys_in_use &&
3175                 ((vp9_priv_surface->frame_width != vp9_state->frame_width) ||
3176                  (vp9_priv_surface->frame_height != vp9_state->frame_height)))
3177                 tmp_input = vp9_priv_surface->dys_surface_obj;
3178             else
3179                 tmp_input = obj_surface;
3180
3181             i965_add_adv_gpe_surface(ctx, gpe_context,
3182                                      tmp_input,
3183                                      VP9_BTI_MBENC_ALTREF_NV12_G9);
3184
3185             i965_add_adv_gpe_surface(ctx, gpe_context,
3186                                      tmp_input,
3187                                      VP9_BTI_MBENC_ALTREF_NV12_G9 + 1);
3188
3189         }
3190
3191         if (mbenc_param->hme_enabled) {
3192             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
3193                                            mbenc_param->ps4x_memv_data_buffer,
3194                                            1,
3195                                            I965_SURFACEFORMAT_R8_UNORM,
3196                                            VP9_BTI_MBENC_HME_MV_DATA_G9);
3197
3198             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
3199                                            mbenc_param->ps4x_memv_distortion_buffer,
3200                                            1,
3201                                            I965_SURFACEFORMAT_R8_UNORM,
3202                                            VP9_BTI_MBENC_HME_DISTORTION_G9);
3203         }
3204
3205         if (mbenc_param->segmentation_enabled) {
3206             i965_add_buffer_2d_gpe_surface(ctx,
3207                                            gpe_context,
3208                                            mbenc_param->pres_segmentation_map,
3209                                            1,
3210                                            I965_SURFACEFORMAT_R8_UNORM,
3211                                            VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3212
3213         }
3214
3215         res_size = 16 * mbenc_param->frame_width_in_mb *
3216                    mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3217         i965_add_buffer_gpe_surface(ctx,
3218                                     gpe_context,
3219                                     mbenc_param->pres_mode_decision_prev,
3220                                     0,
3221                                     res_size / 4,
3222                                     0,
3223                                     VP9_BTI_MBENC_MODE_DECISION_PREV_G9);
3224
3225         i965_add_buffer_gpe_surface(ctx,
3226                                     gpe_context,
3227                                     mbenc_param->pres_mode_decision,
3228                                     0,
3229                                     res_size / 4,
3230                                     0,
3231                                     VP9_BTI_MBENC_MODE_DECISION_G9);
3232
3233         i965_add_buffer_2d_gpe_surface(ctx,
3234                                        gpe_context,
3235                                        mbenc_param->pres_output_16x16_inter_modes,
3236                                        1,
3237                                        I965_SURFACEFORMAT_R8_UNORM,
3238                                        VP9_BTI_MBENC_OUT_16x16_INTER_MODES_G9);
3239
3240         res_size = 160;
3241
3242         gen9_add_dri_buffer_gpe_surface(ctx,
3243                                         gpe_context,
3244                                         mbenc_param->gpe_context_tx->curbe.bo,
3245                                         0,
3246                                         ALIGN(res_size, 64),
3247                                         mbenc_param->gpe_context_tx->curbe.offset,
3248                                         VP9_BTI_MBENC_TX_CURBE_G9);
3249
3250
3251         break;
3252     }
3253     case VP9_MEDIA_STATE_MBENC_TX: {
3254         obj_surface = mbenc_param->curr_frame_obj;
3255
3256         i965_add_2d_gpe_surface(ctx,
3257                                 gpe_context,
3258                                 obj_surface,
3259                                 0,
3260                                 1,
3261                                 I965_SURFACEFORMAT_R8_UNORM,
3262                                 VP9_BTI_MBENC_CURR_Y_G9);
3263
3264         i965_add_2d_gpe_surface(ctx,
3265                                 gpe_context,
3266                                 obj_surface,
3267                                 1,
3268                                 1,
3269                                 I965_SURFACEFORMAT_R16_UINT,
3270                                 VP9_BTI_MBENC_CURR_UV_G9);
3271
3272         if (mbenc_param->segmentation_enabled) {
3273             i965_add_buffer_2d_gpe_surface(ctx,
3274                                            gpe_context,
3275                                            mbenc_param->pres_segmentation_map,
3276                                            1,
3277                                            I965_SURFACEFORMAT_R8_UNORM,
3278                                            VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3279
3280         }
3281
3282         res_size = 16 * mbenc_param->frame_width_in_mb *
3283                    mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3284         i965_add_buffer_gpe_surface(ctx,
3285                                     gpe_context,
3286                                     mbenc_param->pres_mode_decision,
3287                                     0,
3288                                     res_size / 4,
3289                                     0,
3290                                     VP9_BTI_MBENC_MODE_DECISION_G9);
3291
3292         res_size = frame_width_in_sb * frame_height_in_sb * 4 * sizeof(unsigned int);
3293         i965_add_buffer_gpe_surface(ctx,
3294                                     gpe_context,
3295                                     mbenc_param->pres_mb_code_surface,
3296                                     0,
3297                                     res_size / 4,
3298                                     0,
3299                                     VP9_BTI_MBENC_PAK_DATA_G9);
3300
3301         // CU Record
3302         res_size = frame_width_in_sb * frame_height_in_sb *
3303                    64 * 16 * sizeof(unsigned int);
3304
3305         i965_add_buffer_gpe_surface(ctx,
3306                                     gpe_context,
3307                                     mbenc_param->pres_mb_code_surface,
3308                                     0,
3309                                     res_size / 4,
3310                                     mbenc_param->mb_data_offset,
3311                                     VP9_BTI_MBENC_CU_RECORDS_G9);
3312     }
3313     default:
3314         break;
3315     }
3316
3317     return;
3318 }
3319
3320 static VAStatus
3321 gen9_vp9_mbenc_kernel(VADriverContextP ctx,
3322                       struct encode_state *encode_state,
3323                       struct intel_encoder_context *encoder_context,
3324                       int media_function)
3325 {
3326     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
3327     struct i965_gpe_context *gpe_context, *tx_gpe_context;
3328     struct gpe_media_object_walker_parameter        media_object_walker_param;
3329     struct gpe_encoder_kernel_walker_parameter      kernel_walker_param;
3330     unsigned int    resolution_x, resolution_y;
3331     struct gen9_vp9_state *vp9_state;
3332     VAEncPictureParameterBufferVP9  *pic_param;
3333     struct gen9_vp9_mbenc_curbe_param               curbe_param;
3334     struct gen9_vp9_mbenc_surface_param             surface_param;
3335     VAStatus    va_status = VA_STATUS_SUCCESS;
3336     int mbenc_gpe_index = 0;
3337     struct object_surface *obj_surface;
3338     struct gen9_surface_vp9 *vp9_priv_surface;
3339
3340     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3341
3342     if (!vp9_state || !vp9_state->pic_param)
3343         return VA_STATUS_ERROR_ENCODING_ERROR;
3344
3345     pic_param = vp9_state->pic_param;
3346
3347     switch (media_function) {
3348     case VP9_MEDIA_STATE_MBENC_I_32x32:
3349         mbenc_gpe_index = VP9_MBENC_IDX_KEY_32x32;
3350         break;
3351
3352     case VP9_MEDIA_STATE_MBENC_I_16x16:
3353         mbenc_gpe_index = VP9_MBENC_IDX_KEY_16x16;
3354         break;
3355
3356     case VP9_MEDIA_STATE_MBENC_P:
3357         mbenc_gpe_index = VP9_MBENC_IDX_INTER;
3358         break;
3359
3360     case VP9_MEDIA_STATE_MBENC_TX:
3361         mbenc_gpe_index = VP9_MBENC_IDX_TX;
3362         break;
3363
3364     default:
3365         va_status = VA_STATUS_ERROR_OPERATION_FAILED;
3366         return va_status;
3367     }
3368
3369     gpe_context = &(vme_context->mbenc_context.gpe_contexts[mbenc_gpe_index]);
3370     tx_gpe_context = &(vme_context->mbenc_context.gpe_contexts[VP9_MBENC_IDX_TX]);
3371
3372     gen9_gpe_reset_binding_table(ctx, gpe_context);
3373
3374     // Set curbe
3375     if (!vp9_state->mbenc_curbe_set_in_brc_update) {
3376         if (media_function == VP9_MEDIA_STATE_MBENC_I_32x32 ||
3377             media_function == VP9_MEDIA_STATE_MBENC_P) {
3378             memset(&curbe_param, 0, sizeof(curbe_param));
3379             curbe_param.ppic_param            = vp9_state->pic_param;
3380             curbe_param.pseq_param            = vp9_state->seq_param;
3381             curbe_param.psegment_param        = vp9_state->segment_param;
3382             curbe_param.frame_width_in_mb     = vp9_state->frame_width_in_mb;
3383             curbe_param.frame_height_in_mb    = vp9_state->frame_height_in_mb;
3384             curbe_param.last_ref_obj          = vp9_state->last_ref_obj;
3385             curbe_param.golden_ref_obj        = vp9_state->golden_ref_obj;
3386             curbe_param.alt_ref_obj           = vp9_state->alt_ref_obj;
3387             curbe_param.hme_enabled           = vp9_state->hme_enabled;
3388             curbe_param.ref_frame_flag        = vp9_state->ref_frame_flag;
3389             curbe_param.picture_coding_type   = vp9_state->picture_coding_type;
3390             curbe_param.media_state_type      = media_function;
3391             curbe_param.mbenc_curbe_set_in_brc_update = vp9_state->mbenc_curbe_set_in_brc_update;
3392
3393             vme_context->pfn_set_curbe_mbenc(ctx,
3394                                              encode_state,
3395                                              gpe_context,
3396                                              encoder_context,
3397                                              &curbe_param);
3398         }
3399     }
3400
3401     memset(&surface_param, 0, sizeof(surface_param));
3402     surface_param.media_state_type             = media_function;
3403     surface_param.picture_coding_type          = vp9_state->picture_coding_type;
3404     surface_param.frame_width                  = vp9_state->frame_width;
3405     surface_param.frame_height                 = vp9_state->frame_height;
3406     surface_param.frame_width_in_mb            = vp9_state->frame_width_in_mb;
3407     surface_param.frame_height_in_mb           = vp9_state->frame_height_in_mb;
3408     surface_param.hme_enabled                  = vp9_state->hme_enabled;
3409     surface_param.segmentation_enabled         = pic_param->pic_flags.bits.segmentation_enabled;
3410     surface_param.pres_segmentation_map        = &vme_context->mb_segment_map_surface;
3411     surface_param.ps4x_memv_data_buffer        = &vme_context->s4x_memv_data_buffer;
3412     surface_param.ps4x_memv_distortion_buffer  = &vme_context->s4x_memv_distortion_buffer;
3413     surface_param.pres_mode_decision           =
3414         &vme_context->res_mode_decision[vp9_state->curr_mode_decision_index];
3415     surface_param.pres_mode_decision_prev      =
3416         &vme_context->res_mode_decision[!vp9_state->curr_mode_decision_index];
3417     surface_param.pres_output_16x16_inter_modes = &vme_context->res_output_16x16_inter_modes;
3418     surface_param.pres_mbenc_curbe_buffer      = NULL;
3419     surface_param.last_ref_obj               = vp9_state->last_ref_obj;
3420     surface_param.golden_ref_obj             = vp9_state->golden_ref_obj;
3421     surface_param.alt_ref_obj                  = vp9_state->alt_ref_obj;
3422     surface_param.pres_mb_code_surface         = &vme_context->res_mb_code_surface;
3423     surface_param.gpe_context_tx               = tx_gpe_context;
3424     surface_param.mb_data_offset             = vp9_state->mb_data_offset;
3425
3426     obj_surface = encode_state->reconstructed_object;
3427     vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3428     if (vp9_state->dys_in_use &&
3429         (pic_param->frame_width_src != pic_param->frame_height_dst ||
3430          pic_param->frame_height_src != pic_param->frame_height_dst)) {
3431         obj_surface = vp9_priv_surface->dys_surface_obj;
3432     } else
3433         obj_surface = encode_state->input_yuv_object;
3434
3435     surface_param.curr_frame_obj             = obj_surface;
3436
3437     vme_context->pfn_send_mbenc_surface(ctx,
3438                                         encode_state,
3439                                         gpe_context,
3440                                         encoder_context,
3441                                         &surface_param);
3442
3443     if (media_function == VP9_MEDIA_STATE_MBENC_I_32x32) {
3444         resolution_x = ALIGN(vp9_state->frame_width, 32) / 32;
3445         resolution_y = ALIGN(vp9_state->frame_height, 32) / 32;
3446     } else {
3447         resolution_x = ALIGN(vp9_state->frame_width, 16) / 16;
3448         resolution_y = ALIGN(vp9_state->frame_height, 16) / 16;
3449     }
3450
3451     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
3452     kernel_walker_param.resolution_x = resolution_x;
3453     kernel_walker_param.resolution_y = resolution_y;
3454
3455     if (media_function == VP9_MEDIA_STATE_MBENC_P ||
3456         media_function == VP9_MEDIA_STATE_MBENC_I_16x16) {
3457         kernel_walker_param.use_scoreboard = 1;
3458         kernel_walker_param.no_dependency = 0;
3459         kernel_walker_param.walker_degree = VP9_45Z_DEGREE;
3460     } else {
3461         kernel_walker_param.use_scoreboard = 0;
3462         kernel_walker_param.no_dependency = 1;
3463     }
3464
3465     gen8_gpe_setup_interface_data(ctx, gpe_context);
3466
3467     gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
3468
3469     gen9_run_kernel_media_object_walker(ctx, encoder_context,
3470                                         gpe_context,
3471                                         media_function,
3472                                         &media_object_walker_param);
3473     return va_status;
3474 }
3475
3476 static void
3477 gen9_init_gpe_context_vp9(VADriverContextP ctx,
3478                           struct i965_gpe_context *gpe_context,
3479                           struct vp9_encoder_kernel_parameter *kernel_param)
3480 {
3481     struct i965_driver_data *i965 = i965_driver_data(ctx);
3482
3483     gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
3484
3485     gpe_context->sampler.entry_size = 0;
3486     gpe_context->sampler.max_entries = 0;
3487
3488     if (kernel_param->sampler_size) {
3489         gpe_context->sampler.entry_size = ALIGN(kernel_param->sampler_size, 64);
3490         gpe_context->sampler.max_entries = 1;
3491     }
3492
3493     gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
3494     gpe_context->idrt.max_entries = NUM_KERNELS_PER_GPE_CONTEXT;
3495
3496     gpe_context->surface_state_binding_table.max_entries = MAX_VP9_ENCODER_SURFACES;
3497     gpe_context->surface_state_binding_table.binding_table_offset = 0;
3498     gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_VP9_ENCODER_SURFACES * 4, 64);
3499     gpe_context->surface_state_binding_table.length = ALIGN(MAX_VP9_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_VP9_ENCODER_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
3500
3501     if (i965->intel.eu_total > 0)
3502         gpe_context->vfe_state.max_num_threads = 6 * i965->intel.eu_total;
3503     else
3504         gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads
3505
3506     gpe_context->vfe_state.curbe_allocation_size = MAX(1, ALIGN(gpe_context->curbe.length, 32) >> 5); // in registers
3507     gpe_context->vfe_state.urb_entry_size = MAX(1, ALIGN(kernel_param->inline_data_size, 32) >> 5); // in registers
3508     gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE -
3509                                               gpe_context->vfe_state.curbe_allocation_size -
3510                                               ((gpe_context->idrt.entry_size >> 5) *
3511                                                gpe_context->idrt.max_entries)) / gpe_context->vfe_state.urb_entry_size;
3512     gpe_context->vfe_state.num_urb_entries = CLAMP(1, 127, gpe_context->vfe_state.num_urb_entries);
3513     gpe_context->vfe_state.gpgpu_mode = 0;
3514 }
3515
3516 static void
3517 gen9_init_vfe_scoreboard_vp9(struct i965_gpe_context *gpe_context,
3518                              struct vp9_encoder_scoreboard_parameter *scoreboard_param)
3519 {
3520     gpe_context->vfe_desc5.scoreboard0.mask = scoreboard_param->mask;
3521     gpe_context->vfe_desc5.scoreboard0.type = scoreboard_param->type;
3522     gpe_context->vfe_desc5.scoreboard0.enable = scoreboard_param->enable;
3523
3524     if (scoreboard_param->walkpat_flag) {
3525         gpe_context->vfe_desc5.scoreboard0.mask = 0x0F;
3526         gpe_context->vfe_desc5.scoreboard0.type = 1;
3527
3528         gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0x0;
3529         gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0xF;
3530
3531         gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
3532         gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xE;
3533
3534         gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0xF;
3535         gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0x3;
3536
3537         gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
3538         gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0x1;
3539     } else {
3540         // Scoreboard 0
3541         gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0xF;
3542         gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0x0;
3543
3544         // Scoreboard 1
3545         gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
3546         gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xF;
3547
3548         // Scoreboard 2
3549         gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0x1;
3550         gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0xF;
3551
3552         // Scoreboard 3
3553         gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
3554         gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0xF;
3555
3556         // Scoreboard 4
3557         gpe_context->vfe_desc7.scoreboard2.delta_x4 = 0xF;
3558         gpe_context->vfe_desc7.scoreboard2.delta_y4 = 0x1;
3559
3560         // Scoreboard 5
3561         gpe_context->vfe_desc7.scoreboard2.delta_x5 = 0x0;
3562         gpe_context->vfe_desc7.scoreboard2.delta_y5 = 0xE;
3563
3564         // Scoreboard 6
3565         gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0x1;
3566         gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
3567
3568         // Scoreboard 7
3569         gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0xF;
3570         gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
3571     }
3572 }
3573
3574 #define VP9_VME_REF_WIN       48
3575
3576 static VAStatus
3577 gen9_encode_vp9_check_parameter(VADriverContextP ctx,
3578                                 struct encode_state *encode_state,
3579                                 struct intel_encoder_context *encoder_context)
3580 {
3581     struct i965_driver_data *i965 = i965_driver_data(ctx);
3582     struct gen9_vp9_state *vp9_state;
3583     VAEncPictureParameterBufferVP9  *pic_param;
3584     VAEncMiscParameterTypeVP9PerSegmantParam *seg_param;
3585     VAEncSequenceParameterBufferVP9 *seq_param;
3586     struct object_surface *obj_surface;
3587     struct object_buffer *obj_buffer;
3588     struct gen9_surface_vp9 *vp9_priv_surface;
3589
3590     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3591
3592     if (!encode_state->pic_param_ext ||
3593         !encode_state->pic_param_ext->buffer) {
3594         return VA_STATUS_ERROR_INVALID_PARAMETER;
3595     }
3596     pic_param = (VAEncPictureParameterBufferVP9 *)encode_state->pic_param_ext->buffer;
3597
3598     obj_buffer = BUFFER(pic_param->coded_buf);
3599
3600     if (!obj_buffer ||
3601         !obj_buffer->buffer_store ||
3602         !obj_buffer->buffer_store->bo)
3603         return VA_STATUS_ERROR_INVALID_PARAMETER;
3604
3605     encode_state->coded_buf_object = obj_buffer;
3606
3607     vp9_state->status_buffer.bo = obj_buffer->buffer_store->bo;
3608
3609     encode_state->reconstructed_object = SURFACE(pic_param->reconstructed_frame);
3610
3611     if (!encode_state->reconstructed_object ||
3612         !encode_state->input_yuv_object)
3613         return VA_STATUS_ERROR_INVALID_PARAMETER;
3614
3615     vp9_state->curr_frame = pic_param->reconstructed_frame;
3616     vp9_state->ref_frame_flag = 0;
3617     if (pic_param->pic_flags.bits.frame_type == KEY_FRAME ||
3618         pic_param->pic_flags.bits.intra_only) {
3619         /* this will be regarded as I-frame type */
3620         vp9_state->picture_coding_type = 0;
3621         vp9_state->last_ref_obj = NULL;
3622         vp9_state->golden_ref_obj = NULL;
3623         vp9_state->alt_ref_obj = NULL;
3624     } else {
3625         vp9_state->picture_coding_type = 1;
3626         vp9_state->ref_frame_flag = pic_param->ref_flags.bits.ref_frame_ctrl_l0 |
3627                                     pic_param->ref_flags.bits.ref_frame_ctrl_l1;
3628
3629         obj_surface = SURFACE(pic_param->reference_frames[pic_param->ref_flags.bits.ref_last_idx]);
3630         vp9_state->last_ref_obj = obj_surface;
3631         if (!obj_surface ||
3632             !obj_surface->bo ||
3633             !obj_surface->private_data) {
3634             vp9_state->last_ref_obj = NULL;
3635             vp9_state->ref_frame_flag &= ~(VP9_LAST_REF);
3636         }
3637
3638         obj_surface = SURFACE(pic_param->reference_frames[pic_param->ref_flags.bits.ref_gf_idx]);
3639         vp9_state->golden_ref_obj = obj_surface;
3640         if (!obj_surface ||
3641             !obj_surface->bo ||
3642             !obj_surface->private_data) {
3643             vp9_state->golden_ref_obj = NULL;
3644             vp9_state->ref_frame_flag &= ~(VP9_GOLDEN_REF);
3645         }
3646
3647         obj_surface = SURFACE(pic_param->reference_frames[pic_param->ref_flags.bits.ref_arf_idx]);
3648         vp9_state->alt_ref_obj = obj_surface;
3649         if (!obj_surface ||
3650             !obj_surface->bo ||
3651             !obj_surface->private_data) {
3652             vp9_state->alt_ref_obj = NULL;
3653             vp9_state->ref_frame_flag &= ~(VP9_ALT_REF);
3654         }
3655
3656         /* remove the duplicated flag and ref frame list */
3657         if (vp9_state->ref_frame_flag & VP9_LAST_REF) {
3658             if (pic_param->reference_frames[pic_param->ref_flags.bits.ref_last_idx] ==
3659                 pic_param->reference_frames[pic_param->ref_flags.bits.ref_gf_idx]) {
3660                 vp9_state->ref_frame_flag &= ~(VP9_GOLDEN_REF);
3661                 vp9_state->golden_ref_obj = NULL;
3662             }
3663
3664             if (pic_param->reference_frames[pic_param->ref_flags.bits.ref_last_idx] ==
3665                 pic_param->reference_frames[pic_param->ref_flags.bits.ref_arf_idx]) {
3666                 vp9_state->ref_frame_flag &= ~(VP9_ALT_REF);
3667                 vp9_state->alt_ref_obj = NULL;
3668             }
3669         }
3670
3671         if (vp9_state->ref_frame_flag & VP9_GOLDEN_REF) {
3672             if (pic_param->reference_frames[pic_param->ref_flags.bits.ref_gf_idx] ==
3673                 pic_param->reference_frames[pic_param->ref_flags.bits.ref_arf_idx]) {
3674                 vp9_state->ref_frame_flag &= ~(VP9_ALT_REF);
3675                 vp9_state->alt_ref_obj = NULL;
3676             }
3677         }
3678
3679         if (vp9_state->ref_frame_flag == 0)
3680             return VA_STATUS_ERROR_INVALID_PARAMETER;
3681     }
3682
3683     seg_param = NULL;
3684     if (pic_param->pic_flags.bits.segmentation_enabled) {
3685         if (!encode_state->q_matrix ||
3686             !encode_state->q_matrix->buffer) {
3687             return VA_STATUS_ERROR_INVALID_PARAMETER;
3688         }
3689         seg_param = (VAEncMiscParameterTypeVP9PerSegmantParam *)
3690                     encode_state->q_matrix->buffer;
3691     }
3692
3693     seq_param = NULL;
3694     if (encode_state->seq_param_ext &&
3695         encode_state->seq_param_ext->buffer)
3696         seq_param = (VAEncSequenceParameterBufferVP9 *)encode_state->seq_param_ext->buffer;
3697
3698     if (!seq_param) {
3699         seq_param = &vp9_state->bogus_seq_param;
3700     }
3701
3702     vp9_state->pic_param = pic_param;
3703     vp9_state->segment_param = seg_param;
3704     vp9_state->seq_param = seq_param;
3705
3706     obj_surface = encode_state->reconstructed_object;
3707     if (pic_param->frame_width_dst > obj_surface->orig_width ||
3708         pic_param->frame_height_dst > obj_surface->orig_height)
3709         return VA_STATUS_ERROR_INVALID_SURFACE;
3710
3711     if (!vp9_state->dys_enabled &&
3712         ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
3713          (pic_param->frame_height_src != pic_param->frame_height_dst)))
3714         return VA_STATUS_ERROR_UNIMPLEMENTED;
3715
3716     if (vp9_state->brc_enabled) {
3717         if (vp9_state->first_frame || vp9_state->picture_coding_type == KEY_FRAME) {
3718             vp9_state->brc_reset = encoder_context->brc.need_reset || vp9_state->first_frame;
3719
3720             if (!encoder_context->brc.framerate[0].num || !encoder_context->brc.framerate[0].den ||
3721                 !encoder_context->brc.bits_per_second[0])
3722                 return VA_STATUS_ERROR_INVALID_PARAMETER;
3723
3724             vp9_state->gop_size = encoder_context->brc.gop_size;
3725             vp9_state->framerate = encoder_context->brc.framerate[0];
3726
3727             if (encoder_context->rate_control_mode == VA_RC_CBR ||
3728                 !encoder_context->brc.target_percentage[0]) {
3729                 vp9_state->target_bit_rate = encoder_context->brc.bits_per_second[0];
3730                 vp9_state->max_bit_rate = vp9_state->target_bit_rate;
3731                 vp9_state->min_bit_rate = vp9_state->target_bit_rate;
3732             } else {
3733                 vp9_state->max_bit_rate = encoder_context->brc.bits_per_second[0];
3734                 vp9_state->target_bit_rate = vp9_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100;
3735                 if (2 * vp9_state->target_bit_rate < vp9_state->max_bit_rate)
3736                     vp9_state->min_bit_rate = 0;
3737                 else
3738                     vp9_state->min_bit_rate = 2 * vp9_state->target_bit_rate - vp9_state->max_bit_rate;
3739             }
3740
3741             if (encoder_context->brc.hrd_buffer_size)
3742                 vp9_state->vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;
3743             else if (encoder_context->brc.window_size)
3744                 vp9_state->vbv_buffer_size_in_bit = (uint64_t)vp9_state->max_bit_rate * encoder_context->brc.window_size / 1000;
3745             else
3746                 vp9_state->vbv_buffer_size_in_bit = vp9_state->max_bit_rate;
3747             if (encoder_context->brc.hrd_initial_buffer_fullness)
3748                 vp9_state->init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;
3749             else
3750                 vp9_state->init_vbv_buffer_fullness_in_bit = vp9_state->vbv_buffer_size_in_bit / 2;
3751         }
3752     }
3753
3754     vp9_state->frame_width = pic_param->frame_width_dst;
3755     vp9_state->frame_height = pic_param->frame_height_dst;
3756
3757     vp9_state->frame_width_4x = ALIGN(vp9_state->frame_width / 4, 16);
3758     vp9_state->frame_height_4x = ALIGN(vp9_state->frame_height / 4, 16);
3759
3760     vp9_state->frame_width_16x = ALIGN(vp9_state->frame_width / 16, 16);
3761     vp9_state->frame_height_16x = ALIGN(vp9_state->frame_height / 16, 16);
3762
3763     vp9_state->frame_width_in_mb = ALIGN(vp9_state->frame_width, 16) / 16;
3764     vp9_state->frame_height_in_mb = ALIGN(vp9_state->frame_height, 16) / 16;
3765
3766     vp9_state->downscaled_width_4x_in_mb = vp9_state->frame_width_4x / 16;
3767     vp9_state->downscaled_height_4x_in_mb = vp9_state->frame_height_4x / 16;
3768     vp9_state->downscaled_width_16x_in_mb = vp9_state->frame_width_16x / 16;
3769     vp9_state->downscaled_height_16x_in_mb = vp9_state->frame_height_16x / 16;
3770
3771     vp9_state->dys_in_use = 0;
3772     if (pic_param->frame_width_src != pic_param->frame_width_dst ||
3773         pic_param->frame_height_src != pic_param->frame_height_dst)
3774         vp9_state->dys_in_use = 1;
3775     vp9_state->dys_ref_frame_flag = 0;
3776     /* check the dys setting. The dys is supported by default. */
3777     if (pic_param->pic_flags.bits.frame_type != KEY_FRAME &&
3778         !pic_param->pic_flags.bits.intra_only) {
3779         vp9_state->dys_ref_frame_flag = vp9_state->ref_frame_flag;
3780
3781         if ((vp9_state->ref_frame_flag & VP9_LAST_REF) &&
3782             vp9_state->last_ref_obj) {
3783             obj_surface = vp9_state->last_ref_obj;
3784             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3785
3786             if (vp9_state->frame_width == vp9_priv_surface->frame_width &&
3787                 vp9_state->frame_height == vp9_priv_surface->frame_height)
3788                 vp9_state->dys_ref_frame_flag &= ~(VP9_LAST_REF);
3789         }
3790         if ((vp9_state->ref_frame_flag & VP9_GOLDEN_REF) &&
3791             vp9_state->golden_ref_obj) {
3792             obj_surface = vp9_state->golden_ref_obj;
3793             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3794
3795             if (vp9_state->frame_width == vp9_priv_surface->frame_width &&
3796                 vp9_state->frame_height == vp9_priv_surface->frame_height)
3797                 vp9_state->dys_ref_frame_flag &= ~(VP9_GOLDEN_REF);
3798         }
3799         if ((vp9_state->ref_frame_flag & VP9_ALT_REF) &&
3800             vp9_state->alt_ref_obj) {
3801             obj_surface = vp9_state->alt_ref_obj;
3802             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3803
3804             if (vp9_state->frame_width == vp9_priv_surface->frame_width &&
3805                 vp9_state->frame_height == vp9_priv_surface->frame_height)
3806                 vp9_state->dys_ref_frame_flag &= ~(VP9_ALT_REF);
3807         }
3808         if (vp9_state->dys_ref_frame_flag)
3809             vp9_state->dys_in_use = 1;
3810     }
3811
3812     if (vp9_state->hme_supported) {
3813         vp9_state->hme_enabled = 1;
3814     } else {
3815         vp9_state->hme_enabled = 0;
3816     }
3817
3818     if (vp9_state->b16xme_supported) {
3819         vp9_state->b16xme_enabled = 1;
3820     } else {
3821         vp9_state->b16xme_enabled = 0;
3822     }
3823
3824     /* disable HME/16xME if the size is too small */
3825     if (vp9_state->frame_width_4x <= VP9_VME_REF_WIN ||
3826         vp9_state->frame_height_4x <= VP9_VME_REF_WIN) {
3827         vp9_state->hme_enabled = 0;
3828         vp9_state->b16xme_enabled = 0;
3829     }
3830
3831     if (vp9_state->frame_width_16x < VP9_VME_REF_WIN ||
3832         vp9_state->frame_height_16x < VP9_VME_REF_WIN)
3833         vp9_state->b16xme_enabled = 0;
3834
3835     if (pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME ||
3836         pic_param->pic_flags.bits.intra_only) {
3837         vp9_state->hme_enabled = 0;
3838         vp9_state->b16xme_enabled = 0;
3839     }
3840
3841     vp9_state->mbenc_keyframe_dist_enabled = 0;
3842     if ((vp9_state->picture_coding_type == KEY_FRAME) &&
3843         vp9_state->brc_distortion_buffer_supported)
3844         vp9_state->mbenc_keyframe_dist_enabled = 1;
3845
3846     return VA_STATUS_SUCCESS;
3847 }
3848
3849 static VAStatus
3850 gen9_vme_gpe_kernel_prepare_vp9(VADriverContextP ctx,
3851                                 struct encode_state *encode_state,
3852                                 struct intel_encoder_context *encoder_context)
3853 {
3854     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
3855     struct vp9_surface_param surface_param;
3856     struct gen9_vp9_state *vp9_state;
3857     VAEncPictureParameterBufferVP9  *pic_param;
3858     struct object_surface *obj_surface;
3859     struct gen9_surface_vp9 *vp9_surface;
3860     int driver_header_flag = 0;
3861     VAStatus va_status;
3862
3863     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3864
3865     if (!vp9_state || !vp9_state->pic_param)
3866         return VA_STATUS_ERROR_INVALID_PARAMETER;
3867
3868     pic_param = vp9_state->pic_param;
3869
3870     /* this is to check whether the driver should generate the uncompressed header */
3871     driver_header_flag = 1;
3872     if (encode_state->packed_header_data_ext &&
3873         encode_state->packed_header_data_ext[0] &&
3874         pic_param->bit_offset_first_partition_size) {
3875         VAEncPackedHeaderParameterBuffer *param = NULL;
3876
3877         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_params_ext[0]->buffer;
3878
3879         if (param->type == VAEncPackedHeaderRawData) {
3880             char *header_data;
3881             unsigned int length_in_bits;
3882
3883             header_data = (char *)encode_state->packed_header_data_ext[0]->buffer;
3884             length_in_bits = param->bit_length;
3885             driver_header_flag = 0;
3886
3887             vp9_state->frame_header.bit_offset_first_partition_size =
3888                 pic_param->bit_offset_first_partition_size;
3889             vp9_state->header_length = ALIGN(length_in_bits, 8) >> 3;
3890             vp9_state->alias_insert_data = header_data;
3891
3892             vp9_state->frame_header.bit_offset_ref_lf_delta = pic_param->bit_offset_ref_lf_delta;
3893             vp9_state->frame_header.bit_offset_mode_lf_delta = pic_param->bit_offset_mode_lf_delta;
3894             vp9_state->frame_header.bit_offset_lf_level = pic_param->bit_offset_lf_level;
3895             vp9_state->frame_header.bit_offset_qindex = pic_param->bit_offset_qindex;
3896             vp9_state->frame_header.bit_offset_segmentation = pic_param->bit_offset_segmentation;
3897             vp9_state->frame_header.bit_size_segmentation = pic_param->bit_size_segmentation;
3898         }
3899     }
3900
3901     if (driver_header_flag) {
3902         memset(&vp9_state->frame_header, 0, sizeof(vp9_state->frame_header));
3903         intel_write_uncompressed_header(encode_state,
3904                                         VAProfileVP9Profile0,
3905                                         vme_context->frame_header_data,
3906                                         &vp9_state->header_length,
3907                                         &vp9_state->frame_header);
3908         vp9_state->alias_insert_data = vme_context->frame_header_data;
3909     }
3910
3911     va_status = i965_check_alloc_surface_bo(ctx, encode_state->input_yuv_object,
3912                                             1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
3913     if (va_status != VA_STATUS_SUCCESS)
3914         return va_status;
3915
3916     va_status = i965_check_alloc_surface_bo(ctx, encode_state->reconstructed_object,
3917                                             1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
3918
3919     if (va_status != VA_STATUS_SUCCESS)
3920         return va_status;
3921
3922     surface_param.frame_width = vp9_state->frame_width;
3923     surface_param.frame_height = vp9_state->frame_height;
3924     va_status = gen9_vp9_init_check_surfaces(ctx,
3925                                              encode_state->reconstructed_object,
3926                                              &surface_param);
3927
3928     {
3929         vp9_surface = (struct gen9_surface_vp9*)encode_state->reconstructed_object;
3930
3931         vp9_surface->qp_value = pic_param->luma_ac_qindex + pic_param->luma_dc_qindex_delta;
3932     }
3933     if (vp9_state->dys_in_use &&
3934         (pic_param->frame_width_src != pic_param->frame_width_dst ||
3935          pic_param->frame_height_src != pic_param->frame_height_dst)) {
3936         surface_param.frame_width = pic_param->frame_width_dst;
3937         surface_param.frame_height = pic_param->frame_height_dst;
3938         va_status = gen9_vp9_check_dys_surfaces(ctx,
3939                                                 encode_state->reconstructed_object,
3940                                                 &surface_param);
3941
3942         if (va_status)
3943             return va_status;
3944     }
3945
3946     if (vp9_state->dys_ref_frame_flag) {
3947         if ((vp9_state->dys_ref_frame_flag & VP9_LAST_REF) &&
3948             vp9_state->last_ref_obj) {
3949             obj_surface = vp9_state->last_ref_obj;
3950             surface_param.frame_width = vp9_state->frame_width;
3951             surface_param.frame_height = vp9_state->frame_height;
3952             va_status = gen9_vp9_check_dys_surfaces(ctx,
3953                                                     obj_surface,
3954                                                     &surface_param);
3955
3956             if (va_status)
3957                 return va_status;
3958         }
3959         if ((vp9_state->dys_ref_frame_flag & VP9_GOLDEN_REF) &&
3960             vp9_state->golden_ref_obj) {
3961             obj_surface = vp9_state->golden_ref_obj;
3962             surface_param.frame_width = vp9_state->frame_width;
3963             surface_param.frame_height = vp9_state->frame_height;
3964             va_status = gen9_vp9_check_dys_surfaces(ctx,
3965                                                     obj_surface,
3966                                                     &surface_param);
3967
3968             if (va_status)
3969                 return va_status;
3970         }
3971         if ((vp9_state->dys_ref_frame_flag & VP9_ALT_REF) &&
3972             vp9_state->alt_ref_obj) {
3973             obj_surface = vp9_state->alt_ref_obj;
3974             surface_param.frame_width = vp9_state->frame_width;
3975             surface_param.frame_height = vp9_state->frame_height;
3976             va_status = gen9_vp9_check_dys_surfaces(ctx,
3977                                                     obj_surface,
3978                                                     &surface_param);
3979
3980             if (va_status)
3981                 return va_status;
3982         }
3983     }
3984
3985     if (va_status != VA_STATUS_SUCCESS)
3986         return va_status;
3987     /* check the corresponding ref_frame_flag && dys_ref_frame_flag */
3988
3989     return VA_STATUS_SUCCESS;
3990 }
3991
3992 static VAStatus
3993 gen9_vme_gpe_kernel_init_vp9(VADriverContextP ctx,
3994                              struct encode_state *encode_state,
3995                              struct intel_encoder_context *encoder_context)
3996 {
3997     struct i965_driver_data *i965 = i965_driver_data(ctx);
3998     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
3999     struct vp9_mbenc_context *mbenc_context = &vme_context->mbenc_context;
4000     struct vp9_dys_context *dys_context = &vme_context->dys_context;
4001     struct gpe_dynamic_state_parameter ds_param;
4002     int i;
4003
4004     /*
4005      * BRC will update MBEnc curbe data buffer, so initialize GPE context for
4006      * MBEnc first
4007      */
4008     for (i = 0; i < NUM_VP9_MBENC; i++) {
4009         gen8_gpe_context_init(ctx, &mbenc_context->gpe_contexts[i]);
4010     }
4011
4012     /*
4013      * VP9_MBENC_XXX uses the same dynamic state buffer as they share the same
4014      * curbe_buffer.
4015      */
4016     ds_param.bo_size = ALIGN(sizeof(vp9_mbenc_curbe_data), 64) + 128 +
4017                        ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) * NUM_VP9_MBENC;
4018     mbenc_context->mbenc_bo_dys = dri_bo_alloc(i965->intel.bufmgr,
4019                                                "mbenc_dys",
4020                                                ds_param.bo_size,
4021                                                0x1000);
4022     mbenc_context->mbenc_bo_size = ds_param.bo_size;
4023
4024     ds_param.bo = mbenc_context->mbenc_bo_dys;
4025     ds_param.curbe_offset = 0;
4026     ds_param.sampler_offset = ALIGN(sizeof(vp9_mbenc_curbe_data), 64);
4027     for (i = 0; i < NUM_VP9_MBENC; i++) {
4028         ds_param.idrt_offset = ds_param.sampler_offset + 128 +
4029                                ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) * i;
4030
4031         gen8_gpe_context_set_dynamic_buffer(ctx,
4032                                             &mbenc_context->gpe_contexts[i],
4033                                             &ds_param);
4034     }
4035
4036     gen8_gpe_context_init(ctx, &dys_context->gpe_context);
4037     gen9_vp9_dys_set_sampler_state(&dys_context->gpe_context);
4038
4039     return VA_STATUS_SUCCESS;
4040 }
4041
4042 static VAStatus
4043 gen9_vme_gpe_kernel_final_vp9(VADriverContextP ctx,
4044                               struct encode_state *encode_state,
4045                               struct intel_encoder_context *encoder_context)
4046 {
4047     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4048     struct vp9_mbenc_context *mbenc_context = &vme_context->mbenc_context;
4049
4050     dri_bo_unreference(mbenc_context->mbenc_bo_dys);
4051     mbenc_context->mbenc_bo_dys = NULL;
4052
4053     return VA_STATUS_SUCCESS;
4054 }
4055
4056 static VAStatus
4057 gen9_vme_gpe_kernel_run_vp9(VADriverContextP ctx,
4058                             struct encode_state *encode_state,
4059                             struct intel_encoder_context *encoder_context)
4060 {
4061     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4062     struct gen9_vp9_state *vp9_state;
4063     int i;
4064
4065     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
4066
4067     if (!vp9_state || !vp9_state->pic_param)
4068         return VA_STATUS_ERROR_INVALID_PARAMETER;
4069
4070     if (vp9_state->dys_in_use) {
4071         gen9_vp9_run_dys_refframes(ctx, encode_state, encoder_context);
4072     }
4073
4074     if (vp9_state->brc_enabled && (vp9_state->brc_reset || !vp9_state->brc_inited)) {
4075         gen9_vp9_brc_init_reset_kernel(ctx, encode_state, encoder_context);
4076     }
4077
4078     if (vp9_state->picture_coding_type == KEY_FRAME) {
4079         for (i = 0; i < 2; i++)
4080             i965_zero_gpe_resource(&vme_context->res_mode_decision[i]);
4081     }
4082
4083     if (vp9_state->hme_supported) {
4084         gen9_vp9_scaling_kernel(ctx, encode_state,
4085                                 encoder_context,
4086                                 0);
4087         if (vp9_state->b16xme_supported) {
4088             gen9_vp9_scaling_kernel(ctx, encode_state,
4089                                     encoder_context,
4090                                     1);
4091         }
4092     }
4093
4094     if (vp9_state->picture_coding_type && vp9_state->hme_enabled) {
4095         if (vp9_state->b16xme_enabled)
4096             gen9_vp9_me_kernel(ctx, encode_state,
4097                                encoder_context,
4098                                1);
4099
4100         gen9_vp9_me_kernel(ctx, encode_state,
4101                            encoder_context,
4102                            0);
4103     }
4104
4105     if (vp9_state->brc_enabled) {
4106         if (vp9_state->mbenc_keyframe_dist_enabled)
4107             gen9_vp9_brc_intra_dist_kernel(ctx,
4108                                            encode_state,
4109                                            encoder_context);
4110
4111         gen9_vp9_brc_update_kernel(ctx, encode_state,
4112                                    encoder_context);
4113     }
4114
4115     if (vp9_state->picture_coding_type == KEY_FRAME) {
4116         gen9_vp9_mbenc_kernel(ctx, encode_state,
4117                               encoder_context,
4118                               VP9_MEDIA_STATE_MBENC_I_32x32);
4119         gen9_vp9_mbenc_kernel(ctx, encode_state,
4120                               encoder_context,
4121                               VP9_MEDIA_STATE_MBENC_I_16x16);
4122     } else {
4123         gen9_vp9_mbenc_kernel(ctx, encode_state,
4124                               encoder_context,
4125                               VP9_MEDIA_STATE_MBENC_P);
4126     }
4127
4128     gen9_vp9_mbenc_kernel(ctx, encode_state,
4129                           encoder_context,
4130                           VP9_MEDIA_STATE_MBENC_TX);
4131
4132     vp9_state->curr_mode_decision_index ^= 1;
4133     if (vp9_state->brc_enabled) {
4134         vp9_state->brc_inited = 1;
4135         vp9_state->brc_reset = 0;
4136     }
4137
4138     return VA_STATUS_SUCCESS;
4139 }
4140
4141 static VAStatus
4142 gen9_vme_pipeline_vp9(VADriverContextP ctx,
4143                       VAProfile profile,
4144                       struct encode_state *encode_state,
4145                       struct intel_encoder_context *encoder_context)
4146 {
4147     VAStatus va_status;
4148     struct gen9_vp9_state *vp9_state;
4149
4150     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
4151
4152     if (!vp9_state)
4153         return VA_STATUS_ERROR_INVALID_CONTEXT;
4154
4155     va_status = gen9_encode_vp9_check_parameter(ctx, encode_state, encoder_context);
4156     if (va_status != VA_STATUS_SUCCESS)
4157         return va_status;
4158
4159     va_status = gen9_vp9_allocate_resources(ctx, encode_state,
4160                                             encoder_context,
4161                                             !vp9_state->brc_allocated);
4162
4163     if (va_status != VA_STATUS_SUCCESS)
4164         return va_status;
4165     vp9_state->brc_allocated = 1;
4166
4167     va_status = gen9_vme_gpe_kernel_prepare_vp9(ctx, encode_state, encoder_context);
4168
4169     if (va_status != VA_STATUS_SUCCESS)
4170         return va_status;
4171
4172     va_status = gen9_vme_gpe_kernel_init_vp9(ctx, encode_state, encoder_context);
4173     if (va_status != VA_STATUS_SUCCESS)
4174         return va_status;
4175
4176     va_status = gen9_vme_gpe_kernel_run_vp9(ctx, encode_state, encoder_context);
4177     if (va_status != VA_STATUS_SUCCESS)
4178         return va_status;
4179
4180     gen9_vme_gpe_kernel_final_vp9(ctx, encode_state, encoder_context);
4181
4182     return VA_STATUS_SUCCESS;
4183 }
4184
4185 static void
4186 gen9_vme_brc_context_destroy_vp9(struct vp9_brc_context *brc_context)
4187 {
4188     int i;
4189
4190     for (i = 0; i < NUM_VP9_BRC; i++)
4191         gen8_gpe_context_destroy(&brc_context->gpe_contexts[i]);
4192 }
4193
4194 static void
4195 gen9_vme_scaling_context_destroy_vp9(struct vp9_scaling_context *scaling_context)
4196 {
4197     int i;
4198
4199     for (i = 0; i < NUM_VP9_SCALING; i++)
4200         gen8_gpe_context_destroy(&scaling_context->gpe_contexts[i]);
4201 }
4202
4203 static void
4204 gen9_vme_me_context_destroy_vp9(struct vp9_me_context *me_context)
4205 {
4206     gen8_gpe_context_destroy(&me_context->gpe_context);
4207 }
4208
4209 static void
4210 gen9_vme_mbenc_context_destroy_vp9(struct vp9_mbenc_context *mbenc_context)
4211 {
4212     int i;
4213
4214     for (i = 0; i < NUM_VP9_MBENC; i++)
4215         gen8_gpe_context_destroy(&mbenc_context->gpe_contexts[i]);
4216     dri_bo_unreference(mbenc_context->mbenc_bo_dys);
4217     mbenc_context->mbenc_bo_size = 0;
4218 }
4219
4220 static void
4221 gen9_vme_dys_context_destroy_vp9(struct vp9_dys_context *dys_context)
4222 {
4223     gen8_gpe_context_destroy(&dys_context->gpe_context);
4224 }
4225
4226 static void
4227 gen9_vme_kernel_context_destroy_vp9(struct gen9_encoder_context_vp9 *vme_context)
4228 {
4229     gen9_vp9_free_resources(vme_context);
4230     gen9_vme_scaling_context_destroy_vp9(&vme_context->scaling_context);
4231     gen9_vme_me_context_destroy_vp9(&vme_context->me_context);
4232     gen9_vme_mbenc_context_destroy_vp9(&vme_context->mbenc_context);
4233     gen9_vme_brc_context_destroy_vp9(&vme_context->brc_context);
4234     gen9_vme_dys_context_destroy_vp9(&vme_context->dys_context);
4235
4236     return;
4237 }
4238
4239 static void
4240 gen9_vme_context_destroy_vp9(void *context)
4241 {
4242     struct gen9_encoder_context_vp9 *vme_context = context;
4243
4244     if (!vme_context)
4245         return;
4246
4247     gen9_vme_kernel_context_destroy_vp9(vme_context);
4248
4249     free(vme_context);
4250
4251     return;
4252 }
4253
4254 static void
4255 gen9_vme_scaling_context_init_vp9(VADriverContextP ctx,
4256                                   struct gen9_encoder_context_vp9 *vme_context,
4257                                   struct vp9_scaling_context *scaling_context)
4258 {
4259     struct i965_gpe_context *gpe_context = NULL;
4260     struct vp9_encoder_kernel_parameter kernel_param;
4261     struct vp9_encoder_scoreboard_parameter scoreboard_param;
4262     struct i965_kernel scale_kernel;
4263
4264     kernel_param.curbe_size = sizeof(vp9_scaling4x_curbe_data_cm);
4265     kernel_param.inline_data_size = sizeof(vp9_scaling4x_inline_data_cm);
4266     kernel_param.sampler_size = 0;
4267
4268     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4269     scoreboard_param.mask = 0xFF;
4270     scoreboard_param.enable = vme_context->use_hw_scoreboard;
4271     scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4272     scoreboard_param.walkpat_flag = 0;
4273
4274     gpe_context = &scaling_context->gpe_contexts[0];
4275     gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4276     gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4277
4278     scaling_context->scaling_4x_bti.scaling_frame_src_y = VP9_BTI_SCALING_FRAME_SRC_Y;
4279     scaling_context->scaling_4x_bti.scaling_frame_dst_y = VP9_BTI_SCALING_FRAME_DST_Y;
4280     scaling_context->scaling_4x_bti.scaling_frame_mbv_proc_stat_dst =
4281         VP9_BTI_SCALING_FRAME_MBVPROCSTATS_DST_CM;
4282
4283     memset(&scale_kernel, 0, sizeof(scale_kernel));
4284
4285     intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4286                                          sizeof(media_vp9_kernels),
4287                                          INTEL_VP9_ENC_SCALING4X,
4288                                          0,
4289                                          &scale_kernel);
4290
4291     gen8_gpe_load_kernels(ctx,
4292                           gpe_context,
4293                           &scale_kernel,
4294                           1);
4295
4296     kernel_param.curbe_size = sizeof(vp9_scaling2x_curbe_data_cm);
4297     kernel_param.inline_data_size = 0;
4298     kernel_param.sampler_size = 0;
4299
4300     gpe_context = &scaling_context->gpe_contexts[1];
4301     gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4302     gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4303
4304     memset(&scale_kernel, 0, sizeof(scale_kernel));
4305
4306     intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4307                                          sizeof(media_vp9_kernels),
4308                                          INTEL_VP9_ENC_SCALING2X,
4309                                          0,
4310                                          &scale_kernel);
4311
4312     gen8_gpe_load_kernels(ctx,
4313                           gpe_context,
4314                           &scale_kernel,
4315                           1);
4316
4317     scaling_context->scaling_2x_bti.scaling_frame_src_y = VP9_BTI_SCALING_FRAME_SRC_Y;
4318     scaling_context->scaling_2x_bti.scaling_frame_dst_y = VP9_BTI_SCALING_FRAME_DST_Y;
4319     return;
4320 }
4321
4322 static void
4323 gen9_vme_me_context_init_vp9(VADriverContextP ctx,
4324                              struct gen9_encoder_context_vp9 *vme_context,
4325                              struct vp9_me_context *me_context)
4326 {
4327     struct i965_gpe_context *gpe_context = NULL;
4328     struct vp9_encoder_kernel_parameter kernel_param;
4329     struct vp9_encoder_scoreboard_parameter scoreboard_param;
4330     struct i965_kernel scale_kernel;
4331
4332     kernel_param.curbe_size = sizeof(vp9_me_curbe_data);
4333     kernel_param.inline_data_size = 0;
4334     kernel_param.sampler_size = 0;
4335
4336     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4337     scoreboard_param.mask = 0xFF;
4338     scoreboard_param.enable = vme_context->use_hw_scoreboard;
4339     scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4340     scoreboard_param.walkpat_flag = 0;
4341
4342     gpe_context = &me_context->gpe_context;
4343     gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4344     gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4345
4346     memset(&scale_kernel, 0, sizeof(scale_kernel));
4347
4348     intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4349                                          sizeof(media_vp9_kernels),
4350                                          INTEL_VP9_ENC_ME,
4351                                          0,
4352                                          &scale_kernel);
4353
4354     gen8_gpe_load_kernels(ctx,
4355                           gpe_context,
4356                           &scale_kernel,
4357                           1);
4358
4359     return;
4360 }
4361
4362 static void
4363 gen9_vme_mbenc_context_init_vp9(VADriverContextP ctx,
4364                                 struct gen9_encoder_context_vp9 *vme_context,
4365                                 struct vp9_mbenc_context *mbenc_context)
4366 {
4367     struct i965_gpe_context *gpe_context = NULL;
4368     struct vp9_encoder_kernel_parameter kernel_param;
4369     struct vp9_encoder_scoreboard_parameter scoreboard_param;
4370     int i;
4371     struct i965_kernel scale_kernel;
4372
4373     kernel_param.curbe_size = sizeof(vp9_mbenc_curbe_data);
4374     kernel_param.inline_data_size = 0;
4375     kernel_param.sampler_size = 0;
4376
4377     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4378     scoreboard_param.mask = 0xFF;
4379     scoreboard_param.enable = vme_context->use_hw_scoreboard;
4380     scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4381
4382     for (i = 0; i < NUM_VP9_MBENC; i++) {
4383         gpe_context = &mbenc_context->gpe_contexts[i];
4384
4385         if ((i == VP9_MBENC_IDX_KEY_16x16) ||
4386             (i == VP9_MBENC_IDX_INTER)) {
4387             scoreboard_param.walkpat_flag = 1;
4388         } else
4389             scoreboard_param.walkpat_flag = 0;
4390
4391         gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4392         gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4393
4394         memset(&scale_kernel, 0, sizeof(scale_kernel));
4395
4396         intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4397                                              sizeof(media_vp9_kernels),
4398                                              INTEL_VP9_ENC_MBENC,
4399                                              i,
4400                                              &scale_kernel);
4401
4402         gen8_gpe_load_kernels(ctx,
4403                               gpe_context,
4404                               &scale_kernel,
4405                               1);
4406     }
4407 }
4408
4409 static void
4410 gen9_vme_brc_context_init_vp9(VADriverContextP ctx,
4411                               struct gen9_encoder_context_vp9 *vme_context,
4412                               struct vp9_brc_context *brc_context)
4413 {
4414     struct i965_gpe_context *gpe_context = NULL;
4415     struct vp9_encoder_kernel_parameter kernel_param;
4416     struct vp9_encoder_scoreboard_parameter scoreboard_param;
4417     int i;
4418     struct i965_kernel scale_kernel;
4419
4420     kernel_param.curbe_size = sizeof(vp9_brc_curbe_data);
4421     kernel_param.inline_data_size = 0;
4422     kernel_param.sampler_size = 0;
4423
4424     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4425     scoreboard_param.mask = 0xFF;
4426     scoreboard_param.enable = vme_context->use_hw_scoreboard;
4427     scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4428
4429     for (i = 0; i < NUM_VP9_BRC; i++) {
4430         gpe_context = &brc_context->gpe_contexts[i];
4431         gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4432         gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4433
4434         memset(&scale_kernel, 0, sizeof(scale_kernel));
4435
4436         intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4437                                              sizeof(media_vp9_kernels),
4438                                              INTEL_VP9_ENC_BRC,
4439                                              i,
4440                                              &scale_kernel);
4441
4442         gen8_gpe_load_kernels(ctx,
4443                               gpe_context,
4444                               &scale_kernel,
4445                               1);
4446     }
4447 }
4448
4449 static void
4450 gen9_vme_dys_context_init_vp9(VADriverContextP ctx,
4451                               struct gen9_encoder_context_vp9 *vme_context,
4452                               struct vp9_dys_context *dys_context)
4453 {
4454     struct i965_gpe_context *gpe_context = NULL;
4455     struct vp9_encoder_kernel_parameter kernel_param;
4456     struct vp9_encoder_scoreboard_parameter scoreboard_param;
4457     struct i965_kernel scale_kernel;
4458
4459     kernel_param.curbe_size = sizeof(vp9_dys_curbe_data);
4460     kernel_param.inline_data_size = 0;
4461     kernel_param.sampler_size = sizeof(struct gen9_sampler_8x8_avs);
4462
4463     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4464     scoreboard_param.mask = 0xFF;
4465     scoreboard_param.enable = vme_context->use_hw_scoreboard;
4466     scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4467     scoreboard_param.walkpat_flag = 0;
4468
4469     gpe_context = &dys_context->gpe_context;
4470     gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4471     gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4472
4473     memset(&scale_kernel, 0, sizeof(scale_kernel));
4474
4475     intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4476                                          sizeof(media_vp9_kernels),
4477                                          INTEL_VP9_ENC_DYS,
4478                                          0,
4479                                          &scale_kernel);
4480
4481     gen8_gpe_load_kernels(ctx,
4482                           gpe_context,
4483                           &scale_kernel,
4484                           1);
4485
4486     return;
4487 }
4488
4489 static Bool
4490 gen9_vme_kernels_context_init_vp9(VADriverContextP ctx,
4491                                   struct intel_encoder_context *encoder_context,
4492                                   struct gen9_encoder_context_vp9 *vme_context)
4493 {
4494     gen9_vme_scaling_context_init_vp9(ctx, vme_context, &vme_context->scaling_context);
4495     gen9_vme_me_context_init_vp9(ctx, vme_context, &vme_context->me_context);
4496     gen9_vme_mbenc_context_init_vp9(ctx, vme_context, &vme_context->mbenc_context);
4497     gen9_vme_dys_context_init_vp9(ctx, vme_context, &vme_context->dys_context);
4498     gen9_vme_brc_context_init_vp9(ctx, vme_context, &vme_context->brc_context);
4499
4500     vme_context->pfn_set_curbe_brc = gen9_vp9_set_curbe_brc;
4501     vme_context->pfn_set_curbe_me = gen9_vp9_set_curbe_me;
4502     vme_context->pfn_send_me_surface = gen9_vp9_send_me_surface;
4503     vme_context->pfn_send_scaling_surface = gen9_vp9_send_scaling_surface;
4504
4505     vme_context->pfn_set_curbe_scaling = gen9_vp9_set_curbe_scaling_cm;
4506
4507     vme_context->pfn_send_dys_surface = gen9_vp9_send_dys_surface;
4508     vme_context->pfn_set_curbe_dys = gen9_vp9_set_curbe_dys;
4509     vme_context->pfn_set_curbe_mbenc = gen9_vp9_set_curbe_mbenc;
4510     vme_context->pfn_send_mbenc_surface = gen9_vp9_send_mbenc_surface;
4511     return true;
4512 }
4513
4514 static
4515 void gen9_vp9_write_compressed_element(char *buffer,
4516                                        int index,
4517                                        int prob,
4518                                        bool value)
4519 {
4520     struct vp9_compressed_element *base_element, *vp9_element;
4521     base_element = (struct vp9_compressed_element *)buffer;
4522
4523     vp9_element = base_element + (index >> 1);
4524     if (index % 2) {
4525         vp9_element->b_valid = 1;
4526         vp9_element->b_probdiff_select = 1;
4527         vp9_element->b_prob_select = (prob == 252) ? 1 : 0;
4528         vp9_element->b_bin = value;
4529     } else {
4530         vp9_element->a_valid = 1;
4531         vp9_element->a_probdiff_select = 1;
4532         vp9_element->a_prob_select = (prob == 252) ? 1 : 0;
4533         vp9_element->a_bin = value;
4534     }
4535 }
4536
4537 static void
4538 intel_vp9enc_refresh_frame_internal_buffers(VADriverContextP ctx,
4539                                             struct intel_encoder_context *encoder_context)
4540 {
4541     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
4542     VAEncPictureParameterBufferVP9 *pic_param;
4543     struct gen9_vp9_state *vp9_state;
4544     char *buffer;
4545     int i;
4546
4547     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
4548
4549     if (!pak_context || !vp9_state || !vp9_state->pic_param)
4550         return;
4551
4552     pic_param = vp9_state->pic_param;
4553     if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) ||
4554         (pic_param->pic_flags.bits.intra_only) ||
4555         pic_param->pic_flags.bits.error_resilient_mode) {
4556         /* reset current frame_context */
4557         intel_init_default_vp9_probs(&vp9_state->vp9_current_fc);
4558         if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) ||
4559             pic_param->pic_flags.bits.error_resilient_mode ||
4560             (pic_param->pic_flags.bits.reset_frame_context == 3)) {
4561             for (i = 0; i < 4; i++)
4562                 memcpy(&vp9_state->vp9_frame_ctx[i],
4563                        &vp9_state->vp9_current_fc,
4564                        sizeof(FRAME_CONTEXT));
4565         } else if (pic_param->pic_flags.bits.reset_frame_context == 2) {
4566             i = pic_param->pic_flags.bits.frame_context_idx;
4567             memcpy(&vp9_state->vp9_frame_ctx[i],
4568                    &vp9_state->vp9_current_fc, sizeof(FRAME_CONTEXT));
4569         }
4570         /* reset the frame_ctx_idx = 0 */
4571         vp9_state->frame_ctx_idx = 0;
4572     } else {
4573         vp9_state->frame_ctx_idx = pic_param->pic_flags.bits.frame_context_idx;
4574     }
4575
4576     i965_zero_gpe_resource(&pak_context->res_compressed_input_buffer);
4577     buffer = i965_map_gpe_resource(&pak_context->res_compressed_input_buffer);
4578
4579     if (!buffer)
4580         return;
4581
4582     /* write tx_size */
4583     if ((pic_param->luma_ac_qindex == 0) &&
4584         (pic_param->luma_dc_qindex_delta == 0) &&
4585         (pic_param->chroma_ac_qindex_delta == 0) &&
4586         (pic_param->chroma_dc_qindex_delta == 0)) {
4587         /* lossless flag */
4588         /* nothing is needed */
4589         gen9_vp9_write_compressed_element(buffer,
4590                                           0, 128, 0);
4591         gen9_vp9_write_compressed_element(buffer,
4592                                           1, 128, 0);
4593         gen9_vp9_write_compressed_element(buffer,
4594                                           2, 128, 0);
4595     } else {
4596         if (vp9_state->tx_mode == TX_MODE_SELECT) {
4597             gen9_vp9_write_compressed_element(buffer,
4598                                               0, 128, 1);
4599             gen9_vp9_write_compressed_element(buffer,
4600                                               1, 128, 1);
4601             gen9_vp9_write_compressed_element(buffer,
4602                                               2, 128, 1);
4603         } else if (vp9_state->tx_mode == ALLOW_32X32) {
4604             gen9_vp9_write_compressed_element(buffer,
4605                                               0, 128, 1);
4606             gen9_vp9_write_compressed_element(buffer,
4607                                               1, 128, 1);
4608             gen9_vp9_write_compressed_element(buffer,
4609                                               2, 128, 0);
4610         } else {
4611             unsigned int tx_mode;
4612
4613             tx_mode = vp9_state->tx_mode;
4614             gen9_vp9_write_compressed_element(buffer,
4615                                               0, 128, ((tx_mode) & 2));
4616             gen9_vp9_write_compressed_element(buffer,
4617                                               1, 128, ((tx_mode) & 1));
4618             gen9_vp9_write_compressed_element(buffer,
4619                                               2, 128, 0);
4620         }
4621
4622         if (vp9_state->tx_mode == TX_MODE_SELECT) {
4623
4624             gen9_vp9_write_compressed_element(buffer,
4625                                               3, 128, 0);
4626
4627             gen9_vp9_write_compressed_element(buffer,
4628                                               7, 128, 0);
4629
4630             gen9_vp9_write_compressed_element(buffer,
4631                                               15, 128, 0);
4632         }
4633     }
4634     /*Setup all the input&output object*/
4635
4636     {
4637         /* update the coeff_update flag */
4638         gen9_vp9_write_compressed_element(buffer,
4639                                           27, 128, 0);
4640         gen9_vp9_write_compressed_element(buffer,
4641                                           820, 128, 0);
4642         gen9_vp9_write_compressed_element(buffer,
4643                                           1613, 128, 0);
4644         gen9_vp9_write_compressed_element(buffer,
4645                                           2406, 128, 0);
4646     }
4647
4648
4649     if (pic_param->pic_flags.bits.frame_type && !pic_param->pic_flags.bits.intra_only) {
4650         bool allow_comp = !(
4651                               (pic_param->ref_flags.bits.ref_last_sign_bias && pic_param->ref_flags.bits.ref_gf_sign_bias && pic_param->ref_flags.bits.ref_arf_sign_bias) ||
4652                               (!pic_param->ref_flags.bits.ref_last_sign_bias && !pic_param->ref_flags.bits.ref_gf_sign_bias && !pic_param->ref_flags.bits.ref_arf_sign_bias)
4653                           );
4654
4655         if (allow_comp) {
4656             if (pic_param->pic_flags.bits.comp_prediction_mode == REFERENCE_MODE_SELECT) {
4657                 gen9_vp9_write_compressed_element(buffer,
4658                                                   3271, 128, 1);
4659                 gen9_vp9_write_compressed_element(buffer,
4660                                                   3272, 128, 1);
4661             } else if (pic_param->pic_flags.bits.comp_prediction_mode == COMPOUND_REFERENCE) {
4662                 gen9_vp9_write_compressed_element(buffer,
4663                                                   3271, 128, 1);
4664                 gen9_vp9_write_compressed_element(buffer,
4665                                                   3272, 128, 0);
4666             } else {
4667
4668                 gen9_vp9_write_compressed_element(buffer,
4669                                                   3271, 128, 0);
4670                 gen9_vp9_write_compressed_element(buffer,
4671                                                   3272, 128, 0);
4672             }
4673         }
4674     }
4675
4676     i965_unmap_gpe_resource(&pak_context->res_compressed_input_buffer);
4677 }
4678
4679
4680 static void
4681 gen9_pak_vp9_pipe_mode_select(VADriverContextP ctx,
4682                               struct encode_state *encode_state,
4683                               struct intel_encoder_context *encoder_context,
4684                               struct gen9_hcpe_pipe_mode_select_param *pipe_mode_param)
4685 {
4686     struct intel_batchbuffer *batch = encoder_context->base.batch;
4687
4688     BEGIN_BCS_BATCH(batch, 6);
4689
4690     OUT_BCS_BATCH(batch, HCP_PIPE_MODE_SELECT | (6 - 2));
4691     OUT_BCS_BATCH(batch,
4692                   (pipe_mode_param->stream_out << 12) |
4693                   (pipe_mode_param->codec_mode << 5) |
4694                   (0 << 3) | /* disable Pic Status / Error Report */
4695                   (pipe_mode_param->stream_out << 2) |
4696                   HCP_CODEC_SELECT_ENCODE);
4697     OUT_BCS_BATCH(batch, 0);
4698     OUT_BCS_BATCH(batch, 0);
4699     OUT_BCS_BATCH(batch, (1 << 6));
4700     OUT_BCS_BATCH(batch, 0);
4701
4702     ADVANCE_BCS_BATCH(batch);
4703 }
4704
4705 static void
4706 gen9_vp9_add_surface_state(VADriverContextP ctx,
4707                            struct encode_state *encode_state,
4708                            struct intel_encoder_context *encoder_context,
4709                            hcp_surface_state *hcp_state)
4710 {
4711     struct intel_batchbuffer *batch = encoder_context->base.batch;
4712     if (!hcp_state)
4713         return;
4714
4715     BEGIN_BCS_BATCH(batch, 3);
4716     OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
4717     OUT_BCS_BATCH(batch,
4718                   (hcp_state->dw1.surface_id << 28) |
4719                   (hcp_state->dw1.surface_pitch - 1)
4720                  );
4721     OUT_BCS_BATCH(batch,
4722                   (hcp_state->dw2.surface_format << 28) |
4723                   (hcp_state->dw2.y_cb_offset)
4724                  );
4725     ADVANCE_BCS_BATCH(batch);
4726 }
4727
4728 static void
4729 gen9_pak_vp9_pipe_buf_addr_state(VADriverContextP ctx,
4730                                  struct encode_state *encode_state,
4731                                  struct intel_encoder_context *encoder_context)
4732 {
4733     struct i965_driver_data *i965 = i965_driver_data(ctx);
4734     struct intel_batchbuffer *batch = encoder_context->base.batch;
4735     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
4736     struct gen9_vp9_state *vp9_state;
4737     unsigned int i;
4738     struct object_surface *obj_surface;
4739
4740     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
4741
4742     if (!vp9_state || !vp9_state->pic_param)
4743         return;
4744
4745
4746     BEGIN_BCS_BATCH(batch, 104);
4747
4748     OUT_BCS_BATCH(batch, HCP_PIPE_BUF_ADDR_STATE | (104 - 2));
4749
4750     obj_surface = encode_state->reconstructed_object;
4751
4752     /* reconstructed obj_surface is already checked. So this is skipped */
4753     /* DW 1..3 decoded surface */
4754     OUT_RELOC64(batch,
4755                 obj_surface->bo,
4756                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4757                 0);
4758     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4759
4760     /* DW 4..6 deblocking line */
4761     OUT_RELOC64(batch,
4762                 pak_context->res_deblocking_filter_line_buffer.bo,
4763                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4764                 0);
4765     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4766
4767     /* DW 7..9 deblocking tile line */
4768     OUT_RELOC64(batch,
4769                 pak_context->res_deblocking_filter_tile_line_buffer.bo,
4770                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4771                 0);
4772     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4773
4774     /* DW 10..12 deblocking tile col */
4775     OUT_RELOC64(batch,
4776                 pak_context->res_deblocking_filter_tile_col_buffer.bo,
4777                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4778                 0);
4779     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4780
4781     /* DW 13..15 metadata line */
4782     OUT_RELOC64(batch,
4783                 pak_context->res_metadata_line_buffer.bo,
4784                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4785                 0);
4786     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4787
4788     /* DW 16..18 metadata tile line */
4789     OUT_RELOC64(batch,
4790                 pak_context->res_metadata_tile_line_buffer.bo,
4791                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4792                 0);
4793     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4794
4795     /* DW 19..21 metadata tile col */
4796     OUT_RELOC64(batch,
4797                 pak_context->res_metadata_tile_col_buffer.bo,
4798                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4799                 0);
4800     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4801
4802     /* DW 22..30 SAO is not used for VP9 */
4803     OUT_BCS_BATCH(batch, 0);
4804     OUT_BCS_BATCH(batch, 0);
4805     OUT_BCS_BATCH(batch, 0);
4806     OUT_BCS_BATCH(batch, 0);
4807     OUT_BCS_BATCH(batch, 0);
4808     OUT_BCS_BATCH(batch, 0);
4809     OUT_BCS_BATCH(batch, 0);
4810     OUT_BCS_BATCH(batch, 0);
4811     OUT_BCS_BATCH(batch, 0);
4812
4813     /* DW 31..33 Current Motion vector temporal buffer */
4814     OUT_RELOC64(batch,
4815                 pak_context->res_mv_temporal_buffer[vp9_state->curr_mv_temporal_index].bo,
4816                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4817                 0);
4818     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4819
4820     /* DW 34..36 Not used */
4821     OUT_BCS_BATCH(batch, 0);
4822     OUT_BCS_BATCH(batch, 0);
4823     OUT_BCS_BATCH(batch, 0);
4824
4825     /* Only the first three reference_frame is used for VP9 */
4826     /* DW 37..52 for reference_frame */
4827     i = 0;
4828     if (vp9_state->picture_coding_type) {
4829         for (i = 0; i < 3; i++) {
4830
4831             if (pak_context->reference_surfaces[i].bo) {
4832                 OUT_RELOC64(batch,
4833                             pak_context->reference_surfaces[i].bo,
4834                             I915_GEM_DOMAIN_INSTRUCTION, 0,
4835                             0);
4836             } else {
4837                 OUT_BCS_BATCH(batch, 0);
4838                 OUT_BCS_BATCH(batch, 0);
4839             }
4840         }
4841     }
4842
4843     for (; i < 8; i++) {
4844         OUT_BCS_BATCH(batch, 0);
4845         OUT_BCS_BATCH(batch, 0);
4846     }
4847
4848     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4849
4850     /* DW 54..56 for source input */
4851     OUT_RELOC64(batch,
4852                 pak_context->uncompressed_picture_source.bo,
4853                 I915_GEM_DOMAIN_INSTRUCTION, 0,
4854                 0);
4855     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4856
4857     /* DW 57..59 StreamOut is not used */
4858     OUT_BCS_BATCH(batch, 0);
4859     OUT_BCS_BATCH(batch, 0);
4860     OUT_BCS_BATCH(batch, 0);
4861
4862     /* DW 60..62. Not used for encoder */
4863     OUT_BCS_BATCH(batch, 0);
4864     OUT_BCS_BATCH(batch, 0);
4865     OUT_BCS_BATCH(batch, 0);
4866
4867     /* DW 63..65. ILDB Not used for encoder */
4868     OUT_BCS_BATCH(batch, 0);
4869     OUT_BCS_BATCH(batch, 0);
4870     OUT_BCS_BATCH(batch, 0);
4871
4872     /* DW 66..81 For the collocated motion vector temporal buffer */
4873     if (vp9_state->picture_coding_type) {
4874         int prev_index = vp9_state->curr_mv_temporal_index ^ 0x01;
4875         OUT_RELOC64(batch,
4876                     pak_context->res_mv_temporal_buffer[prev_index].bo,
4877                     I915_GEM_DOMAIN_INSTRUCTION, 0,
4878                     0);
4879     } else {
4880         OUT_BCS_BATCH(batch, 0);
4881         OUT_BCS_BATCH(batch, 0);
4882     }
4883
4884     for (i = 1; i < 8; i++) {
4885         OUT_BCS_BATCH(batch, 0);
4886         OUT_BCS_BATCH(batch, 0);
4887     }
4888     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4889
4890     /* DW 83..85 VP9 prob buffer */
4891     OUT_RELOC64(batch,
4892                 pak_context->res_prob_buffer.bo,
4893                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4894                 0);
4895
4896     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4897
4898     /* DW 86..88 Segment id buffer */
4899     if (pak_context->res_segmentid_buffer.bo) {
4900         OUT_RELOC64(batch,
4901                     pak_context->res_segmentid_buffer.bo,
4902                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4903                     0);
4904     } else {
4905         OUT_BCS_BATCH(batch, 0);
4906         OUT_BCS_BATCH(batch, 0);
4907     }
4908     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4909
4910     /* DW 89..91 HVD line rowstore buffer */
4911     OUT_RELOC64(batch,
4912                 pak_context->res_hvd_line_buffer.bo,
4913                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4914                 0);
4915     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4916
4917     /* DW 92..94 HVD tile line rowstore buffer */
4918     OUT_RELOC64(batch,
4919                 pak_context->res_hvd_tile_line_buffer.bo,
4920                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4921                 0);
4922     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4923
4924     /* DW 95..97 SAO streamout. Not used for VP9 */
4925     OUT_BCS_BATCH(batch, 0);
4926     OUT_BCS_BATCH(batch, 0);
4927     OUT_BCS_BATCH(batch, 0);
4928
4929     /* reserved for KBL. 98..100 */
4930     OUT_BCS_BATCH(batch, 0);
4931     OUT_BCS_BATCH(batch, 0);
4932     OUT_BCS_BATCH(batch, 0);
4933
4934     /* 101..103 */
4935     OUT_BCS_BATCH(batch, 0);
4936     OUT_BCS_BATCH(batch, 0);
4937     OUT_BCS_BATCH(batch, 0);
4938
4939     ADVANCE_BCS_BATCH(batch);
4940 }
4941
4942 static void
4943 gen9_pak_vp9_ind_obj_base_addr_state(VADriverContextP ctx,
4944                                      struct encode_state *encode_state,
4945                                      struct intel_encoder_context *encoder_context)
4946 {
4947     struct i965_driver_data *i965 = i965_driver_data(ctx);
4948     struct intel_batchbuffer *batch = encoder_context->base.batch;
4949     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
4950     struct gen9_vp9_state *vp9_state;
4951
4952     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
4953
4954     /* to do */
4955     BEGIN_BCS_BATCH(batch, 29);
4956
4957     OUT_BCS_BATCH(batch, HCP_IND_OBJ_BASE_ADDR_STATE | (29 - 2));
4958
4959     /* indirect bitstream object base */
4960     OUT_BCS_BATCH(batch, 0);
4961     OUT_BCS_BATCH(batch, 0);
4962     OUT_BCS_BATCH(batch, 0);
4963     /* the upper bound of indirect bitstream object */
4964     OUT_BCS_BATCH(batch, 0);
4965     OUT_BCS_BATCH(batch, 0);
4966
4967     /* DW 6: Indirect CU object base address */
4968     OUT_RELOC64(batch,
4969                 pak_context->res_mb_code_surface.bo,
4970                 I915_GEM_DOMAIN_INSTRUCTION, 0,   /* No write domain */
4971                 vp9_state->mb_data_offset);
4972     /* default attribute */
4973     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4974
4975     /* DW 9..11, PAK-BSE */
4976     OUT_RELOC64(batch,
4977                 pak_context->indirect_pak_bse_object.bo,
4978                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4979                 pak_context->indirect_pak_bse_object.offset);
4980     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4981
4982     /* DW 12..13 upper bound */
4983     OUT_RELOC64(batch,
4984                 pak_context->indirect_pak_bse_object.bo,
4985                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4986                 pak_context->indirect_pak_bse_object.end_offset);
4987
4988     /* DW 14..16 compressed header buffer */
4989     OUT_RELOC64(batch,
4990                 pak_context->res_compressed_input_buffer.bo,
4991                 I915_GEM_DOMAIN_INSTRUCTION, 0,
4992                 0);
4993     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4994
4995     /* DW 17..19 prob counter streamout */
4996     OUT_RELOC64(batch,
4997                 pak_context->res_prob_counter_buffer.bo,
4998                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4999                 0);
5000     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5001
5002     /* DW 20..22 prob delta streamin */
5003     OUT_RELOC64(batch,
5004                 pak_context->res_prob_delta_buffer.bo,
5005                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5006                 0);
5007     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5008
5009     /* DW 23..25 Tile record streamout */
5010     OUT_RELOC64(batch,
5011                 pak_context->res_tile_record_streamout_buffer.bo,
5012                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5013                 0);
5014     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5015
5016     /* DW 26..28 CU record streamout */
5017     OUT_RELOC64(batch,
5018                 pak_context->res_cu_stat_streamout_buffer.bo,
5019                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5020                 0);
5021     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5022
5023     ADVANCE_BCS_BATCH(batch);
5024 }
5025
5026 static void
5027 gen9_pak_vp9_segment_state(VADriverContextP ctx,
5028                            struct encode_state *encode_state,
5029                            struct intel_encoder_context *encoder_context,
5030                            VAEncSegParamVP9 *seg_param, uint8_t seg_id)
5031 {
5032     struct intel_batchbuffer *batch = encoder_context->base.batch;
5033     uint32_t batch_value, tmp;
5034     VAEncPictureParameterBufferVP9 *pic_param;
5035
5036     if (!encode_state->pic_param_ext ||
5037         !encode_state->pic_param_ext->buffer) {
5038         return;
5039     }
5040
5041     pic_param = (VAEncPictureParameterBufferVP9 *)encode_state->pic_param_ext->buffer;
5042
5043     batch_value = seg_param->seg_flags.bits.segment_reference;
5044     if (pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME ||
5045         pic_param->pic_flags.bits.intra_only)
5046         batch_value = 0;
5047
5048     BEGIN_BCS_BATCH(batch, 8);
5049
5050     OUT_BCS_BATCH(batch, HCP_VP9_SEGMENT_STATE | (8 - 2));
5051     OUT_BCS_BATCH(batch, seg_id << 0); /* DW 1 - SegmentID */
5052     OUT_BCS_BATCH(batch,
5053                   (seg_param->seg_flags.bits.segment_reference_enabled << 3) |
5054                   (batch_value << 1) |
5055                   (seg_param->seg_flags.bits.segment_reference_skipped << 0)
5056                  );
5057
5058     /* DW 3..6 is not used for encoder */
5059     OUT_BCS_BATCH(batch, 0);
5060     OUT_BCS_BATCH(batch, 0);
5061     OUT_BCS_BATCH(batch, 0);
5062     OUT_BCS_BATCH(batch, 0);
5063
5064     /* DW 7 Mode */
5065     tmp = intel_convert_sign_mag(seg_param->segment_qindex_delta, 9);
5066     batch_value = tmp;
5067     tmp = intel_convert_sign_mag(seg_param->segment_lf_level_delta, 7);
5068     batch_value |= (tmp << 16);
5069     OUT_BCS_BATCH(batch, batch_value);
5070
5071     ADVANCE_BCS_BATCH(batch);
5072
5073 }
5074
5075 static void
5076 intel_vp9enc_construct_pak_insertobj_batchbuffer(VADriverContextP ctx,
5077                                                  struct intel_encoder_context *encoder_context,
5078                                                  struct i965_gpe_resource *obj_batch_buffer)
5079 {
5080     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5081     struct gen9_vp9_state *vp9_state;
5082     int uncompressed_header_length;
5083     unsigned int *cmd_ptr;
5084     unsigned int dw_length, bits_in_last_dw;
5085
5086     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5087
5088     if (!pak_context || !vp9_state || !vp9_state->pic_param)
5089         return;
5090
5091     uncompressed_header_length = vp9_state->header_length;
5092     cmd_ptr = i965_map_gpe_resource(obj_batch_buffer);
5093
5094     if (!cmd_ptr)
5095         return;
5096
5097     bits_in_last_dw = uncompressed_header_length % 4;
5098     bits_in_last_dw *= 8;
5099
5100     if (bits_in_last_dw == 0)
5101         bits_in_last_dw = 32;
5102
5103     /* get the DWORD length of the inserted_data */
5104     dw_length = ALIGN(uncompressed_header_length, 4) / 4;
5105     *cmd_ptr++ = HCP_INSERT_PAK_OBJECT | dw_length;
5106
5107     *cmd_ptr++ = ((0 << 31) | /* indirect payload */
5108                   (0 << 16) | /* the start offset in first DW */
5109                   (0 << 15) |
5110                   (bits_in_last_dw << 8) | /* bits_in_last_dw */
5111                   (0 << 4) |  /* skip emulation byte count. 0 for VP9 */
5112                   (0 << 3) |  /* emulation flag. 0 for VP9 */
5113                   (1 << 2) |  /* last header flag. */
5114                   (0 << 1));
5115     memcpy(cmd_ptr, vp9_state->alias_insert_data, dw_length * sizeof(unsigned int));
5116
5117     cmd_ptr += dw_length;
5118
5119     *cmd_ptr++ = MI_NOOP;
5120     *cmd_ptr++ = MI_BATCH_BUFFER_END;
5121     i965_unmap_gpe_resource(obj_batch_buffer);
5122 }
5123
5124 static void
5125 gen9_vp9_pak_picture_level(VADriverContextP ctx,
5126                            struct encode_state *encode_state,
5127                            struct intel_encoder_context *encoder_context)
5128 {
5129     struct intel_batchbuffer *batch = encoder_context->base.batch;
5130     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5131     struct object_surface *obj_surface;
5132     VAEncPictureParameterBufferVP9 *pic_param;
5133     VAEncMiscParameterTypeVP9PerSegmantParam *seg_param, tmp_seg_param;
5134     struct gen9_vp9_state *vp9_state;
5135     struct gen9_surface_vp9 *vp9_priv_surface;
5136     int i;
5137     struct gen9_hcpe_pipe_mode_select_param mode_param;
5138     hcp_surface_state hcp_surface;
5139     struct gpe_mi_batch_buffer_start_parameter second_level_batch;
5140     int segment_count;
5141
5142     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5143
5144     if (!pak_context || !vp9_state || !vp9_state->pic_param)
5145         return;
5146
5147     pic_param = vp9_state->pic_param;
5148     seg_param = vp9_state->segment_param;
5149
5150     if (vp9_state->curr_pak_pass == 0) {
5151         intel_vp9enc_construct_pak_insertobj_batchbuffer(ctx, encoder_context,
5152                                                          &pak_context->res_pak_uncompressed_input_buffer);
5153
5154         // Check if driver already programmed pic state as part of BRC update kernel programming.
5155         if (!vp9_state->brc_enabled) {
5156             intel_vp9enc_construct_picstate_batchbuf(ctx, encode_state,
5157                                                      encoder_context, &pak_context->res_pic_state_brc_write_hfw_read_buffer);
5158         }
5159     }
5160
5161     if (vp9_state->curr_pak_pass == 0) {
5162         intel_vp9enc_refresh_frame_internal_buffers(ctx, encoder_context);
5163     }
5164
5165     {
5166         /* copy the frame_context[frame_idx] into curr_frame_context */
5167         memcpy(&vp9_state->vp9_current_fc,
5168                &(vp9_state->vp9_frame_ctx[vp9_state->frame_ctx_idx]),
5169                sizeof(FRAME_CONTEXT));
5170         {
5171             uint8_t *prob_ptr;
5172
5173             prob_ptr = i965_map_gpe_resource(&pak_context->res_prob_buffer);
5174
5175             if (!prob_ptr)
5176                 return;
5177
5178             /* copy the current fc to vp9_prob buffer */
5179             memcpy(prob_ptr, &vp9_state->vp9_current_fc, sizeof(FRAME_CONTEXT));
5180             if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) ||
5181                 pic_param->pic_flags.bits.intra_only) {
5182                 FRAME_CONTEXT *frame_ptr = (FRAME_CONTEXT *)prob_ptr;
5183
5184                 memcpy(frame_ptr->partition_prob, vp9_kf_partition_probs,
5185                        sizeof(vp9_kf_partition_probs));
5186                 memcpy(frame_ptr->uv_mode_prob, vp9_kf_uv_mode_prob,
5187                        sizeof(vp9_kf_uv_mode_prob));
5188             }
5189             i965_unmap_gpe_resource(&pak_context->res_prob_buffer);
5190         }
5191     }
5192
5193     if (vp9_state->brc_enabled && vp9_state->curr_pak_pass) {
5194         /* read image status and insert the conditional end cmd */
5195         /* image ctrl/status is already accessed */
5196         struct gpe_mi_conditional_batch_buffer_end_parameter mi_cond_end;
5197         struct vp9_encode_status_buffer_internal *status_buffer;
5198
5199         status_buffer = &vp9_state->status_buffer;
5200         memset(&mi_cond_end, 0, sizeof(mi_cond_end));
5201         mi_cond_end.offset = status_buffer->image_status_mask_offset;
5202         mi_cond_end.bo = status_buffer->bo;
5203         mi_cond_end.compare_data = 0;
5204         mi_cond_end.compare_mask_mode_disabled = 1;
5205         gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch,
5206                                                  &mi_cond_end);
5207     }
5208
5209     mode_param.codec_mode = 1;
5210     mode_param.stream_out = 0;
5211     gen9_pak_vp9_pipe_mode_select(ctx, encode_state, encoder_context, &mode_param);
5212
5213     /* reconstructed surface */
5214     memset(&hcp_surface, 0, sizeof(hcp_surface));
5215     obj_surface = encode_state->reconstructed_object;
5216     hcp_surface.dw1.surface_id = 0;
5217     hcp_surface.dw1.surface_pitch = obj_surface->width;
5218     hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5219     hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5220     gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5221                                &hcp_surface);
5222
5223     /* Input surface */
5224     if (vp9_state->dys_in_use &&
5225         ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
5226          (pic_param->frame_height_src != pic_param->frame_height_dst))) {
5227         vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
5228         obj_surface = vp9_priv_surface->dys_surface_obj;
5229     } else {
5230         obj_surface = encode_state->input_yuv_object;
5231     }
5232
5233     hcp_surface.dw1.surface_id = 1;
5234     hcp_surface.dw1.surface_pitch = obj_surface->width;
5235     hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5236     hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5237     gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5238                                &hcp_surface);
5239
5240     if (vp9_state->picture_coding_type) {
5241         /* Add surface for last */
5242         if (vp9_state->last_ref_obj) {
5243             obj_surface = vp9_state->last_ref_obj;
5244             hcp_surface.dw1.surface_id = 2;
5245             hcp_surface.dw1.surface_pitch = obj_surface->width;
5246             hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5247             hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5248             gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5249                                        &hcp_surface);
5250         }
5251         if (vp9_state->golden_ref_obj) {
5252             obj_surface = vp9_state->golden_ref_obj;
5253             hcp_surface.dw1.surface_id = 3;
5254             hcp_surface.dw1.surface_pitch = obj_surface->width;
5255             hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5256             hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5257             gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5258                                        &hcp_surface);
5259         }
5260         if (vp9_state->alt_ref_obj) {
5261             obj_surface = vp9_state->alt_ref_obj;
5262             hcp_surface.dw1.surface_id = 4;
5263             hcp_surface.dw1.surface_pitch = obj_surface->width;
5264             hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5265             hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5266             gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5267                                        &hcp_surface);
5268         }
5269     }
5270
5271     gen9_pak_vp9_pipe_buf_addr_state(ctx, encode_state, encoder_context);
5272
5273     gen9_pak_vp9_ind_obj_base_addr_state(ctx, encode_state, encoder_context);
5274
5275     // Using picstate zero with updated QP and LF deltas by HuC for repak, irrespective of how many Pak passes were run in multi-pass mode.
5276     memset(&second_level_batch, 0, sizeof(second_level_batch));
5277
5278     if (vp9_state->curr_pak_pass == 0) {
5279         second_level_batch.offset = 0;
5280     } else
5281         second_level_batch.offset = vp9_state->curr_pak_pass * VP9_PIC_STATE_BUFFER_SIZE;
5282
5283     second_level_batch.is_second_level = 1;
5284     second_level_batch.bo = pak_context->res_pic_state_brc_write_hfw_read_buffer.bo;
5285
5286     gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5287
5288     if (pic_param->pic_flags.bits.segmentation_enabled &&
5289         seg_param)
5290         segment_count = 8;
5291     else {
5292         segment_count = 1;
5293         memset(&tmp_seg_param, 0, sizeof(tmp_seg_param));
5294         seg_param = &tmp_seg_param;
5295     }
5296     for (i = 0; i < segment_count; i++) {
5297         gen9_pak_vp9_segment_state(ctx, encode_state,
5298                                    encoder_context,
5299                                    &seg_param->seg_data[i], i);
5300     }
5301
5302     /* Insert the uncompressed header buffer */
5303     second_level_batch.is_second_level = 1;
5304     second_level_batch.offset = 0;
5305     second_level_batch.bo = pak_context->res_pak_uncompressed_input_buffer.bo;
5306
5307     gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5308
5309     /* PAK_OBJECT */
5310     second_level_batch.is_second_level = 1;
5311     second_level_batch.offset = 0;
5312     second_level_batch.bo = pak_context->res_mb_code_surface.bo;
5313     gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5314
5315     return;
5316 }
5317
5318 static void
5319 gen9_vp9_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5320 {
5321     struct intel_batchbuffer *batch = encoder_context->base.batch;
5322     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5323     struct gpe_mi_store_register_mem_parameter mi_store_reg_mem_param;
5324     struct gpe_mi_flush_dw_parameter mi_flush_dw_param;
5325     //struct gpe_mi_copy_mem_parameter mi_copy_mem_param;
5326     struct vp9_encode_status_buffer_internal *status_buffer;
5327     struct gen9_vp9_state *vp9_state;
5328
5329     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5330     if (!vp9_state || !pak_context || !batch)
5331         return;
5332
5333     status_buffer = &(vp9_state->status_buffer);
5334
5335     memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
5336     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);
5337
5338     memset(&mi_store_reg_mem_param, 0, sizeof(mi_store_reg_mem_param));
5339     mi_store_reg_mem_param.bo = status_buffer->bo;
5340     mi_store_reg_mem_param.offset = status_buffer->bs_byte_count_offset;
5341     mi_store_reg_mem_param.mmio_offset = status_buffer->vp9_bs_frame_reg_offset;
5342     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5343
5344     mi_store_reg_mem_param.bo = pak_context->res_brc_bitstream_size_buffer.bo;
5345     mi_store_reg_mem_param.offset = 0;
5346     mi_store_reg_mem_param.mmio_offset = status_buffer->vp9_bs_frame_reg_offset;
5347     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5348
5349     /* Read HCP Image status */
5350     mi_store_reg_mem_param.bo = status_buffer->bo;
5351     mi_store_reg_mem_param.offset = status_buffer->image_status_mask_offset;
5352     mi_store_reg_mem_param.mmio_offset =
5353         status_buffer->vp9_image_mask_reg_offset;
5354     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5355
5356     mi_store_reg_mem_param.bo = status_buffer->bo;
5357     mi_store_reg_mem_param.offset = status_buffer->image_status_ctrl_offset;
5358     mi_store_reg_mem_param.mmio_offset =
5359         status_buffer->vp9_image_ctrl_reg_offset;
5360     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5361
5362     mi_store_reg_mem_param.bo = pak_context->res_brc_bitstream_size_buffer.bo;
5363     mi_store_reg_mem_param.offset = 4;
5364     mi_store_reg_mem_param.mmio_offset =
5365         status_buffer->vp9_image_ctrl_reg_offset;
5366     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5367
5368     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);
5369
5370     return;
5371 }
5372
5373 static VAStatus
5374 gen9_vp9_pak_pipeline_prepare(VADriverContextP ctx,
5375                               struct encode_state *encode_state,
5376                               struct intel_encoder_context *encoder_context)
5377 {
5378     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5379     struct object_surface *obj_surface;
5380     struct object_buffer *obj_buffer;
5381     struct i965_coded_buffer_segment *coded_buffer_segment;
5382     VAEncPictureParameterBufferVP9 *pic_param;
5383     struct gen9_vp9_state *vp9_state;
5384     dri_bo *bo;
5385     int i;
5386
5387     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5388     if (!vp9_state ||
5389         !vp9_state->pic_param)
5390         return VA_STATUS_ERROR_INVALID_PARAMETER;
5391
5392     pic_param = vp9_state->pic_param;
5393
5394     /* reconstructed surface */
5395     obj_surface = encode_state->reconstructed_object;
5396     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5397
5398     dri_bo_unreference(pak_context->reconstructed_object.bo);
5399
5400     pak_context->reconstructed_object.bo = obj_surface->bo;
5401     dri_bo_reference(pak_context->reconstructed_object.bo);
5402
5403     /* set vp9 reference frames */
5404     for (i = 0; i < ARRAY_ELEMS(pak_context->reference_surfaces); i++) {
5405         if (pak_context->reference_surfaces[i].bo)
5406             dri_bo_unreference(pak_context->reference_surfaces[i].bo);
5407         pak_context->reference_surfaces[i].bo = NULL;
5408     }
5409
5410     /* Three reference frames are enough for VP9 */
5411     if (pic_param->pic_flags.bits.frame_type &&
5412         !pic_param->pic_flags.bits.intra_only) {
5413         for (i = 0; i < 3; i++) {
5414             obj_surface = encode_state->reference_objects[i];
5415             if (obj_surface && obj_surface->bo) {
5416                 pak_context->reference_surfaces[i].bo = obj_surface->bo;
5417                 dri_bo_reference(obj_surface->bo);
5418             }
5419         }
5420     }
5421
5422     /* input YUV surface */
5423     dri_bo_unreference(pak_context->uncompressed_picture_source.bo);
5424     pak_context->uncompressed_picture_source.bo = NULL;
5425     obj_surface = encode_state->reconstructed_object;
5426     if (vp9_state->dys_in_use &&
5427         ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
5428          (pic_param->frame_height_src != pic_param->frame_height_dst))) {
5429         struct gen9_surface_vp9 *vp9_priv_surface =
5430             (struct gen9_surface_vp9 *)(obj_surface->private_data);
5431         obj_surface = vp9_priv_surface->dys_surface_obj;
5432     } else
5433         obj_surface = encode_state->input_yuv_object;
5434
5435     pak_context->uncompressed_picture_source.bo = obj_surface->bo;
5436     dri_bo_reference(pak_context->uncompressed_picture_source.bo);
5437
5438     /* coded buffer */
5439     dri_bo_unreference(pak_context->indirect_pak_bse_object.bo);
5440     pak_context->indirect_pak_bse_object.bo = NULL;
5441     obj_buffer = encode_state->coded_buf_object;
5442     bo = obj_buffer->buffer_store->bo;
5443     pak_context->indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
5444     pak_context->indirect_pak_bse_object.end_offset = ALIGN((obj_buffer->size_element - 0x1000), 0x1000);
5445     pak_context->indirect_pak_bse_object.bo = bo;
5446     dri_bo_reference(pak_context->indirect_pak_bse_object.bo);
5447
5448     /* set the internal flag to 0 to indicate the coded size is unknown */
5449     dri_bo_map(bo, 1);
5450     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
5451     coded_buffer_segment->mapped = 0;
5452     coded_buffer_segment->codec = encoder_context->codec;
5453     coded_buffer_segment->status_support = 1;
5454     dri_bo_unmap(bo);
5455
5456     return VA_STATUS_SUCCESS;
5457 }
5458
5459 static void
5460 gen9_vp9_pak_brc_prepare(struct encode_state *encode_state,
5461                          struct intel_encoder_context *encoder_context)
5462 {
5463 }
5464
5465 static void
5466 gen9_vp9_pak_context_destroy(void *context)
5467 {
5468     struct gen9_encoder_context_vp9 *pak_context = context;
5469     int i;
5470
5471     dri_bo_unreference(pak_context->reconstructed_object.bo);
5472     pak_context->reconstructed_object.bo = NULL;
5473
5474     dri_bo_unreference(pak_context->uncompressed_picture_source.bo);
5475     pak_context->uncompressed_picture_source.bo = NULL;
5476
5477     dri_bo_unreference(pak_context->indirect_pak_bse_object.bo);
5478     pak_context->indirect_pak_bse_object.bo = NULL;
5479
5480     for (i = 0; i < 8; i++) {
5481         dri_bo_unreference(pak_context->reference_surfaces[i].bo);
5482         pak_context->reference_surfaces[i].bo = NULL;
5483     }
5484
5485     /* vme & pak same the same structure, so don't free the context here */
5486 }
5487
5488 static VAStatus
5489 gen9_vp9_pak_pipeline(VADriverContextP ctx,
5490                       VAProfile profile,
5491                       struct encode_state *encode_state,
5492                       struct intel_encoder_context *encoder_context)
5493 {
5494     struct i965_driver_data *i965 = i965_driver_data(ctx);
5495     struct intel_batchbuffer *batch = encoder_context->base.batch;
5496     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5497     VAStatus va_status;
5498     struct gen9_vp9_state *vp9_state;
5499     VAEncPictureParameterBufferVP9 *pic_param;
5500     int i;
5501
5502     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5503
5504     if (!vp9_state || !vp9_state->pic_param || !pak_context)
5505         return VA_STATUS_ERROR_INVALID_PARAMETER;
5506
5507     va_status = gen9_vp9_pak_pipeline_prepare(ctx, encode_state, encoder_context);
5508
5509     if (va_status != VA_STATUS_SUCCESS)
5510         return va_status;
5511
5512     if (i965->intel.has_bsd2)
5513         intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
5514     else
5515         intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
5516
5517     intel_batchbuffer_emit_mi_flush(batch);
5518
5519     BEGIN_BCS_BATCH(batch, 64);
5520     for (i = 0; i < 64; i++)
5521         OUT_BCS_BATCH(batch, MI_NOOP);
5522
5523     ADVANCE_BCS_BATCH(batch);
5524
5525     for (vp9_state->curr_pak_pass = 0;
5526          vp9_state->curr_pak_pass < vp9_state->num_pak_passes;
5527          vp9_state->curr_pak_pass++) {
5528
5529         if (vp9_state->curr_pak_pass == 0) {
5530             /* Initialize the VP9 Image Ctrl reg for the first pass */
5531             struct gpe_mi_load_register_imm_parameter mi_load_reg_imm;
5532             struct vp9_encode_status_buffer_internal *status_buffer;
5533
5534             status_buffer = &(vp9_state->status_buffer);
5535             memset(&mi_load_reg_imm, 0, sizeof(mi_load_reg_imm));
5536             mi_load_reg_imm.mmio_offset = status_buffer->vp9_image_ctrl_reg_offset;
5537             mi_load_reg_imm.data = 0;
5538             gen8_gpe_mi_load_register_imm(ctx, batch, &mi_load_reg_imm);
5539         }
5540         gen9_vp9_pak_picture_level(ctx, encode_state, encoder_context);
5541         gen9_vp9_read_mfc_status(ctx, encoder_context);
5542     }
5543
5544     intel_batchbuffer_end_atomic(batch);
5545     intel_batchbuffer_flush(batch);
5546
5547     pic_param = vp9_state->pic_param;
5548     vp9_state->vp9_last_frame.frame_width = pic_param->frame_width_dst;
5549     vp9_state->vp9_last_frame.frame_height = pic_param->frame_height_dst;
5550     vp9_state->vp9_last_frame.frame_type = pic_param->pic_flags.bits.frame_type;
5551     vp9_state->vp9_last_frame.show_frame = pic_param->pic_flags.bits.show_frame;
5552     vp9_state->vp9_last_frame.refresh_frame_context = pic_param->pic_flags.bits.refresh_frame_context;
5553     vp9_state->vp9_last_frame.frame_context_idx = pic_param->pic_flags.bits.frame_context_idx;
5554     vp9_state->vp9_last_frame.intra_only = pic_param->pic_flags.bits.intra_only;
5555     vp9_state->frame_number++;
5556     vp9_state->curr_mv_temporal_index ^= 1;
5557     vp9_state->first_frame = 0;
5558
5559     return VA_STATUS_SUCCESS;
5560 }
5561
5562 Bool
5563 gen9_vp9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5564 {
5565     struct gen9_encoder_context_vp9 *vme_context = NULL;
5566     struct gen9_vp9_state *vp9_state = NULL;
5567
5568     vme_context = calloc(1, sizeof(struct gen9_encoder_context_vp9));
5569     vp9_state = calloc(1, sizeof(struct gen9_vp9_state));
5570
5571     if (!vme_context || !vp9_state) {
5572         if (vme_context)
5573             free(vme_context);
5574         if (vp9_state)
5575             free(vp9_state);
5576         return false;
5577     }
5578
5579     encoder_context->enc_priv_state = vp9_state;
5580     vme_context->enc_priv_state = vp9_state;
5581
5582     /* Initialize the features that are supported by VP9 */
5583     vme_context->hme_supported = 1;
5584     vme_context->use_hw_scoreboard = 1;
5585     vme_context->use_hw_non_stalling_scoreboard = 1;
5586
5587     vp9_state->tx_mode = TX_MODE_SELECT;
5588     vp9_state->multi_ref_qp_check = 0;
5589     vp9_state->target_usage = INTEL_ENC_VP9_TU_NORMAL;
5590     vp9_state->num_pak_passes = 1;
5591     vp9_state->hme_supported = vme_context->hme_supported;
5592     vp9_state->b16xme_supported = 1;
5593
5594     if (encoder_context->rate_control_mode != VA_RC_NONE &&
5595         encoder_context->rate_control_mode != VA_RC_CQP) {
5596         vp9_state->brc_enabled = 1;
5597         vp9_state->brc_distortion_buffer_supported = 1;
5598         vp9_state->brc_constant_buffer_supported = 1;
5599         vp9_state->num_pak_passes = 4;
5600     }
5601     vp9_state->dys_enabled = 1; /* this is supported by default */
5602     vp9_state->first_frame = 1;
5603
5604     /* the definition of status buffer offset for VP9 */
5605     {
5606         struct vp9_encode_status_buffer_internal *status_buffer;
5607         uint32_t base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
5608
5609         status_buffer = &vp9_state->status_buffer;
5610         memset(status_buffer, 0,
5611                sizeof(struct vp9_encode_status_buffer_internal));
5612
5613         status_buffer->bs_byte_count_offset = base_offset + offsetof(struct vp9_encode_status, bs_byte_count);
5614         status_buffer->image_status_mask_offset = base_offset + offsetof(struct vp9_encode_status, image_status_mask);
5615         status_buffer->image_status_ctrl_offset = base_offset + offsetof(struct vp9_encode_status, image_status_ctrl);
5616         status_buffer->media_index_offset       = base_offset + offsetof(struct vp9_encode_status, media_index);
5617
5618         status_buffer->vp9_bs_frame_reg_offset = 0x1E9E0;
5619         status_buffer->vp9_image_mask_reg_offset = 0x1E9F0;
5620         status_buffer->vp9_image_ctrl_reg_offset = 0x1E9F4;
5621     }
5622
5623     gen9_vme_kernels_context_init_vp9(ctx, encoder_context, vme_context);
5624
5625     encoder_context->vme_context = vme_context;
5626     encoder_context->vme_pipeline = gen9_vme_pipeline_vp9;
5627     encoder_context->vme_context_destroy = gen9_vme_context_destroy_vp9;
5628
5629     return true;
5630 }
5631
5632 static VAStatus
5633 gen9_vp9_get_coded_status(VADriverContextP ctx,
5634                           struct intel_encoder_context *encoder_context,
5635                           struct i965_coded_buffer_segment *coded_buf_seg)
5636 {
5637     struct vp9_encode_status *vp9_encode_status;
5638
5639     if (!encoder_context || !coded_buf_seg)
5640         return VA_STATUS_ERROR_INVALID_BUFFER;
5641
5642     vp9_encode_status = (struct vp9_encode_status *)coded_buf_seg->codec_private_data;
5643     coded_buf_seg->base.size = vp9_encode_status->bs_byte_count;
5644
5645     /* One VACodedBufferSegment for VP9 will be added later.
5646      * It will be linked to the next element of coded_buf_seg->base.next
5647      */
5648
5649     return VA_STATUS_SUCCESS;
5650 }
5651
5652 Bool
5653 gen9_vp9_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5654 {
5655     /* VME & PAK share the same context */
5656     struct gen9_encoder_context_vp9 *pak_context = encoder_context->vme_context;
5657
5658     if (!pak_context)
5659         return false;
5660
5661     encoder_context->mfc_context = pak_context;
5662     encoder_context->mfc_context_destroy = gen9_vp9_pak_context_destroy;
5663     encoder_context->mfc_pipeline = gen9_vp9_pak_pipeline;
5664     encoder_context->mfc_brc_prepare = gen9_vp9_pak_brc_prepare;
5665     encoder_context->get_status = gen9_vp9_get_coded_status;
5666     return true;
5667 }