OSDN Git Service

Remove decode code.
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_vp9_encoder.c
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWAR
23  *
24  * Authors:
25  *    Zhao, Yakui <yakui.zhao@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <stdbool.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35 #include <va/va.h>
36
37 #include "intel_batchbuffer.h"
38 #include "intel_driver.h"
39
40 #include "i965_defines.h"
41 #include "i965_drv_video.h"
42 #include "i965_encoder.h"
43 #include "gen9_vp9_encapi.h"
44 #include "gen9_vp9_encoder.h"
45 #include "gen9_vp9_encoder_kernels.h"
46 #include "vp9_probs.h"
47 #include "gen9_vp9_const_def.h"
48
49 #define MAX_VP9_ENCODER_FRAMERATE       60
50 #define MAX_VP9_ENCODER_SURFACES        64
51
52 #define MAX_URB_SIZE                    4096 /* In register */
53 #define NUM_KERNELS_PER_GPE_CONTEXT     1
54
55 #define VP9_BRC_KBPS                    1000
56
57 #define BRC_KERNEL_CBR                  0x0010
58 #define BRC_KERNEL_VBR                  0x0020
59 #define BRC_KERNEL_AVBR                 0x0040
60 #define BRC_KERNEL_CQL                  0x0080
61
62 #define VP9_PIC_STATE_BUFFER_SIZE 192
63
64 typedef struct _intel_kernel_header_ {
65     uint32_t       reserved                        : 6;
66     uint32_t       kernel_start_pointer            : 26;
67 } intel_kernel_header;
68
69 typedef struct _intel_vp9_kernel_header {
70     int nKernelCount;
71     intel_kernel_header PLY_DSCALE;
72     intel_kernel_header VP9_ME_P;
73     intel_kernel_header VP9_Enc_I_32x32;
74     intel_kernel_header VP9_Enc_I_16x16;
75     intel_kernel_header VP9_Enc_P;
76     intel_kernel_header VP9_Enc_TX;
77     intel_kernel_header VP9_DYS;
78
79     intel_kernel_header VP9BRC_Intra_Distortion;
80     intel_kernel_header VP9BRC_Init;
81     intel_kernel_header VP9BRC_Reset;
82     intel_kernel_header VP9BRC_Update;
83 } intel_vp9_kernel_header;
84
85 #define DYS_1X_FLAG    0x01
86 #define DYS_4X_FLAG    0x02
87 #define DYS_16X_FLAG   0x04
88
89 struct vp9_surface_param {
90     uint32_t frame_width;
91     uint32_t frame_height;
92 };
93
94 static uint32_t intel_convert_sign_mag(int val, int sign_bit_pos)
95 {
96     uint32_t ret_val = 0;
97     if (val < 0) {
98         val = -val;
99         ret_val = ((1 << (sign_bit_pos - 1)) | (val & ((1 << (sign_bit_pos - 1)) - 1)));
100     } else {
101         ret_val = val & ((1 << (sign_bit_pos - 1)) - 1);
102     }
103     return ret_val;
104 }
105
106 static bool
107 intel_vp9_get_kernel_header_and_size(
108     void                             *pvbinary,
109     int                              binary_size,
110     INTEL_VP9_ENC_OPERATION          operation,
111     int                              krnstate_idx,
112     struct i965_kernel               *ret_kernel)
113 {
114     typedef uint32_t BIN_PTR[4];
115
116     char *bin_start;
117     intel_vp9_kernel_header      *pkh_table;
118     intel_kernel_header          *pcurr_header, *pinvalid_entry, *pnext_header;
119     int next_krnoffset;
120
121     if (!pvbinary || !ret_kernel)
122         return false;
123
124     bin_start = (char *)pvbinary;
125     pkh_table = (intel_vp9_kernel_header *)pvbinary;
126     pinvalid_entry = &(pkh_table->VP9BRC_Update) + 1;
127     next_krnoffset = binary_size;
128
129     if ((operation == INTEL_VP9_ENC_SCALING4X) || (operation == INTEL_VP9_ENC_SCALING2X)) {
130         pcurr_header = &pkh_table->PLY_DSCALE;
131     } else if (operation == INTEL_VP9_ENC_ME) {
132         pcurr_header = &pkh_table->VP9_ME_P;
133     } else if (operation == INTEL_VP9_ENC_MBENC) {
134         pcurr_header = &pkh_table->VP9_Enc_I_32x32;
135     } else if (operation == INTEL_VP9_ENC_DYS) {
136         pcurr_header = &pkh_table->VP9_DYS;
137     } else if (operation == INTEL_VP9_ENC_BRC) {
138         pcurr_header = &pkh_table->VP9BRC_Intra_Distortion;
139     } else {
140         return false;
141     }
142
143     pcurr_header += krnstate_idx;
144     ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
145
146     pnext_header = (pcurr_header + 1);
147     if (pnext_header < pinvalid_entry) {
148         next_krnoffset = pnext_header->kernel_start_pointer << 6;
149     }
150     ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
151
152     return true;
153 }
154
155
156 static void
157 gen9_free_surfaces_vp9(void **data)
158 {
159     struct gen9_surface_vp9 *vp9_surface;
160
161     if (!data || !*data)
162         return;
163
164     vp9_surface = *data;
165
166     if (vp9_surface->scaled_4x_surface_obj) {
167         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->scaled_4x_surface_id, 1);
168         vp9_surface->scaled_4x_surface_id = VA_INVALID_SURFACE;
169         vp9_surface->scaled_4x_surface_obj = NULL;
170     }
171
172     if (vp9_surface->scaled_16x_surface_obj) {
173         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->scaled_16x_surface_id, 1);
174         vp9_surface->scaled_16x_surface_id = VA_INVALID_SURFACE;
175         vp9_surface->scaled_16x_surface_obj = NULL;
176     }
177
178     if (vp9_surface->dys_4x_surface_obj) {
179         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_4x_surface_id, 1);
180         vp9_surface->dys_4x_surface_id = VA_INVALID_SURFACE;
181         vp9_surface->dys_4x_surface_obj = NULL;
182     }
183
184     if (vp9_surface->dys_16x_surface_obj) {
185         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_16x_surface_id, 1);
186         vp9_surface->dys_16x_surface_id = VA_INVALID_SURFACE;
187         vp9_surface->dys_16x_surface_obj = NULL;
188     }
189
190     if (vp9_surface->dys_surface_obj) {
191         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_surface_id, 1);
192         vp9_surface->dys_surface_id = VA_INVALID_SURFACE;
193         vp9_surface->dys_surface_obj = NULL;
194     }
195
196     free(vp9_surface);
197
198     *data = NULL;
199
200     return;
201 }
202
203 static VAStatus
204 gen9_vp9_init_check_surfaces(VADriverContextP ctx,
205                              struct object_surface *obj_surface,
206                              struct vp9_surface_param *surface_param)
207 {
208     struct i965_driver_data *i965 = i965_driver_data(ctx);
209     struct gen9_surface_vp9 *vp9_surface;
210     int downscaled_width_4x, downscaled_height_4x;
211     int downscaled_width_16x, downscaled_height_16x;
212
213     if (!obj_surface || !obj_surface->bo)
214         return VA_STATUS_ERROR_INVALID_SURFACE;
215
216     if (obj_surface->private_data &&
217         obj_surface->free_private_data != gen9_free_surfaces_vp9) {
218         obj_surface->free_private_data(&obj_surface->private_data);
219         obj_surface->private_data = NULL;
220     }
221
222     if (obj_surface->private_data) {
223         /* if the frame width/height is already the same as the expected,
224          * it is unncessary to reallocate it.
225          */
226         vp9_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
227         if (vp9_surface->frame_width >= surface_param->frame_width ||
228             vp9_surface->frame_height >= surface_param->frame_height)
229             return VA_STATUS_SUCCESS;
230
231         obj_surface->free_private_data(&obj_surface->private_data);
232         obj_surface->private_data = NULL;
233         vp9_surface = NULL;
234     }
235
236     vp9_surface = calloc(1, sizeof(struct gen9_surface_vp9));
237
238     if (!vp9_surface)
239         return VA_STATUS_ERROR_ALLOCATION_FAILED;
240
241     vp9_surface->ctx = ctx;
242     obj_surface->private_data = vp9_surface;
243     obj_surface->free_private_data = gen9_free_surfaces_vp9;
244
245     vp9_surface->frame_width = surface_param->frame_width;
246     vp9_surface->frame_height = surface_param->frame_height;
247
248     downscaled_width_4x = ALIGN(surface_param->frame_width / 4, 16);
249     downscaled_height_4x = ALIGN(surface_param->frame_height / 4, 16);
250
251     i965_CreateSurfaces(ctx,
252                         downscaled_width_4x,
253                         downscaled_height_4x,
254                         VA_RT_FORMAT_YUV420,
255                         1,
256                         &vp9_surface->scaled_4x_surface_id);
257
258     vp9_surface->scaled_4x_surface_obj = SURFACE(vp9_surface->scaled_4x_surface_id);
259
260     if (!vp9_surface->scaled_4x_surface_obj) {
261         return VA_STATUS_ERROR_ALLOCATION_FAILED;
262     }
263
264     i965_check_alloc_surface_bo(ctx, vp9_surface->scaled_4x_surface_obj, 1,
265                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
266
267     downscaled_width_16x = ALIGN(surface_param->frame_width / 16, 16);
268     downscaled_height_16x = ALIGN(surface_param->frame_height / 16, 16);
269     i965_CreateSurfaces(ctx,
270                         downscaled_width_16x,
271                         downscaled_height_16x,
272                         VA_RT_FORMAT_YUV420,
273                         1,
274                         &vp9_surface->scaled_16x_surface_id);
275     vp9_surface->scaled_16x_surface_obj = SURFACE(vp9_surface->scaled_16x_surface_id);
276
277     if (!vp9_surface->scaled_16x_surface_obj) {
278         return VA_STATUS_ERROR_ALLOCATION_FAILED;
279     }
280
281     i965_check_alloc_surface_bo(ctx, vp9_surface->scaled_16x_surface_obj, 1,
282                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
283
284     return VA_STATUS_SUCCESS;
285 }
286
287 static VAStatus
288 gen9_vp9_check_dys_surfaces(VADriverContextP ctx,
289                             struct object_surface *obj_surface,
290                             struct vp9_surface_param *surface_param)
291 {
292     struct i965_driver_data *i965 = i965_driver_data(ctx);
293     struct gen9_surface_vp9 *vp9_surface;
294     int dys_width_4x, dys_height_4x;
295     int dys_width_16x, dys_height_16x;
296
297     /* As this is handled after the surface checking, it is unnecessary
298      * to check the surface bo and vp9_priv_surface again
299      */
300
301     vp9_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
302
303     if (!vp9_surface)
304         return VA_STATUS_ERROR_INVALID_SURFACE;
305
306     /* if the frame_width/height of dys_surface is the same as
307      * the expected, it is unnecessary to allocate it again
308      */
309     if (vp9_surface->dys_frame_width == surface_param->frame_width &&
310         vp9_surface->dys_frame_width == surface_param->frame_width)
311         return VA_STATUS_SUCCESS;
312
313     if (vp9_surface->dys_4x_surface_obj) {
314         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_4x_surface_id, 1);
315         vp9_surface->dys_4x_surface_id = VA_INVALID_SURFACE;
316         vp9_surface->dys_4x_surface_obj = NULL;
317     }
318
319     if (vp9_surface->dys_16x_surface_obj) {
320         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_16x_surface_id, 1);
321         vp9_surface->dys_16x_surface_id = VA_INVALID_SURFACE;
322         vp9_surface->dys_16x_surface_obj = NULL;
323     }
324
325     if (vp9_surface->dys_surface_obj) {
326         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_surface_id, 1);
327         vp9_surface->dys_surface_id = VA_INVALID_SURFACE;
328         vp9_surface->dys_surface_obj = NULL;
329     }
330
331     vp9_surface->dys_frame_width = surface_param->frame_width;
332     vp9_surface->dys_frame_height = surface_param->frame_height;
333
334     i965_CreateSurfaces(ctx,
335                         surface_param->frame_width,
336                         surface_param->frame_height,
337                         VA_RT_FORMAT_YUV420,
338                         1,
339                         &vp9_surface->dys_surface_id);
340     vp9_surface->dys_surface_obj = SURFACE(vp9_surface->dys_surface_id);
341
342     if (!vp9_surface->dys_surface_obj) {
343         return VA_STATUS_ERROR_ALLOCATION_FAILED;
344     }
345
346     i965_check_alloc_surface_bo(ctx, vp9_surface->dys_surface_obj, 1,
347                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
348
349     dys_width_4x = ALIGN(surface_param->frame_width / 4, 16);
350     dys_height_4x = ALIGN(surface_param->frame_width / 4, 16);
351
352     i965_CreateSurfaces(ctx,
353                         dys_width_4x,
354                         dys_height_4x,
355                         VA_RT_FORMAT_YUV420,
356                         1,
357                         &vp9_surface->dys_4x_surface_id);
358
359     vp9_surface->dys_4x_surface_obj = SURFACE(vp9_surface->dys_4x_surface_id);
360
361     if (!vp9_surface->dys_4x_surface_obj) {
362         return VA_STATUS_ERROR_ALLOCATION_FAILED;
363     }
364
365     i965_check_alloc_surface_bo(ctx, vp9_surface->dys_4x_surface_obj, 1,
366                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
367
368     dys_width_16x = ALIGN(surface_param->frame_width / 16, 16);
369     dys_height_16x = ALIGN(surface_param->frame_width / 16, 16);
370     i965_CreateSurfaces(ctx,
371                         dys_width_16x,
372                         dys_height_16x,
373                         VA_RT_FORMAT_YUV420,
374                         1,
375                         &vp9_surface->dys_16x_surface_id);
376     vp9_surface->dys_16x_surface_obj = SURFACE(vp9_surface->dys_16x_surface_id);
377
378     if (!vp9_surface->dys_16x_surface_obj) {
379         return VA_STATUS_ERROR_ALLOCATION_FAILED;
380     }
381
382     i965_check_alloc_surface_bo(ctx, vp9_surface->dys_16x_surface_obj, 1,
383                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
384
385     return VA_STATUS_SUCCESS;
386 }
387
388 static VAStatus
389 gen9_vp9_allocate_resources(VADriverContextP ctx,
390                             struct encode_state *encode_state,
391                             struct intel_encoder_context *encoder_context,
392                             int allocate)
393 {
394     struct i965_driver_data *i965 = i965_driver_data(ctx);
395     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
396     struct gen9_vp9_state *vp9_state;
397     int allocate_flag, i;
398     int res_size;
399     uint32_t        frame_width_in_sb, frame_height_in_sb, frame_sb_num;
400     unsigned int width, height;
401
402     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
403
404     if (!vp9_state || !vp9_state->pic_param)
405         return VA_STATUS_ERROR_INVALID_PARAMETER;
406
407     /* the buffer related with BRC is not changed. So it is allocated
408      * based on the input parameter
409      */
410     if (allocate) {
411         i965_free_gpe_resource(&vme_context->res_brc_history_buffer);
412         i965_free_gpe_resource(&vme_context->res_brc_const_data_buffer);
413         i965_free_gpe_resource(&vme_context->res_brc_mbenc_curbe_write_buffer);
414         i965_free_gpe_resource(&vme_context->res_pic_state_brc_read_buffer);
415         i965_free_gpe_resource(&vme_context->res_pic_state_brc_write_hfw_read_buffer);
416         i965_free_gpe_resource(&vme_context->res_pic_state_hfw_write_buffer);
417         i965_free_gpe_resource(&vme_context->res_seg_state_brc_read_buffer);
418         i965_free_gpe_resource(&vme_context->res_seg_state_brc_write_buffer);
419         i965_free_gpe_resource(&vme_context->res_brc_bitstream_size_buffer);
420         i965_free_gpe_resource(&vme_context->res_brc_hfw_data_buffer);
421         i965_free_gpe_resource(&vme_context->res_brc_mmdk_pak_buffer);
422
423         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
424                                                    &vme_context->res_brc_history_buffer,
425                                                    VP9_BRC_HISTORY_BUFFER_SIZE,
426                                                    "Brc History buffer");
427         if (!allocate_flag)
428             goto failed_allocation;
429         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
430                                                    &vme_context->res_brc_const_data_buffer,
431                                                    VP9_BRC_CONSTANTSURFACE_SIZE,
432                                                    "Brc Constant buffer");
433         if (!allocate_flag)
434             goto failed_allocation;
435
436         res_size = ALIGN(sizeof(vp9_mbenc_curbe_data), 64) + 128 +
437                    ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) * NUM_VP9_MBENC;
438         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
439                                                    &vme_context->res_brc_mbenc_curbe_write_buffer,
440                                                    res_size,
441                                                    "Brc Curbe write");
442         if (!allocate_flag)
443             goto failed_allocation;
444
445         res_size = VP9_PIC_STATE_BUFFER_SIZE * 4;
446         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
447                                                    &vme_context->res_pic_state_brc_read_buffer,
448                                                    res_size,
449                                                    "Pic State Brc_read");
450         if (!allocate_flag)
451             goto failed_allocation;
452
453         res_size = VP9_PIC_STATE_BUFFER_SIZE * 4;
454         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
455                                                    &vme_context->res_pic_state_brc_write_hfw_read_buffer,
456                                                    res_size,
457                                                    "Pic State Brc_write Hfw_Read");
458         if (!allocate_flag)
459             goto failed_allocation;
460
461         res_size = VP9_PIC_STATE_BUFFER_SIZE * 4;
462         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
463                                                    &vme_context->res_pic_state_hfw_write_buffer,
464                                                    res_size,
465                                                    "Pic State Hfw Write");
466         if (!allocate_flag)
467             goto failed_allocation;
468
469         res_size = VP9_SEGMENT_STATE_BUFFER_SIZE;
470         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
471                                                    &vme_context->res_seg_state_brc_read_buffer,
472                                                    res_size,
473                                                    "Segment state brc_read");
474         if (!allocate_flag)
475             goto failed_allocation;
476
477         res_size = VP9_SEGMENT_STATE_BUFFER_SIZE;
478         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
479                                                    &vme_context->res_seg_state_brc_write_buffer,
480                                                    res_size,
481                                                    "Segment state brc_write");
482         if (!allocate_flag)
483             goto failed_allocation;
484
485         res_size = VP9_BRC_BITSTREAM_SIZE_BUFFER_SIZE;
486         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
487                                                    &vme_context->res_brc_bitstream_size_buffer,
488                                                    res_size,
489                                                    "Brc bitstream buffer");
490         if (!allocate_flag)
491             goto failed_allocation;
492
493         res_size = VP9_HFW_BRC_DATA_BUFFER_SIZE;
494         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
495                                                    &vme_context->res_brc_hfw_data_buffer,
496                                                    res_size,
497                                                    "mfw Brc data");
498         if (!allocate_flag)
499             goto failed_allocation;
500
501         res_size = VP9_BRC_MMDK_PAK_BUFFER_SIZE;
502         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
503                                                    &vme_context->res_brc_mmdk_pak_buffer,
504                                                    res_size,
505                                                    "Brc mmdk_pak");
506         if (!allocate_flag)
507             goto failed_allocation;
508     }
509
510     /* If the width/height of allocated buffer is greater than the expected,
511      * it is unnecessary to allocate it again
512      */
513     if (vp9_state->res_width >= vp9_state->frame_width &&
514         vp9_state->res_height >= vp9_state->frame_height) {
515
516         return VA_STATUS_SUCCESS;
517     }
518     frame_width_in_sb = ALIGN(vp9_state->frame_width, 64) / 64;
519     frame_height_in_sb = ALIGN(vp9_state->frame_height, 64) / 64;
520     frame_sb_num  = frame_width_in_sb * frame_height_in_sb;
521
522     i965_free_gpe_resource(&vme_context->res_hvd_line_buffer);
523     res_size = frame_width_in_sb * 64;
524     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
525                                                &vme_context->res_hvd_line_buffer,
526                                                res_size,
527                                                "VP9 hvd line line");
528     if (!allocate_flag)
529         goto failed_allocation;
530
531     i965_free_gpe_resource(&vme_context->res_hvd_tile_line_buffer);
532     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
533                                                &vme_context->res_hvd_tile_line_buffer,
534                                                res_size,
535                                                "VP9 hvd tile_line line");
536     if (!allocate_flag)
537         goto failed_allocation;
538
539     i965_free_gpe_resource(&vme_context->res_deblocking_filter_line_buffer);
540     res_size = frame_width_in_sb * 18 * 64;
541     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
542                                                &vme_context->res_deblocking_filter_line_buffer,
543                                                res_size,
544                                                "VP9 deblocking filter line");
545     if (!allocate_flag)
546         goto failed_allocation;
547
548     i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_line_buffer);
549     res_size = frame_width_in_sb * 18 * 64;
550     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
551                                                &vme_context->res_deblocking_filter_tile_line_buffer,
552                                                res_size,
553                                                "VP9 deblocking tile line");
554     if (!allocate_flag)
555         goto failed_allocation;
556
557     i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_col_buffer);
558     res_size = frame_height_in_sb * 17 * 64;
559     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
560                                                &vme_context->res_deblocking_filter_tile_col_buffer,
561                                                res_size,
562                                                "VP9 deblocking tile col");
563     if (!allocate_flag)
564         goto failed_allocation;
565
566     i965_free_gpe_resource(&vme_context->res_metadata_line_buffer);
567     res_size = frame_width_in_sb * 5 * 64;
568     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
569                                                &vme_context->res_metadata_line_buffer,
570                                                res_size,
571                                                "VP9 metadata line");
572     if (!allocate_flag)
573         goto failed_allocation;
574
575     i965_free_gpe_resource(&vme_context->res_metadata_tile_line_buffer);
576     res_size = frame_width_in_sb * 5 * 64;
577     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
578                                                &vme_context->res_metadata_tile_line_buffer,
579                                                res_size,
580                                                "VP9 metadata tile line");
581     if (!allocate_flag)
582         goto failed_allocation;
583
584     i965_free_gpe_resource(&vme_context->res_metadata_tile_col_buffer);
585     res_size = frame_height_in_sb * 5 * 64;
586     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
587                                                &vme_context->res_metadata_tile_col_buffer,
588                                                res_size,
589                                                "VP9 metadata tile col");
590     if (!allocate_flag)
591         goto failed_allocation;
592
593     i965_free_gpe_resource(&vme_context->res_prob_buffer);
594     res_size = 2048;
595     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
596                                                &vme_context->res_prob_buffer,
597                                                res_size,
598                                                "VP9 prob");
599     if (!allocate_flag)
600         goto failed_allocation;
601
602     i965_free_gpe_resource(&vme_context->res_segmentid_buffer);
603     res_size = frame_sb_num * 64;
604     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
605                                                &vme_context->res_segmentid_buffer,
606                                                res_size,
607                                                "VP9 segment id");
608     if (!allocate_flag)
609         goto failed_allocation;
610
611     i965_zero_gpe_resource(&vme_context->res_segmentid_buffer);
612
613     i965_free_gpe_resource(&vme_context->res_prob_delta_buffer);
614     res_size = 29 * 64;
615     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
616                                                &vme_context->res_prob_delta_buffer,
617                                                res_size,
618                                                "VP9 prob delta");
619     if (!allocate_flag)
620         goto failed_allocation;
621
622     i965_zero_gpe_resource(&vme_context->res_segmentid_buffer);
623
624     i965_free_gpe_resource(&vme_context->res_prob_delta_buffer);
625     res_size = 29 * 64;
626     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
627                                                &vme_context->res_prob_delta_buffer,
628                                                res_size,
629                                                "VP9 prob delta");
630     if (!allocate_flag)
631         goto failed_allocation;
632
633     i965_free_gpe_resource(&vme_context->res_compressed_input_buffer);
634     res_size = 32 * 64;
635     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
636                                                &vme_context->res_compressed_input_buffer,
637                                                res_size,
638                                                "VP9 compressed_input buffer");
639     if (!allocate_flag)
640         goto failed_allocation;
641
642     i965_free_gpe_resource(&vme_context->res_prob_counter_buffer);
643     res_size = 193 * 64;
644     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
645                                                &vme_context->res_prob_counter_buffer,
646                                                res_size,
647                                                "VP9 prob counter");
648     if (!allocate_flag)
649         goto failed_allocation;
650
651     i965_free_gpe_resource(&vme_context->res_tile_record_streamout_buffer);
652     res_size = frame_sb_num * 64;
653     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
654                                                &vme_context->res_tile_record_streamout_buffer,
655                                                res_size,
656                                                "VP9 tile record stream_out");
657     if (!allocate_flag)
658         goto failed_allocation;
659
660     i965_free_gpe_resource(&vme_context->res_cu_stat_streamout_buffer);
661     res_size = frame_sb_num * 64;
662     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
663                                                &vme_context->res_cu_stat_streamout_buffer,
664                                                res_size,
665                                                "VP9 CU stat stream_out");
666     if (!allocate_flag)
667         goto failed_allocation;
668
669     width = vp9_state->downscaled_width_4x_in_mb * 32;
670     height = vp9_state->downscaled_height_4x_in_mb * 16;
671     i965_free_gpe_resource(&vme_context->s4x_memv_data_buffer);
672     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
673                                                   &vme_context->s4x_memv_data_buffer,
674                                                   width, height,
675                                                   ALIGN(width, 64),
676                                                   "VP9 4x MEMV data");
677     if (!allocate_flag)
678         goto failed_allocation;
679
680     width = vp9_state->downscaled_width_4x_in_mb * 8;
681     height = vp9_state->downscaled_height_4x_in_mb * 16;
682     i965_free_gpe_resource(&vme_context->s4x_memv_distortion_buffer);
683     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
684                                                   &vme_context->s4x_memv_distortion_buffer,
685                                                   width, height,
686                                                   ALIGN(width, 64),
687                                                   "VP9 4x MEMV distorion");
688     if (!allocate_flag)
689         goto failed_allocation;
690
691     width = ALIGN(vp9_state->downscaled_width_16x_in_mb * 32, 64);
692     height = vp9_state->downscaled_height_16x_in_mb * 16;
693     i965_free_gpe_resource(&vme_context->s16x_memv_data_buffer);
694     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
695                                                   &vme_context->s16x_memv_data_buffer,
696                                                   width, height,
697                                                   width,
698                                                   "VP9 16x MEMV data");
699     if (!allocate_flag)
700         goto failed_allocation;
701
702     width = vp9_state->frame_width_in_mb * 16;
703     height = vp9_state->frame_height_in_mb * 8;
704     i965_free_gpe_resource(&vme_context->res_output_16x16_inter_modes);
705     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
706                                                   &vme_context->res_output_16x16_inter_modes,
707                                                   width, height,
708                                                   ALIGN(width, 64),
709                                                   "VP9 output inter_mode");
710     if (!allocate_flag)
711         goto failed_allocation;
712
713     res_size = vp9_state->frame_width_in_mb * vp9_state->frame_height_in_mb *
714                16 * 4;
715     for (i = 0; i < 2; i++) {
716         i965_free_gpe_resource(&vme_context->res_mode_decision[i]);
717         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
718                                                    &vme_context->res_mode_decision[i],
719                                                    res_size,
720                                                    "VP9 mode decision");
721         if (!allocate_flag)
722             goto failed_allocation;
723
724     }
725
726     res_size = frame_sb_num * 9 * 64;
727     for (i = 0; i < 2; i++) {
728         i965_free_gpe_resource(&vme_context->res_mv_temporal_buffer[i]);
729         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
730                                                    &vme_context->res_mv_temporal_buffer[i],
731                                                    res_size,
732                                                    "VP9 temporal mv");
733         if (!allocate_flag)
734             goto failed_allocation;
735     }
736
737     vp9_state->mb_data_offset = ALIGN(frame_sb_num * 16, 4096) + 4096;
738     res_size = vp9_state->mb_data_offset + frame_sb_num * 64 * 64 + 1000;
739     i965_free_gpe_resource(&vme_context->res_mb_code_surface);
740     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
741                                                &vme_context->res_mb_code_surface,
742                                                ALIGN(res_size, 4096),
743                                                "VP9 mb_code surface");
744     if (!allocate_flag)
745         goto failed_allocation;
746
747     res_size = 128;
748     i965_free_gpe_resource(&vme_context->res_pak_uncompressed_input_buffer);
749     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
750                                                &vme_context->res_pak_uncompressed_input_buffer,
751                                                ALIGN(res_size, 4096),
752                                                "VP9 pak_uncompressed_input");
753     if (!allocate_flag)
754         goto failed_allocation;
755
756     if (!vme_context->frame_header_data) {
757         /* allocate 512 bytes for generating the uncompressed header */
758         vme_context->frame_header_data = calloc(1, 512);
759     }
760
761     vp9_state->res_width = vp9_state->frame_width;
762     vp9_state->res_height = vp9_state->frame_height;
763
764     return VA_STATUS_SUCCESS;
765
766 failed_allocation:
767     return VA_STATUS_ERROR_ALLOCATION_FAILED;
768 }
769
770 static void
771 gen9_vp9_free_resources(struct gen9_encoder_context_vp9 *vme_context)
772 {
773     int i;
774     struct gen9_vp9_state *vp9_state = (struct gen9_vp9_state *) vme_context->enc_priv_state;
775
776     if (vp9_state->brc_enabled) {
777         i965_free_gpe_resource(&vme_context->res_brc_history_buffer);
778         i965_free_gpe_resource(&vme_context->res_brc_const_data_buffer);
779         i965_free_gpe_resource(&vme_context->res_brc_mbenc_curbe_write_buffer);
780         i965_free_gpe_resource(&vme_context->res_pic_state_brc_read_buffer);
781         i965_free_gpe_resource(&vme_context->res_pic_state_brc_write_hfw_read_buffer);
782         i965_free_gpe_resource(&vme_context->res_pic_state_hfw_write_buffer);
783         i965_free_gpe_resource(&vme_context->res_seg_state_brc_read_buffer);
784         i965_free_gpe_resource(&vme_context->res_seg_state_brc_write_buffer);
785         i965_free_gpe_resource(&vme_context->res_brc_bitstream_size_buffer);
786         i965_free_gpe_resource(&vme_context->res_brc_hfw_data_buffer);
787         i965_free_gpe_resource(&vme_context->res_brc_mmdk_pak_buffer);
788     }
789
790     i965_free_gpe_resource(&vme_context->res_hvd_line_buffer);
791     i965_free_gpe_resource(&vme_context->res_hvd_tile_line_buffer);
792     i965_free_gpe_resource(&vme_context->res_deblocking_filter_line_buffer);
793     i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_line_buffer);
794     i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_col_buffer);
795     i965_free_gpe_resource(&vme_context->res_metadata_line_buffer);
796     i965_free_gpe_resource(&vme_context->res_metadata_tile_line_buffer);
797     i965_free_gpe_resource(&vme_context->res_metadata_tile_col_buffer);
798     i965_free_gpe_resource(&vme_context->res_prob_buffer);
799     i965_free_gpe_resource(&vme_context->res_segmentid_buffer);
800     i965_free_gpe_resource(&vme_context->res_prob_delta_buffer);
801     i965_free_gpe_resource(&vme_context->res_prob_counter_buffer);
802     i965_free_gpe_resource(&vme_context->res_tile_record_streamout_buffer);
803     i965_free_gpe_resource(&vme_context->res_cu_stat_streamout_buffer);
804     i965_free_gpe_resource(&vme_context->s4x_memv_data_buffer);
805     i965_free_gpe_resource(&vme_context->s4x_memv_distortion_buffer);
806     i965_free_gpe_resource(&vme_context->s16x_memv_data_buffer);
807     i965_free_gpe_resource(&vme_context->res_output_16x16_inter_modes);
808     for (i = 0; i < 2; i++) {
809         i965_free_gpe_resource(&vme_context->res_mode_decision[i]);
810     }
811
812     for (i = 0; i < 2; i++) {
813         i965_free_gpe_resource(&vme_context->res_mv_temporal_buffer[i]);
814     }
815
816     i965_free_gpe_resource(&vme_context->res_compressed_input_buffer);
817     i965_free_gpe_resource(&vme_context->res_mb_code_surface);
818     i965_free_gpe_resource(&vme_context->res_pak_uncompressed_input_buffer);
819
820     if (vme_context->frame_header_data) {
821         free(vme_context->frame_header_data);
822         vme_context->frame_header_data = NULL;
823     }
824     return;
825 }
826
827 static void
828 gen9_init_media_object_walker_parameter(struct intel_encoder_context *encoder_context,
829                                         struct gpe_encoder_kernel_walker_parameter *kernel_walker_param,
830                                         struct gpe_media_object_walker_parameter *walker_param)
831 {
832     memset(walker_param, 0, sizeof(*walker_param));
833
834     walker_param->use_scoreboard = kernel_walker_param->use_scoreboard;
835
836     walker_param->block_resolution.x = kernel_walker_param->resolution_x;
837     walker_param->block_resolution.y = kernel_walker_param->resolution_y;
838
839     walker_param->global_resolution.x = kernel_walker_param->resolution_x;
840     walker_param->global_resolution.y = kernel_walker_param->resolution_y;
841
842     walker_param->global_outer_loop_stride.x = kernel_walker_param->resolution_x;
843     walker_param->global_outer_loop_stride.y = 0;
844
845     walker_param->global_inner_loop_unit.x = 0;
846     walker_param->global_inner_loop_unit.y = kernel_walker_param->resolution_y;
847
848     walker_param->local_loop_exec_count = 0xFFFF;  //MAX VALUE
849     walker_param->global_loop_exec_count = 0xFFFF;  //MAX VALUE
850
851     if (kernel_walker_param->no_dependency) {
852         walker_param->scoreboard_mask = 0;
853         walker_param->use_scoreboard = 0;
854         // Raster scan walking pattern
855         walker_param->local_outer_loop_stride.x = 0;
856         walker_param->local_outer_loop_stride.y = 1;
857         walker_param->local_inner_loop_unit.x = 1;
858         walker_param->local_inner_loop_unit.y = 0;
859         walker_param->local_end.x = kernel_walker_param->resolution_x - 1;
860         walker_param->local_end.y = 0;
861     } else {
862         walker_param->local_end.x = 0;
863         walker_param->local_end.y = 0;
864
865         if (kernel_walker_param->walker_degree == VP9_45Z_DEGREE) {
866             // 45z degree
867             walker_param->scoreboard_mask = 0x0F;
868
869             walker_param->global_loop_exec_count = 0x3FF;
870             walker_param->local_loop_exec_count = 0x3FF;
871
872             walker_param->global_resolution.x = (unsigned int)(kernel_walker_param->resolution_x / 2.f) + 1;
873             walker_param->global_resolution.y = 2 * kernel_walker_param->resolution_y;
874
875             walker_param->global_start.x = 0;
876             walker_param->global_start.y = 0;
877
878             walker_param->global_outer_loop_stride.x = walker_param->global_resolution.x;
879             walker_param->global_outer_loop_stride.y = 0;
880
881             walker_param->global_inner_loop_unit.x = 0;
882             walker_param->global_inner_loop_unit.y = walker_param->global_resolution.y;
883
884             walker_param->block_resolution.x = walker_param->global_resolution.x;
885             walker_param->block_resolution.y = walker_param->global_resolution.y;
886
887             walker_param->local_start.x = 0;
888             walker_param->local_start.y = 0;
889
890             walker_param->local_outer_loop_stride.x = 1;
891             walker_param->local_outer_loop_stride.y = 0;
892
893             walker_param->local_inner_loop_unit.x = -1;
894             walker_param->local_inner_loop_unit.y = 4;
895
896             walker_param->middle_loop_extra_steps = 3;
897             walker_param->mid_loop_unit_x = 0;
898             walker_param->mid_loop_unit_y = 1;
899         } else {
900             // 26 degree
901             walker_param->scoreboard_mask = 0x0F;
902             walker_param->local_outer_loop_stride.x = 1;
903             walker_param->local_outer_loop_stride.y = 0;
904             walker_param->local_inner_loop_unit.x = -2;
905             walker_param->local_inner_loop_unit.y = 1;
906         }
907     }
908 }
909
910 static void
911 gen9_run_kernel_media_object(VADriverContextP ctx,
912                              struct intel_encoder_context *encoder_context,
913                              struct i965_gpe_context *gpe_context,
914                              int media_function,
915                              struct gpe_media_object_parameter *param)
916 {
917     struct intel_batchbuffer *batch = encoder_context->base.batch;
918     struct vp9_encode_status_buffer_internal *status_buffer;
919     struct gen9_vp9_state *vp9_state;
920     struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
921
922     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
923     if (!vp9_state || !batch)
924         return;
925
926     intel_batchbuffer_start_atomic(batch, 0x1000);
927
928     status_buffer = &(vp9_state->status_buffer);
929     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
930     mi_store_data_imm.bo = status_buffer->bo;
931     mi_store_data_imm.offset = status_buffer->media_index_offset;
932     mi_store_data_imm.dw0 = media_function;
933     gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);
934
935     intel_batchbuffer_emit_mi_flush(batch);
936     gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
937     gen8_gpe_media_object(ctx, gpe_context, batch, param);
938     gen8_gpe_media_state_flush(ctx, gpe_context, batch);
939
940     gen9_gpe_pipeline_end(ctx, gpe_context, batch);
941
942     intel_batchbuffer_end_atomic(batch);
943
944     intel_batchbuffer_flush(batch);
945 }
946
947 static void
948 gen9_run_kernel_media_object_walker(VADriverContextP ctx,
949                                     struct intel_encoder_context *encoder_context,
950                                     struct i965_gpe_context *gpe_context,
951                                     int media_function,
952                                     struct gpe_media_object_walker_parameter *param)
953 {
954     struct intel_batchbuffer *batch = encoder_context->base.batch;
955     struct vp9_encode_status_buffer_internal *status_buffer;
956     struct gen9_vp9_state *vp9_state;
957     struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
958
959     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
960     if (!vp9_state || !batch)
961         return;
962
963     intel_batchbuffer_start_atomic(batch, 0x1000);
964
965     intel_batchbuffer_emit_mi_flush(batch);
966
967     status_buffer = &(vp9_state->status_buffer);
968     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
969     mi_store_data_imm.bo = status_buffer->bo;
970     mi_store_data_imm.offset = status_buffer->media_index_offset;
971     mi_store_data_imm.dw0 = media_function;
972     gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);
973
974     gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
975     gen8_gpe_media_object_walker(ctx, gpe_context, batch, param);
976     gen8_gpe_media_state_flush(ctx, gpe_context, batch);
977
978     gen9_gpe_pipeline_end(ctx, gpe_context, batch);
979
980     intel_batchbuffer_end_atomic(batch);
981
982     intel_batchbuffer_flush(batch);
983 }
984
985 static
986 void gen9_vp9_set_curbe_brc(VADriverContextP ctx,
987                             struct encode_state *encode_state,
988                             struct i965_gpe_context *gpe_context,
989                             struct intel_encoder_context *encoder_context,
990                             struct gen9_vp9_brc_curbe_param *param)
991 {
992     VAEncSequenceParameterBufferVP9 *seq_param;
993     VAEncPictureParameterBufferVP9  *pic_param;
994     VAEncMiscParameterTypeVP9PerSegmantParam *segment_param;
995     vp9_brc_curbe_data      *cmd;
996     double                  dbps_ratio, dInputBitsPerFrame;
997     struct gen9_vp9_state *vp9_state;
998
999     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1000
1001     pic_param      = param->ppic_param;
1002     seq_param      = param->pseq_param;
1003     segment_param  = param->psegment_param;
1004
1005     cmd = i965_gpe_context_map_curbe(gpe_context);
1006
1007     if (!cmd)
1008         return;
1009
1010     memset(cmd, 0, sizeof(vp9_brc_curbe_data));
1011
1012     if (!vp9_state->dys_enabled) {
1013         cmd->dw0.frame_width  = pic_param->frame_width_src;
1014         cmd->dw0.frame_height = pic_param->frame_height_src;
1015     } else {
1016         cmd->dw0.frame_width  = pic_param->frame_width_dst;
1017         cmd->dw0.frame_height = pic_param->frame_height_dst;
1018     }
1019
1020     cmd->dw1.frame_type           = vp9_state->picture_coding_type;
1021     cmd->dw1.segmentation_enable  = 0;
1022     cmd->dw1.ref_frame_flags      = vp9_state->ref_frame_flag;
1023     cmd->dw1.num_tlevels          = 1;
1024
1025     switch (param->media_state_type) {
1026     case VP9_MEDIA_STATE_BRC_INIT_RESET: {
1027         cmd->dw3.max_level_ratiot0 = 0;
1028         cmd->dw3.max_level_ratiot1 = 0;
1029         cmd->dw3.max_level_ratiot2 = 0;
1030         cmd->dw3.max_level_ratiot3 = 0;
1031
1032         cmd->dw4.profile_level_max_frame    = seq_param->max_frame_width *
1033                                               seq_param->max_frame_height;
1034         cmd->dw5.init_buf_fullness         = vp9_state->init_vbv_buffer_fullness_in_bit;
1035         cmd->dw6.buf_size                  = vp9_state->vbv_buffer_size_in_bit;
1036         cmd->dw7.target_bit_rate           = (vp9_state->target_bit_rate  + VP9_BRC_KBPS - 1) / VP9_BRC_KBPS *
1037                                              VP9_BRC_KBPS;
1038         cmd->dw8.max_bit_rate           = (vp9_state->max_bit_rate  + VP9_BRC_KBPS - 1) / VP9_BRC_KBPS *
1039                                           VP9_BRC_KBPS;
1040         cmd->dw9.min_bit_rate           = (vp9_state->min_bit_rate  + VP9_BRC_KBPS - 1) / VP9_BRC_KBPS *
1041                                           VP9_BRC_KBPS;
1042         cmd->dw10.frame_ratem           = vp9_state->framerate.num;
1043         cmd->dw11.frame_rated           = vp9_state->framerate.den;
1044
1045         cmd->dw14.avbr_accuracy         = 30;
1046         cmd->dw14.avbr_convergence      = 150;
1047
1048         if (encoder_context->rate_control_mode == VA_RC_CBR) {
1049             cmd->dw12.brc_flag    = BRC_KERNEL_CBR;
1050             cmd->dw8.max_bit_rate  = cmd->dw7.target_bit_rate;
1051             cmd->dw9.min_bit_rate  = 0;
1052         } else if (encoder_context->rate_control_mode == VA_RC_VBR) {
1053             cmd->dw12.brc_flag    = BRC_KERNEL_VBR;
1054         } else {
1055             cmd->dw12.brc_flag = BRC_KERNEL_CQL;
1056             cmd->dw16.cq_level = 30;
1057         }
1058         cmd->dw12.gopp = seq_param->intra_period - 1;
1059
1060         cmd->dw13.init_frame_width   = pic_param->frame_width_src;
1061         cmd->dw13.init_frame_height   = pic_param->frame_height_src;
1062
1063         cmd->dw15.min_qp          = 1;
1064         cmd->dw15.max_qp          = 255;
1065
1066         cmd->dw16.cq_level            = 30;
1067
1068         cmd->dw17.enable_dynamic_scaling = vp9_state->dys_in_use;
1069         cmd->dw17.brc_overshoot_cbr_pct = 150;
1070
1071         dInputBitsPerFrame = (double)cmd->dw8.max_bit_rate * (double)vp9_state->framerate.den / (double)vp9_state->framerate.num;
1072         dbps_ratio         = dInputBitsPerFrame / ((double)vp9_state->vbv_buffer_size_in_bit / 30.0);
1073         if (dbps_ratio < 0.1)
1074             dbps_ratio = 0.1;
1075         if (dbps_ratio > 3.5)
1076             dbps_ratio = 3.5;
1077
1078         *param->pbrc_init_reset_buf_size_in_bits  = cmd->dw6.buf_size;
1079         *param->pbrc_init_reset_input_bits_per_frame  = dInputBitsPerFrame;
1080         *param->pbrc_init_current_target_buf_full_in_bits = cmd->dw6.buf_size >> 1;
1081
1082         cmd->dw18.pframe_deviation_threshold0 = (uint32_t)(-50 * pow(0.90, dbps_ratio));
1083         cmd->dw18.pframe_deviation_threshold1  = (uint32_t)(-50 * pow(0.66, dbps_ratio));
1084         cmd->dw18.pframe_deviation_threshold2  = (uint32_t)(-50 * pow(0.46, dbps_ratio));
1085         cmd->dw18.pframe_deviation_threshold3  = (uint32_t)(-50 * pow(0.3, dbps_ratio));
1086         cmd->dw19.pframe_deviation_threshold4  = (uint32_t)(50 * pow(0.3, dbps_ratio));
1087         cmd->dw19.pframe_deviation_threshold5  = (uint32_t)(50 * pow(0.46, dbps_ratio));
1088         cmd->dw19.pframe_deviation_threshold6  = (uint32_t)(50 * pow(0.7, dbps_ratio));
1089         cmd->dw19.pframe_deviation_threshold7  = (uint32_t)(50 * pow(0.9, dbps_ratio));
1090
1091         cmd->dw20.vbr_deviation_threshold0     = (uint32_t)(-50 * pow(0.9, dbps_ratio));
1092         cmd->dw20.vbr_deviation_threshold1     = (uint32_t)(-50 * pow(0.7, dbps_ratio));
1093         cmd->dw20.vbr_deviation_threshold2     = (uint32_t)(-50 * pow(0.5, dbps_ratio));
1094         cmd->dw20.vbr_deviation_threshold3     = (uint32_t)(-50 * pow(0.3, dbps_ratio));
1095         cmd->dw21.vbr_deviation_threshold4     = (uint32_t)(100 * pow(0.4, dbps_ratio));
1096         cmd->dw21.vbr_deviation_threshold5     = (uint32_t)(100 * pow(0.5, dbps_ratio));
1097         cmd->dw21.vbr_deviation_threshold6     = (uint32_t)(100 * pow(0.75, dbps_ratio));
1098         cmd->dw21.vbr_deviation_threshold7     = (uint32_t)(100 * pow(0.9, dbps_ratio));
1099
1100         cmd->dw22.kframe_deviation_threshold0  = (uint32_t)(-50 * pow(0.8, dbps_ratio));
1101         cmd->dw22.kframe_deviation_threshold1  = (uint32_t)(-50 * pow(0.6, dbps_ratio));
1102         cmd->dw22.kframe_deviation_threshold2  = (uint32_t)(-50 * pow(0.34, dbps_ratio));
1103         cmd->dw22.kframe_deviation_threshold3  = (uint32_t)(-50 * pow(0.2, dbps_ratio));
1104         cmd->dw23.kframe_deviation_threshold4  = (uint32_t)(50 * pow(0.2, dbps_ratio));
1105         cmd->dw23.kframe_deviation_threshold5  = (uint32_t)(50 * pow(0.4, dbps_ratio));
1106         cmd->dw23.kframe_deviation_threshold6  = (uint32_t)(50 * pow(0.66, dbps_ratio));
1107         cmd->dw23.kframe_deviation_threshold7  = (uint32_t)(50 * pow(0.9, dbps_ratio));
1108
1109         break;
1110     }
1111     case VP9_MEDIA_STATE_BRC_UPDATE: {
1112         cmd->dw15.min_qp          = 1;
1113         cmd->dw15.max_qp          = 255;
1114
1115         cmd->dw25.frame_number    = param->frame_number;
1116
1117         // Used in dynamic scaling. set to zero for now
1118         cmd->dw27.hrd_buffer_fullness_upper_limit = 0;
1119         cmd->dw28.hrd_buffer_fullness_lower_limit = 0;
1120
1121         if (pic_param->pic_flags.bits.segmentation_enabled) {
1122             cmd->dw32.seg_delta_qp0              = segment_param->seg_data[0].segment_qindex_delta;
1123             cmd->dw32.seg_delta_qp1              = segment_param->seg_data[1].segment_qindex_delta;
1124             cmd->dw32.seg_delta_qp2              = segment_param->seg_data[2].segment_qindex_delta;
1125             cmd->dw32.seg_delta_qp3              = segment_param->seg_data[3].segment_qindex_delta;
1126
1127             cmd->dw33.seg_delta_qp4              = segment_param->seg_data[4].segment_qindex_delta;
1128             cmd->dw33.seg_delta_qp5              = segment_param->seg_data[5].segment_qindex_delta;
1129             cmd->dw33.seg_delta_qp6              = segment_param->seg_data[6].segment_qindex_delta;
1130             cmd->dw33.seg_delta_qp7              = segment_param->seg_data[7].segment_qindex_delta;
1131         }
1132
1133         //cmd->dw34.temporal_id                = pPicParams->temporal_idi;
1134         cmd->dw34.temporal_id                = 0;
1135         cmd->dw34.multi_ref_qp_check         = param->multi_ref_qp_check;
1136
1137         cmd->dw35.max_num_pak_passes         = param->brc_num_pak_passes;
1138         cmd->dw35.sync_async                 = 0;
1139         cmd->dw35.mbrc                       = param->mbbrc_enabled;
1140         if (*param->pbrc_init_current_target_buf_full_in_bits >
1141             ((double)(*param->pbrc_init_reset_buf_size_in_bits))) {
1142             *param->pbrc_init_current_target_buf_full_in_bits -=
1143                 (double)(*param->pbrc_init_reset_buf_size_in_bits);
1144             cmd->dw35.overflow = 1;
1145         } else
1146             cmd->dw35.overflow = 0;
1147
1148         cmd->dw24.target_size                 = (uint32_t)(*param->pbrc_init_current_target_buf_full_in_bits);
1149
1150         cmd->dw36.segmentation               = pic_param->pic_flags.bits.segmentation_enabled;
1151
1152         *param->pbrc_init_current_target_buf_full_in_bits += *param->pbrc_init_reset_input_bits_per_frame;
1153
1154         cmd->dw38.qdelta_ydc  = pic_param->luma_dc_qindex_delta;
1155         cmd->dw38.qdelta_uvdc = pic_param->chroma_dc_qindex_delta;
1156         cmd->dw38.qdelta_uvac = pic_param->chroma_ac_qindex_delta;
1157
1158         break;
1159     }
1160     case VP9_MEDIA_STATE_ENC_I_FRAME_DIST:
1161         cmd->dw2.intra_mode_disable        = 0;
1162         break;
1163     default:
1164         break;
1165     }
1166
1167     cmd->dw48.brc_y4x_input_bti                = VP9_BTI_BRC_SRCY4X_G9;
1168     cmd->dw49.brc_vme_coarse_intra_input_bti   = VP9_BTI_BRC_VME_COARSE_INTRA_G9;
1169     cmd->dw50.brc_history_buffer_bti           = VP9_BTI_BRC_HISTORY_G9;
1170     cmd->dw51.brc_const_data_input_bti         = VP9_BTI_BRC_CONSTANT_DATA_G9;
1171     cmd->dw52.brc_distortion_bti               = VP9_BTI_BRC_DISTORTION_G9;
1172     cmd->dw53.brc_mmdk_pak_output_bti          = VP9_BTI_BRC_MMDK_PAK_OUTPUT_G9;
1173     cmd->dw54.brc_enccurbe_input_bti           = VP9_BTI_BRC_MBENC_CURBE_INPUT_G9;
1174     cmd->dw55.brc_enccurbe_output_bti          = VP9_BTI_BRC_MBENC_CURBE_OUTPUT_G9;
1175     cmd->dw56.brc_pic_state_input_bti          = VP9_BTI_BRC_PIC_STATE_INPUT_G9;
1176     cmd->dw57.brc_pic_state_output_bti         = VP9_BTI_BRC_PIC_STATE_OUTPUT_G9;
1177     cmd->dw58.brc_seg_state_input_bti          = VP9_BTI_BRC_SEGMENT_STATE_INPUT_G9;
1178     cmd->dw59.brc_seg_state_output_bti         = VP9_BTI_BRC_SEGMENT_STATE_OUTPUT_G9;
1179     cmd->dw60.brc_bitstream_size_data_bti      = VP9_BTI_BRC_BITSTREAM_SIZE_G9;
1180     cmd->dw61.brc_hfw_data_output_bti          = VP9_BTI_BRC_HFW_DATA_G9;
1181
1182     i965_gpe_context_unmap_curbe(gpe_context);
1183     return;
1184 }
1185
1186 static void
1187 gen9_brc_init_reset_add_surfaces_vp9(VADriverContextP ctx,
1188                                      struct encode_state *encode_state,
1189                                      struct intel_encoder_context *encoder_context,
1190                                      struct i965_gpe_context *gpe_context)
1191 {
1192     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1193
1194     i965_add_buffer_gpe_surface(ctx,
1195                                 gpe_context,
1196                                 &vme_context->res_brc_history_buffer,
1197                                 0,
1198                                 vme_context->res_brc_history_buffer.size,
1199                                 0,
1200                                 VP9_BTI_BRC_HISTORY_G9);
1201
1202     i965_add_buffer_2d_gpe_surface(ctx,
1203                                    gpe_context,
1204                                    &vme_context->s4x_memv_distortion_buffer,
1205                                    1,
1206                                    I965_SURFACEFORMAT_R8_UNORM,
1207                                    VP9_BTI_BRC_DISTORTION_G9);
1208 }
1209
1210 /* The function related with BRC */
1211 static VAStatus
1212 gen9_vp9_brc_init_reset_kernel(VADriverContextP ctx,
1213                                struct encode_state *encode_state,
1214                                struct intel_encoder_context *encoder_context)
1215 {
1216     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1217     struct vp9_brc_context *brc_context = &vme_context->brc_context;
1218     struct gpe_media_object_parameter media_object_param;
1219     struct i965_gpe_context *gpe_context;
1220     int gpe_index = VP9_BRC_INIT;
1221     int media_function = VP9_MEDIA_STATE_BRC_INIT_RESET;
1222     struct gen9_vp9_brc_curbe_param                brc_initreset_curbe;
1223     VAEncPictureParameterBufferVP9 *pic_param;
1224     struct gen9_vp9_state *vp9_state;
1225
1226     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1227
1228     if (!vp9_state || !vp9_state->pic_param)
1229         return VA_STATUS_ERROR_INVALID_PARAMETER;
1230
1231     pic_param = vp9_state->pic_param;
1232
1233     if (vp9_state->brc_inited)
1234         gpe_index = VP9_BRC_RESET;
1235
1236     gpe_context = &brc_context->gpe_contexts[gpe_index];
1237
1238     gen8_gpe_context_init(ctx, gpe_context);
1239     gen9_gpe_reset_binding_table(ctx, gpe_context);
1240
1241     brc_initreset_curbe.media_state_type    = media_function;
1242     brc_initreset_curbe.curr_frame          = pic_param->reconstructed_frame;
1243     brc_initreset_curbe.ppic_param          = vp9_state->pic_param;
1244     brc_initreset_curbe.pseq_param          = vp9_state->seq_param;
1245     brc_initreset_curbe.psegment_param      = vp9_state->segment_param;
1246     brc_initreset_curbe.frame_width         = vp9_state->frame_width;
1247     brc_initreset_curbe.frame_height        = vp9_state->frame_height;
1248     brc_initreset_curbe.pbrc_init_current_target_buf_full_in_bits =
1249         &vp9_state->brc_init_current_target_buf_full_in_bits;
1250     brc_initreset_curbe.pbrc_init_reset_buf_size_in_bits =
1251         &vp9_state->brc_init_reset_buf_size_in_bits;
1252     brc_initreset_curbe.pbrc_init_reset_input_bits_per_frame =
1253         &vp9_state->brc_init_reset_input_bits_per_frame;
1254     brc_initreset_curbe.picture_coding_type  = vp9_state->picture_coding_type;
1255     brc_initreset_curbe.initbrc            = !vp9_state->brc_inited;
1256     brc_initreset_curbe.mbbrc_enabled      = 0;
1257     brc_initreset_curbe.ref_frame_flag      = vp9_state->ref_frame_flag;
1258
1259     vme_context->pfn_set_curbe_brc(ctx, encode_state,
1260                                    gpe_context,
1261                                    encoder_context,
1262                                    &brc_initreset_curbe);
1263
1264     gen9_brc_init_reset_add_surfaces_vp9(ctx, encode_state, encoder_context, gpe_context);
1265     gen8_gpe_setup_interface_data(ctx, gpe_context);
1266
1267     memset(&media_object_param, 0, sizeof(media_object_param));
1268     gen9_run_kernel_media_object(ctx, encoder_context, gpe_context, media_function, &media_object_param);
1269
1270     return VA_STATUS_SUCCESS;
1271 }
1272
1273 static void
1274 gen9_brc_intra_dist_add_surfaces_vp9(VADriverContextP ctx,
1275                                      struct encode_state *encode_state,
1276                                      struct intel_encoder_context *encoder_context,
1277                                      struct i965_gpe_context *gpe_context)
1278 {
1279     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1280
1281     struct object_surface *obj_surface;
1282     struct gen9_surface_vp9 *vp9_priv_surface;
1283
1284     /* sScaled4xSurface surface */
1285     obj_surface = encode_state->reconstructed_object;
1286
1287     vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
1288
1289     obj_surface = vp9_priv_surface->scaled_4x_surface_obj;
1290     i965_add_2d_gpe_surface(ctx, gpe_context,
1291                             obj_surface,
1292                             0, 1,
1293                             I965_SURFACEFORMAT_R8_UNORM,
1294                             VP9_BTI_BRC_SRCY4X_G9
1295                            );
1296
1297     i965_add_adv_gpe_surface(ctx, gpe_context,
1298                              obj_surface,
1299                              VP9_BTI_BRC_VME_COARSE_INTRA_G9);
1300
1301     i965_add_buffer_2d_gpe_surface(ctx,
1302                                    gpe_context,
1303                                    &vme_context->s4x_memv_distortion_buffer,
1304                                    1,
1305                                    I965_SURFACEFORMAT_R8_UNORM,
1306                                    VP9_BTI_BRC_DISTORTION_G9);
1307
1308     return;
1309 }
1310
1311 /* The function related with BRC */
1312 static VAStatus
1313 gen9_vp9_brc_intra_dist_kernel(VADriverContextP ctx,
1314                                struct encode_state *encode_state,
1315                                struct intel_encoder_context *encoder_context)
1316 {
1317     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1318     struct vp9_brc_context *brc_context = &vme_context->brc_context;
1319     struct i965_gpe_context *gpe_context;
1320     int gpe_index = VP9_BRC_INTRA_DIST;
1321     int media_function = VP9_MEDIA_STATE_ENC_I_FRAME_DIST;
1322     struct gen9_vp9_brc_curbe_param                brc_intra_dist_curbe;
1323     VAEncPictureParameterBufferVP9 *pic_param;
1324     struct gen9_vp9_state *vp9_state;
1325     struct gpe_media_object_walker_parameter media_object_walker_param;
1326     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
1327
1328     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1329
1330     if (!vp9_state || !vp9_state->pic_param)
1331         return VA_STATUS_ERROR_INVALID_PARAMETER;
1332
1333     pic_param = vp9_state->pic_param;
1334
1335     gpe_context = &brc_context->gpe_contexts[gpe_index];
1336
1337     gen8_gpe_context_init(ctx, gpe_context);
1338     gen9_gpe_reset_binding_table(ctx, gpe_context);
1339
1340     brc_intra_dist_curbe.media_state_type    = media_function;
1341     brc_intra_dist_curbe.curr_frame          = pic_param->reconstructed_frame;
1342     brc_intra_dist_curbe.ppic_param          = vp9_state->pic_param;
1343     brc_intra_dist_curbe.pseq_param          = vp9_state->seq_param;
1344     brc_intra_dist_curbe.psegment_param      = vp9_state->segment_param;
1345     brc_intra_dist_curbe.frame_width         = vp9_state->frame_width;
1346     brc_intra_dist_curbe.frame_height        = vp9_state->frame_height;
1347     brc_intra_dist_curbe.pbrc_init_current_target_buf_full_in_bits =
1348         &vp9_state->brc_init_current_target_buf_full_in_bits;
1349     brc_intra_dist_curbe.pbrc_init_reset_buf_size_in_bits =
1350         &vp9_state->brc_init_reset_buf_size_in_bits;
1351     brc_intra_dist_curbe.pbrc_init_reset_input_bits_per_frame =
1352         &vp9_state->brc_init_reset_input_bits_per_frame;
1353     brc_intra_dist_curbe.picture_coding_type  = vp9_state->picture_coding_type;
1354     brc_intra_dist_curbe.initbrc            = !vp9_state->brc_inited;
1355     brc_intra_dist_curbe.mbbrc_enabled      = 0;
1356     brc_intra_dist_curbe.ref_frame_flag      = vp9_state->ref_frame_flag;
1357
1358     vme_context->pfn_set_curbe_brc(ctx, encode_state,
1359                                    gpe_context,
1360                                    encoder_context,
1361                                    &brc_intra_dist_curbe);
1362
1363     /* zero distortion buffer */
1364     i965_zero_gpe_resource(&vme_context->s4x_memv_distortion_buffer);
1365
1366     gen9_brc_intra_dist_add_surfaces_vp9(ctx, encode_state, encoder_context, gpe_context);
1367     gen8_gpe_setup_interface_data(ctx, gpe_context);
1368
1369     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
1370     kernel_walker_param.resolution_x = vp9_state->downscaled_width_4x_in_mb;
1371     kernel_walker_param.resolution_y = vp9_state->downscaled_height_4x_in_mb;
1372     kernel_walker_param.no_dependency = 1;
1373
1374     gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
1375
1376     gen9_run_kernel_media_object_walker(ctx, encoder_context,
1377                                         gpe_context,
1378                                         media_function,
1379                                         &media_object_walker_param);
1380
1381     return VA_STATUS_SUCCESS;
1382 }
1383
1384 static void
1385 intel_vp9enc_construct_picstate_batchbuf(VADriverContextP ctx,
1386                                          struct encode_state *encode_state,
1387                                          struct intel_encoder_context *encoder_context,
1388                                          struct i965_gpe_resource *gpe_resource)
1389 {
1390     struct gen9_vp9_state *vp9_state;
1391     VAEncPictureParameterBufferVP9 *pic_param;
1392     int frame_width_minus1, frame_height_minus1;
1393     int is_lossless = 0;
1394     int is_intra_only = 0;
1395     unsigned int last_frame_type;
1396     unsigned int ref_flags;
1397     unsigned int use_prev_frame_mvs, adapt_flag;
1398     struct gen9_surface_vp9 *vp9_surface = NULL;
1399     struct object_surface *obj_surface = NULL;
1400     uint32_t scale_h = 0;
1401     uint32_t scale_w = 0;
1402
1403     char *pdata;
1404     int i, j;
1405     unsigned int *cmd_ptr, cmd_value, tmp;
1406
1407     pdata = i965_map_gpe_resource(gpe_resource);
1408     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1409
1410     if (!vp9_state || !vp9_state->pic_param || !pdata)
1411         return;
1412
1413     pic_param = vp9_state->pic_param;
1414     frame_width_minus1 = ALIGN(pic_param->frame_width_dst, 8) - 1;
1415     frame_height_minus1 = ALIGN(pic_param->frame_height_dst, 8) - 1;
1416     if ((pic_param->luma_ac_qindex == 0) &&
1417         (pic_param->luma_dc_qindex_delta == 0) &&
1418         (pic_param->chroma_ac_qindex_delta == 0) &&
1419         (pic_param->chroma_dc_qindex_delta == 0))
1420         is_lossless = 1;
1421
1422     if (pic_param->pic_flags.bits.frame_type)
1423         is_intra_only = pic_param->pic_flags.bits.intra_only;
1424
1425     last_frame_type = vp9_state->vp9_last_frame.frame_type;
1426
1427     use_prev_frame_mvs = 0;
1428     if (pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) {
1429         last_frame_type = 0;
1430         ref_flags = 0;
1431     } else {
1432         ref_flags = ((pic_param->ref_flags.bits.ref_arf_sign_bias << 9) |
1433                      (pic_param->ref_flags.bits.ref_gf_sign_bias << 8) |
1434                      (pic_param->ref_flags.bits.ref_last_sign_bias << 7)
1435                     );
1436         if (!pic_param->pic_flags.bits.error_resilient_mode &&
1437             (pic_param->frame_width_dst == vp9_state->vp9_last_frame.frame_width) &&
1438             (pic_param->frame_height_dst == vp9_state->vp9_last_frame.frame_height) &&
1439             !pic_param->pic_flags.bits.intra_only &&
1440             vp9_state->vp9_last_frame.show_frame &&
1441             ((vp9_state->vp9_last_frame.frame_type == HCP_VP9_INTER_FRAME) &&
1442              !vp9_state->vp9_last_frame.intra_only)
1443            )
1444             use_prev_frame_mvs = 1;
1445     }
1446     adapt_flag = 0;
1447     if (!pic_param->pic_flags.bits.error_resilient_mode &&
1448         !pic_param->pic_flags.bits.frame_parallel_decoding_mode)
1449         adapt_flag = 1;
1450
1451     for (i = 0; i < 4; i++) {
1452         uint32_t non_first_pass;
1453         non_first_pass = 1;
1454         if (i == 0)
1455             non_first_pass = 0;
1456
1457         cmd_ptr = (unsigned int *)(pdata + i * VP9_PIC_STATE_BUFFER_SIZE);
1458
1459         *cmd_ptr++ = (HCP_VP9_PIC_STATE | (33 - 2));
1460         *cmd_ptr++ = (frame_height_minus1 << 16 |
1461                       frame_width_minus1);
1462         /* dw2 */
1463         *cmd_ptr++ = (0 << 31 |  /* disable segment_in */
1464                       0 << 30 | /* disable segment_out */
1465                       is_lossless << 29 | /* loseless */
1466                       (pic_param->pic_flags.bits.segmentation_enabled && pic_param->pic_flags.bits.segmentation_temporal_update) << 28 | /* temporal update */
1467                       (pic_param->pic_flags.bits.segmentation_enabled && pic_param->pic_flags.bits.segmentation_update_map) << 27 | /* temporal update */
1468                       (pic_param->pic_flags.bits.segmentation_enabled << 26) |
1469                       (pic_param->sharpness_level << 23) |
1470                       (pic_param->filter_level << 17) |
1471                       (pic_param->pic_flags.bits.frame_parallel_decoding_mode << 16) |
1472                       (pic_param->pic_flags.bits.error_resilient_mode << 15) |
1473                       (pic_param->pic_flags.bits.refresh_frame_context << 14) |
1474                       (last_frame_type << 13) |
1475                       (vp9_state->tx_mode == TX_MODE_SELECT) << 12 |
1476                       (pic_param->pic_flags.bits.comp_prediction_mode == REFERENCE_MODE_SELECT) << 11 |
1477                       (use_prev_frame_mvs) << 10 |
1478                       ref_flags |
1479                       (pic_param->pic_flags.bits.mcomp_filter_type << 4) |
1480                       (pic_param->pic_flags.bits.allow_high_precision_mv << 3) |
1481                       (is_intra_only << 2) |
1482                       (adapt_flag << 1) |
1483                       (pic_param->pic_flags.bits.frame_type) << 0);
1484
1485         *cmd_ptr++ = ((0 << 28) | /* VP9Profile0 */
1486                       (0 << 24) | /* 8-bit depth */
1487                       (0 << 22) | /* only 420 format */
1488                       (0 << 0)  | /* sse statistics */
1489                       (pic_param->log2_tile_rows << 8) |
1490                       (pic_param->log2_tile_columns << 0));
1491
1492         /* dw4..6 */
1493         if (pic_param->pic_flags.bits.frame_type &&
1494             !pic_param->pic_flags.bits.intra_only) {
1495             for (j = 0; j < 3; j++) {
1496                 obj_surface = encode_state->reference_objects[j];
1497                 scale_w = 0;
1498                 scale_h = 0;
1499                 if (obj_surface && obj_surface->private_data) {
1500                     vp9_surface = obj_surface->private_data;
1501                     scale_w = (vp9_surface->frame_width  << 14) / pic_param->frame_width_dst;
1502                     scale_h = (vp9_surface->frame_height << 14) / pic_param->frame_height_dst;
1503                     *cmd_ptr++ = (scale_w << 16 |
1504                                   scale_h);
1505                 } else
1506                     *cmd_ptr++ = 0;
1507             }
1508         } else {
1509             *cmd_ptr++ = 0;
1510             *cmd_ptr++ = 0;
1511             *cmd_ptr++ = 0;
1512         }
1513         /* dw7..9 */
1514         for (j = 0; j < 3; j++) {
1515             obj_surface = encode_state->reference_objects[j];
1516             vp9_surface = NULL;
1517
1518             if (obj_surface && obj_surface->private_data) {
1519                 vp9_surface = obj_surface->private_data;
1520                 *cmd_ptr++ = (vp9_surface->frame_height - 1) << 16 |
1521                              (vp9_surface->frame_width - 1);
1522             } else
1523                 *cmd_ptr++ = 0;
1524         }
1525         /* dw10 */
1526         *cmd_ptr++ = 0;
1527         /* dw11 */
1528         *cmd_ptr++ = (1 << 1);
1529         *cmd_ptr++ = 0;
1530
1531         /* dw13 */
1532         *cmd_ptr++ = ((1 << 25) | /* header insertation for VP9 */
1533                       (0 << 24) | /* tail insertation */
1534                       (pic_param->luma_ac_qindex << 16) |
1535                       0 /* compressed header bin count */);
1536
1537         /* dw14 */
1538         tmp = intel_convert_sign_mag(pic_param->luma_dc_qindex_delta, 5);
1539         cmd_value = (tmp << 16);
1540         tmp = intel_convert_sign_mag(pic_param->chroma_dc_qindex_delta, 5);
1541         cmd_value |= (tmp << 8);
1542         tmp = intel_convert_sign_mag(pic_param->chroma_ac_qindex_delta, 5);
1543         cmd_value |= tmp;
1544         *cmd_ptr++ = cmd_value;
1545
1546         tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[0], 7);
1547         cmd_value = tmp;
1548         tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[1], 7);
1549         cmd_value |= (tmp << 8);
1550         tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[2], 7);
1551         cmd_value |= (tmp << 16);
1552         tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[3], 7);
1553         cmd_value |= (tmp << 24);
1554         *cmd_ptr++ = cmd_value;
1555
1556         /* dw16 */
1557         tmp = intel_convert_sign_mag(pic_param->mode_lf_delta[0], 7);
1558         cmd_value = tmp;
1559         tmp = intel_convert_sign_mag(pic_param->mode_lf_delta[1], 7);
1560         cmd_value |= (tmp << 8);
1561         *cmd_ptr++ = cmd_value;
1562
1563         /* dw17 */
1564         *cmd_ptr++ = vp9_state->frame_header.bit_offset_ref_lf_delta |
1565                      (vp9_state->frame_header.bit_offset_mode_lf_delta << 16);
1566         *cmd_ptr++ = vp9_state->frame_header.bit_offset_qindex |
1567                      (vp9_state->frame_header.bit_offset_lf_level << 16);
1568
1569         /* dw19 */
1570         *cmd_ptr++ = (1 << 26 | (1 << 25) |
1571                       non_first_pass << 16);
1572         /* dw20 */
1573         *cmd_ptr++ = (1 << 31) | (256);
1574
1575         /* dw21 */
1576         *cmd_ptr++ = (0 << 31) | 1;
1577
1578         /* dw22-dw24. Frame_delta_qindex_range */
1579         *cmd_ptr++ = 0;
1580         *cmd_ptr++ = 0;
1581         *cmd_ptr++ = 0;
1582
1583         /* dw25-26. frame_delta_lf_range */
1584         *cmd_ptr++ = 0;
1585         *cmd_ptr++ = 0;
1586
1587         /* dw27. frame_delta_lf_min */
1588         *cmd_ptr++ = 0;
1589
1590         /* dw28..30 */
1591         *cmd_ptr++ = 0;
1592         *cmd_ptr++ = 0;
1593         *cmd_ptr++ = 0;
1594
1595         /* dw31 */
1596         *cmd_ptr++ = (0 << 30) | 1;
1597         /* dw32 */
1598         *cmd_ptr++ = vp9_state->frame_header.bit_offset_first_partition_size;
1599
1600         *cmd_ptr++ = 0;
1601         *cmd_ptr++ = MI_BATCH_BUFFER_END;
1602     }
1603
1604     i965_unmap_gpe_resource(gpe_resource);
1605 }
1606
1607 static void
1608 gen9_brc_update_add_surfaces_vp9(VADriverContextP ctx,
1609                                  struct encode_state *encode_state,
1610                                  struct intel_encoder_context *encoder_context,
1611                                  struct i965_gpe_context *brc_gpe_context,
1612                                  struct i965_gpe_context *mbenc_gpe_context)
1613 {
1614     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1615
1616     /* 0. BRC history buffer */
1617     i965_add_buffer_gpe_surface(ctx,
1618                                 brc_gpe_context,
1619                                 &vme_context->res_brc_history_buffer,
1620                                 0,
1621                                 vme_context->res_brc_history_buffer.size,
1622                                 0,
1623                                 VP9_BTI_BRC_HISTORY_G9);
1624
1625     /* 1. Constant data buffer */
1626     i965_add_buffer_gpe_surface(ctx,
1627                                 brc_gpe_context,
1628                                 &vme_context->res_brc_const_data_buffer,
1629                                 0,
1630                                 vme_context->res_brc_const_data_buffer.size,
1631                                 0,
1632                                 VP9_BTI_BRC_CONSTANT_DATA_G9);
1633
1634     /* 2. Distortion 2D surface buffer */
1635     i965_add_buffer_2d_gpe_surface(ctx,
1636                                    brc_gpe_context,
1637                                    &vme_context->s4x_memv_distortion_buffer,
1638                                    1,
1639                                    I965_SURFACEFORMAT_R8_UNORM,
1640                                    VP9_BTI_BRC_DISTORTION_G9);
1641
1642     /* 3. pak buffer */
1643     i965_add_buffer_gpe_surface(ctx,
1644                                 brc_gpe_context,
1645                                 &vme_context->res_brc_mmdk_pak_buffer,
1646                                 0,
1647                                 vme_context->res_brc_mmdk_pak_buffer.size,
1648                                 0,
1649                                 VP9_BTI_BRC_MMDK_PAK_OUTPUT_G9);
1650     /* 4. Mbenc curbe input buffer */
1651     gen9_add_dri_buffer_gpe_surface(ctx,
1652                                     brc_gpe_context,
1653                                     mbenc_gpe_context->curbe.bo,
1654                                     0,
1655                                     ALIGN(mbenc_gpe_context->curbe.length, 64),
1656                                     mbenc_gpe_context->curbe.offset,
1657                                     VP9_BTI_BRC_MBENC_CURBE_INPUT_G9);
1658     /* 5. Mbenc curbe output buffer */
1659     gen9_add_dri_buffer_gpe_surface(ctx,
1660                                     brc_gpe_context,
1661                                     mbenc_gpe_context->curbe.bo,
1662                                     0,
1663                                     ALIGN(mbenc_gpe_context->curbe.length, 64),
1664                                     mbenc_gpe_context->curbe.offset,
1665                                     VP9_BTI_BRC_MBENC_CURBE_OUTPUT_G9);
1666
1667     /* 6. BRC_PIC_STATE read buffer */
1668     i965_add_buffer_gpe_surface(ctx, brc_gpe_context,
1669                                 &vme_context->res_pic_state_brc_read_buffer,
1670                                 0,
1671                                 vme_context->res_pic_state_brc_read_buffer.size,
1672                                 0,
1673                                 VP9_BTI_BRC_PIC_STATE_INPUT_G9);
1674
1675     /* 7. BRC_PIC_STATE write buffer */
1676     i965_add_buffer_gpe_surface(ctx, brc_gpe_context,
1677                                 &vme_context->res_pic_state_brc_write_hfw_read_buffer,
1678                                 0,
1679                                 vme_context->res_pic_state_brc_write_hfw_read_buffer.size,
1680                                 0,
1681                                 VP9_BTI_BRC_PIC_STATE_OUTPUT_G9);
1682
1683     /* 8. SEGMENT_STATE read buffer */
1684     i965_add_buffer_gpe_surface(ctx, brc_gpe_context,
1685                                 &vme_context->res_seg_state_brc_read_buffer,
1686                                 0,
1687                                 vme_context->res_seg_state_brc_read_buffer.size,
1688                                 0,
1689                                 VP9_BTI_BRC_SEGMENT_STATE_INPUT_G9);
1690
1691     /* 9. SEGMENT_STATE write buffer */
1692     i965_add_buffer_gpe_surface(ctx, brc_gpe_context,
1693                                 &vme_context->res_seg_state_brc_write_buffer,
1694                                 0,
1695                                 vme_context->res_seg_state_brc_write_buffer.size,
1696                                 0,
1697                                 VP9_BTI_BRC_SEGMENT_STATE_OUTPUT_G9);
1698
1699     /* 10. Bitstream size buffer */
1700     i965_add_buffer_gpe_surface(ctx, brc_gpe_context,
1701                                 &vme_context->res_brc_bitstream_size_buffer,
1702                                 0,
1703                                 vme_context->res_brc_bitstream_size_buffer.size,
1704                                 0,
1705                                 VP9_BTI_BRC_BITSTREAM_SIZE_G9);
1706
1707     i965_add_buffer_gpe_surface(ctx, brc_gpe_context,
1708                                 &vme_context->res_brc_hfw_data_buffer,
1709                                 0,
1710                                 vme_context->res_brc_hfw_data_buffer.size,
1711                                 0,
1712                                 VP9_BTI_BRC_HFW_DATA_G9);
1713
1714     return;
1715 }
1716
1717 static VAStatus
1718 gen9_vp9_brc_update_kernel(VADriverContextP ctx,
1719                            struct encode_state *encode_state,
1720                            struct intel_encoder_context *encoder_context)
1721 {
1722     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1723     struct vp9_brc_context *brc_context = &vme_context->brc_context;
1724     struct i965_gpe_context *brc_gpe_context, *mbenc_gpe_context;
1725     int mbenc_index, gpe_index = VP9_BRC_UPDATE;
1726     int media_function = VP9_MEDIA_STATE_BRC_UPDATE;
1727     int mbenc_function;
1728     struct gen9_vp9_brc_curbe_param        brc_update_curbe_param;
1729     VAEncPictureParameterBufferVP9 *pic_param;
1730     struct gen9_vp9_state *vp9_state;
1731     struct gen9_vp9_mbenc_curbe_param    mbenc_curbe_param;
1732     struct gpe_media_object_parameter media_object_param;
1733
1734     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1735     if (!vp9_state || !vp9_state->pic_param)
1736         return VA_STATUS_ERROR_INVALID_PARAMETER;
1737
1738     pic_param = vp9_state->pic_param;
1739     // Setup VP9 MbEnc Curbe
1740     if (vp9_state->picture_coding_type) {
1741         mbenc_function = VP9_MEDIA_STATE_MBENC_P;
1742         mbenc_index = VP9_MBENC_IDX_INTER;
1743     } else {
1744         mbenc_function = VP9_MEDIA_STATE_MBENC_I_32x32;
1745         mbenc_index = VP9_MBENC_IDX_KEY_32x32;
1746     }
1747
1748     mbenc_gpe_context = &(vme_context->mbenc_context.gpe_contexts[mbenc_index]);
1749
1750     memset(&mbenc_curbe_param, 0, sizeof(mbenc_curbe_param));
1751
1752     mbenc_curbe_param.ppic_param             = vp9_state->pic_param;
1753     mbenc_curbe_param.pseq_param             = vp9_state->seq_param;
1754     mbenc_curbe_param.psegment_param         = vp9_state->segment_param;
1755     //mbenc_curbe_param.ppRefList              = &(vp9_state->pRefList[0]);
1756     mbenc_curbe_param.last_ref_obj           = vp9_state->last_ref_obj;
1757     mbenc_curbe_param.golden_ref_obj         = vp9_state->golden_ref_obj;
1758     mbenc_curbe_param.alt_ref_obj            = vp9_state->alt_ref_obj;
1759     mbenc_curbe_param.frame_width_in_mb      = ALIGN(vp9_state->frame_width, 16) / 16;
1760     mbenc_curbe_param.frame_height_in_mb     = ALIGN(vp9_state->frame_height, 16) / 16;
1761     mbenc_curbe_param.hme_enabled            = vp9_state->hme_enabled;
1762     mbenc_curbe_param.ref_frame_flag         = vp9_state->ref_frame_flag;
1763     mbenc_curbe_param.multi_ref_qp_check     = vp9_state->multi_ref_qp_check;
1764     mbenc_curbe_param.picture_coding_type    = vp9_state->picture_coding_type;
1765     mbenc_curbe_param.media_state_type       = mbenc_function;
1766
1767     vme_context->pfn_set_curbe_mbenc(ctx, encode_state,
1768                                      mbenc_gpe_context,
1769                                      encoder_context,
1770                                      &mbenc_curbe_param);
1771
1772     vp9_state->mbenc_curbe_set_in_brc_update = true;
1773
1774     brc_gpe_context = &brc_context->gpe_contexts[gpe_index];
1775
1776     gen8_gpe_context_init(ctx, brc_gpe_context);
1777     gen9_gpe_reset_binding_table(ctx, brc_gpe_context);
1778
1779     memset(&brc_update_curbe_param, 0, sizeof(brc_update_curbe_param));
1780
1781     // Setup BRC Update Curbe
1782     brc_update_curbe_param.media_state_type       = media_function;
1783     brc_update_curbe_param.curr_frame               = pic_param->reconstructed_frame;
1784     brc_update_curbe_param.ppic_param             = vp9_state->pic_param;
1785     brc_update_curbe_param.pseq_param             = vp9_state->seq_param;
1786     brc_update_curbe_param.psegment_param         = vp9_state->segment_param;
1787     brc_update_curbe_param.picture_coding_type    = vp9_state->picture_coding_type;
1788     brc_update_curbe_param.frame_width_in_mb      = ALIGN(vp9_state->frame_width, 16) / 16;
1789     brc_update_curbe_param.frame_height_in_mb     = ALIGN(vp9_state->frame_height, 16) / 16;
1790     brc_update_curbe_param.hme_enabled            = vp9_state->hme_enabled;
1791     brc_update_curbe_param.b_used_ref             = 1;
1792     brc_update_curbe_param.frame_number           = vp9_state->frame_number;
1793     brc_update_curbe_param.ref_frame_flag         = vp9_state->ref_frame_flag;
1794     brc_update_curbe_param.mbbrc_enabled          = 0;
1795     brc_update_curbe_param.multi_ref_qp_check     = vp9_state->multi_ref_qp_check;
1796     brc_update_curbe_param.brc_num_pak_passes     = vp9_state->num_pak_passes;
1797
1798     brc_update_curbe_param.pbrc_init_current_target_buf_full_in_bits =
1799         &vp9_state->brc_init_current_target_buf_full_in_bits;
1800     brc_update_curbe_param.pbrc_init_reset_buf_size_in_bits =
1801         &vp9_state->brc_init_reset_buf_size_in_bits;
1802     brc_update_curbe_param.pbrc_init_reset_input_bits_per_frame =
1803         &vp9_state->brc_init_reset_input_bits_per_frame;
1804
1805     vme_context->pfn_set_curbe_brc(ctx, encode_state,
1806                                    brc_gpe_context,
1807                                    encoder_context,
1808                                    &brc_update_curbe_param);
1809
1810
1811     // Check if the constant data surface is present
1812     if (vp9_state->brc_constant_buffer_supported) {
1813         char *brc_const_buffer;
1814         brc_const_buffer = i965_map_gpe_resource(&vme_context->res_brc_const_data_buffer);
1815
1816         if (!brc_const_buffer)
1817             return VA_STATUS_ERROR_OPERATION_FAILED;
1818
1819         if (vp9_state->picture_coding_type)
1820             memcpy(brc_const_buffer, vp9_brc_const_data_p_g9,
1821                    sizeof(vp9_brc_const_data_p_g9));
1822         else
1823             memcpy(brc_const_buffer, vp9_brc_const_data_i_g9,
1824                    sizeof(vp9_brc_const_data_i_g9));
1825
1826         i965_unmap_gpe_resource(&vme_context->res_brc_const_data_buffer);
1827     }
1828
1829     if (pic_param->pic_flags.bits.segmentation_enabled) {
1830         //reallocate the vme_state->mb_segment_map_surface
1831         /* this will be added later */
1832     }
1833
1834     {
1835         pic_param->filter_level = 0;
1836         // clear the filter level value in picParams ebfore programming pic state, as this value will be determined and updated by BRC.
1837         intel_vp9enc_construct_picstate_batchbuf(ctx, encode_state,
1838                                                  encoder_context, &vme_context->res_pic_state_brc_read_buffer);
1839     }
1840
1841     gen9_brc_update_add_surfaces_vp9(ctx, encode_state,
1842                                      encoder_context,
1843                                      brc_gpe_context,
1844                                      mbenc_gpe_context);
1845
1846     gen8_gpe_setup_interface_data(ctx, brc_gpe_context);
1847     memset(&media_object_param, 0, sizeof(media_object_param));
1848     gen9_run_kernel_media_object(ctx, encoder_context,
1849                                  brc_gpe_context,
1850                                  media_function,
1851                                  &media_object_param);
1852     return VA_STATUS_SUCCESS;
1853 }
1854
1855 static
1856 void gen9_vp9_set_curbe_me(VADriverContextP ctx,
1857                            struct encode_state *encode_state,
1858                            struct i965_gpe_context *gpe_context,
1859                            struct intel_encoder_context *encoder_context,
1860                            struct gen9_vp9_me_curbe_param *param)
1861 {
1862     vp9_me_curbe_data        *me_cmd;
1863     int enc_media_state;
1864     int                                       me_mode;
1865     unsigned int                                       width, height;
1866     uint32_t                                  l0_ref_frames;
1867     uint32_t                                  scale_factor;
1868
1869     if (param->b16xme_enabled) {
1870         if (param->use_16x_me)
1871             me_mode = VP9_ENC_ME16X_BEFORE_ME4X;
1872         else
1873             me_mode = VP9_ENC_ME4X_AFTER_ME16X;
1874     } else {
1875         me_mode = VP9_ENC_ME4X_ONLY;
1876     }
1877
1878     if (me_mode == VP9_ENC_ME16X_BEFORE_ME4X)
1879         scale_factor = 16;
1880     else
1881         scale_factor = 4;
1882
1883     if (param->use_16x_me)
1884         enc_media_state = VP9_MEDIA_STATE_16X_ME;
1885     else
1886         enc_media_state = VP9_MEDIA_STATE_4X_ME;
1887
1888     me_cmd = i965_gpe_context_map_curbe(gpe_context);
1889
1890     if (!me_cmd)
1891         return;
1892
1893     memset(me_cmd, 0, sizeof(vp9_me_curbe_data));
1894
1895     me_cmd->dw1.max_num_mvs           = 0x10;
1896     me_cmd->dw1.bi_weight             = 0x00;
1897
1898     me_cmd->dw2.max_num_su            = 0x39;
1899     me_cmd->dw2.max_len_sp            = 0x39;
1900
1901     me_cmd->dw3.sub_mb_part_mask       = 0x77;
1902     me_cmd->dw3.inter_sad             = 0x00;
1903     me_cmd->dw3.intra_sad            = 0x00;
1904     me_cmd->dw3.bme_disable_fbr      = 0x01;
1905     me_cmd->dw3.sub_pel_mode         = 0x03;
1906
1907     width = param->frame_width / scale_factor;
1908     height = param->frame_height / scale_factor;
1909
1910     me_cmd->dw4.picture_width        = ALIGN(width, 16) / 16;
1911     me_cmd->dw4.picture_height_minus1       = ALIGN(height, 16) / 16 - 1;
1912
1913     me_cmd->dw5.ref_width            = 0x30;
1914     me_cmd->dw5.ref_height           = 0x28;
1915
1916     if (enc_media_state == VP9_MEDIA_STATE_4X_ME)
1917         me_cmd->dw6.write_distortions = 0x01;
1918
1919     me_cmd->dw6.use_mv_from_prev_step   = me_mode == VP9_ENC_ME4X_AFTER_ME16X ? 1 : 0;
1920     me_cmd->dw6.super_combine_dist    = 0x5;
1921     me_cmd->dw6.max_vmvr              = 0x7fc;
1922
1923     l0_ref_frames = (param->ref_frame_flag & 0x01) +
1924                     !!(param->ref_frame_flag & 0x02) +
1925                     !!(param->ref_frame_flag & 0x04);
1926     me_cmd->dw13.num_ref_idx_l0_minus1 = (l0_ref_frames > 0) ? l0_ref_frames - 1 : 0;
1927     me_cmd->dw13.num_ref_idx_l1_minus1 =  0;
1928
1929     me_cmd->dw14.l0_ref_pic_polarity_bits = 0;
1930     me_cmd->dw14.l1_ref_pic_polarity_bits = 0;
1931
1932     me_cmd->dw15.mv_shift_factor        = 0x02;
1933
1934     {
1935         memcpy((void *)((char *)me_cmd + 64),
1936                vp9_diamond_ime_search_path_delta,
1937                sizeof(vp9_diamond_ime_search_path_delta));
1938     }
1939
1940
1941     me_cmd->dw32._4x_memv_output_data_surf_index     = VP9_BTI_ME_MV_DATA_SURFACE;
1942     me_cmd->dw33._16x_32x_memv_input_data_surf_index = VP9_BTI_16XME_MV_DATA_SURFACE;
1943     me_cmd->dw34._4x_me_output_dist_surf_index       = VP9_BTI_ME_DISTORTION_SURFACE;
1944     me_cmd->dw35._4x_me_output_brc_dist_surf_index   = VP9_BTI_ME_BRC_DISTORTION_SURFACE;
1945     me_cmd->dw36.vme_fwd_inter_pred_surf_index       = VP9_BTI_ME_CURR_PIC_L0;
1946     me_cmd->dw37.vme_bdw_inter_pred_surf_index       = VP9_BTI_ME_CURR_PIC_L1;
1947
1948     i965_gpe_context_unmap_curbe(gpe_context);
1949 }
1950
1951 static void
1952 gen9_vp9_send_me_surface(VADriverContextP ctx,
1953                          struct encode_state *encode_state,
1954                          struct i965_gpe_context *gpe_context,
1955                          struct intel_encoder_context *encoder_context,
1956                          struct gen9_vp9_me_surface_param *param)
1957 {
1958     struct i965_driver_data *i965 = i965_driver_data(ctx);
1959     struct object_surface *obj_surface;
1960     struct gen9_surface_vp9 *vp9_priv_surface;
1961     struct object_surface *input_surface;
1962     struct i965_gpe_resource *gpe_resource;
1963     int ref_bti;
1964
1965     obj_surface = SURFACE(param->curr_pic);
1966
1967     if (!obj_surface || !obj_surface->private_data)
1968         return;
1969
1970     vp9_priv_surface = obj_surface->private_data;
1971     if (param->use_16x_me) {
1972         gpe_resource = param->pres_16x_memv_data_buffer;
1973     } else {
1974         gpe_resource = param->pres_4x_memv_data_buffer;
1975     }
1976
1977     i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
1978                                    gpe_resource,
1979                                    1,
1980                                    I965_SURFACEFORMAT_R8_UNORM,
1981                                    VP9_BTI_ME_MV_DATA_SURFACE);
1982
1983     if (param->b16xme_enabled) {
1984         gpe_resource = param->pres_16x_memv_data_buffer;
1985         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
1986                                        gpe_resource,
1987                                        1,
1988                                        I965_SURFACEFORMAT_R8_UNORM,
1989                                        VP9_BTI_16XME_MV_DATA_SURFACE);
1990     }
1991
1992     if (!param->use_16x_me) {
1993         gpe_resource = param->pres_me_brc_distortion_buffer;
1994
1995         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
1996                                        gpe_resource,
1997                                        1,
1998                                        I965_SURFACEFORMAT_R8_UNORM,
1999                                        VP9_BTI_ME_BRC_DISTORTION_SURFACE);
2000
2001         gpe_resource = param->pres_me_distortion_buffer;
2002
2003         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
2004                                        gpe_resource,
2005                                        1,
2006                                        I965_SURFACEFORMAT_R8_UNORM,
2007                                        VP9_BTI_ME_DISTORTION_SURFACE);
2008     }
2009
2010     if (param->use_16x_me)
2011         input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2012     else
2013         input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2014
2015     i965_add_adv_gpe_surface(ctx, gpe_context,
2016                              input_surface,
2017                              VP9_BTI_ME_CURR_PIC_L0);
2018
2019     ref_bti = VP9_BTI_ME_CURR_PIC_L0 + 1;
2020
2021
2022     if (param->last_ref_pic) {
2023         obj_surface = param->last_ref_pic;
2024         vp9_priv_surface = obj_surface->private_data;
2025
2026         if (param->use_16x_me)
2027             input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2028         else
2029             input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2030
2031         if (param->dys_enabled &&
2032             ((vp9_priv_surface->frame_width != param->frame_width) ||
2033              (vp9_priv_surface->frame_height != param->frame_height))) {
2034             if (param->use_16x_me)
2035                 input_surface = vp9_priv_surface->dys_16x_surface_obj;
2036             else
2037                 input_surface = vp9_priv_surface->dys_4x_surface_obj;
2038         }
2039         i965_add_adv_gpe_surface(ctx, gpe_context,
2040                                  input_surface,
2041                                  ref_bti);
2042         i965_add_adv_gpe_surface(ctx, gpe_context,
2043                                  input_surface,
2044                                  ref_bti + 1);
2045         ref_bti += 2;
2046     }
2047
2048     if (param->golden_ref_pic) {
2049         obj_surface = param->golden_ref_pic;
2050         vp9_priv_surface = obj_surface->private_data;
2051
2052         if (param->use_16x_me)
2053             input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2054         else
2055             input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2056
2057         if (param->dys_enabled &&
2058             ((vp9_priv_surface->frame_width != param->frame_width) ||
2059              (vp9_priv_surface->frame_height != param->frame_height))) {
2060             if (param->use_16x_me)
2061                 input_surface = vp9_priv_surface->dys_16x_surface_obj;
2062             else
2063                 input_surface = vp9_priv_surface->dys_4x_surface_obj;
2064         }
2065
2066         i965_add_adv_gpe_surface(ctx, gpe_context,
2067                                  input_surface,
2068                                  ref_bti);
2069         i965_add_adv_gpe_surface(ctx, gpe_context,
2070                                  input_surface,
2071                                  ref_bti + 1);
2072         ref_bti += 2;
2073     }
2074
2075     if (param->alt_ref_pic) {
2076         obj_surface = param->alt_ref_pic;
2077         vp9_priv_surface = obj_surface->private_data;
2078
2079         if (param->use_16x_me)
2080             input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2081         else
2082             input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2083
2084         if (param->dys_enabled &&
2085             ((vp9_priv_surface->frame_width != param->frame_width) ||
2086              (vp9_priv_surface->frame_height != param->frame_height))) {
2087             if (param->use_16x_me)
2088                 input_surface = vp9_priv_surface->dys_16x_surface_obj;
2089             else
2090                 input_surface = vp9_priv_surface->dys_4x_surface_obj;
2091         }
2092         i965_add_adv_gpe_surface(ctx, gpe_context,
2093                                  input_surface,
2094                                  ref_bti);
2095         i965_add_adv_gpe_surface(ctx, gpe_context,
2096                                  input_surface,
2097                                  ref_bti + 1);
2098         ref_bti += 2;
2099     }
2100
2101     return;
2102 }
2103
2104 static
2105 void gen9_me_add_surfaces_vp9(VADriverContextP ctx,
2106                               struct encode_state *encode_state,
2107                               struct intel_encoder_context *encoder_context,
2108                               struct i965_gpe_context *gpe_context,
2109                               int use_16x_me)
2110 {
2111     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2112     struct gen9_vp9_me_surface_param  me_surface_param;
2113     struct gen9_vp9_state *vp9_state;
2114
2115     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
2116
2117     /* sScaled4xSurface surface */
2118     memset(&me_surface_param, 0, sizeof(me_surface_param));
2119     me_surface_param.last_ref_pic = vp9_state->last_ref_obj;
2120     me_surface_param.golden_ref_pic = vp9_state->golden_ref_obj;
2121     me_surface_param.alt_ref_pic = vp9_state->alt_ref_obj;
2122     me_surface_param.curr_pic = vp9_state->curr_frame;
2123     me_surface_param.pres_4x_memv_data_buffer  = &vme_context->s4x_memv_data_buffer;
2124     me_surface_param.pres_16x_memv_data_buffer = &vme_context->s16x_memv_data_buffer;
2125     me_surface_param.pres_me_distortion_buffer = &vme_context->s4x_memv_distortion_buffer;
2126     me_surface_param.pres_me_brc_distortion_buffer = &vme_context->s4x_memv_distortion_buffer;
2127
2128     if (use_16x_me) {
2129         me_surface_param.downscaled_width_in_mb = vp9_state->downscaled_width_16x_in_mb;
2130         me_surface_param.downscaled_height_in_mb = vp9_state->downscaled_height_16x_in_mb;
2131     } else {
2132         me_surface_param.downscaled_width_in_mb = vp9_state->downscaled_width_4x_in_mb;
2133         me_surface_param.downscaled_height_in_mb = vp9_state->downscaled_height_4x_in_mb;
2134     }
2135     me_surface_param.frame_width  = vp9_state->frame_width;
2136     me_surface_param.frame_height  = vp9_state->frame_height;
2137
2138     me_surface_param.use_16x_me = use_16x_me;
2139     me_surface_param.b16xme_enabled = vp9_state->b16xme_enabled;
2140     me_surface_param.dys_enabled = vp9_state->dys_in_use;
2141
2142     vme_context->pfn_send_me_surface(ctx, encode_state,
2143                                      gpe_context,
2144                                      encoder_context,
2145                                      &me_surface_param);
2146     return;
2147 }
2148
2149 static VAStatus
2150 gen9_vp9_me_kernel(VADriverContextP ctx,
2151                    struct encode_state *encode_state,
2152                    struct intel_encoder_context *encoder_context,
2153                    int use_16x_me)
2154 {
2155     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2156     struct i965_gpe_context *gpe_context;
2157     int media_function;
2158     struct gen9_vp9_me_curbe_param me_curbe_param;
2159     struct gen9_vp9_state *vp9_state;
2160     struct gpe_media_object_walker_parameter media_object_walker_param;
2161     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
2162
2163     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2164     if (!vp9_state || !vp9_state->pic_param)
2165         return VA_STATUS_ERROR_INVALID_PARAMETER;
2166
2167     if (use_16x_me)
2168         media_function = VP9_MEDIA_STATE_16X_ME;
2169     else
2170         media_function = VP9_MEDIA_STATE_4X_ME;
2171
2172     gpe_context = &(vme_context->me_context.gpe_context);
2173
2174     gen8_gpe_context_init(ctx, gpe_context);
2175     gen9_gpe_reset_binding_table(ctx, gpe_context);
2176
2177     memset(&me_curbe_param, 0, sizeof(me_curbe_param));
2178     me_curbe_param.ppic_param = vp9_state->pic_param;
2179     me_curbe_param.pseq_param = vp9_state->seq_param;
2180     me_curbe_param.frame_width = vp9_state->frame_width;
2181     me_curbe_param.frame_height = vp9_state->frame_height;
2182     me_curbe_param.ref_frame_flag = vp9_state->ref_frame_flag;
2183     me_curbe_param.use_16x_me = use_16x_me;
2184     me_curbe_param.b16xme_enabled = vp9_state->b16xme_enabled;
2185     vme_context->pfn_set_curbe_me(ctx, encode_state,
2186                                   gpe_context,
2187                                   encoder_context,
2188                                   &me_curbe_param);
2189
2190     gen9_me_add_surfaces_vp9(ctx, encode_state,
2191                              encoder_context,
2192                              gpe_context,
2193                              use_16x_me);
2194
2195     gen8_gpe_setup_interface_data(ctx, gpe_context);
2196
2197     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2198     if (use_16x_me) {
2199         kernel_walker_param.resolution_x = vp9_state->downscaled_width_16x_in_mb;
2200         kernel_walker_param.resolution_y = vp9_state->downscaled_height_16x_in_mb;
2201     } else {
2202         kernel_walker_param.resolution_x = vp9_state->downscaled_width_4x_in_mb;
2203         kernel_walker_param.resolution_y = vp9_state->downscaled_height_4x_in_mb;
2204     }
2205     kernel_walker_param.no_dependency = 1;
2206
2207     gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
2208
2209     gen9_run_kernel_media_object_walker(ctx, encoder_context,
2210                                         gpe_context,
2211                                         media_function,
2212                                         &media_object_walker_param);
2213
2214     return VA_STATUS_SUCCESS;
2215 }
2216
2217 static void
2218 gen9_vp9_set_curbe_scaling_cm(VADriverContextP ctx,
2219                               struct encode_state *encode_state,
2220                               struct i965_gpe_context *gpe_context,
2221                               struct intel_encoder_context *encoder_context,
2222                               struct gen9_vp9_scaling_curbe_param *curbe_param)
2223 {
2224     vp9_scaling4x_curbe_data_cm *curbe_cmd;
2225
2226     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
2227
2228     if (!curbe_cmd)
2229         return;
2230
2231     memset(curbe_cmd, 0, sizeof(vp9_scaling4x_curbe_data_cm));
2232
2233     curbe_cmd->dw0.input_picture_width = curbe_param->input_picture_width;
2234     curbe_cmd->dw0.input_picture_height = curbe_param->input_picture_height;
2235
2236     curbe_cmd->dw1.input_y_bti = VP9_BTI_SCALING_FRAME_SRC_Y;
2237     curbe_cmd->dw2.output_y_bti = VP9_BTI_SCALING_FRAME_DST_Y;
2238
2239
2240     curbe_cmd->dw6.enable_mb_variance_output = 0;
2241     curbe_cmd->dw6.enable_mb_pixel_average_output = 0;
2242     curbe_cmd->dw6.enable_blk8x8_stat_output = 0;
2243
2244     if (curbe_param->mb_variance_output_enabled ||
2245         curbe_param->mb_pixel_average_output_enabled) {
2246         curbe_cmd->dw10.mbv_proc_stat_bti = VP9_BTI_SCALING_FRAME_MBVPROCSTATS_DST_CM;
2247     }
2248
2249     i965_gpe_context_unmap_curbe(gpe_context);
2250     return;
2251 }
2252
2253 static void
2254 gen9_vp9_send_scaling_surface(VADriverContextP ctx,
2255                               struct encode_state *encode_state,
2256                               struct i965_gpe_context *gpe_context,
2257                               struct intel_encoder_context *encoder_context,
2258                               struct gen9_vp9_scaling_surface_param *scaling_surface_param)
2259 {
2260     vp9_bti_scaling_offset *scaling_bti;
2261     unsigned int surface_format;
2262
2263     scaling_bti = scaling_surface_param->p_scaling_bti;
2264
2265     if (scaling_surface_param->scaling_out_use_32unorm_surf_fmt)
2266         surface_format = I965_SURFACEFORMAT_R32_UNORM;
2267     else if (scaling_surface_param->scaling_out_use_16unorm_surf_fmt)
2268         surface_format = I965_SURFACEFORMAT_R16_UNORM;
2269     else
2270         surface_format = I965_SURFACEFORMAT_R8_UNORM;
2271
2272     i965_add_2d_gpe_surface(ctx, gpe_context,
2273                             scaling_surface_param->input_surface,
2274                             0, 1, surface_format,
2275                             scaling_bti->scaling_frame_src_y);
2276
2277     i965_add_2d_gpe_surface(ctx, gpe_context,
2278                             scaling_surface_param->output_surface,
2279                             0, 1, surface_format,
2280                             scaling_bti->scaling_frame_dst_y);
2281
2282
2283     return;
2284 }
2285
2286 static VAStatus
2287 gen9_vp9_scaling_kernel(VADriverContextP ctx,
2288                         struct encode_state *encode_state,
2289                         struct intel_encoder_context *encoder_context,
2290                         int use_16x_scaling)
2291 {
2292     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2293     struct i965_gpe_context *gpe_context;
2294     int media_function;
2295     struct gen9_vp9_scaling_curbe_param scaling_curbe_param;
2296     struct gen9_vp9_scaling_surface_param scaling_surface_param;
2297     struct gen9_vp9_state *vp9_state;
2298     VAEncPictureParameterBufferVP9  *pic_param;
2299     struct gpe_media_object_walker_parameter media_object_walker_param;
2300     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
2301     struct object_surface *obj_surface;
2302     struct object_surface *input_surface, *output_surface;
2303     struct gen9_surface_vp9 *vp9_priv_surface;
2304     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
2305     unsigned int input_frame_width, input_frame_height;
2306     unsigned int output_frame_width, output_frame_height;
2307
2308     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2309     if (!vp9_state || !vp9_state->pic_param)
2310         return VA_STATUS_ERROR_INVALID_PARAMETER;
2311
2312     pic_param = vp9_state->pic_param;
2313
2314     if (use_16x_scaling)
2315         media_function = VP9_MEDIA_STATE_16X_SCALING;
2316     else
2317         media_function = VP9_MEDIA_STATE_4X_SCALING;
2318
2319     gpe_context = &(vme_context->scaling_context.gpe_contexts[0]);
2320
2321     gen8_gpe_context_init(ctx, gpe_context);
2322     gen9_gpe_reset_binding_table(ctx, gpe_context);
2323
2324     obj_surface = encode_state->reconstructed_object;
2325     vp9_priv_surface = obj_surface->private_data;
2326
2327     if (use_16x_scaling) {
2328         downscaled_width_in_mb      = vp9_state->downscaled_width_16x_in_mb;
2329         downscaled_height_in_mb      = vp9_state->downscaled_height_16x_in_mb;
2330
2331         input_surface               = vp9_priv_surface->scaled_4x_surface_obj;
2332         input_frame_width           = vp9_state->frame_width_4x;
2333         input_frame_height          = vp9_state->frame_height_4x;
2334
2335         output_surface              = vp9_priv_surface->scaled_16x_surface_obj;
2336         output_frame_width          = vp9_state->frame_width_16x;
2337         output_frame_height         = vp9_state->frame_height_16x;
2338     } else {
2339         downscaled_width_in_mb      = vp9_state->downscaled_width_4x_in_mb;
2340         downscaled_height_in_mb      = vp9_state->downscaled_height_4x_in_mb;
2341
2342         if (vp9_state->dys_in_use &&
2343             ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
2344              (pic_param->frame_height_src != pic_param->frame_height_dst)))
2345             input_surface               = vp9_priv_surface->dys_surface_obj;
2346         else
2347             input_surface               = encode_state->input_yuv_object;
2348
2349         input_frame_width           = vp9_state->frame_width;
2350         input_frame_height          = vp9_state->frame_height;
2351
2352         output_surface              = vp9_priv_surface->scaled_4x_surface_obj;
2353         output_frame_width          = vp9_state->frame_width_4x;
2354         output_frame_height         = vp9_state->frame_height_4x;
2355     }
2356
2357     memset(&scaling_curbe_param, 0, sizeof(scaling_curbe_param));
2358
2359     scaling_curbe_param.input_picture_width  = input_frame_width;
2360     scaling_curbe_param.input_picture_height = input_frame_height;
2361
2362     scaling_curbe_param.use_16x_scaling = use_16x_scaling;
2363     scaling_curbe_param.use_32x_scaling = 0;
2364
2365     if (use_16x_scaling)
2366         scaling_curbe_param.mb_variance_output_enabled = 0;
2367     else
2368         scaling_curbe_param.mb_variance_output_enabled = vp9_state->adaptive_transform_decision_enabled;
2369
2370     scaling_curbe_param.blk8x8_stat_enabled = 0;
2371
2372     vme_context->pfn_set_curbe_scaling(ctx, encode_state,
2373                                        gpe_context,
2374                                        encoder_context,
2375                                        &scaling_curbe_param);
2376
2377     memset(&scaling_surface_param, 0, sizeof(scaling_surface_param));
2378     scaling_surface_param.p_scaling_bti = (void *)(&vme_context->scaling_context.scaling_4x_bti);
2379     scaling_surface_param.input_surface                      = input_surface;
2380     scaling_surface_param.input_frame_width                  = input_frame_width;
2381     scaling_surface_param.input_frame_height                 = input_frame_height;
2382
2383     scaling_surface_param.output_surface                     = output_surface;
2384     scaling_surface_param.output_frame_width                 = output_frame_width;
2385     scaling_surface_param.output_frame_height                = output_frame_height;
2386     scaling_surface_param.scaling_out_use_16unorm_surf_fmt   = 0;
2387     scaling_surface_param.scaling_out_use_32unorm_surf_fmt   = 1;
2388
2389     vme_context->pfn_send_scaling_surface(ctx, encode_state,
2390                                           gpe_context,
2391                                           encoder_context,
2392                                           &scaling_surface_param);
2393
2394     gen8_gpe_setup_interface_data(ctx, gpe_context);
2395
2396     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2397     /* the scaling is based on 8x8 blk level */
2398     kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
2399     kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
2400     kernel_walker_param.no_dependency = 1;
2401
2402     gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
2403
2404     gen9_run_kernel_media_object_walker(ctx, encoder_context,
2405                                         gpe_context,
2406                                         media_function,
2407                                         &media_object_walker_param);
2408
2409     return VA_STATUS_SUCCESS;
2410 }
2411
2412 static void
2413 gen9_vp9_dys_set_sampler_state(struct i965_gpe_context *gpe_context)
2414 {
2415     struct gen9_sampler_8x8_avs                *sampler_cmd;
2416
2417     if (!gpe_context)
2418         return;
2419
2420     dri_bo_map(gpe_context->sampler.bo, 1);
2421
2422     if (!gpe_context->sampler.bo->virtual)
2423         return;
2424
2425     sampler_cmd = (struct gen9_sampler_8x8_avs *)
2426                   (gpe_context->sampler.bo->virtual + gpe_context->sampler.offset);
2427
2428     memset(sampler_cmd, 0, sizeof(struct gen9_sampler_8x8_avs));
2429
2430     sampler_cmd->dw0.r3c_coefficient                      = 15;
2431     sampler_cmd->dw0.r3x_coefficient                      = 6;
2432     sampler_cmd->dw0.strong_edge_threshold                = 8;
2433     sampler_cmd->dw0.weak_edge_threshold                  = 1;
2434     sampler_cmd->dw0.gain_factor                          = 32;
2435
2436     sampler_cmd->dw2.r5c_coefficient                     = 3;
2437     sampler_cmd->dw2.r5cx_coefficient                    = 8;
2438     sampler_cmd->dw2.r5x_coefficient                     = 9;
2439     sampler_cmd->dw2.strong_edge_weight                  = 6;
2440     sampler_cmd->dw2.regular_weight                      = 3;
2441     sampler_cmd->dw2.non_edge_weight                     = 2;
2442     sampler_cmd->dw2.global_noise_estimation             = 255;
2443
2444     sampler_cmd->dw3.enable_8tap_adaptive_filter         = 0;
2445     sampler_cmd->dw3.cos_alpha                           = 79;
2446     sampler_cmd->dw3.sin_alpha                           = 101;
2447
2448     sampler_cmd->dw5.diamond_du                           = 0;
2449     sampler_cmd->dw5.hs_margin                            = 3;
2450     sampler_cmd->dw5.diamond_alpha                        = 100;
2451
2452     sampler_cmd->dw7.inv_margin_vyl                       = 3300;
2453
2454     sampler_cmd->dw8.inv_margin_vyu                       = 1600;
2455
2456     sampler_cmd->dw10.y_slope2                            = 24;
2457     sampler_cmd->dw10.s0l                                 = 1792;
2458
2459     sampler_cmd->dw12.y_slope1                            = 24;
2460
2461     sampler_cmd->dw14.s0u                                = 256;
2462
2463     sampler_cmd->dw15.s2u                                = 1792;
2464     sampler_cmd->dw15.s1u                                = 0;
2465
2466     memcpy(sampler_cmd->coefficients,
2467            &gen9_vp9_avs_coeffs[0],
2468            17 * sizeof(struct gen8_sampler_8x8_avs_coefficients));
2469
2470     sampler_cmd->dw152.default_sharpness_level     = 255;
2471     sampler_cmd->dw152.max_derivative_4_pixels     = 7;
2472     sampler_cmd->dw152.max_derivative_8_pixels     = 20;
2473     sampler_cmd->dw152.transition_area_with_4_pixels    = 4;
2474     sampler_cmd->dw152.transition_area_with_8_pixels    = 5;
2475
2476     sampler_cmd->dw153.bypass_x_adaptive_filtering  = 1;
2477     sampler_cmd->dw153.bypass_y_adaptive_filtering  = 1;
2478     sampler_cmd->dw153.adaptive_filter_for_all_channel = 0;
2479
2480     memcpy(sampler_cmd->extra_coefficients,
2481            &gen9_vp9_avs_coeffs[17 * 8],
2482            15 * sizeof(struct gen8_sampler_8x8_avs_coefficients));
2483
2484     dri_bo_unmap(gpe_context->sampler.bo);
2485 }
2486
2487 static void
2488 gen9_vp9_set_curbe_dys(VADriverContextP ctx,
2489                        struct encode_state *encode_state,
2490                        struct i965_gpe_context *gpe_context,
2491                        struct intel_encoder_context *encoder_context,
2492                        struct gen9_vp9_dys_curbe_param *curbe_param)
2493 {
2494     vp9_dys_curbe_data  *curbe_cmd;
2495
2496     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
2497
2498     if (!curbe_cmd)
2499         return;
2500
2501     memset(curbe_cmd, 0, sizeof(vp9_dys_curbe_data));
2502
2503     curbe_cmd->dw0.input_frame_width    = curbe_param->input_width;
2504     curbe_cmd->dw0.input_frame_height   = curbe_param->input_height;
2505
2506     curbe_cmd->dw1.output_frame_width   = curbe_param->output_width;
2507     curbe_cmd->dw1.output_frame_height  = curbe_param->output_height;
2508
2509     curbe_cmd->dw2.delta_u                 = 1.0f / curbe_param->output_width;
2510     curbe_cmd->dw3.delta_v                 = 1.0f / curbe_param->output_height;
2511
2512     curbe_cmd->dw16.input_frame_nv12_bti  = VP9_BTI_DYS_INPUT_NV12;
2513     curbe_cmd->dw17.output_frame_y_bti    = VP9_BTI_DYS_OUTPUT_Y;
2514     curbe_cmd->dw18.avs_sample_idx            = 0;
2515
2516     i965_gpe_context_unmap_curbe(gpe_context);
2517 }
2518
2519 static void
2520 gen9_vp9_send_dys_surface(VADriverContextP ctx,
2521                           struct encode_state *encode_state,
2522                           struct i965_gpe_context *gpe_context,
2523                           struct intel_encoder_context *encoder_context,
2524                           struct gen9_vp9_dys_surface_param *surface_param)
2525 {
2526
2527     if (surface_param->input_frame)
2528         i965_add_adv_gpe_surface(ctx,
2529                                  gpe_context,
2530                                  surface_param->input_frame,
2531                                  VP9_BTI_DYS_INPUT_NV12);
2532
2533     if (surface_param->output_frame) {
2534         i965_add_2d_gpe_surface(ctx,
2535                                 gpe_context,
2536                                 surface_param->output_frame,
2537                                 0,
2538                                 1,
2539                                 I965_SURFACEFORMAT_R8_UNORM,
2540                                 VP9_BTI_DYS_OUTPUT_Y);
2541
2542         i965_add_2d_gpe_surface(ctx,
2543                                 gpe_context,
2544                                 surface_param->output_frame,
2545                                 1,
2546                                 1,
2547                                 I965_SURFACEFORMAT_R16_UINT,
2548                                 VP9_BTI_DYS_OUTPUT_UV);
2549     }
2550
2551     return;
2552 }
2553
2554 static VAStatus
2555 gen9_vp9_dys_kernel(VADriverContextP ctx,
2556                     struct encode_state *encode_state,
2557                     struct intel_encoder_context *encoder_context,
2558                     gen9_vp9_dys_kernel_param *dys_kernel_param)
2559 {
2560     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2561     struct i965_gpe_context *gpe_context;
2562     int media_function;
2563     struct gen9_vp9_dys_curbe_param                 curbe_param;
2564     struct gen9_vp9_dys_surface_param               surface_param;
2565     struct gpe_media_object_walker_parameter        media_object_walker_param;
2566     struct gpe_encoder_kernel_walker_parameter      kernel_walker_param;
2567     unsigned int                                    resolution_x, resolution_y;
2568
2569     media_function = VP9_MEDIA_STATE_DYS;
2570     gpe_context = &vme_context->dys_context.gpe_context;
2571
2572     //gen8_gpe_context_init(ctx, gpe_context);
2573     gen9_gpe_reset_binding_table(ctx, gpe_context);
2574
2575     /* sampler state is configured only when initializing the GPE context */
2576
2577     memset(&curbe_param, 0, sizeof(curbe_param));
2578     curbe_param.input_width   = dys_kernel_param->input_width;
2579     curbe_param.input_height  = dys_kernel_param->input_height;
2580     curbe_param.output_width = dys_kernel_param->output_width;
2581     curbe_param.output_height = dys_kernel_param->output_height;
2582     vme_context->pfn_set_curbe_dys(ctx, encode_state,
2583                                    gpe_context,
2584                                    encoder_context,
2585                                    &curbe_param);
2586
2587     // Add surface states
2588     memset(&surface_param, 0, sizeof(surface_param));
2589     surface_param.input_frame = dys_kernel_param->input_surface;
2590     surface_param.output_frame = dys_kernel_param->output_surface;
2591     surface_param.vert_line_stride = 0;
2592     surface_param.vert_line_stride_offset = 0;
2593
2594     vme_context->pfn_send_dys_surface(ctx,
2595                                       encode_state,
2596                                       gpe_context,
2597                                       encoder_context,
2598                                       &surface_param);
2599
2600     resolution_x = ALIGN(dys_kernel_param->output_width, 16) / 16;
2601     resolution_y = ALIGN(dys_kernel_param->output_height, 16) / 16;
2602
2603     gen8_gpe_setup_interface_data(ctx, gpe_context);
2604
2605     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2606     kernel_walker_param.resolution_x = resolution_x;
2607     kernel_walker_param.resolution_y = resolution_y;
2608     kernel_walker_param.no_dependency = 1;
2609
2610     gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
2611
2612     gen9_run_kernel_media_object_walker(ctx, encoder_context,
2613                                         gpe_context,
2614                                         media_function,
2615                                         &media_object_walker_param);
2616
2617     return VA_STATUS_SUCCESS;
2618 }
2619
2620 static VAStatus
2621 gen9_vp9_run_dys_refframes(VADriverContextP ctx,
2622                            struct encode_state *encode_state,
2623                            struct intel_encoder_context *encoder_context)
2624 {
2625     struct gen9_vp9_state *vp9_state;
2626     VAEncPictureParameterBufferVP9  *pic_param;
2627     gen9_vp9_dys_kernel_param dys_kernel_param;
2628     struct object_surface *obj_surface;
2629     struct object_surface *input_surface, *output_surface;
2630     struct gen9_surface_vp9 *vp9_priv_surface;
2631
2632     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2633
2634     if (!vp9_state || !vp9_state->pic_param)
2635         return VA_STATUS_ERROR_INVALID_PARAMETER;
2636
2637     pic_param = vp9_state->pic_param;
2638
2639     if ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
2640         (pic_param->frame_height_src != pic_param->frame_height_dst)) {
2641         input_surface = encode_state->input_yuv_object;
2642         obj_surface = encode_state->reconstructed_object;
2643         vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2644         output_surface = vp9_priv_surface->dys_surface_obj;
2645
2646         memset(&dys_kernel_param, 0, sizeof(dys_kernel_param));
2647         dys_kernel_param.input_width = pic_param->frame_width_src;
2648         dys_kernel_param.input_height = pic_param->frame_height_src;
2649         dys_kernel_param.input_surface = input_surface;
2650         dys_kernel_param.output_width = pic_param->frame_width_dst;
2651         dys_kernel_param.output_height = pic_param->frame_height_dst;
2652         dys_kernel_param.output_surface = output_surface;
2653         gen9_vp9_dys_kernel(ctx, encode_state,
2654                             encoder_context,
2655                             &dys_kernel_param);
2656     }
2657
2658     if ((vp9_state->dys_ref_frame_flag & VP9_LAST_REF) &&
2659         vp9_state->last_ref_obj) {
2660         obj_surface = vp9_state->last_ref_obj;
2661         vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2662
2663         input_surface = obj_surface;
2664         output_surface = vp9_priv_surface->dys_surface_obj;
2665
2666         dys_kernel_param.input_width = vp9_priv_surface->frame_width;
2667         dys_kernel_param.input_height = vp9_priv_surface->frame_height;
2668         dys_kernel_param.input_surface = input_surface;
2669
2670         dys_kernel_param.output_width = pic_param->frame_width_dst;
2671         dys_kernel_param.output_height = pic_param->frame_height_dst;
2672         dys_kernel_param.output_surface = output_surface;
2673
2674         gen9_vp9_dys_kernel(ctx, encode_state,
2675                             encoder_context,
2676                             &dys_kernel_param);
2677
2678         if (vp9_state->hme_enabled) {
2679             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 4), 16);
2680             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_height / 4), 16);
2681             dys_kernel_param.input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2682
2683             dys_kernel_param.output_width = vp9_state->frame_width_4x;
2684             dys_kernel_param.output_height = vp9_state->frame_height_4x;
2685             dys_kernel_param.output_surface = vp9_priv_surface->dys_4x_surface_obj;
2686
2687             gen9_vp9_dys_kernel(ctx, encode_state,
2688                                 encoder_context,
2689                                 &dys_kernel_param);
2690
2691             /* Does it really need to do the 16x HME if the
2692              * resolution is different?
2693              * Maybe it should be restricted
2694              */
2695             if (vp9_state->b16xme_enabled) {
2696                 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 16), 16);
2697                 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 16), 16);
2698                 dys_kernel_param.input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2699
2700                 dys_kernel_param.output_width = vp9_state->frame_width_16x;
2701                 dys_kernel_param.output_height = vp9_state->frame_height_16x;
2702                 dys_kernel_param.output_surface = vp9_priv_surface->dys_16x_surface_obj;
2703
2704                 gen9_vp9_dys_kernel(ctx, encode_state,
2705                                     encoder_context,
2706                                     &dys_kernel_param);
2707             }
2708         }
2709     }
2710
2711     if ((vp9_state->dys_ref_frame_flag & VP9_GOLDEN_REF) &&
2712         vp9_state->golden_ref_obj) {
2713         obj_surface = vp9_state->golden_ref_obj;
2714         vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2715
2716         input_surface = obj_surface;
2717         output_surface = vp9_priv_surface->dys_surface_obj;
2718
2719         dys_kernel_param.input_width = vp9_priv_surface->frame_width;
2720         dys_kernel_param.input_height = vp9_priv_surface->frame_height;
2721         dys_kernel_param.input_surface = input_surface;
2722
2723         dys_kernel_param.output_width = pic_param->frame_width_dst;
2724         dys_kernel_param.output_height = pic_param->frame_height_dst;
2725         dys_kernel_param.output_surface = output_surface;
2726
2727         gen9_vp9_dys_kernel(ctx, encode_state,
2728                             encoder_context,
2729                             &dys_kernel_param);
2730
2731         if (vp9_state->hme_enabled) {
2732             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 4), 16);
2733             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_height / 4), 16);
2734             dys_kernel_param.input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2735
2736             dys_kernel_param.output_width = vp9_state->frame_width_4x;
2737             dys_kernel_param.output_height = vp9_state->frame_height_4x;
2738             dys_kernel_param.output_surface = vp9_priv_surface->dys_4x_surface_obj;
2739
2740             gen9_vp9_dys_kernel(ctx, encode_state,
2741                                 encoder_context,
2742                                 &dys_kernel_param);
2743
2744             /* Does it really need to do the 16x HME if the
2745              * resolution is different?
2746              * Maybe it should be restricted
2747              */
2748             if (vp9_state->b16xme_enabled) {
2749                 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 16), 16);
2750                 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 16), 16);
2751                 dys_kernel_param.input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2752
2753                 dys_kernel_param.output_width = vp9_state->frame_width_16x;
2754                 dys_kernel_param.output_height = vp9_state->frame_height_16x;
2755                 dys_kernel_param.output_surface = vp9_priv_surface->dys_16x_surface_obj;
2756
2757                 gen9_vp9_dys_kernel(ctx, encode_state,
2758                                     encoder_context,
2759                                     &dys_kernel_param);
2760             }
2761         }
2762     }
2763
2764     if ((vp9_state->dys_ref_frame_flag & VP9_ALT_REF) &&
2765         vp9_state->alt_ref_obj) {
2766         obj_surface = vp9_state->alt_ref_obj;
2767         vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2768
2769         input_surface = obj_surface;
2770         output_surface = vp9_priv_surface->dys_surface_obj;
2771
2772         dys_kernel_param.input_width = vp9_priv_surface->frame_width;
2773         dys_kernel_param.input_height = vp9_priv_surface->frame_height;
2774         dys_kernel_param.input_surface = input_surface;
2775
2776         dys_kernel_param.output_width = pic_param->frame_width_dst;
2777         dys_kernel_param.output_height = pic_param->frame_height_dst;
2778         dys_kernel_param.output_surface = output_surface;
2779
2780         gen9_vp9_dys_kernel(ctx, encode_state,
2781                             encoder_context,
2782                             &dys_kernel_param);
2783
2784         if (vp9_state->hme_enabled) {
2785             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 4), 16);
2786             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_height / 4), 16);
2787             dys_kernel_param.input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2788
2789             dys_kernel_param.output_width = vp9_state->frame_width_4x;
2790             dys_kernel_param.output_height = vp9_state->frame_height_4x;
2791             dys_kernel_param.output_surface = vp9_priv_surface->dys_4x_surface_obj;
2792
2793             gen9_vp9_dys_kernel(ctx, encode_state,
2794                                 encoder_context,
2795                                 &dys_kernel_param);
2796
2797             /* Does it really need to do the 16x HME if the
2798              * resolution is different?
2799              * Maybe it should be restricted
2800              */
2801             if (vp9_state->b16xme_enabled) {
2802                 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 16), 16);
2803                 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 16), 16);
2804                 dys_kernel_param.input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2805
2806                 dys_kernel_param.output_width = vp9_state->frame_width_16x;
2807                 dys_kernel_param.output_height = vp9_state->frame_height_16x;
2808                 dys_kernel_param.output_surface = vp9_priv_surface->dys_16x_surface_obj;
2809
2810                 gen9_vp9_dys_kernel(ctx, encode_state,
2811                                     encoder_context,
2812                                     &dys_kernel_param);
2813             }
2814         }
2815     }
2816
2817     return VA_STATUS_SUCCESS;
2818 }
2819
2820 static void
2821 gen9_vp9_set_curbe_mbenc(VADriverContextP ctx,
2822                          struct encode_state *encode_state,
2823                          struct i965_gpe_context *gpe_context,
2824                          struct intel_encoder_context *encoder_context,
2825                          struct gen9_vp9_mbenc_curbe_param *curbe_param)
2826 {
2827     struct gen9_vp9_state *vp9_state;
2828     VAEncMiscParameterTypeVP9PerSegmantParam *seg_param, tmp_seg_param;
2829     vp9_mbenc_curbe_data  *curbe_cmd;
2830     VAEncPictureParameterBufferVP9  *pic_param;
2831     int i, segment_count;
2832     int seg_qindex;
2833     struct object_surface *obj_surface;
2834     struct gen9_surface_vp9 *vp9_priv_surface;
2835
2836     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2837
2838     if (!vp9_state || !vp9_state->pic_param)
2839         return;
2840
2841     pic_param = curbe_param->ppic_param;
2842     seg_param = curbe_param->psegment_param;
2843
2844     if (!seg_param) {
2845         memset(&tmp_seg_param, 0, sizeof(tmp_seg_param));
2846         seg_param = &tmp_seg_param;
2847     }
2848
2849     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
2850
2851     if (!curbe_cmd)
2852         return;
2853
2854     memset(curbe_cmd, 0, sizeof(vp9_mbenc_curbe_data));
2855
2856     if (vp9_state->dys_in_use) {
2857         curbe_cmd->dw0.frame_width = pic_param->frame_width_dst;
2858         curbe_cmd->dw0.frame_height = pic_param->frame_height_dst;
2859     } else {
2860         curbe_cmd->dw0.frame_width = pic_param->frame_width_src;
2861         curbe_cmd->dw0.frame_height = pic_param->frame_height_src;
2862     }
2863
2864     curbe_cmd->dw1.frame_type = curbe_param->picture_coding_type;
2865
2866     curbe_cmd->dw1.segmentation_enable = pic_param->pic_flags.bits.segmentation_enabled;
2867     if (pic_param->pic_flags.bits.segmentation_enabled)
2868         segment_count = 8;
2869     else
2870         segment_count = 1;
2871
2872     curbe_cmd->dw1.ref_frame_flags = curbe_param->ref_frame_flag;
2873
2874     //right now set them to normal settings
2875     if (curbe_param->picture_coding_type) {
2876         switch (vp9_state->target_usage) {
2877         case INTEL_ENC_VP9_TU_QUALITY:
2878             curbe_cmd->dw1.min_16for32_check    = 0x00;
2879             curbe_cmd->dw2.multi_pred           = 0x02;
2880             curbe_cmd->dw2.len_sp               = 0x39;
2881             curbe_cmd->dw2.search_x             = 0x30;
2882             curbe_cmd->dw2.search_y             = 0x28;
2883             curbe_cmd->dw3.min_ref_for32_check = 0x01;
2884             curbe_cmd->dw4.skip16_threshold     = 0x000A;
2885             curbe_cmd->dw4.disable_mr_threshold = 0x000C;
2886
2887             memcpy(&curbe_cmd->dw16,
2888                    vp9_diamond_ime_search_path_delta,
2889                    14 * sizeof(unsigned int));
2890             break;
2891         case INTEL_ENC_VP9_TU_PERFORMANCE:
2892             curbe_cmd->dw1.min_16for32_check    = 0x02;
2893             curbe_cmd->dw2.multi_pred           = 0x00;
2894             curbe_cmd->dw2.len_sp               = 0x10;
2895             curbe_cmd->dw2.search_x             = 0x20;
2896             curbe_cmd->dw2.search_y             = 0x20;
2897             curbe_cmd->dw3.min_ref_for32_check = 0x03;
2898             curbe_cmd->dw4.skip16_threshold     = 0x0014;
2899             curbe_cmd->dw4.disable_mr_threshold = 0x0016;
2900
2901             memcpy(&curbe_cmd->dw16,
2902                    vp9_fullspiral_ime_search_path_delta,
2903                    14 * sizeof(unsigned int));
2904
2905             break;
2906         default:  // normal settings
2907             curbe_cmd->dw1.min_16for32_check     = 0x01;
2908             curbe_cmd->dw2.multi_pred           = 0x00;
2909             curbe_cmd->dw2.len_sp               = 0x19;
2910             curbe_cmd->dw2.search_x             = 0x30;
2911             curbe_cmd->dw2.search_y             = 0x28;
2912             curbe_cmd->dw3.min_ref_for32_check = 0x02;
2913             curbe_cmd->dw4.skip16_threshold     = 0x000F;
2914             curbe_cmd->dw4.disable_mr_threshold = 0x0011;
2915
2916             memcpy(&curbe_cmd->dw16,
2917                    vp9_diamond_ime_search_path_delta,
2918                    14 * sizeof(unsigned int));
2919             break;
2920         }
2921
2922         curbe_cmd->dw3.hme_enabled               = curbe_param->hme_enabled;
2923         curbe_cmd->dw3.multi_ref_qp_check         = curbe_param->multi_ref_qp_check;
2924         // co-located predictor must be disabled when dynamic scaling is enabled
2925         curbe_cmd->dw3.disable_temp_pred    = vp9_state->dys_in_use;
2926     }
2927
2928     curbe_cmd->dw5.inter_round = 0;
2929     curbe_cmd->dw5.intra_round = 4;
2930     curbe_cmd->dw5.frame_qpindex = pic_param->luma_ac_qindex;
2931
2932     for (i = 0; i < segment_count; i++) {
2933         seg_qindex = pic_param->luma_ac_qindex + pic_param->luma_dc_qindex_delta
2934                      + seg_param->seg_data[i].segment_qindex_delta;
2935
2936         seg_qindex = CLAMP(0, 255, seg_qindex);
2937
2938         if (curbe_param->picture_coding_type)
2939             memcpy(&curbe_cmd->segments[i],
2940                    &intel_vp9_costlut_p[seg_qindex * 16],
2941                    16 * sizeof(unsigned int));
2942         else
2943             memcpy(&curbe_cmd->segments[i],
2944                    &intel_vp9_costlut_key[seg_qindex * 16],
2945                    16 * sizeof(unsigned int));
2946     }
2947
2948     if (curbe_param->picture_coding_type) {
2949         if (curbe_cmd->dw3.multi_ref_qp_check) {
2950             if (curbe_param->ref_frame_flag & 0x01) {
2951                 obj_surface = curbe_param->last_ref_obj;
2952                 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2953                 curbe_cmd->dw8.last_ref_qp = vp9_quant_dc[vp9_priv_surface->qp_value];
2954             }
2955
2956             if (curbe_param->ref_frame_flag & 0x02) {
2957                 obj_surface = curbe_param->golden_ref_obj;
2958                 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2959                 curbe_cmd->dw8.golden_ref_qp = vp9_quant_dc[vp9_priv_surface->qp_value];
2960             }
2961
2962             if (curbe_param->ref_frame_flag & 0x04) {
2963                 obj_surface = curbe_param->alt_ref_obj;
2964                 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2965                 curbe_cmd->dw9.alt_ref_qp = vp9_quant_dc[vp9_priv_surface->qp_value];
2966             }
2967         }
2968     }
2969     curbe_cmd->dw160.enc_curr_y_surf_bti           = VP9_BTI_MBENC_CURR_Y_G9;
2970     curbe_cmd->dw162.enc_curr_nv12_surf_bti        = VP9_BTI_MBENC_CURR_NV12_G9;
2971     curbe_cmd->dw166.segmentation_map_bti          = VP9_BTI_MBENC_SEGMENTATION_MAP_G9;
2972     curbe_cmd->dw172.mode_decision_bti           = VP9_BTI_MBENC_MODE_DECISION_G9;
2973     curbe_cmd->dw167.tx_curbe_bti                = VP9_BTI_MBENC_TX_CURBE_G9;
2974     curbe_cmd->dw168.hme_mvdata_bti             = VP9_BTI_MBENC_HME_MV_DATA_G9;
2975     curbe_cmd->dw169.hme_distortion_bti          = VP9_BTI_MBENC_HME_DISTORTION_G9;
2976     curbe_cmd->dw171.mode_decision_prev_bti      = VP9_BTI_MBENC_MODE_DECISION_PREV_G9;
2977     curbe_cmd->dw172.mode_decision_bti           = VP9_BTI_MBENC_MODE_DECISION_G9;
2978     curbe_cmd->dw173.output_16x16_inter_modes_bti = VP9_BTI_MBENC_OUT_16x16_INTER_MODES_G9;
2979     curbe_cmd->dw174.cu_record_bti               = VP9_BTI_MBENC_CU_RECORDS_G9;
2980     curbe_cmd->dw175.pak_data_bti                = VP9_BTI_MBENC_PAK_DATA_G9;
2981
2982     i965_gpe_context_unmap_curbe(gpe_context);
2983     return;
2984 }
2985
2986 static void
2987 gen9_vp9_send_mbenc_surface(VADriverContextP ctx,
2988                             struct encode_state *encode_state,
2989                             struct i965_gpe_context *gpe_context,
2990                             struct intel_encoder_context *encoder_context,
2991                             struct gen9_vp9_mbenc_surface_param *mbenc_param)
2992 {
2993     struct gen9_vp9_state *vp9_state;
2994     unsigned int            res_size;
2995     unsigned int            frame_width_in_sb, frame_height_in_sb;
2996     struct object_surface   *obj_surface, *tmp_input;
2997     struct gen9_surface_vp9 *vp9_priv_surface;
2998     int media_function;
2999
3000     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3001
3002     if (!vp9_state || !vp9_state->pic_param)
3003         return;
3004
3005     frame_width_in_sb = ALIGN(mbenc_param->frame_width, 64) / 64;
3006     frame_height_in_sb = ALIGN(mbenc_param->frame_height, 64) / 64;
3007     media_function = mbenc_param->media_state_type;
3008
3009     switch (media_function) {
3010     case VP9_MEDIA_STATE_MBENC_I_32x32: {
3011         obj_surface = mbenc_param->curr_frame_obj;
3012
3013         i965_add_2d_gpe_surface(ctx,
3014                                 gpe_context,
3015                                 obj_surface,
3016                                 0,
3017                                 1,
3018                                 I965_SURFACEFORMAT_R8_UNORM,
3019                                 VP9_BTI_MBENC_CURR_Y_G9);
3020
3021         i965_add_2d_gpe_surface(ctx,
3022                                 gpe_context,
3023                                 obj_surface,
3024                                 1,
3025                                 1,
3026                                 I965_SURFACEFORMAT_R16_UINT,
3027                                 VP9_BTI_MBENC_CURR_UV_G9);
3028
3029
3030         if (mbenc_param->segmentation_enabled) {
3031             i965_add_buffer_2d_gpe_surface(ctx,
3032                                            gpe_context,
3033                                            mbenc_param->pres_segmentation_map,
3034                                            1,
3035                                            I965_SURFACEFORMAT_R8_UNORM,
3036                                            VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3037
3038         }
3039
3040         res_size = 16 * mbenc_param->frame_width_in_mb *
3041                    mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3042         i965_add_buffer_gpe_surface(ctx,
3043                                     gpe_context,
3044                                     mbenc_param->pres_mode_decision,
3045                                     0,
3046                                     res_size / 4,
3047                                     0,
3048                                     VP9_BTI_MBENC_MODE_DECISION_G9);
3049
3050         break;
3051     }
3052     case VP9_MEDIA_STATE_MBENC_I_16x16: {
3053         obj_surface = mbenc_param->curr_frame_obj;
3054
3055         i965_add_2d_gpe_surface(ctx,
3056                                 gpe_context,
3057                                 obj_surface,
3058                                 0,
3059                                 1,
3060                                 I965_SURFACEFORMAT_R8_UNORM,
3061                                 VP9_BTI_MBENC_CURR_Y_G9);
3062
3063         i965_add_2d_gpe_surface(ctx,
3064                                 gpe_context,
3065                                 obj_surface,
3066                                 1,
3067                                 1,
3068                                 I965_SURFACEFORMAT_R16_UINT,
3069                                 VP9_BTI_MBENC_CURR_UV_G9);
3070
3071         i965_add_adv_gpe_surface(ctx, gpe_context,
3072                                  obj_surface,
3073                                  VP9_BTI_MBENC_CURR_NV12_G9);
3074
3075         if (mbenc_param->segmentation_enabled) {
3076             i965_add_buffer_2d_gpe_surface(ctx,
3077                                            gpe_context,
3078                                            mbenc_param->pres_segmentation_map,
3079                                            1,
3080                                            I965_SURFACEFORMAT_R8_UNORM,
3081                                            VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3082
3083         }
3084
3085         res_size = 16 * mbenc_param->frame_width_in_mb *
3086                    mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3087         i965_add_buffer_gpe_surface(ctx,
3088                                     gpe_context,
3089                                     mbenc_param->pres_mode_decision,
3090                                     0,
3091                                     res_size / 4,
3092                                     0,
3093                                     VP9_BTI_MBENC_MODE_DECISION_G9);
3094
3095         res_size = 160;
3096
3097         gen9_add_dri_buffer_gpe_surface(ctx,
3098                                         gpe_context,
3099                                         mbenc_param->gpe_context_tx->curbe.bo,
3100                                         0,
3101                                         ALIGN(res_size, 64),
3102                                         mbenc_param->gpe_context_tx->curbe.offset,
3103                                         VP9_BTI_MBENC_TX_CURBE_G9);
3104
3105         break;
3106     }
3107     case VP9_MEDIA_STATE_MBENC_P: {
3108         obj_surface = mbenc_param->curr_frame_obj;
3109
3110         i965_add_2d_gpe_surface(ctx,
3111                                 gpe_context,
3112                                 obj_surface,
3113                                 0,
3114                                 1,
3115                                 I965_SURFACEFORMAT_R8_UNORM,
3116                                 VP9_BTI_MBENC_CURR_Y_G9);
3117
3118         i965_add_2d_gpe_surface(ctx, gpe_context,
3119                                 obj_surface,
3120                                 1,
3121                                 1,
3122                                 I965_SURFACEFORMAT_R16_UINT,
3123                                 VP9_BTI_MBENC_CURR_UV_G9);
3124
3125         i965_add_adv_gpe_surface(ctx, gpe_context,
3126                                  obj_surface,
3127                                  VP9_BTI_MBENC_CURR_NV12_G9);
3128
3129         if (mbenc_param->last_ref_obj) {
3130             obj_surface = mbenc_param->last_ref_obj;
3131             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3132
3133             if (vp9_state->dys_in_use &&
3134                 ((vp9_priv_surface->frame_width != vp9_state->frame_width) ||
3135                  (vp9_priv_surface->frame_height != vp9_state->frame_height)))
3136                 tmp_input = vp9_priv_surface->dys_surface_obj;
3137             else
3138                 tmp_input = obj_surface;
3139
3140             i965_add_adv_gpe_surface(ctx, gpe_context,
3141                                      tmp_input,
3142                                      VP9_BTI_MBENC_LAST_NV12_G9);
3143
3144             i965_add_adv_gpe_surface(ctx, gpe_context,
3145                                      tmp_input,
3146                                      VP9_BTI_MBENC_LAST_NV12_G9 + 1);
3147
3148         }
3149
3150         if (mbenc_param->golden_ref_obj) {
3151             obj_surface = mbenc_param->golden_ref_obj;
3152             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3153
3154             if (vp9_state->dys_in_use &&
3155                 ((vp9_priv_surface->frame_width != vp9_state->frame_width) ||
3156                  (vp9_priv_surface->frame_height != vp9_state->frame_height)))
3157                 tmp_input = vp9_priv_surface->dys_surface_obj;
3158             else
3159                 tmp_input = obj_surface;
3160
3161             i965_add_adv_gpe_surface(ctx, gpe_context,
3162                                      tmp_input,
3163                                      VP9_BTI_MBENC_GOLD_NV12_G9);
3164
3165             i965_add_adv_gpe_surface(ctx, gpe_context,
3166                                      tmp_input,
3167                                      VP9_BTI_MBENC_GOLD_NV12_G9 + 1);
3168
3169         }
3170
3171         if (mbenc_param->alt_ref_obj) {
3172             obj_surface = mbenc_param->alt_ref_obj;
3173             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3174
3175             if (vp9_state->dys_in_use &&
3176                 ((vp9_priv_surface->frame_width != vp9_state->frame_width) ||
3177                  (vp9_priv_surface->frame_height != vp9_state->frame_height)))
3178                 tmp_input = vp9_priv_surface->dys_surface_obj;
3179             else
3180                 tmp_input = obj_surface;
3181
3182             i965_add_adv_gpe_surface(ctx, gpe_context,
3183                                      tmp_input,
3184                                      VP9_BTI_MBENC_ALTREF_NV12_G9);
3185
3186             i965_add_adv_gpe_surface(ctx, gpe_context,
3187                                      tmp_input,
3188                                      VP9_BTI_MBENC_ALTREF_NV12_G9 + 1);
3189
3190         }
3191
3192         if (mbenc_param->hme_enabled) {
3193             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
3194                                            mbenc_param->ps4x_memv_data_buffer,
3195                                            1,
3196                                            I965_SURFACEFORMAT_R8_UNORM,
3197                                            VP9_BTI_MBENC_HME_MV_DATA_G9);
3198
3199             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
3200                                            mbenc_param->ps4x_memv_distortion_buffer,
3201                                            1,
3202                                            I965_SURFACEFORMAT_R8_UNORM,
3203                                            VP9_BTI_MBENC_HME_DISTORTION_G9);
3204         }
3205
3206         if (mbenc_param->segmentation_enabled) {
3207             i965_add_buffer_2d_gpe_surface(ctx,
3208                                            gpe_context,
3209                                            mbenc_param->pres_segmentation_map,
3210                                            1,
3211                                            I965_SURFACEFORMAT_R8_UNORM,
3212                                            VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3213
3214         }
3215
3216         res_size = 16 * mbenc_param->frame_width_in_mb *
3217                    mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3218         i965_add_buffer_gpe_surface(ctx,
3219                                     gpe_context,
3220                                     mbenc_param->pres_mode_decision_prev,
3221                                     0,
3222                                     res_size / 4,
3223                                     0,
3224                                     VP9_BTI_MBENC_MODE_DECISION_PREV_G9);
3225
3226         i965_add_buffer_gpe_surface(ctx,
3227                                     gpe_context,
3228                                     mbenc_param->pres_mode_decision,
3229                                     0,
3230                                     res_size / 4,
3231                                     0,
3232                                     VP9_BTI_MBENC_MODE_DECISION_G9);
3233
3234         i965_add_buffer_2d_gpe_surface(ctx,
3235                                        gpe_context,
3236                                        mbenc_param->pres_output_16x16_inter_modes,
3237                                        1,
3238                                        I965_SURFACEFORMAT_R8_UNORM,
3239                                        VP9_BTI_MBENC_OUT_16x16_INTER_MODES_G9);
3240
3241         res_size = 160;
3242
3243         gen9_add_dri_buffer_gpe_surface(ctx,
3244                                         gpe_context,
3245                                         mbenc_param->gpe_context_tx->curbe.bo,
3246                                         0,
3247                                         ALIGN(res_size, 64),
3248                                         mbenc_param->gpe_context_tx->curbe.offset,
3249                                         VP9_BTI_MBENC_TX_CURBE_G9);
3250
3251
3252         break;
3253     }
3254     case VP9_MEDIA_STATE_MBENC_TX: {
3255         obj_surface = mbenc_param->curr_frame_obj;
3256
3257         i965_add_2d_gpe_surface(ctx,
3258                                 gpe_context,
3259                                 obj_surface,
3260                                 0,
3261                                 1,
3262                                 I965_SURFACEFORMAT_R8_UNORM,
3263                                 VP9_BTI_MBENC_CURR_Y_G9);
3264
3265         i965_add_2d_gpe_surface(ctx,
3266                                 gpe_context,
3267                                 obj_surface,
3268                                 1,
3269                                 1,
3270                                 I965_SURFACEFORMAT_R16_UINT,
3271                                 VP9_BTI_MBENC_CURR_UV_G9);
3272
3273         if (mbenc_param->segmentation_enabled) {
3274             i965_add_buffer_2d_gpe_surface(ctx,
3275                                            gpe_context,
3276                                            mbenc_param->pres_segmentation_map,
3277                                            1,
3278                                            I965_SURFACEFORMAT_R8_UNORM,
3279                                            VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3280
3281         }
3282
3283         res_size = 16 * mbenc_param->frame_width_in_mb *
3284                    mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3285         i965_add_buffer_gpe_surface(ctx,
3286                                     gpe_context,
3287                                     mbenc_param->pres_mode_decision,
3288                                     0,
3289                                     res_size / 4,
3290                                     0,
3291                                     VP9_BTI_MBENC_MODE_DECISION_G9);
3292
3293         res_size = frame_width_in_sb * frame_height_in_sb * 4 * sizeof(unsigned int);
3294         i965_add_buffer_gpe_surface(ctx,
3295                                     gpe_context,
3296                                     mbenc_param->pres_mb_code_surface,
3297                                     0,
3298                                     res_size / 4,
3299                                     0,
3300                                     VP9_BTI_MBENC_PAK_DATA_G9);
3301
3302         // CU Record
3303         res_size = frame_width_in_sb * frame_height_in_sb *
3304                    64 * 16 * sizeof(unsigned int);
3305
3306         i965_add_buffer_gpe_surface(ctx,
3307                                     gpe_context,
3308                                     mbenc_param->pres_mb_code_surface,
3309                                     0,
3310                                     res_size / 4,
3311                                     mbenc_param->mb_data_offset,
3312                                     VP9_BTI_MBENC_CU_RECORDS_G9);
3313     }
3314     default:
3315         break;
3316     }
3317
3318     return;
3319 }
3320
3321 static VAStatus
3322 gen9_vp9_mbenc_kernel(VADriverContextP ctx,
3323                       struct encode_state *encode_state,
3324                       struct intel_encoder_context *encoder_context,
3325                       int media_function)
3326 {
3327     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
3328     struct i965_gpe_context *gpe_context, *tx_gpe_context;
3329     struct gpe_media_object_walker_parameter        media_object_walker_param;
3330     struct gpe_encoder_kernel_walker_parameter      kernel_walker_param;
3331     unsigned int    resolution_x, resolution_y;
3332     struct gen9_vp9_state *vp9_state;
3333     VAEncPictureParameterBufferVP9  *pic_param;
3334     struct gen9_vp9_mbenc_curbe_param               curbe_param;
3335     struct gen9_vp9_mbenc_surface_param             surface_param;
3336     VAStatus    va_status = VA_STATUS_SUCCESS;
3337     int mbenc_gpe_index = 0;
3338     struct object_surface *obj_surface;
3339     struct gen9_surface_vp9 *vp9_priv_surface;
3340
3341     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3342
3343     if (!vp9_state || !vp9_state->pic_param)
3344         return VA_STATUS_ERROR_ENCODING_ERROR;
3345
3346     pic_param = vp9_state->pic_param;
3347
3348     switch (media_function) {
3349     case VP9_MEDIA_STATE_MBENC_I_32x32:
3350         mbenc_gpe_index = VP9_MBENC_IDX_KEY_32x32;
3351         break;
3352
3353     case VP9_MEDIA_STATE_MBENC_I_16x16:
3354         mbenc_gpe_index = VP9_MBENC_IDX_KEY_16x16;
3355         break;
3356
3357     case VP9_MEDIA_STATE_MBENC_P:
3358         mbenc_gpe_index = VP9_MBENC_IDX_INTER;
3359         break;
3360
3361     case VP9_MEDIA_STATE_MBENC_TX:
3362         mbenc_gpe_index = VP9_MBENC_IDX_TX;
3363         break;
3364
3365     default:
3366         va_status = VA_STATUS_ERROR_OPERATION_FAILED;
3367         return va_status;
3368     }
3369
3370     gpe_context = &(vme_context->mbenc_context.gpe_contexts[mbenc_gpe_index]);
3371     tx_gpe_context = &(vme_context->mbenc_context.gpe_contexts[VP9_MBENC_IDX_TX]);
3372
3373     gen9_gpe_reset_binding_table(ctx, gpe_context);
3374
3375     // Set curbe
3376     if (!vp9_state->mbenc_curbe_set_in_brc_update) {
3377         if (media_function == VP9_MEDIA_STATE_MBENC_I_32x32 ||
3378             media_function == VP9_MEDIA_STATE_MBENC_P) {
3379             memset(&curbe_param, 0, sizeof(curbe_param));
3380             curbe_param.ppic_param            = vp9_state->pic_param;
3381             curbe_param.pseq_param            = vp9_state->seq_param;
3382             curbe_param.psegment_param        = vp9_state->segment_param;
3383             curbe_param.frame_width_in_mb     = vp9_state->frame_width_in_mb;
3384             curbe_param.frame_height_in_mb    = vp9_state->frame_height_in_mb;
3385             curbe_param.last_ref_obj          = vp9_state->last_ref_obj;
3386             curbe_param.golden_ref_obj        = vp9_state->golden_ref_obj;
3387             curbe_param.alt_ref_obj           = vp9_state->alt_ref_obj;
3388             curbe_param.hme_enabled           = vp9_state->hme_enabled;
3389             curbe_param.ref_frame_flag        = vp9_state->ref_frame_flag;
3390             curbe_param.picture_coding_type   = vp9_state->picture_coding_type;
3391             curbe_param.media_state_type      = media_function;
3392             curbe_param.mbenc_curbe_set_in_brc_update = vp9_state->mbenc_curbe_set_in_brc_update;
3393
3394             vme_context->pfn_set_curbe_mbenc(ctx,
3395                                              encode_state,
3396                                              gpe_context,
3397                                              encoder_context,
3398                                              &curbe_param);
3399         }
3400     }
3401
3402     memset(&surface_param, 0, sizeof(surface_param));
3403     surface_param.media_state_type             = media_function;
3404     surface_param.picture_coding_type          = vp9_state->picture_coding_type;
3405     surface_param.frame_width                  = vp9_state->frame_width;
3406     surface_param.frame_height                 = vp9_state->frame_height;
3407     surface_param.frame_width_in_mb            = vp9_state->frame_width_in_mb;
3408     surface_param.frame_height_in_mb           = vp9_state->frame_height_in_mb;
3409     surface_param.hme_enabled                  = vp9_state->hme_enabled;
3410     surface_param.segmentation_enabled         = pic_param->pic_flags.bits.segmentation_enabled;
3411     surface_param.pres_segmentation_map        = &vme_context->mb_segment_map_surface;
3412     surface_param.ps4x_memv_data_buffer        = &vme_context->s4x_memv_data_buffer;
3413     surface_param.ps4x_memv_distortion_buffer  = &vme_context->s4x_memv_distortion_buffer;
3414     surface_param.pres_mode_decision           =
3415         &vme_context->res_mode_decision[vp9_state->curr_mode_decision_index];
3416     surface_param.pres_mode_decision_prev      =
3417         &vme_context->res_mode_decision[!vp9_state->curr_mode_decision_index];
3418     surface_param.pres_output_16x16_inter_modes = &vme_context->res_output_16x16_inter_modes;
3419     surface_param.pres_mbenc_curbe_buffer      = NULL;
3420     surface_param.last_ref_obj               = vp9_state->last_ref_obj;
3421     surface_param.golden_ref_obj             = vp9_state->golden_ref_obj;
3422     surface_param.alt_ref_obj                  = vp9_state->alt_ref_obj;
3423     surface_param.pres_mb_code_surface         = &vme_context->res_mb_code_surface;
3424     surface_param.gpe_context_tx               = tx_gpe_context;
3425     surface_param.mb_data_offset             = vp9_state->mb_data_offset;
3426
3427     obj_surface = encode_state->reconstructed_object;
3428     vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3429     if (vp9_state->dys_in_use &&
3430         (pic_param->frame_width_src != pic_param->frame_height_dst ||
3431          pic_param->frame_height_src != pic_param->frame_height_dst)) {
3432         obj_surface = vp9_priv_surface->dys_surface_obj;
3433     } else
3434         obj_surface = encode_state->input_yuv_object;
3435
3436     surface_param.curr_frame_obj             = obj_surface;
3437
3438     vme_context->pfn_send_mbenc_surface(ctx,
3439                                         encode_state,
3440                                         gpe_context,
3441                                         encoder_context,
3442                                         &surface_param);
3443
3444     if (media_function == VP9_MEDIA_STATE_MBENC_I_32x32) {
3445         resolution_x = ALIGN(vp9_state->frame_width, 32) / 32;
3446         resolution_y = ALIGN(vp9_state->frame_height, 32) / 32;
3447     } else {
3448         resolution_x = ALIGN(vp9_state->frame_width, 16) / 16;
3449         resolution_y = ALIGN(vp9_state->frame_height, 16) / 16;
3450     }
3451
3452     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
3453     kernel_walker_param.resolution_x = resolution_x;
3454     kernel_walker_param.resolution_y = resolution_y;
3455
3456     if (media_function == VP9_MEDIA_STATE_MBENC_P ||
3457         media_function == VP9_MEDIA_STATE_MBENC_I_16x16) {
3458         kernel_walker_param.use_scoreboard = 1;
3459         kernel_walker_param.no_dependency = 0;
3460         kernel_walker_param.walker_degree = VP9_45Z_DEGREE;
3461     } else {
3462         kernel_walker_param.use_scoreboard = 0;
3463         kernel_walker_param.no_dependency = 1;
3464     }
3465
3466     gen8_gpe_setup_interface_data(ctx, gpe_context);
3467
3468     gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
3469
3470     gen9_run_kernel_media_object_walker(ctx, encoder_context,
3471                                         gpe_context,
3472                                         media_function,
3473                                         &media_object_walker_param);
3474     return va_status;
3475 }
3476
3477 static void
3478 gen9_init_gpe_context_vp9(VADriverContextP ctx,
3479                           struct i965_gpe_context *gpe_context,
3480                           struct vp9_encoder_kernel_parameter *kernel_param)
3481 {
3482     struct i965_driver_data *i965 = i965_driver_data(ctx);
3483
3484     gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
3485
3486     gpe_context->sampler.entry_size = 0;
3487     gpe_context->sampler.max_entries = 0;
3488
3489     if (kernel_param->sampler_size) {
3490         gpe_context->sampler.entry_size = ALIGN(kernel_param->sampler_size, 64);
3491         gpe_context->sampler.max_entries = 1;
3492     }
3493
3494     gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
3495     gpe_context->idrt.max_entries = NUM_KERNELS_PER_GPE_CONTEXT;
3496
3497     gpe_context->surface_state_binding_table.max_entries = MAX_VP9_ENCODER_SURFACES;
3498     gpe_context->surface_state_binding_table.binding_table_offset = 0;
3499     gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_VP9_ENCODER_SURFACES * 4, 64);
3500     gpe_context->surface_state_binding_table.length = ALIGN(MAX_VP9_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_VP9_ENCODER_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
3501
3502     if (i965->intel.eu_total > 0)
3503         gpe_context->vfe_state.max_num_threads = 6 * i965->intel.eu_total;
3504     else
3505         gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads
3506
3507     gpe_context->vfe_state.curbe_allocation_size = MAX(1, ALIGN(gpe_context->curbe.length, 32) >> 5); // in registers
3508     gpe_context->vfe_state.urb_entry_size = MAX(1, ALIGN(kernel_param->inline_data_size, 32) >> 5); // in registers
3509     gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE -
3510                                               gpe_context->vfe_state.curbe_allocation_size -
3511                                               ((gpe_context->idrt.entry_size >> 5) *
3512                                                gpe_context->idrt.max_entries)) / gpe_context->vfe_state.urb_entry_size;
3513     gpe_context->vfe_state.num_urb_entries = CLAMP(1, 127, gpe_context->vfe_state.num_urb_entries);
3514     gpe_context->vfe_state.gpgpu_mode = 0;
3515 }
3516
3517 static void
3518 gen9_init_vfe_scoreboard_vp9(struct i965_gpe_context *gpe_context,
3519                              struct vp9_encoder_scoreboard_parameter *scoreboard_param)
3520 {
3521     gpe_context->vfe_desc5.scoreboard0.mask = scoreboard_param->mask;
3522     gpe_context->vfe_desc5.scoreboard0.type = scoreboard_param->type;
3523     gpe_context->vfe_desc5.scoreboard0.enable = scoreboard_param->enable;
3524
3525     if (scoreboard_param->walkpat_flag) {
3526         gpe_context->vfe_desc5.scoreboard0.mask = 0x0F;
3527         gpe_context->vfe_desc5.scoreboard0.type = 1;
3528
3529         gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0;
3530         gpe_context->vfe_desc6.scoreboard1.delta_y0 = -1;
3531
3532         gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0;
3533         gpe_context->vfe_desc6.scoreboard1.delta_y1 = -2;
3534
3535         gpe_context->vfe_desc6.scoreboard1.delta_x2 = -1;
3536         gpe_context->vfe_desc6.scoreboard1.delta_y2 = 3;
3537
3538         gpe_context->vfe_desc6.scoreboard1.delta_x3 = -1;
3539         gpe_context->vfe_desc6.scoreboard1.delta_y3 = 1;
3540     } else {
3541         // Scoreboard 0
3542         gpe_context->vfe_desc6.scoreboard1.delta_x0 = -1;
3543         gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0;
3544
3545         // Scoreboard 1
3546         gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0;
3547         gpe_context->vfe_desc6.scoreboard1.delta_y1 = -1;
3548
3549         // Scoreboard 2
3550         gpe_context->vfe_desc6.scoreboard1.delta_x2 = 1;
3551         gpe_context->vfe_desc6.scoreboard1.delta_y2 = -1;
3552
3553         // Scoreboard 3
3554         gpe_context->vfe_desc6.scoreboard1.delta_x3 = -1;
3555         gpe_context->vfe_desc6.scoreboard1.delta_y3 = -1;
3556
3557         // Scoreboard 4
3558         gpe_context->vfe_desc7.scoreboard2.delta_x4 = -1;
3559         gpe_context->vfe_desc7.scoreboard2.delta_y4 = 1;
3560
3561         // Scoreboard 5
3562         gpe_context->vfe_desc7.scoreboard2.delta_x5 = 0;
3563         gpe_context->vfe_desc7.scoreboard2.delta_y5 = -2;
3564
3565         // Scoreboard 6
3566         gpe_context->vfe_desc7.scoreboard2.delta_x6 = 1;
3567         gpe_context->vfe_desc7.scoreboard2.delta_y6 = -2;
3568
3569         // Scoreboard 7
3570         gpe_context->vfe_desc7.scoreboard2.delta_x6 = -1;
3571         gpe_context->vfe_desc7.scoreboard2.delta_y6 = -2;
3572     }
3573 }
3574
3575 #define VP9_VME_REF_WIN       48
3576
3577 static VAStatus
3578 gen9_encode_vp9_check_parameter(VADriverContextP ctx,
3579                                 struct encode_state *encode_state,
3580                                 struct intel_encoder_context *encoder_context)
3581 {
3582     struct i965_driver_data *i965 = i965_driver_data(ctx);
3583     struct gen9_vp9_state *vp9_state;
3584     VAEncPictureParameterBufferVP9  *pic_param;
3585     VAEncMiscParameterTypeVP9PerSegmantParam *seg_param;
3586     VAEncSequenceParameterBufferVP9 *seq_param;
3587     struct object_surface *obj_surface;
3588     struct object_buffer *obj_buffer;
3589     struct gen9_surface_vp9 *vp9_priv_surface;
3590
3591     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3592
3593     if (!encode_state->pic_param_ext ||
3594         !encode_state->pic_param_ext->buffer) {
3595         return VA_STATUS_ERROR_INVALID_PARAMETER;
3596     }
3597     pic_param = (VAEncPictureParameterBufferVP9 *)encode_state->pic_param_ext->buffer;
3598
3599     obj_buffer = BUFFER(pic_param->coded_buf);
3600
3601     if (!obj_buffer ||
3602         !obj_buffer->buffer_store ||
3603         !obj_buffer->buffer_store->bo)
3604         return VA_STATUS_ERROR_INVALID_PARAMETER;
3605
3606     encode_state->coded_buf_object = obj_buffer;
3607
3608     vp9_state->status_buffer.bo = obj_buffer->buffer_store->bo;
3609
3610     encode_state->reconstructed_object = SURFACE(pic_param->reconstructed_frame);
3611
3612     if (!encode_state->reconstructed_object ||
3613         !encode_state->input_yuv_object)
3614         return VA_STATUS_ERROR_INVALID_PARAMETER;
3615
3616     vp9_state->curr_frame = pic_param->reconstructed_frame;
3617     vp9_state->ref_frame_flag = 0;
3618     if (pic_param->pic_flags.bits.frame_type == KEY_FRAME ||
3619         pic_param->pic_flags.bits.intra_only) {
3620         /* this will be regarded as I-frame type */
3621         vp9_state->picture_coding_type = 0;
3622         vp9_state->last_ref_obj = NULL;
3623         vp9_state->golden_ref_obj = NULL;
3624         vp9_state->alt_ref_obj = NULL;
3625     } else {
3626         vp9_state->picture_coding_type = 1;
3627         vp9_state->ref_frame_flag = pic_param->ref_flags.bits.ref_frame_ctrl_l0 |
3628                                     pic_param->ref_flags.bits.ref_frame_ctrl_l1;
3629
3630         obj_surface = SURFACE(pic_param->reference_frames[pic_param->ref_flags.bits.ref_last_idx]);
3631         vp9_state->last_ref_obj = obj_surface;
3632         if (!obj_surface ||
3633             !obj_surface->bo ||
3634             !obj_surface->private_data) {
3635             vp9_state->last_ref_obj = NULL;
3636             vp9_state->ref_frame_flag &= ~(VP9_LAST_REF);
3637         }
3638
3639         obj_surface = SURFACE(pic_param->reference_frames[pic_param->ref_flags.bits.ref_gf_idx]);
3640         vp9_state->golden_ref_obj = obj_surface;
3641         if (!obj_surface ||
3642             !obj_surface->bo ||
3643             !obj_surface->private_data) {
3644             vp9_state->golden_ref_obj = NULL;
3645             vp9_state->ref_frame_flag &= ~(VP9_GOLDEN_REF);
3646         }
3647
3648         obj_surface = SURFACE(pic_param->reference_frames[pic_param->ref_flags.bits.ref_arf_idx]);
3649         vp9_state->alt_ref_obj = obj_surface;
3650         if (!obj_surface ||
3651             !obj_surface->bo ||
3652             !obj_surface->private_data) {
3653             vp9_state->alt_ref_obj = NULL;
3654             vp9_state->ref_frame_flag &= ~(VP9_ALT_REF);
3655         }
3656
3657         /* remove the duplicated flag and ref frame list */
3658         if (vp9_state->ref_frame_flag & VP9_LAST_REF) {
3659             if (pic_param->reference_frames[pic_param->ref_flags.bits.ref_last_idx] ==
3660                 pic_param->reference_frames[pic_param->ref_flags.bits.ref_gf_idx]) {
3661                 vp9_state->ref_frame_flag &= ~(VP9_GOLDEN_REF);
3662                 vp9_state->golden_ref_obj = NULL;
3663             }
3664
3665             if (pic_param->reference_frames[pic_param->ref_flags.bits.ref_last_idx] ==
3666                 pic_param->reference_frames[pic_param->ref_flags.bits.ref_arf_idx]) {
3667                 vp9_state->ref_frame_flag &= ~(VP9_ALT_REF);
3668                 vp9_state->alt_ref_obj = NULL;
3669             }
3670         }
3671
3672         if (vp9_state->ref_frame_flag & VP9_GOLDEN_REF) {
3673             if (pic_param->reference_frames[pic_param->ref_flags.bits.ref_gf_idx] ==
3674                 pic_param->reference_frames[pic_param->ref_flags.bits.ref_arf_idx]) {
3675                 vp9_state->ref_frame_flag &= ~(VP9_ALT_REF);
3676                 vp9_state->alt_ref_obj = NULL;
3677             }
3678         }
3679
3680         if (vp9_state->ref_frame_flag == 0)
3681             return VA_STATUS_ERROR_INVALID_PARAMETER;
3682     }
3683
3684     seg_param = NULL;
3685     if (pic_param->pic_flags.bits.segmentation_enabled) {
3686         if (!encode_state->q_matrix ||
3687             !encode_state->q_matrix->buffer) {
3688             return VA_STATUS_ERROR_INVALID_PARAMETER;
3689         }
3690         seg_param = (VAEncMiscParameterTypeVP9PerSegmantParam *)
3691                     encode_state->q_matrix->buffer;
3692     }
3693
3694     seq_param = NULL;
3695     if (encode_state->seq_param_ext &&
3696         encode_state->seq_param_ext->buffer)
3697         seq_param = (VAEncSequenceParameterBufferVP9 *)encode_state->seq_param_ext->buffer;
3698
3699     if (!seq_param) {
3700         seq_param = &vp9_state->bogus_seq_param;
3701     }
3702
3703     vp9_state->pic_param = pic_param;
3704     vp9_state->segment_param = seg_param;
3705     vp9_state->seq_param = seq_param;
3706
3707     obj_surface = encode_state->reconstructed_object;
3708     if (pic_param->frame_width_dst > obj_surface->orig_width ||
3709         pic_param->frame_height_dst > obj_surface->orig_height)
3710         return VA_STATUS_ERROR_INVALID_SURFACE;
3711
3712     if (!vp9_state->dys_enabled &&
3713         ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
3714          (pic_param->frame_height_src != pic_param->frame_height_dst)))
3715         return VA_STATUS_ERROR_UNIMPLEMENTED;
3716
3717     if (vp9_state->brc_enabled) {
3718         if (vp9_state->first_frame || vp9_state->picture_coding_type == KEY_FRAME) {
3719             vp9_state->brc_reset = encoder_context->brc.need_reset || vp9_state->first_frame;
3720
3721             if (!encoder_context->brc.framerate[0].num || !encoder_context->brc.framerate[0].den ||
3722                 !encoder_context->brc.bits_per_second[0])
3723                 return VA_STATUS_ERROR_INVALID_PARAMETER;
3724
3725             vp9_state->gop_size = encoder_context->brc.gop_size;
3726             vp9_state->framerate = encoder_context->brc.framerate[0];
3727             if ((vp9_state->framerate.num / vp9_state->framerate.den) > MAX_VP9_ENCODER_FRAMERATE) {
3728                 vp9_state->framerate.num = MAX_VP9_ENCODER_FRAMERATE * vp9_state->framerate.den;
3729                 i965_log_info(ctx, "gen9_encode_vp9_check_parameter: Too high frame rate(num: %d, den: %d), max supported is %d fps.\n",
3730                               vp9_state->framerate.num, vp9_state->framerate.den, MAX_VP9_ENCODER_FRAMERATE);
3731             }
3732
3733             if (encoder_context->rate_control_mode == VA_RC_CBR ||
3734                 !encoder_context->brc.target_percentage[0]) {
3735                 vp9_state->target_bit_rate = encoder_context->brc.bits_per_second[0];
3736                 vp9_state->max_bit_rate = vp9_state->target_bit_rate;
3737                 vp9_state->min_bit_rate = vp9_state->target_bit_rate;
3738             } else {
3739                 vp9_state->max_bit_rate = encoder_context->brc.bits_per_second[0];
3740                 vp9_state->target_bit_rate = vp9_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100;
3741                 if (2 * vp9_state->target_bit_rate < vp9_state->max_bit_rate)
3742                     vp9_state->min_bit_rate = 0;
3743                 else
3744                     vp9_state->min_bit_rate = 2 * vp9_state->target_bit_rate - vp9_state->max_bit_rate;
3745             }
3746
3747             if (encoder_context->brc.hrd_buffer_size)
3748                 vp9_state->vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;
3749             else if (encoder_context->brc.window_size)
3750                 vp9_state->vbv_buffer_size_in_bit = (uint64_t)vp9_state->max_bit_rate * encoder_context->brc.window_size / 1000;
3751             else
3752                 vp9_state->vbv_buffer_size_in_bit = vp9_state->max_bit_rate;
3753             if (encoder_context->brc.hrd_initial_buffer_fullness)
3754                 vp9_state->init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;
3755             else
3756                 vp9_state->init_vbv_buffer_fullness_in_bit = vp9_state->vbv_buffer_size_in_bit / 2;
3757         }
3758     }
3759
3760     vp9_state->frame_width = pic_param->frame_width_dst;
3761     vp9_state->frame_height = pic_param->frame_height_dst;
3762
3763     vp9_state->frame_width_4x = ALIGN(vp9_state->frame_width / 4, 16);
3764     vp9_state->frame_height_4x = ALIGN(vp9_state->frame_height / 4, 16);
3765
3766     vp9_state->frame_width_16x = ALIGN(vp9_state->frame_width / 16, 16);
3767     vp9_state->frame_height_16x = ALIGN(vp9_state->frame_height / 16, 16);
3768
3769     vp9_state->frame_width_in_mb = ALIGN(vp9_state->frame_width, 16) / 16;
3770     vp9_state->frame_height_in_mb = ALIGN(vp9_state->frame_height, 16) / 16;
3771
3772     vp9_state->downscaled_width_4x_in_mb = vp9_state->frame_width_4x / 16;
3773     vp9_state->downscaled_height_4x_in_mb = vp9_state->frame_height_4x / 16;
3774     vp9_state->downscaled_width_16x_in_mb = vp9_state->frame_width_16x / 16;
3775     vp9_state->downscaled_height_16x_in_mb = vp9_state->frame_height_16x / 16;
3776
3777     vp9_state->dys_in_use = 0;
3778     if (pic_param->frame_width_src != pic_param->frame_width_dst ||
3779         pic_param->frame_height_src != pic_param->frame_height_dst)
3780         vp9_state->dys_in_use = 1;
3781     vp9_state->dys_ref_frame_flag = 0;
3782     /* check the dys setting. The dys is supported by default. */
3783     if (pic_param->pic_flags.bits.frame_type != KEY_FRAME &&
3784         !pic_param->pic_flags.bits.intra_only) {
3785         vp9_state->dys_ref_frame_flag = vp9_state->ref_frame_flag;
3786
3787         if ((vp9_state->ref_frame_flag & VP9_LAST_REF) &&
3788             vp9_state->last_ref_obj) {
3789             obj_surface = vp9_state->last_ref_obj;
3790             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3791
3792             if (vp9_state->frame_width == vp9_priv_surface->frame_width &&
3793                 vp9_state->frame_height == vp9_priv_surface->frame_height)
3794                 vp9_state->dys_ref_frame_flag &= ~(VP9_LAST_REF);
3795         }
3796         if ((vp9_state->ref_frame_flag & VP9_GOLDEN_REF) &&
3797             vp9_state->golden_ref_obj) {
3798             obj_surface = vp9_state->golden_ref_obj;
3799             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3800
3801             if (vp9_state->frame_width == vp9_priv_surface->frame_width &&
3802                 vp9_state->frame_height == vp9_priv_surface->frame_height)
3803                 vp9_state->dys_ref_frame_flag &= ~(VP9_GOLDEN_REF);
3804         }
3805         if ((vp9_state->ref_frame_flag & VP9_ALT_REF) &&
3806             vp9_state->alt_ref_obj) {
3807             obj_surface = vp9_state->alt_ref_obj;
3808             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3809
3810             if (vp9_state->frame_width == vp9_priv_surface->frame_width &&
3811                 vp9_state->frame_height == vp9_priv_surface->frame_height)
3812                 vp9_state->dys_ref_frame_flag &= ~(VP9_ALT_REF);
3813         }
3814         if (vp9_state->dys_ref_frame_flag)
3815             vp9_state->dys_in_use = 1;
3816     }
3817
3818     if (vp9_state->hme_supported) {
3819         vp9_state->hme_enabled = 1;
3820     } else {
3821         vp9_state->hme_enabled = 0;
3822     }
3823
3824     if (vp9_state->b16xme_supported) {
3825         vp9_state->b16xme_enabled = 1;
3826     } else {
3827         vp9_state->b16xme_enabled = 0;
3828     }
3829
3830     /* disable HME/16xME if the size is too small */
3831     if (vp9_state->frame_width_4x <= VP9_VME_REF_WIN ||
3832         vp9_state->frame_height_4x <= VP9_VME_REF_WIN) {
3833         vp9_state->hme_enabled = 0;
3834         vp9_state->b16xme_enabled = 0;
3835     }
3836
3837     if (vp9_state->frame_width_16x < VP9_VME_REF_WIN ||
3838         vp9_state->frame_height_16x < VP9_VME_REF_WIN)
3839         vp9_state->b16xme_enabled = 0;
3840
3841     if (pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME ||
3842         pic_param->pic_flags.bits.intra_only) {
3843         vp9_state->hme_enabled = 0;
3844         vp9_state->b16xme_enabled = 0;
3845     }
3846
3847     vp9_state->mbenc_keyframe_dist_enabled = 0;
3848     if ((vp9_state->picture_coding_type == KEY_FRAME) &&
3849         vp9_state->brc_distortion_buffer_supported)
3850         vp9_state->mbenc_keyframe_dist_enabled = 1;
3851
3852     return VA_STATUS_SUCCESS;
3853 }
3854
3855 static VAStatus
3856 gen9_vme_gpe_kernel_prepare_vp9(VADriverContextP ctx,
3857                                 struct encode_state *encode_state,
3858                                 struct intel_encoder_context *encoder_context)
3859 {
3860     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
3861     struct vp9_surface_param surface_param;
3862     struct gen9_vp9_state *vp9_state;
3863     VAEncPictureParameterBufferVP9  *pic_param;
3864     struct object_surface *obj_surface;
3865     struct gen9_surface_vp9 *vp9_surface;
3866     int driver_header_flag = 0;
3867     VAStatus va_status;
3868
3869     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3870
3871     if (!vp9_state || !vp9_state->pic_param)
3872         return VA_STATUS_ERROR_INVALID_PARAMETER;
3873
3874     pic_param = vp9_state->pic_param;
3875
3876     /* this is to check whether the driver should generate the uncompressed header */
3877     driver_header_flag = 1;
3878     if (encode_state->packed_header_data_ext &&
3879         encode_state->packed_header_data_ext[0] &&
3880         pic_param->bit_offset_first_partition_size) {
3881         VAEncPackedHeaderParameterBuffer *param = NULL;
3882
3883         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_params_ext[0]->buffer;
3884
3885         if (param->type == VAEncPackedHeaderRawData) {
3886             char *header_data;
3887             unsigned int length_in_bits;
3888
3889             header_data = (char *)encode_state->packed_header_data_ext[0]->buffer;
3890             length_in_bits = param->bit_length;
3891             driver_header_flag = 0;
3892
3893             vp9_state->frame_header.bit_offset_first_partition_size =
3894                 pic_param->bit_offset_first_partition_size;
3895             vp9_state->header_length = ALIGN(length_in_bits, 8) >> 3;
3896             vp9_state->alias_insert_data = header_data;
3897
3898             vp9_state->frame_header.bit_offset_ref_lf_delta = pic_param->bit_offset_ref_lf_delta;
3899             vp9_state->frame_header.bit_offset_mode_lf_delta = pic_param->bit_offset_mode_lf_delta;
3900             vp9_state->frame_header.bit_offset_lf_level = pic_param->bit_offset_lf_level;
3901             vp9_state->frame_header.bit_offset_qindex = pic_param->bit_offset_qindex;
3902             vp9_state->frame_header.bit_offset_segmentation = pic_param->bit_offset_segmentation;
3903             vp9_state->frame_header.bit_size_segmentation = pic_param->bit_size_segmentation;
3904         }
3905     }
3906
3907     if (driver_header_flag) {
3908         memset(&vp9_state->frame_header, 0, sizeof(vp9_state->frame_header));
3909         intel_write_uncompressed_header(encode_state,
3910                                         VAProfileVP9Profile0,
3911                                         vme_context->frame_header_data,
3912                                         &vp9_state->header_length,
3913                                         &vp9_state->frame_header);
3914         vp9_state->alias_insert_data = vme_context->frame_header_data;
3915     }
3916
3917     va_status = i965_check_alloc_surface_bo(ctx, encode_state->input_yuv_object,
3918                                             1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
3919     if (va_status != VA_STATUS_SUCCESS)
3920         return va_status;
3921
3922     va_status = i965_check_alloc_surface_bo(ctx, encode_state->reconstructed_object,
3923                                             1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
3924
3925     if (va_status != VA_STATUS_SUCCESS)
3926         return va_status;
3927
3928     surface_param.frame_width = vp9_state->frame_width;
3929     surface_param.frame_height = vp9_state->frame_height;
3930     va_status = gen9_vp9_init_check_surfaces(ctx,
3931                                              encode_state->reconstructed_object,
3932                                              &surface_param);
3933
3934     {
3935         vp9_surface = (struct gen9_surface_vp9*)encode_state->reconstructed_object;
3936
3937         vp9_surface->qp_value = pic_param->luma_ac_qindex + pic_param->luma_dc_qindex_delta;
3938     }
3939     if (vp9_state->dys_in_use &&
3940         (pic_param->frame_width_src != pic_param->frame_width_dst ||
3941          pic_param->frame_height_src != pic_param->frame_height_dst)) {
3942         surface_param.frame_width = pic_param->frame_width_dst;
3943         surface_param.frame_height = pic_param->frame_height_dst;
3944         va_status = gen9_vp9_check_dys_surfaces(ctx,
3945                                                 encode_state->reconstructed_object,
3946                                                 &surface_param);
3947
3948         if (va_status)
3949             return va_status;
3950     }
3951
3952     if (vp9_state->dys_ref_frame_flag) {
3953         if ((vp9_state->dys_ref_frame_flag & VP9_LAST_REF) &&
3954             vp9_state->last_ref_obj) {
3955             obj_surface = vp9_state->last_ref_obj;
3956             surface_param.frame_width = vp9_state->frame_width;
3957             surface_param.frame_height = vp9_state->frame_height;
3958             va_status = gen9_vp9_check_dys_surfaces(ctx,
3959                                                     obj_surface,
3960                                                     &surface_param);
3961
3962             if (va_status)
3963                 return va_status;
3964         }
3965         if ((vp9_state->dys_ref_frame_flag & VP9_GOLDEN_REF) &&
3966             vp9_state->golden_ref_obj) {
3967             obj_surface = vp9_state->golden_ref_obj;
3968             surface_param.frame_width = vp9_state->frame_width;
3969             surface_param.frame_height = vp9_state->frame_height;
3970             va_status = gen9_vp9_check_dys_surfaces(ctx,
3971                                                     obj_surface,
3972                                                     &surface_param);
3973
3974             if (va_status)
3975                 return va_status;
3976         }
3977         if ((vp9_state->dys_ref_frame_flag & VP9_ALT_REF) &&
3978             vp9_state->alt_ref_obj) {
3979             obj_surface = vp9_state->alt_ref_obj;
3980             surface_param.frame_width = vp9_state->frame_width;
3981             surface_param.frame_height = vp9_state->frame_height;
3982             va_status = gen9_vp9_check_dys_surfaces(ctx,
3983                                                     obj_surface,
3984                                                     &surface_param);
3985
3986             if (va_status)
3987                 return va_status;
3988         }
3989     }
3990
3991     if (va_status != VA_STATUS_SUCCESS)
3992         return va_status;
3993     /* check the corresponding ref_frame_flag && dys_ref_frame_flag */
3994
3995     return VA_STATUS_SUCCESS;
3996 }
3997
3998 static VAStatus
3999 gen9_vme_gpe_kernel_init_vp9(VADriverContextP ctx,
4000                              struct encode_state *encode_state,
4001                              struct intel_encoder_context *encoder_context)
4002 {
4003     struct i965_driver_data *i965 = i965_driver_data(ctx);
4004     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4005     struct vp9_mbenc_context *mbenc_context = &vme_context->mbenc_context;
4006     struct vp9_dys_context *dys_context = &vme_context->dys_context;
4007     struct gpe_dynamic_state_parameter ds_param;
4008     int i;
4009
4010     /*
4011      * BRC will update MBEnc curbe data buffer, so initialize GPE context for
4012      * MBEnc first
4013      */
4014     for (i = 0; i < NUM_VP9_MBENC; i++) {
4015         gen8_gpe_context_init(ctx, &mbenc_context->gpe_contexts[i]);
4016     }
4017
4018     /*
4019      * VP9_MBENC_XXX uses the same dynamic state buffer as they share the same
4020      * curbe_buffer.
4021      */
4022     ds_param.bo_size = ALIGN(sizeof(vp9_mbenc_curbe_data), 64) + 128 +
4023                        ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) * NUM_VP9_MBENC;
4024     mbenc_context->mbenc_bo_dys = dri_bo_alloc(i965->intel.bufmgr,
4025                                                "mbenc_dys",
4026                                                ds_param.bo_size,
4027                                                0x1000);
4028     mbenc_context->mbenc_bo_size = ds_param.bo_size;
4029
4030     ds_param.bo = mbenc_context->mbenc_bo_dys;
4031     ds_param.curbe_offset = 0;
4032     ds_param.sampler_offset = ALIGN(sizeof(vp9_mbenc_curbe_data), 64);
4033     for (i = 0; i < NUM_VP9_MBENC; i++) {
4034         ds_param.idrt_offset = ds_param.sampler_offset + 128 +
4035                                ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) * i;
4036
4037         gen8_gpe_context_set_dynamic_buffer(ctx,
4038                                             &mbenc_context->gpe_contexts[i],
4039                                             &ds_param);
4040     }
4041
4042     gen8_gpe_context_init(ctx, &dys_context->gpe_context);
4043     gen9_vp9_dys_set_sampler_state(&dys_context->gpe_context);
4044
4045     return VA_STATUS_SUCCESS;
4046 }
4047
4048 static VAStatus
4049 gen9_vme_gpe_kernel_final_vp9(VADriverContextP ctx,
4050                               struct encode_state *encode_state,
4051                               struct intel_encoder_context *encoder_context)
4052 {
4053     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4054     struct vp9_mbenc_context *mbenc_context = &vme_context->mbenc_context;
4055
4056     dri_bo_unreference(mbenc_context->mbenc_bo_dys);
4057     mbenc_context->mbenc_bo_dys = NULL;
4058
4059     return VA_STATUS_SUCCESS;
4060 }
4061
4062 static VAStatus
4063 gen9_vme_gpe_kernel_run_vp9(VADriverContextP ctx,
4064                             struct encode_state *encode_state,
4065                             struct intel_encoder_context *encoder_context)
4066 {
4067     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4068     struct gen9_vp9_state *vp9_state;
4069     int i;
4070
4071     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
4072
4073     if (!vp9_state || !vp9_state->pic_param)
4074         return VA_STATUS_ERROR_INVALID_PARAMETER;
4075
4076     if (vp9_state->dys_in_use) {
4077         gen9_vp9_run_dys_refframes(ctx, encode_state, encoder_context);
4078     }
4079
4080     if (vp9_state->brc_enabled && (vp9_state->brc_reset || !vp9_state->brc_inited)) {
4081         gen9_vp9_brc_init_reset_kernel(ctx, encode_state, encoder_context);
4082     }
4083
4084     if (vp9_state->picture_coding_type == KEY_FRAME) {
4085         for (i = 0; i < 2; i++)
4086             i965_zero_gpe_resource(&vme_context->res_mode_decision[i]);
4087     }
4088
4089     if (vp9_state->hme_supported) {
4090         gen9_vp9_scaling_kernel(ctx, encode_state,
4091                                 encoder_context,
4092                                 0);
4093         if (vp9_state->b16xme_supported) {
4094             gen9_vp9_scaling_kernel(ctx, encode_state,
4095                                     encoder_context,
4096                                     1);
4097         }
4098     }
4099
4100     if (vp9_state->picture_coding_type && vp9_state->hme_enabled) {
4101         if (vp9_state->b16xme_enabled)
4102             gen9_vp9_me_kernel(ctx, encode_state,
4103                                encoder_context,
4104                                1);
4105
4106         gen9_vp9_me_kernel(ctx, encode_state,
4107                            encoder_context,
4108                            0);
4109     }
4110
4111     if (vp9_state->brc_enabled) {
4112         if (vp9_state->mbenc_keyframe_dist_enabled)
4113             gen9_vp9_brc_intra_dist_kernel(ctx,
4114                                            encode_state,
4115                                            encoder_context);
4116
4117         gen9_vp9_brc_update_kernel(ctx, encode_state,
4118                                    encoder_context);
4119     }
4120
4121     if (vp9_state->picture_coding_type == KEY_FRAME) {
4122         gen9_vp9_mbenc_kernel(ctx, encode_state,
4123                               encoder_context,
4124                               VP9_MEDIA_STATE_MBENC_I_32x32);
4125         gen9_vp9_mbenc_kernel(ctx, encode_state,
4126                               encoder_context,
4127                               VP9_MEDIA_STATE_MBENC_I_16x16);
4128     } else {
4129         gen9_vp9_mbenc_kernel(ctx, encode_state,
4130                               encoder_context,
4131                               VP9_MEDIA_STATE_MBENC_P);
4132     }
4133
4134     gen9_vp9_mbenc_kernel(ctx, encode_state,
4135                           encoder_context,
4136                           VP9_MEDIA_STATE_MBENC_TX);
4137
4138     vp9_state->curr_mode_decision_index ^= 1;
4139     if (vp9_state->brc_enabled) {
4140         vp9_state->brc_inited = 1;
4141         vp9_state->brc_reset = 0;
4142     }
4143
4144     return VA_STATUS_SUCCESS;
4145 }
4146
4147 static VAStatus
4148 gen9_vme_pipeline_vp9(VADriverContextP ctx,
4149                       VAProfile profile,
4150                       struct encode_state *encode_state,
4151                       struct intel_encoder_context *encoder_context)
4152 {
4153     VAStatus va_status;
4154     struct gen9_vp9_state *vp9_state;
4155
4156     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
4157
4158     if (!vp9_state)
4159         return VA_STATUS_ERROR_INVALID_CONTEXT;
4160
4161     va_status = gen9_encode_vp9_check_parameter(ctx, encode_state, encoder_context);
4162     if (va_status != VA_STATUS_SUCCESS)
4163         return va_status;
4164
4165     va_status = gen9_vp9_allocate_resources(ctx, encode_state,
4166                                             encoder_context,
4167                                             !vp9_state->brc_allocated);
4168
4169     if (va_status != VA_STATUS_SUCCESS)
4170         return va_status;
4171     vp9_state->brc_allocated = 1;
4172
4173     va_status = gen9_vme_gpe_kernel_prepare_vp9(ctx, encode_state, encoder_context);
4174
4175     if (va_status != VA_STATUS_SUCCESS)
4176         return va_status;
4177
4178     va_status = gen9_vme_gpe_kernel_init_vp9(ctx, encode_state, encoder_context);
4179     if (va_status != VA_STATUS_SUCCESS)
4180         return va_status;
4181
4182     va_status = gen9_vme_gpe_kernel_run_vp9(ctx, encode_state, encoder_context);
4183     if (va_status != VA_STATUS_SUCCESS)
4184         return va_status;
4185
4186     gen9_vme_gpe_kernel_final_vp9(ctx, encode_state, encoder_context);
4187
4188     return VA_STATUS_SUCCESS;
4189 }
4190
4191 static void
4192 gen9_vme_brc_context_destroy_vp9(struct vp9_brc_context *brc_context)
4193 {
4194     int i;
4195
4196     for (i = 0; i < NUM_VP9_BRC; i++)
4197         gen8_gpe_context_destroy(&brc_context->gpe_contexts[i]);
4198 }
4199
4200 static void
4201 gen9_vme_scaling_context_destroy_vp9(struct vp9_scaling_context *scaling_context)
4202 {
4203     int i;
4204
4205     for (i = 0; i < NUM_VP9_SCALING; i++)
4206         gen8_gpe_context_destroy(&scaling_context->gpe_contexts[i]);
4207 }
4208
4209 static void
4210 gen9_vme_me_context_destroy_vp9(struct vp9_me_context *me_context)
4211 {
4212     gen8_gpe_context_destroy(&me_context->gpe_context);
4213 }
4214
4215 static void
4216 gen9_vme_mbenc_context_destroy_vp9(struct vp9_mbenc_context *mbenc_context)
4217 {
4218     int i;
4219
4220     for (i = 0; i < NUM_VP9_MBENC; i++)
4221         gen8_gpe_context_destroy(&mbenc_context->gpe_contexts[i]);
4222     dri_bo_unreference(mbenc_context->mbenc_bo_dys);
4223     mbenc_context->mbenc_bo_size = 0;
4224 }
4225
4226 static void
4227 gen9_vme_dys_context_destroy_vp9(struct vp9_dys_context *dys_context)
4228 {
4229     gen8_gpe_context_destroy(&dys_context->gpe_context);
4230 }
4231
4232 static void
4233 gen9_vme_kernel_context_destroy_vp9(struct gen9_encoder_context_vp9 *vme_context)
4234 {
4235     gen9_vp9_free_resources(vme_context);
4236     gen9_vme_scaling_context_destroy_vp9(&vme_context->scaling_context);
4237     gen9_vme_me_context_destroy_vp9(&vme_context->me_context);
4238     gen9_vme_mbenc_context_destroy_vp9(&vme_context->mbenc_context);
4239     gen9_vme_brc_context_destroy_vp9(&vme_context->brc_context);
4240     gen9_vme_dys_context_destroy_vp9(&vme_context->dys_context);
4241
4242     return;
4243 }
4244
4245 static void
4246 gen9_vme_context_destroy_vp9(void *context)
4247 {
4248     struct gen9_encoder_context_vp9 *vme_context = context;
4249
4250     if (!vme_context)
4251         return;
4252
4253     gen9_vme_kernel_context_destroy_vp9(vme_context);
4254
4255     free(vme_context);
4256
4257     return;
4258 }
4259
4260 static void
4261 gen9_vme_scaling_context_init_vp9(VADriverContextP ctx,
4262                                   struct gen9_encoder_context_vp9 *vme_context,
4263                                   struct vp9_scaling_context *scaling_context)
4264 {
4265     struct i965_gpe_context *gpe_context = NULL;
4266     struct vp9_encoder_kernel_parameter kernel_param;
4267     struct vp9_encoder_scoreboard_parameter scoreboard_param;
4268     struct i965_kernel scale_kernel;
4269
4270     kernel_param.curbe_size = sizeof(vp9_scaling4x_curbe_data_cm);
4271     kernel_param.inline_data_size = sizeof(vp9_scaling4x_inline_data_cm);
4272     kernel_param.sampler_size = 0;
4273
4274     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4275     scoreboard_param.mask = 0xFF;
4276     scoreboard_param.enable = vme_context->use_hw_scoreboard;
4277     scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4278     scoreboard_param.walkpat_flag = 0;
4279
4280     gpe_context = &scaling_context->gpe_contexts[0];
4281     gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4282     gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4283
4284     scaling_context->scaling_4x_bti.scaling_frame_src_y = VP9_BTI_SCALING_FRAME_SRC_Y;
4285     scaling_context->scaling_4x_bti.scaling_frame_dst_y = VP9_BTI_SCALING_FRAME_DST_Y;
4286     scaling_context->scaling_4x_bti.scaling_frame_mbv_proc_stat_dst =
4287         VP9_BTI_SCALING_FRAME_MBVPROCSTATS_DST_CM;
4288
4289     memset(&scale_kernel, 0, sizeof(scale_kernel));
4290
4291     intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4292                                          sizeof(media_vp9_kernels),
4293                                          INTEL_VP9_ENC_SCALING4X,
4294                                          0,
4295                                          &scale_kernel);
4296
4297     gen8_gpe_load_kernels(ctx,
4298                           gpe_context,
4299                           &scale_kernel,
4300                           1);
4301
4302     kernel_param.curbe_size = sizeof(vp9_scaling2x_curbe_data_cm);
4303     kernel_param.inline_data_size = 0;
4304     kernel_param.sampler_size = 0;
4305
4306     gpe_context = &scaling_context->gpe_contexts[1];
4307     gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4308     gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4309
4310     memset(&scale_kernel, 0, sizeof(scale_kernel));
4311
4312     intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4313                                          sizeof(media_vp9_kernels),
4314                                          INTEL_VP9_ENC_SCALING2X,
4315                                          0,
4316                                          &scale_kernel);
4317
4318     gen8_gpe_load_kernels(ctx,
4319                           gpe_context,
4320                           &scale_kernel,
4321                           1);
4322
4323     scaling_context->scaling_2x_bti.scaling_frame_src_y = VP9_BTI_SCALING_FRAME_SRC_Y;
4324     scaling_context->scaling_2x_bti.scaling_frame_dst_y = VP9_BTI_SCALING_FRAME_DST_Y;
4325     return;
4326 }
4327
4328 static void
4329 gen9_vme_me_context_init_vp9(VADriverContextP ctx,
4330                              struct gen9_encoder_context_vp9 *vme_context,
4331                              struct vp9_me_context *me_context)
4332 {
4333     struct i965_gpe_context *gpe_context = NULL;
4334     struct vp9_encoder_kernel_parameter kernel_param;
4335     struct vp9_encoder_scoreboard_parameter scoreboard_param;
4336     struct i965_kernel scale_kernel;
4337
4338     kernel_param.curbe_size = sizeof(vp9_me_curbe_data);
4339     kernel_param.inline_data_size = 0;
4340     kernel_param.sampler_size = 0;
4341
4342     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4343     scoreboard_param.mask = 0xFF;
4344     scoreboard_param.enable = vme_context->use_hw_scoreboard;
4345     scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4346     scoreboard_param.walkpat_flag = 0;
4347
4348     gpe_context = &me_context->gpe_context;
4349     gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4350     gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4351
4352     memset(&scale_kernel, 0, sizeof(scale_kernel));
4353
4354     intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4355                                          sizeof(media_vp9_kernels),
4356                                          INTEL_VP9_ENC_ME,
4357                                          0,
4358                                          &scale_kernel);
4359
4360     gen8_gpe_load_kernels(ctx,
4361                           gpe_context,
4362                           &scale_kernel,
4363                           1);
4364
4365     return;
4366 }
4367
4368 static void
4369 gen9_vme_mbenc_context_init_vp9(VADriverContextP ctx,
4370                                 struct gen9_encoder_context_vp9 *vme_context,
4371                                 struct vp9_mbenc_context *mbenc_context)
4372 {
4373     struct i965_gpe_context *gpe_context = NULL;
4374     struct vp9_encoder_kernel_parameter kernel_param;
4375     struct vp9_encoder_scoreboard_parameter scoreboard_param;
4376     int i;
4377     struct i965_kernel scale_kernel;
4378
4379     kernel_param.curbe_size = sizeof(vp9_mbenc_curbe_data);
4380     kernel_param.inline_data_size = 0;
4381     kernel_param.sampler_size = 0;
4382
4383     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4384     scoreboard_param.mask = 0xFF;
4385     scoreboard_param.enable = vme_context->use_hw_scoreboard;
4386     scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4387
4388     for (i = 0; i < NUM_VP9_MBENC; i++) {
4389         gpe_context = &mbenc_context->gpe_contexts[i];
4390
4391         if ((i == VP9_MBENC_IDX_KEY_16x16) ||
4392             (i == VP9_MBENC_IDX_INTER)) {
4393             scoreboard_param.walkpat_flag = 1;
4394         } else
4395             scoreboard_param.walkpat_flag = 0;
4396
4397         gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4398         gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4399
4400         memset(&scale_kernel, 0, sizeof(scale_kernel));
4401
4402         intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4403                                              sizeof(media_vp9_kernels),
4404                                              INTEL_VP9_ENC_MBENC,
4405                                              i,
4406                                              &scale_kernel);
4407
4408         gen8_gpe_load_kernels(ctx,
4409                               gpe_context,
4410                               &scale_kernel,
4411                               1);
4412     }
4413 }
4414
4415 static void
4416 gen9_vme_brc_context_init_vp9(VADriverContextP ctx,
4417                               struct gen9_encoder_context_vp9 *vme_context,
4418                               struct vp9_brc_context *brc_context)
4419 {
4420     struct i965_gpe_context *gpe_context = NULL;
4421     struct vp9_encoder_kernel_parameter kernel_param;
4422     struct vp9_encoder_scoreboard_parameter scoreboard_param;
4423     int i;
4424     struct i965_kernel scale_kernel;
4425
4426     kernel_param.curbe_size = sizeof(vp9_brc_curbe_data);
4427     kernel_param.inline_data_size = 0;
4428     kernel_param.sampler_size = 0;
4429
4430     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4431     scoreboard_param.mask = 0xFF;
4432     scoreboard_param.enable = vme_context->use_hw_scoreboard;
4433     scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4434
4435     for (i = 0; i < NUM_VP9_BRC; i++) {
4436         gpe_context = &brc_context->gpe_contexts[i];
4437         gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4438         gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4439
4440         memset(&scale_kernel, 0, sizeof(scale_kernel));
4441
4442         intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4443                                              sizeof(media_vp9_kernels),
4444                                              INTEL_VP9_ENC_BRC,
4445                                              i,
4446                                              &scale_kernel);
4447
4448         gen8_gpe_load_kernels(ctx,
4449                               gpe_context,
4450                               &scale_kernel,
4451                               1);
4452     }
4453 }
4454
4455 static void
4456 gen9_vme_dys_context_init_vp9(VADriverContextP ctx,
4457                               struct gen9_encoder_context_vp9 *vme_context,
4458                               struct vp9_dys_context *dys_context)
4459 {
4460     struct i965_gpe_context *gpe_context = NULL;
4461     struct vp9_encoder_kernel_parameter kernel_param;
4462     struct vp9_encoder_scoreboard_parameter scoreboard_param;
4463     struct i965_kernel scale_kernel;
4464
4465     kernel_param.curbe_size = sizeof(vp9_dys_curbe_data);
4466     kernel_param.inline_data_size = 0;
4467     kernel_param.sampler_size = sizeof(struct gen9_sampler_8x8_avs);
4468
4469     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4470     scoreboard_param.mask = 0xFF;
4471     scoreboard_param.enable = vme_context->use_hw_scoreboard;
4472     scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4473     scoreboard_param.walkpat_flag = 0;
4474
4475     gpe_context = &dys_context->gpe_context;
4476     gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4477     gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4478
4479     memset(&scale_kernel, 0, sizeof(scale_kernel));
4480
4481     intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4482                                          sizeof(media_vp9_kernels),
4483                                          INTEL_VP9_ENC_DYS,
4484                                          0,
4485                                          &scale_kernel);
4486
4487     gen8_gpe_load_kernels(ctx,
4488                           gpe_context,
4489                           &scale_kernel,
4490                           1);
4491
4492     return;
4493 }
4494
4495 static Bool
4496 gen9_vme_kernels_context_init_vp9(VADriverContextP ctx,
4497                                   struct intel_encoder_context *encoder_context,
4498                                   struct gen9_encoder_context_vp9 *vme_context)
4499 {
4500     gen9_vme_scaling_context_init_vp9(ctx, vme_context, &vme_context->scaling_context);
4501     gen9_vme_me_context_init_vp9(ctx, vme_context, &vme_context->me_context);
4502     gen9_vme_mbenc_context_init_vp9(ctx, vme_context, &vme_context->mbenc_context);
4503     gen9_vme_dys_context_init_vp9(ctx, vme_context, &vme_context->dys_context);
4504     gen9_vme_brc_context_init_vp9(ctx, vme_context, &vme_context->brc_context);
4505
4506     vme_context->pfn_set_curbe_brc = gen9_vp9_set_curbe_brc;
4507     vme_context->pfn_set_curbe_me = gen9_vp9_set_curbe_me;
4508     vme_context->pfn_send_me_surface = gen9_vp9_send_me_surface;
4509     vme_context->pfn_send_scaling_surface = gen9_vp9_send_scaling_surface;
4510
4511     vme_context->pfn_set_curbe_scaling = gen9_vp9_set_curbe_scaling_cm;
4512
4513     vme_context->pfn_send_dys_surface = gen9_vp9_send_dys_surface;
4514     vme_context->pfn_set_curbe_dys = gen9_vp9_set_curbe_dys;
4515     vme_context->pfn_set_curbe_mbenc = gen9_vp9_set_curbe_mbenc;
4516     vme_context->pfn_send_mbenc_surface = gen9_vp9_send_mbenc_surface;
4517     return true;
4518 }
4519
4520 static
4521 void gen9_vp9_write_compressed_element(char *buffer,
4522                                        int index,
4523                                        int prob,
4524                                        bool value)
4525 {
4526     struct vp9_compressed_element *base_element, *vp9_element;
4527     base_element = (struct vp9_compressed_element *)buffer;
4528
4529     vp9_element = base_element + (index >> 1);
4530     if (index % 2) {
4531         vp9_element->b_valid = 1;
4532         vp9_element->b_probdiff_select = 1;
4533         vp9_element->b_prob_select = (prob == 252) ? 1 : 0;
4534         vp9_element->b_bin = value;
4535     } else {
4536         vp9_element->a_valid = 1;
4537         vp9_element->a_probdiff_select = 1;
4538         vp9_element->a_prob_select = (prob == 252) ? 1 : 0;
4539         vp9_element->a_bin = value;
4540     }
4541 }
4542
4543 static void
4544 intel_vp9enc_refresh_frame_internal_buffers(VADriverContextP ctx,
4545                                             struct intel_encoder_context *encoder_context)
4546 {
4547     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
4548     VAEncPictureParameterBufferVP9 *pic_param;
4549     struct gen9_vp9_state *vp9_state;
4550     char *buffer;
4551     int i;
4552
4553     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
4554
4555     if (!pak_context || !vp9_state || !vp9_state->pic_param)
4556         return;
4557
4558     pic_param = vp9_state->pic_param;
4559     if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) ||
4560         (pic_param->pic_flags.bits.intra_only) ||
4561         pic_param->pic_flags.bits.error_resilient_mode) {
4562         /* reset current frame_context */
4563         intel_init_default_vp9_probs(&vp9_state->vp9_current_fc);
4564         if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) ||
4565             pic_param->pic_flags.bits.error_resilient_mode ||
4566             (pic_param->pic_flags.bits.reset_frame_context == 3)) {
4567             for (i = 0; i < 4; i++)
4568                 memcpy(&vp9_state->vp9_frame_ctx[i],
4569                        &vp9_state->vp9_current_fc,
4570                        sizeof(FRAME_CONTEXT));
4571         } else if (pic_param->pic_flags.bits.reset_frame_context == 2) {
4572             i = pic_param->pic_flags.bits.frame_context_idx;
4573             memcpy(&vp9_state->vp9_frame_ctx[i],
4574                    &vp9_state->vp9_current_fc, sizeof(FRAME_CONTEXT));
4575         }
4576         /* reset the frame_ctx_idx = 0 */
4577         vp9_state->frame_ctx_idx = 0;
4578     } else {
4579         vp9_state->frame_ctx_idx = pic_param->pic_flags.bits.frame_context_idx;
4580     }
4581
4582     i965_zero_gpe_resource(&pak_context->res_compressed_input_buffer);
4583     buffer = i965_map_gpe_resource(&pak_context->res_compressed_input_buffer);
4584
4585     if (!buffer)
4586         return;
4587
4588     /* write tx_size */
4589     if ((pic_param->luma_ac_qindex == 0) &&
4590         (pic_param->luma_dc_qindex_delta == 0) &&
4591         (pic_param->chroma_ac_qindex_delta == 0) &&
4592         (pic_param->chroma_dc_qindex_delta == 0)) {
4593         /* lossless flag */
4594         /* nothing is needed */
4595         gen9_vp9_write_compressed_element(buffer,
4596                                           0, 128, 0);
4597         gen9_vp9_write_compressed_element(buffer,
4598                                           1, 128, 0);
4599         gen9_vp9_write_compressed_element(buffer,
4600                                           2, 128, 0);
4601     } else {
4602         if (vp9_state->tx_mode == TX_MODE_SELECT) {
4603             gen9_vp9_write_compressed_element(buffer,
4604                                               0, 128, 1);
4605             gen9_vp9_write_compressed_element(buffer,
4606                                               1, 128, 1);
4607             gen9_vp9_write_compressed_element(buffer,
4608                                               2, 128, 1);
4609         } else if (vp9_state->tx_mode == ALLOW_32X32) {
4610             gen9_vp9_write_compressed_element(buffer,
4611                                               0, 128, 1);
4612             gen9_vp9_write_compressed_element(buffer,
4613                                               1, 128, 1);
4614             gen9_vp9_write_compressed_element(buffer,
4615                                               2, 128, 0);
4616         } else {
4617             unsigned int tx_mode;
4618
4619             tx_mode = vp9_state->tx_mode;
4620             gen9_vp9_write_compressed_element(buffer,
4621                                               0, 128, ((tx_mode) & 2));
4622             gen9_vp9_write_compressed_element(buffer,
4623                                               1, 128, ((tx_mode) & 1));
4624             gen9_vp9_write_compressed_element(buffer,
4625                                               2, 128, 0);
4626         }
4627
4628         if (vp9_state->tx_mode == TX_MODE_SELECT) {
4629
4630             gen9_vp9_write_compressed_element(buffer,
4631                                               3, 128, 0);
4632
4633             gen9_vp9_write_compressed_element(buffer,
4634                                               7, 128, 0);
4635
4636             gen9_vp9_write_compressed_element(buffer,
4637                                               15, 128, 0);
4638         }
4639     }
4640     /*Setup all the input&output object*/
4641
4642     {
4643         /* update the coeff_update flag */
4644         gen9_vp9_write_compressed_element(buffer,
4645                                           27, 128, 0);
4646         gen9_vp9_write_compressed_element(buffer,
4647                                           820, 128, 0);
4648         gen9_vp9_write_compressed_element(buffer,
4649                                           1613, 128, 0);
4650         gen9_vp9_write_compressed_element(buffer,
4651                                           2406, 128, 0);
4652     }
4653
4654
4655     if (pic_param->pic_flags.bits.frame_type && !pic_param->pic_flags.bits.intra_only) {
4656         bool allow_comp = !(
4657                               (pic_param->ref_flags.bits.ref_last_sign_bias && pic_param->ref_flags.bits.ref_gf_sign_bias && pic_param->ref_flags.bits.ref_arf_sign_bias) ||
4658                               (!pic_param->ref_flags.bits.ref_last_sign_bias && !pic_param->ref_flags.bits.ref_gf_sign_bias && !pic_param->ref_flags.bits.ref_arf_sign_bias)
4659                           );
4660
4661         if (allow_comp) {
4662             if (pic_param->pic_flags.bits.comp_prediction_mode == REFERENCE_MODE_SELECT) {
4663                 gen9_vp9_write_compressed_element(buffer,
4664                                                   3271, 128, 1);
4665                 gen9_vp9_write_compressed_element(buffer,
4666                                                   3272, 128, 1);
4667             } else if (pic_param->pic_flags.bits.comp_prediction_mode == COMPOUND_REFERENCE) {
4668                 gen9_vp9_write_compressed_element(buffer,
4669                                                   3271, 128, 1);
4670                 gen9_vp9_write_compressed_element(buffer,
4671                                                   3272, 128, 0);
4672             } else {
4673
4674                 gen9_vp9_write_compressed_element(buffer,
4675                                                   3271, 128, 0);
4676                 gen9_vp9_write_compressed_element(buffer,
4677                                                   3272, 128, 0);
4678             }
4679         }
4680     }
4681
4682     i965_unmap_gpe_resource(&pak_context->res_compressed_input_buffer);
4683 }
4684
4685
4686 static void
4687 gen9_pak_vp9_pipe_mode_select(VADriverContextP ctx,
4688                               struct encode_state *encode_state,
4689                               struct intel_encoder_context *encoder_context,
4690                               struct gen9_hcpe_pipe_mode_select_param *pipe_mode_param)
4691 {
4692     struct intel_batchbuffer *batch = encoder_context->base.batch;
4693
4694     BEGIN_BCS_BATCH(batch, 6);
4695
4696     OUT_BCS_BATCH(batch, HCP_PIPE_MODE_SELECT | (6 - 2));
4697     OUT_BCS_BATCH(batch,
4698                   (pipe_mode_param->stream_out << 12) |
4699                   (pipe_mode_param->codec_mode << 5) |
4700                   (0 << 3) | /* disable Pic Status / Error Report */
4701                   (pipe_mode_param->stream_out << 2) |
4702                   HCP_CODEC_SELECT_ENCODE);
4703     OUT_BCS_BATCH(batch, 0);
4704     OUT_BCS_BATCH(batch, 0);
4705     OUT_BCS_BATCH(batch, (1 << 6));
4706     OUT_BCS_BATCH(batch, 0);
4707
4708     ADVANCE_BCS_BATCH(batch);
4709 }
4710
4711 static void
4712 gen9_vp9_add_surface_state(VADriverContextP ctx,
4713                            struct encode_state *encode_state,
4714                            struct intel_encoder_context *encoder_context,
4715                            hcp_surface_state *hcp_state)
4716 {
4717     struct intel_batchbuffer *batch = encoder_context->base.batch;
4718     if (!hcp_state)
4719         return;
4720
4721     BEGIN_BCS_BATCH(batch, 3);
4722     OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
4723     OUT_BCS_BATCH(batch,
4724                   (hcp_state->dw1.surface_id << 28) |
4725                   (hcp_state->dw1.surface_pitch - 1)
4726                  );
4727     OUT_BCS_BATCH(batch,
4728                   (hcp_state->dw2.surface_format << 28) |
4729                   (hcp_state->dw2.y_cb_offset)
4730                  );
4731     ADVANCE_BCS_BATCH(batch);
4732 }
4733
4734 static void
4735 gen9_pak_vp9_pipe_buf_addr_state(VADriverContextP ctx,
4736                                  struct encode_state *encode_state,
4737                                  struct intel_encoder_context *encoder_context)
4738 {
4739     struct i965_driver_data *i965 = i965_driver_data(ctx);
4740     struct intel_batchbuffer *batch = encoder_context->base.batch;
4741     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
4742     struct gen9_vp9_state *vp9_state;
4743     unsigned int i;
4744     struct object_surface *obj_surface;
4745
4746     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
4747
4748     if (!vp9_state || !vp9_state->pic_param)
4749         return;
4750
4751
4752     BEGIN_BCS_BATCH(batch, 104);
4753
4754     OUT_BCS_BATCH(batch, HCP_PIPE_BUF_ADDR_STATE | (104 - 2));
4755
4756     obj_surface = encode_state->reconstructed_object;
4757
4758     /* reconstructed obj_surface is already checked. So this is skipped */
4759     /* DW 1..3 decoded surface */
4760     OUT_RELOC64(batch,
4761                 obj_surface->bo,
4762                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4763                 0);
4764     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4765
4766     /* DW 4..6 deblocking line */
4767     OUT_RELOC64(batch,
4768                 pak_context->res_deblocking_filter_line_buffer.bo,
4769                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4770                 0);
4771     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4772
4773     /* DW 7..9 deblocking tile line */
4774     OUT_RELOC64(batch,
4775                 pak_context->res_deblocking_filter_tile_line_buffer.bo,
4776                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4777                 0);
4778     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4779
4780     /* DW 10..12 deblocking tile col */
4781     OUT_RELOC64(batch,
4782                 pak_context->res_deblocking_filter_tile_col_buffer.bo,
4783                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4784                 0);
4785     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4786
4787     /* DW 13..15 metadata line */
4788     OUT_RELOC64(batch,
4789                 pak_context->res_metadata_line_buffer.bo,
4790                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4791                 0);
4792     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4793
4794     /* DW 16..18 metadata tile line */
4795     OUT_RELOC64(batch,
4796                 pak_context->res_metadata_tile_line_buffer.bo,
4797                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4798                 0);
4799     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4800
4801     /* DW 19..21 metadata tile col */
4802     OUT_RELOC64(batch,
4803                 pak_context->res_metadata_tile_col_buffer.bo,
4804                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4805                 0);
4806     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4807
4808     /* DW 22..30 SAO is not used for VP9 */
4809     OUT_BCS_BATCH(batch, 0);
4810     OUT_BCS_BATCH(batch, 0);
4811     OUT_BCS_BATCH(batch, 0);
4812     OUT_BCS_BATCH(batch, 0);
4813     OUT_BCS_BATCH(batch, 0);
4814     OUT_BCS_BATCH(batch, 0);
4815     OUT_BCS_BATCH(batch, 0);
4816     OUT_BCS_BATCH(batch, 0);
4817     OUT_BCS_BATCH(batch, 0);
4818
4819     /* DW 31..33 Current Motion vector temporal buffer */
4820     OUT_RELOC64(batch,
4821                 pak_context->res_mv_temporal_buffer[vp9_state->curr_mv_temporal_index].bo,
4822                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4823                 0);
4824     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4825
4826     /* DW 34..36 Not used */
4827     OUT_BCS_BATCH(batch, 0);
4828     OUT_BCS_BATCH(batch, 0);
4829     OUT_BCS_BATCH(batch, 0);
4830
4831     /* Only the first three reference_frame is used for VP9 */
4832     /* DW 37..52 for reference_frame */
4833     i = 0;
4834     if (vp9_state->picture_coding_type) {
4835         for (i = 0; i < 3; i++) {
4836
4837             if (pak_context->reference_surfaces[i].bo) {
4838                 OUT_RELOC64(batch,
4839                             pak_context->reference_surfaces[i].bo,
4840                             I915_GEM_DOMAIN_INSTRUCTION, 0,
4841                             0);
4842             } else {
4843                 OUT_BCS_BATCH(batch, 0);
4844                 OUT_BCS_BATCH(batch, 0);
4845             }
4846         }
4847     }
4848
4849     for (; i < 8; i++) {
4850         OUT_BCS_BATCH(batch, 0);
4851         OUT_BCS_BATCH(batch, 0);
4852     }
4853
4854     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4855
4856     /* DW 54..56 for source input */
4857     OUT_RELOC64(batch,
4858                 pak_context->uncompressed_picture_source.bo,
4859                 I915_GEM_DOMAIN_INSTRUCTION, 0,
4860                 0);
4861     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4862
4863     /* DW 57..59 StreamOut is not used */
4864     OUT_BCS_BATCH(batch, 0);
4865     OUT_BCS_BATCH(batch, 0);
4866     OUT_BCS_BATCH(batch, 0);
4867
4868     /* DW 60..62. Not used for encoder */
4869     OUT_BCS_BATCH(batch, 0);
4870     OUT_BCS_BATCH(batch, 0);
4871     OUT_BCS_BATCH(batch, 0);
4872
4873     /* DW 63..65. ILDB Not used for encoder */
4874     OUT_BCS_BATCH(batch, 0);
4875     OUT_BCS_BATCH(batch, 0);
4876     OUT_BCS_BATCH(batch, 0);
4877
4878     /* DW 66..81 For the collocated motion vector temporal buffer */
4879     if (vp9_state->picture_coding_type) {
4880         int prev_index = vp9_state->curr_mv_temporal_index ^ 0x01;
4881         OUT_RELOC64(batch,
4882                     pak_context->res_mv_temporal_buffer[prev_index].bo,
4883                     I915_GEM_DOMAIN_INSTRUCTION, 0,
4884                     0);
4885     } else {
4886         OUT_BCS_BATCH(batch, 0);
4887         OUT_BCS_BATCH(batch, 0);
4888     }
4889
4890     for (i = 1; i < 8; i++) {
4891         OUT_BCS_BATCH(batch, 0);
4892         OUT_BCS_BATCH(batch, 0);
4893     }
4894     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4895
4896     /* DW 83..85 VP9 prob buffer */
4897     OUT_RELOC64(batch,
4898                 pak_context->res_prob_buffer.bo,
4899                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4900                 0);
4901
4902     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4903
4904     /* DW 86..88 Segment id buffer */
4905     if (pak_context->res_segmentid_buffer.bo) {
4906         OUT_RELOC64(batch,
4907                     pak_context->res_segmentid_buffer.bo,
4908                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4909                     0);
4910     } else {
4911         OUT_BCS_BATCH(batch, 0);
4912         OUT_BCS_BATCH(batch, 0);
4913     }
4914     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4915
4916     /* DW 89..91 HVD line rowstore buffer */
4917     OUT_RELOC64(batch,
4918                 pak_context->res_hvd_line_buffer.bo,
4919                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4920                 0);
4921     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4922
4923     /* DW 92..94 HVD tile line rowstore buffer */
4924     OUT_RELOC64(batch,
4925                 pak_context->res_hvd_tile_line_buffer.bo,
4926                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4927                 0);
4928     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4929
4930     /* DW 95..97 SAO streamout. Not used for VP9 */
4931     OUT_BCS_BATCH(batch, 0);
4932     OUT_BCS_BATCH(batch, 0);
4933     OUT_BCS_BATCH(batch, 0);
4934
4935     /* reserved for KBL. 98..100 */
4936     OUT_BCS_BATCH(batch, 0);
4937     OUT_BCS_BATCH(batch, 0);
4938     OUT_BCS_BATCH(batch, 0);
4939
4940     /* 101..103 */
4941     OUT_BCS_BATCH(batch, 0);
4942     OUT_BCS_BATCH(batch, 0);
4943     OUT_BCS_BATCH(batch, 0);
4944
4945     ADVANCE_BCS_BATCH(batch);
4946 }
4947
4948 static void
4949 gen9_pak_vp9_ind_obj_base_addr_state(VADriverContextP ctx,
4950                                      struct encode_state *encode_state,
4951                                      struct intel_encoder_context *encoder_context)
4952 {
4953     struct i965_driver_data *i965 = i965_driver_data(ctx);
4954     struct intel_batchbuffer *batch = encoder_context->base.batch;
4955     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
4956     struct gen9_vp9_state *vp9_state;
4957
4958     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
4959
4960     /* to do */
4961     BEGIN_BCS_BATCH(batch, 29);
4962
4963     OUT_BCS_BATCH(batch, HCP_IND_OBJ_BASE_ADDR_STATE | (29 - 2));
4964
4965     /* indirect bitstream object base */
4966     OUT_BCS_BATCH(batch, 0);
4967     OUT_BCS_BATCH(batch, 0);
4968     OUT_BCS_BATCH(batch, 0);
4969     /* the upper bound of indirect bitstream object */
4970     OUT_BCS_BATCH(batch, 0);
4971     OUT_BCS_BATCH(batch, 0);
4972
4973     /* DW 6: Indirect CU object base address */
4974     OUT_RELOC64(batch,
4975                 pak_context->res_mb_code_surface.bo,
4976                 I915_GEM_DOMAIN_INSTRUCTION, 0,   /* No write domain */
4977                 vp9_state->mb_data_offset);
4978     /* default attribute */
4979     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4980
4981     /* DW 9..11, PAK-BSE */
4982     OUT_RELOC64(batch,
4983                 pak_context->indirect_pak_bse_object.bo,
4984                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4985                 pak_context->indirect_pak_bse_object.offset);
4986     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4987
4988     /* DW 12..13 upper bound */
4989     OUT_RELOC64(batch,
4990                 pak_context->indirect_pak_bse_object.bo,
4991                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4992                 pak_context->indirect_pak_bse_object.end_offset);
4993
4994     /* DW 14..16 compressed header buffer */
4995     OUT_RELOC64(batch,
4996                 pak_context->res_compressed_input_buffer.bo,
4997                 I915_GEM_DOMAIN_INSTRUCTION, 0,
4998                 0);
4999     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5000
5001     /* DW 17..19 prob counter streamout */
5002     OUT_RELOC64(batch,
5003                 pak_context->res_prob_counter_buffer.bo,
5004                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5005                 0);
5006     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5007
5008     /* DW 20..22 prob delta streamin */
5009     OUT_RELOC64(batch,
5010                 pak_context->res_prob_delta_buffer.bo,
5011                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5012                 0);
5013     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5014
5015     /* DW 23..25 Tile record streamout */
5016     OUT_RELOC64(batch,
5017                 pak_context->res_tile_record_streamout_buffer.bo,
5018                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5019                 0);
5020     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5021
5022     /* DW 26..28 CU record streamout */
5023     OUT_RELOC64(batch,
5024                 pak_context->res_cu_stat_streamout_buffer.bo,
5025                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5026                 0);
5027     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5028
5029     ADVANCE_BCS_BATCH(batch);
5030 }
5031
5032 static void
5033 gen9_pak_vp9_segment_state(VADriverContextP ctx,
5034                            struct encode_state *encode_state,
5035                            struct intel_encoder_context *encoder_context,
5036                            VAEncSegParamVP9 *seg_param, uint8_t seg_id)
5037 {
5038     struct intel_batchbuffer *batch = encoder_context->base.batch;
5039     uint32_t batch_value, tmp;
5040     VAEncPictureParameterBufferVP9 *pic_param;
5041
5042     if (!encode_state->pic_param_ext ||
5043         !encode_state->pic_param_ext->buffer) {
5044         return;
5045     }
5046
5047     pic_param = (VAEncPictureParameterBufferVP9 *)encode_state->pic_param_ext->buffer;
5048
5049     batch_value = seg_param->seg_flags.bits.segment_reference;
5050     if (pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME ||
5051         pic_param->pic_flags.bits.intra_only)
5052         batch_value = 0;
5053
5054     BEGIN_BCS_BATCH(batch, 8);
5055
5056     OUT_BCS_BATCH(batch, HCP_VP9_SEGMENT_STATE | (8 - 2));
5057     OUT_BCS_BATCH(batch, seg_id << 0); /* DW 1 - SegmentID */
5058     OUT_BCS_BATCH(batch,
5059                   (seg_param->seg_flags.bits.segment_reference_enabled << 3) |
5060                   (batch_value << 1) |
5061                   (seg_param->seg_flags.bits.segment_reference_skipped << 0)
5062                  );
5063
5064     /* DW 3..6 is not used for encoder */
5065     OUT_BCS_BATCH(batch, 0);
5066     OUT_BCS_BATCH(batch, 0);
5067     OUT_BCS_BATCH(batch, 0);
5068     OUT_BCS_BATCH(batch, 0);
5069
5070     /* DW 7 Mode */
5071     tmp = intel_convert_sign_mag(seg_param->segment_qindex_delta, 9);
5072     batch_value = tmp;
5073     tmp = intel_convert_sign_mag(seg_param->segment_lf_level_delta, 7);
5074     batch_value |= (tmp << 16);
5075     OUT_BCS_BATCH(batch, batch_value);
5076
5077     ADVANCE_BCS_BATCH(batch);
5078
5079 }
5080
5081 static void
5082 intel_vp9enc_construct_pak_insertobj_batchbuffer(VADriverContextP ctx,
5083                                                  struct intel_encoder_context *encoder_context,
5084                                                  struct i965_gpe_resource *obj_batch_buffer)
5085 {
5086     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5087     struct gen9_vp9_state *vp9_state;
5088     int uncompressed_header_length;
5089     unsigned int *cmd_ptr;
5090     unsigned int dw_length, bits_in_last_dw;
5091
5092     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5093
5094     if (!pak_context || !vp9_state || !vp9_state->pic_param)
5095         return;
5096
5097     uncompressed_header_length = vp9_state->header_length;
5098     cmd_ptr = i965_map_gpe_resource(obj_batch_buffer);
5099
5100     if (!cmd_ptr)
5101         return;
5102
5103     bits_in_last_dw = uncompressed_header_length % 4;
5104     bits_in_last_dw *= 8;
5105
5106     if (bits_in_last_dw == 0)
5107         bits_in_last_dw = 32;
5108
5109     /* get the DWORD length of the inserted_data */
5110     dw_length = ALIGN(uncompressed_header_length, 4) / 4;
5111     *cmd_ptr++ = HCP_INSERT_PAK_OBJECT | dw_length;
5112
5113     *cmd_ptr++ = ((0 << 31) | /* indirect payload */
5114                   (0 << 16) | /* the start offset in first DW */
5115                   (0 << 15) |
5116                   (bits_in_last_dw << 8) | /* bits_in_last_dw */
5117                   (0 << 4) |  /* skip emulation byte count. 0 for VP9 */
5118                   (0 << 3) |  /* emulation flag. 0 for VP9 */
5119                   (1 << 2) |  /* last header flag. */
5120                   (0 << 1));
5121     memcpy(cmd_ptr, vp9_state->alias_insert_data, dw_length * sizeof(unsigned int));
5122
5123     cmd_ptr += dw_length;
5124
5125     *cmd_ptr++ = MI_NOOP;
5126     *cmd_ptr++ = MI_BATCH_BUFFER_END;
5127     i965_unmap_gpe_resource(obj_batch_buffer);
5128 }
5129
5130 static void
5131 gen9_vp9_pak_picture_level(VADriverContextP ctx,
5132                            struct encode_state *encode_state,
5133                            struct intel_encoder_context *encoder_context)
5134 {
5135     struct intel_batchbuffer *batch = encoder_context->base.batch;
5136     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5137     struct object_surface *obj_surface;
5138     VAEncPictureParameterBufferVP9 *pic_param;
5139     VAEncMiscParameterTypeVP9PerSegmantParam *seg_param, tmp_seg_param;
5140     struct gen9_vp9_state *vp9_state;
5141     struct gen9_surface_vp9 *vp9_priv_surface;
5142     int i;
5143     struct gen9_hcpe_pipe_mode_select_param mode_param;
5144     hcp_surface_state hcp_surface;
5145     struct gpe_mi_batch_buffer_start_parameter second_level_batch;
5146     int segment_count;
5147
5148     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5149
5150     if (!pak_context || !vp9_state || !vp9_state->pic_param)
5151         return;
5152
5153     pic_param = vp9_state->pic_param;
5154     seg_param = vp9_state->segment_param;
5155
5156     if (vp9_state->curr_pak_pass == 0) {
5157         intel_vp9enc_construct_pak_insertobj_batchbuffer(ctx, encoder_context,
5158                                                          &pak_context->res_pak_uncompressed_input_buffer);
5159
5160         // Check if driver already programmed pic state as part of BRC update kernel programming.
5161         if (!vp9_state->brc_enabled) {
5162             intel_vp9enc_construct_picstate_batchbuf(ctx, encode_state,
5163                                                      encoder_context, &pak_context->res_pic_state_brc_write_hfw_read_buffer);
5164         }
5165     }
5166
5167     if (vp9_state->curr_pak_pass == 0) {
5168         intel_vp9enc_refresh_frame_internal_buffers(ctx, encoder_context);
5169     }
5170
5171     {
5172         /* copy the frame_context[frame_idx] into curr_frame_context */
5173         memcpy(&vp9_state->vp9_current_fc,
5174                &(vp9_state->vp9_frame_ctx[vp9_state->frame_ctx_idx]),
5175                sizeof(FRAME_CONTEXT));
5176         {
5177             uint8_t *prob_ptr;
5178
5179             prob_ptr = i965_map_gpe_resource(&pak_context->res_prob_buffer);
5180
5181             if (!prob_ptr)
5182                 return;
5183
5184             /* copy the current fc to vp9_prob buffer */
5185             memcpy(prob_ptr, &vp9_state->vp9_current_fc, sizeof(FRAME_CONTEXT));
5186             if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) ||
5187                 pic_param->pic_flags.bits.intra_only) {
5188                 FRAME_CONTEXT *frame_ptr = (FRAME_CONTEXT *)prob_ptr;
5189
5190                 memcpy(frame_ptr->partition_prob, vp9_kf_partition_probs,
5191                        sizeof(vp9_kf_partition_probs));
5192                 memcpy(frame_ptr->uv_mode_prob, vp9_kf_uv_mode_prob,
5193                        sizeof(vp9_kf_uv_mode_prob));
5194             }
5195             i965_unmap_gpe_resource(&pak_context->res_prob_buffer);
5196         }
5197     }
5198
5199     if (vp9_state->brc_enabled && vp9_state->curr_pak_pass) {
5200         /* read image status and insert the conditional end cmd */
5201         /* image ctrl/status is already accessed */
5202         struct gpe_mi_conditional_batch_buffer_end_parameter mi_cond_end;
5203         struct vp9_encode_status_buffer_internal *status_buffer;
5204
5205         status_buffer = &vp9_state->status_buffer;
5206         memset(&mi_cond_end, 0, sizeof(mi_cond_end));
5207         mi_cond_end.offset = status_buffer->image_status_mask_offset;
5208         mi_cond_end.bo = status_buffer->bo;
5209         mi_cond_end.compare_data = 0;
5210         mi_cond_end.compare_mask_mode_disabled = 1;
5211         gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch,
5212                                                  &mi_cond_end);
5213     }
5214
5215     mode_param.codec_mode = 1;
5216     mode_param.stream_out = 0;
5217     gen9_pak_vp9_pipe_mode_select(ctx, encode_state, encoder_context, &mode_param);
5218
5219     /* reconstructed surface */
5220     memset(&hcp_surface, 0, sizeof(hcp_surface));
5221     obj_surface = encode_state->reconstructed_object;
5222     hcp_surface.dw1.surface_id = 0;
5223     hcp_surface.dw1.surface_pitch = obj_surface->width;
5224     hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5225     hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5226     gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5227                                &hcp_surface);
5228
5229     /* Input surface */
5230     if (vp9_state->dys_in_use &&
5231         ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
5232          (pic_param->frame_height_src != pic_param->frame_height_dst))) {
5233         vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
5234         obj_surface = vp9_priv_surface->dys_surface_obj;
5235     } else {
5236         obj_surface = encode_state->input_yuv_object;
5237     }
5238
5239     hcp_surface.dw1.surface_id = 1;
5240     hcp_surface.dw1.surface_pitch = obj_surface->width;
5241     hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5242     hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5243     gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5244                                &hcp_surface);
5245
5246     if (vp9_state->picture_coding_type) {
5247         /* Add surface for last */
5248         if (vp9_state->last_ref_obj) {
5249             obj_surface = vp9_state->last_ref_obj;
5250             hcp_surface.dw1.surface_id = 2;
5251             hcp_surface.dw1.surface_pitch = obj_surface->width;
5252             hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5253             hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5254             gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5255                                        &hcp_surface);
5256         }
5257         if (vp9_state->golden_ref_obj) {
5258             obj_surface = vp9_state->golden_ref_obj;
5259             hcp_surface.dw1.surface_id = 3;
5260             hcp_surface.dw1.surface_pitch = obj_surface->width;
5261             hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5262             hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5263             gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5264                                        &hcp_surface);
5265         }
5266         if (vp9_state->alt_ref_obj) {
5267             obj_surface = vp9_state->alt_ref_obj;
5268             hcp_surface.dw1.surface_id = 4;
5269             hcp_surface.dw1.surface_pitch = obj_surface->width;
5270             hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5271             hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5272             gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5273                                        &hcp_surface);
5274         }
5275     }
5276
5277     gen9_pak_vp9_pipe_buf_addr_state(ctx, encode_state, encoder_context);
5278
5279     gen9_pak_vp9_ind_obj_base_addr_state(ctx, encode_state, encoder_context);
5280
5281     // Using picstate zero with updated QP and LF deltas by HuC for repak, irrespective of how many Pak passes were run in multi-pass mode.
5282     memset(&second_level_batch, 0, sizeof(second_level_batch));
5283
5284     if (vp9_state->curr_pak_pass == 0) {
5285         second_level_batch.offset = 0;
5286     } else
5287         second_level_batch.offset = vp9_state->curr_pak_pass * VP9_PIC_STATE_BUFFER_SIZE;
5288
5289     second_level_batch.is_second_level = 1;
5290     second_level_batch.bo = pak_context->res_pic_state_brc_write_hfw_read_buffer.bo;
5291
5292     gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5293
5294     if (pic_param->pic_flags.bits.segmentation_enabled &&
5295         seg_param)
5296         segment_count = 8;
5297     else {
5298         segment_count = 1;
5299         memset(&tmp_seg_param, 0, sizeof(tmp_seg_param));
5300         seg_param = &tmp_seg_param;
5301     }
5302     for (i = 0; i < segment_count; i++) {
5303         gen9_pak_vp9_segment_state(ctx, encode_state,
5304                                    encoder_context,
5305                                    &seg_param->seg_data[i], i);
5306     }
5307
5308     /* Insert the uncompressed header buffer */
5309     second_level_batch.is_second_level = 1;
5310     second_level_batch.offset = 0;
5311     second_level_batch.bo = pak_context->res_pak_uncompressed_input_buffer.bo;
5312
5313     gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5314
5315     /* PAK_OBJECT */
5316     second_level_batch.is_second_level = 1;
5317     second_level_batch.offset = 0;
5318     second_level_batch.bo = pak_context->res_mb_code_surface.bo;
5319     gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5320
5321     return;
5322 }
5323
5324 static void
5325 gen9_vp9_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5326 {
5327     struct intel_batchbuffer *batch = encoder_context->base.batch;
5328     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5329     struct gpe_mi_store_register_mem_parameter mi_store_reg_mem_param;
5330     struct gpe_mi_flush_dw_parameter mi_flush_dw_param;
5331     //struct gpe_mi_copy_mem_parameter mi_copy_mem_param;
5332     struct vp9_encode_status_buffer_internal *status_buffer;
5333     struct gen9_vp9_state *vp9_state;
5334
5335     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5336     if (!vp9_state || !pak_context || !batch)
5337         return;
5338
5339     status_buffer = &(vp9_state->status_buffer);
5340
5341     memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
5342     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);
5343
5344     memset(&mi_store_reg_mem_param, 0, sizeof(mi_store_reg_mem_param));
5345     mi_store_reg_mem_param.bo = status_buffer->bo;
5346     mi_store_reg_mem_param.offset = status_buffer->bs_byte_count_offset;
5347     mi_store_reg_mem_param.mmio_offset = status_buffer->vp9_bs_frame_reg_offset;
5348     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5349
5350     mi_store_reg_mem_param.bo = pak_context->res_brc_bitstream_size_buffer.bo;
5351     mi_store_reg_mem_param.offset = 0;
5352     mi_store_reg_mem_param.mmio_offset = status_buffer->vp9_bs_frame_reg_offset;
5353     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5354
5355     /* Read HCP Image status */
5356     mi_store_reg_mem_param.bo = status_buffer->bo;
5357     mi_store_reg_mem_param.offset = status_buffer->image_status_mask_offset;
5358     mi_store_reg_mem_param.mmio_offset =
5359         status_buffer->vp9_image_mask_reg_offset;
5360     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5361
5362     mi_store_reg_mem_param.bo = status_buffer->bo;
5363     mi_store_reg_mem_param.offset = status_buffer->image_status_ctrl_offset;
5364     mi_store_reg_mem_param.mmio_offset =
5365         status_buffer->vp9_image_ctrl_reg_offset;
5366     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5367
5368     mi_store_reg_mem_param.bo = pak_context->res_brc_bitstream_size_buffer.bo;
5369     mi_store_reg_mem_param.offset = 4;
5370     mi_store_reg_mem_param.mmio_offset =
5371         status_buffer->vp9_image_ctrl_reg_offset;
5372     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5373
5374     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);
5375
5376     return;
5377 }
5378
5379 static VAStatus
5380 gen9_vp9_pak_pipeline_prepare(VADriverContextP ctx,
5381                               struct encode_state *encode_state,
5382                               struct intel_encoder_context *encoder_context)
5383 {
5384     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5385     struct object_surface *obj_surface;
5386     struct object_buffer *obj_buffer;
5387     struct i965_coded_buffer_segment *coded_buffer_segment;
5388     VAEncPictureParameterBufferVP9 *pic_param;
5389     struct gen9_vp9_state *vp9_state;
5390     dri_bo *bo;
5391     int i;
5392
5393     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5394     if (!vp9_state ||
5395         !vp9_state->pic_param)
5396         return VA_STATUS_ERROR_INVALID_PARAMETER;
5397
5398     pic_param = vp9_state->pic_param;
5399
5400     /* reconstructed surface */
5401     obj_surface = encode_state->reconstructed_object;
5402     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5403
5404     dri_bo_unreference(pak_context->reconstructed_object.bo);
5405
5406     pak_context->reconstructed_object.bo = obj_surface->bo;
5407     dri_bo_reference(pak_context->reconstructed_object.bo);
5408
5409     /* set vp9 reference frames */
5410     for (i = 0; i < ARRAY_ELEMS(pak_context->reference_surfaces); i++) {
5411         if (pak_context->reference_surfaces[i].bo)
5412             dri_bo_unreference(pak_context->reference_surfaces[i].bo);
5413         pak_context->reference_surfaces[i].bo = NULL;
5414     }
5415
5416     /* Three reference frames are enough for VP9 */
5417     if (pic_param->pic_flags.bits.frame_type &&
5418         !pic_param->pic_flags.bits.intra_only) {
5419         for (i = 0; i < 3; i++) {
5420             obj_surface = encode_state->reference_objects[i];
5421             if (obj_surface && obj_surface->bo) {
5422                 pak_context->reference_surfaces[i].bo = obj_surface->bo;
5423                 dri_bo_reference(obj_surface->bo);
5424             }
5425         }
5426     }
5427
5428     /* input YUV surface */
5429     dri_bo_unreference(pak_context->uncompressed_picture_source.bo);
5430     pak_context->uncompressed_picture_source.bo = NULL;
5431     obj_surface = encode_state->reconstructed_object;
5432     if (vp9_state->dys_in_use &&
5433         ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
5434          (pic_param->frame_height_src != pic_param->frame_height_dst))) {
5435         struct gen9_surface_vp9 *vp9_priv_surface =
5436             (struct gen9_surface_vp9 *)(obj_surface->private_data);
5437         obj_surface = vp9_priv_surface->dys_surface_obj;
5438     } else
5439         obj_surface = encode_state->input_yuv_object;
5440
5441     pak_context->uncompressed_picture_source.bo = obj_surface->bo;
5442     dri_bo_reference(pak_context->uncompressed_picture_source.bo);
5443
5444     /* coded buffer */
5445     dri_bo_unreference(pak_context->indirect_pak_bse_object.bo);
5446     pak_context->indirect_pak_bse_object.bo = NULL;
5447     obj_buffer = encode_state->coded_buf_object;
5448     bo = obj_buffer->buffer_store->bo;
5449     pak_context->indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
5450     pak_context->indirect_pak_bse_object.end_offset = ALIGN((obj_buffer->size_element - 0x1000), 0x1000);
5451     pak_context->indirect_pak_bse_object.bo = bo;
5452     dri_bo_reference(pak_context->indirect_pak_bse_object.bo);
5453
5454     /* set the internal flag to 0 to indicate the coded size is unknown */
5455     dri_bo_map(bo, 1);
5456     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
5457     coded_buffer_segment->mapped = 0;
5458     coded_buffer_segment->codec = encoder_context->codec;
5459     coded_buffer_segment->status_support = 1;
5460     dri_bo_unmap(bo);
5461
5462     return VA_STATUS_SUCCESS;
5463 }
5464
5465 static void
5466 gen9_vp9_pak_brc_prepare(struct encode_state *encode_state,
5467                          struct intel_encoder_context *encoder_context)
5468 {
5469 }
5470
5471 static void
5472 gen9_vp9_pak_context_destroy(void *context)
5473 {
5474     struct gen9_encoder_context_vp9 *pak_context = context;
5475     int i;
5476
5477     dri_bo_unreference(pak_context->reconstructed_object.bo);
5478     pak_context->reconstructed_object.bo = NULL;
5479
5480     dri_bo_unreference(pak_context->uncompressed_picture_source.bo);
5481     pak_context->uncompressed_picture_source.bo = NULL;
5482
5483     dri_bo_unreference(pak_context->indirect_pak_bse_object.bo);
5484     pak_context->indirect_pak_bse_object.bo = NULL;
5485
5486     for (i = 0; i < 8; i++) {
5487         dri_bo_unreference(pak_context->reference_surfaces[i].bo);
5488         pak_context->reference_surfaces[i].bo = NULL;
5489     }
5490
5491     /* vme & pak same the same structure, so don't free the context here */
5492 }
5493
5494 static VAStatus
5495 gen9_vp9_pak_pipeline(VADriverContextP ctx,
5496                       VAProfile profile,
5497                       struct encode_state *encode_state,
5498                       struct intel_encoder_context *encoder_context)
5499 {
5500     struct i965_driver_data *i965 = i965_driver_data(ctx);
5501     struct intel_batchbuffer *batch = encoder_context->base.batch;
5502     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5503     VAStatus va_status;
5504     struct gen9_vp9_state *vp9_state;
5505     VAEncPictureParameterBufferVP9 *pic_param;
5506     int i;
5507
5508     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5509
5510     if (!vp9_state || !vp9_state->pic_param || !pak_context)
5511         return VA_STATUS_ERROR_INVALID_PARAMETER;
5512
5513     va_status = gen9_vp9_pak_pipeline_prepare(ctx, encode_state, encoder_context);
5514
5515     if (va_status != VA_STATUS_SUCCESS)
5516         return va_status;
5517
5518     if (i965->intel.has_bsd2)
5519         intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
5520     else
5521         intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
5522
5523     intel_batchbuffer_emit_mi_flush(batch);
5524
5525     BEGIN_BCS_BATCH(batch, 64);
5526     for (i = 0; i < 64; i++)
5527         OUT_BCS_BATCH(batch, MI_NOOP);
5528
5529     ADVANCE_BCS_BATCH(batch);
5530
5531     for (vp9_state->curr_pak_pass = 0;
5532          vp9_state->curr_pak_pass < vp9_state->num_pak_passes;
5533          vp9_state->curr_pak_pass++) {
5534
5535         if (vp9_state->curr_pak_pass == 0) {
5536             /* Initialize the VP9 Image Ctrl reg for the first pass */
5537             struct gpe_mi_load_register_imm_parameter mi_load_reg_imm;
5538             struct vp9_encode_status_buffer_internal *status_buffer;
5539
5540             status_buffer = &(vp9_state->status_buffer);
5541             memset(&mi_load_reg_imm, 0, sizeof(mi_load_reg_imm));
5542             mi_load_reg_imm.mmio_offset = status_buffer->vp9_image_ctrl_reg_offset;
5543             mi_load_reg_imm.data = 0;
5544             gen8_gpe_mi_load_register_imm(ctx, batch, &mi_load_reg_imm);
5545         }
5546         gen9_vp9_pak_picture_level(ctx, encode_state, encoder_context);
5547         gen9_vp9_read_mfc_status(ctx, encoder_context);
5548     }
5549
5550     intel_batchbuffer_end_atomic(batch);
5551     intel_batchbuffer_flush(batch);
5552
5553     pic_param = vp9_state->pic_param;
5554     vp9_state->vp9_last_frame.frame_width = pic_param->frame_width_dst;
5555     vp9_state->vp9_last_frame.frame_height = pic_param->frame_height_dst;
5556     vp9_state->vp9_last_frame.frame_type = pic_param->pic_flags.bits.frame_type;
5557     vp9_state->vp9_last_frame.show_frame = pic_param->pic_flags.bits.show_frame;
5558     vp9_state->vp9_last_frame.refresh_frame_context = pic_param->pic_flags.bits.refresh_frame_context;
5559     vp9_state->vp9_last_frame.frame_context_idx = pic_param->pic_flags.bits.frame_context_idx;
5560     vp9_state->vp9_last_frame.intra_only = pic_param->pic_flags.bits.intra_only;
5561     vp9_state->frame_number++;
5562     vp9_state->curr_mv_temporal_index ^= 1;
5563     vp9_state->first_frame = 0;
5564
5565     return VA_STATUS_SUCCESS;
5566 }
5567
5568 Bool
5569 gen9_vp9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5570 {
5571     struct gen9_encoder_context_vp9 *vme_context = NULL;
5572     struct gen9_vp9_state *vp9_state = NULL;
5573
5574     vme_context = calloc(1, sizeof(struct gen9_encoder_context_vp9));
5575     vp9_state = calloc(1, sizeof(struct gen9_vp9_state));
5576
5577     if (!vme_context || !vp9_state) {
5578         if (vme_context)
5579             free(vme_context);
5580         if (vp9_state)
5581             free(vp9_state);
5582         return false;
5583     }
5584
5585     encoder_context->enc_priv_state = vp9_state;
5586     vme_context->enc_priv_state = vp9_state;
5587
5588     /* Initialize the features that are supported by VP9 */
5589     vme_context->hme_supported = 1;
5590     vme_context->use_hw_scoreboard = 1;
5591     vme_context->use_hw_non_stalling_scoreboard = 1;
5592
5593     vp9_state->tx_mode = TX_MODE_SELECT;
5594     vp9_state->multi_ref_qp_check = 0;
5595     vp9_state->target_usage = INTEL_ENC_VP9_TU_NORMAL;
5596     vp9_state->num_pak_passes = 1;
5597     vp9_state->hme_supported = vme_context->hme_supported;
5598     vp9_state->b16xme_supported = 1;
5599
5600     if (encoder_context->rate_control_mode != VA_RC_NONE &&
5601         encoder_context->rate_control_mode != VA_RC_CQP) {
5602         vp9_state->brc_enabled = 1;
5603         vp9_state->brc_distortion_buffer_supported = 1;
5604         vp9_state->brc_constant_buffer_supported = 1;
5605         vp9_state->num_pak_passes = 4;
5606     }
5607     vp9_state->dys_enabled = 1; /* this is supported by default */
5608     vp9_state->first_frame = 1;
5609
5610     /* the definition of status buffer offset for VP9 */
5611     {
5612         struct vp9_encode_status_buffer_internal *status_buffer;
5613         uint32_t base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
5614
5615         status_buffer = &vp9_state->status_buffer;
5616         memset(status_buffer, 0,
5617                sizeof(struct vp9_encode_status_buffer_internal));
5618
5619         status_buffer->bs_byte_count_offset = base_offset + offsetof(struct vp9_encode_status, bs_byte_count);
5620         status_buffer->image_status_mask_offset = base_offset + offsetof(struct vp9_encode_status, image_status_mask);
5621         status_buffer->image_status_ctrl_offset = base_offset + offsetof(struct vp9_encode_status, image_status_ctrl);
5622         status_buffer->media_index_offset       = base_offset + offsetof(struct vp9_encode_status, media_index);
5623
5624         status_buffer->vp9_bs_frame_reg_offset = 0x1E9E0;
5625         status_buffer->vp9_image_mask_reg_offset = 0x1E9F0;
5626         status_buffer->vp9_image_ctrl_reg_offset = 0x1E9F4;
5627     }
5628
5629     gen9_vme_kernels_context_init_vp9(ctx, encoder_context, vme_context);
5630
5631     encoder_context->vme_context = vme_context;
5632     encoder_context->vme_pipeline = gen9_vme_pipeline_vp9;
5633     encoder_context->vme_context_destroy = gen9_vme_context_destroy_vp9;
5634
5635     return true;
5636 }
5637
5638 static VAStatus
5639 gen9_vp9_get_coded_status(VADriverContextP ctx,
5640                           struct intel_encoder_context *encoder_context,
5641                           struct i965_coded_buffer_segment *coded_buf_seg)
5642 {
5643     struct vp9_encode_status *vp9_encode_status;
5644
5645     if (!encoder_context || !coded_buf_seg)
5646         return VA_STATUS_ERROR_INVALID_BUFFER;
5647
5648     vp9_encode_status = (struct vp9_encode_status *)coded_buf_seg->codec_private_data;
5649     coded_buf_seg->base.size = vp9_encode_status->bs_byte_count;
5650
5651     /* One VACodedBufferSegment for VP9 will be added later.
5652      * It will be linked to the next element of coded_buf_seg->base.next
5653      */
5654
5655     return VA_STATUS_SUCCESS;
5656 }
5657
5658 Bool
5659 gen9_vp9_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5660 {
5661     /* VME & PAK share the same context */
5662     struct gen9_encoder_context_vp9 *pak_context = encoder_context->vme_context;
5663
5664     if (!pak_context)
5665         return false;
5666
5667     encoder_context->mfc_context = pak_context;
5668     encoder_context->mfc_context_destroy = gen9_vp9_pak_context_destroy;
5669     encoder_context->mfc_pipeline = gen9_vp9_pak_pipeline;
5670     encoder_context->mfc_brc_prepare = gen9_vp9_pak_brc_prepare;
5671     encoder_context->get_status = gen9_vp9_get_coded_status;
5672     return true;
5673 }