OSDN Git Service

android: move the libraries to /vendor
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_vp9_encoder.c
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWAR
23  *
24  * Authors:
25  *    Zhao, Yakui <yakui.zhao@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <stdbool.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35 #include <va/va.h>
36
37 #include "intel_batchbuffer.h"
38 #include "intel_driver.h"
39
40 #include "i965_defines.h"
41 #include "i965_drv_video.h"
42 #include "i965_encoder.h"
43 #include "gen9_vp9_encapi.h"
44 #include "gen9_vp9_encoder.h"
45 #include "gen9_vp9_encoder_kernels.h"
46 #include "vp9_probs.h"
47 #include "gen9_vp9_const_def.h"
48
49 #define MAX_VP9_ENCODER_SURFACES        64
50
51 #define MAX_URB_SIZE                    4096 /* In register */
52 #define NUM_KERNELS_PER_GPE_CONTEXT     1
53
54 #define VP9_BRC_KBPS                    1000
55
56 #define BRC_KERNEL_CBR                  0x0010
57 #define BRC_KERNEL_VBR                  0x0020
58 #define BRC_KERNEL_AVBR                 0x0040
59 #define BRC_KERNEL_CQL                  0x0080
60
61 #define VP9_PIC_STATE_BUFFER_SIZE 192
62
63 typedef struct _intel_kernel_header_ {
64     uint32_t       reserved                        : 6;
65     uint32_t       kernel_start_pointer            : 26;
66 } intel_kernel_header;
67
68 typedef struct _intel_vp9_kernel_header {
69     int nKernelCount;
70     intel_kernel_header PLY_DSCALE;
71     intel_kernel_header VP9_ME_P;
72     intel_kernel_header VP9_Enc_I_32x32;
73     intel_kernel_header VP9_Enc_I_16x16;
74     intel_kernel_header VP9_Enc_P;
75     intel_kernel_header VP9_Enc_TX;
76     intel_kernel_header VP9_DYS;
77
78     intel_kernel_header VP9BRC_Intra_Distortion;
79     intel_kernel_header VP9BRC_Init;
80     intel_kernel_header VP9BRC_Reset;
81     intel_kernel_header VP9BRC_Update;
82 } intel_vp9_kernel_header;
83
84 #define DYS_1X_FLAG    0x01
85 #define DYS_4X_FLAG    0x02
86 #define DYS_16X_FLAG   0x04
87
88 struct vp9_surface_param {
89     uint32_t frame_width;
90     uint32_t frame_height;
91 };
92
93 static uint32_t intel_convert_sign_mag(int val, int sign_bit_pos)
94 {
95     uint32_t ret_val = 0;
96     if (val < 0) {
97         val = -val;
98         ret_val = ((1 << (sign_bit_pos - 1)) | (val & ((1 << (sign_bit_pos - 1)) - 1)));
99     } else {
100         ret_val = val & ((1 << (sign_bit_pos - 1)) - 1);
101     }
102     return ret_val;
103 }
104
105 static bool
106 intel_vp9_get_kernel_header_and_size(
107     void                             *pvbinary,
108     int                              binary_size,
109     INTEL_VP9_ENC_OPERATION          operation,
110     int                              krnstate_idx,
111     struct i965_kernel               *ret_kernel)
112 {
113     typedef uint32_t BIN_PTR[4];
114
115     char *bin_start;
116     intel_vp9_kernel_header      *pkh_table;
117     intel_kernel_header          *pcurr_header, *pinvalid_entry, *pnext_header;
118     int next_krnoffset;
119
120     if (!pvbinary || !ret_kernel)
121         return false;
122
123     bin_start = (char *)pvbinary;
124     pkh_table = (intel_vp9_kernel_header *)pvbinary;
125     pinvalid_entry = &(pkh_table->VP9BRC_Update) + 1;
126     next_krnoffset = binary_size;
127
128     if ((operation == INTEL_VP9_ENC_SCALING4X) || (operation == INTEL_VP9_ENC_SCALING2X)) {
129         pcurr_header = &pkh_table->PLY_DSCALE;
130     } else if (operation == INTEL_VP9_ENC_ME) {
131         pcurr_header = &pkh_table->VP9_ME_P;
132     } else if (operation == INTEL_VP9_ENC_MBENC) {
133         pcurr_header = &pkh_table->VP9_Enc_I_32x32;
134     } else if (operation == INTEL_VP9_ENC_DYS) {
135         pcurr_header = &pkh_table->VP9_DYS;
136     } else if (operation == INTEL_VP9_ENC_BRC) {
137         pcurr_header = &pkh_table->VP9BRC_Intra_Distortion;
138     } else {
139         return false;
140     }
141
142     pcurr_header += krnstate_idx;
143     ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
144
145     pnext_header = (pcurr_header + 1);
146     if (pnext_header < pinvalid_entry) {
147         next_krnoffset = pnext_header->kernel_start_pointer << 6;
148     }
149     ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
150
151     return true;
152 }
153
154
155 static void
156 gen9_free_surfaces_vp9(void **data)
157 {
158     struct gen9_surface_vp9 *vp9_surface;
159
160     if (!data || !*data)
161         return;
162
163     vp9_surface = *data;
164
165     if (vp9_surface->scaled_4x_surface_obj) {
166         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->scaled_4x_surface_id, 1);
167         vp9_surface->scaled_4x_surface_id = VA_INVALID_SURFACE;
168         vp9_surface->scaled_4x_surface_obj = NULL;
169     }
170
171     if (vp9_surface->scaled_16x_surface_obj) {
172         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->scaled_16x_surface_id, 1);
173         vp9_surface->scaled_16x_surface_id = VA_INVALID_SURFACE;
174         vp9_surface->scaled_16x_surface_obj = NULL;
175     }
176
177     if (vp9_surface->dys_4x_surface_obj) {
178         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_4x_surface_id, 1);
179         vp9_surface->dys_4x_surface_id = VA_INVALID_SURFACE;
180         vp9_surface->dys_4x_surface_obj = NULL;
181     }
182
183     if (vp9_surface->dys_16x_surface_obj) {
184         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_16x_surface_id, 1);
185         vp9_surface->dys_16x_surface_id = VA_INVALID_SURFACE;
186         vp9_surface->dys_16x_surface_obj = NULL;
187     }
188
189     if (vp9_surface->dys_surface_obj) {
190         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_surface_id, 1);
191         vp9_surface->dys_surface_id = VA_INVALID_SURFACE;
192         vp9_surface->dys_surface_obj = NULL;
193     }
194
195     free(vp9_surface);
196
197     *data = NULL;
198
199     return;
200 }
201
202 static VAStatus
203 gen9_vp9_init_check_surfaces(VADriverContextP ctx,
204                              struct object_surface *obj_surface,
205                              struct vp9_surface_param *surface_param)
206 {
207     struct i965_driver_data *i965 = i965_driver_data(ctx);
208     struct gen9_surface_vp9 *vp9_surface;
209     int downscaled_width_4x, downscaled_height_4x;
210     int downscaled_width_16x, downscaled_height_16x;
211
212     if (!obj_surface || !obj_surface->bo)
213         return VA_STATUS_ERROR_INVALID_SURFACE;
214
215     if (obj_surface->private_data &&
216         obj_surface->free_private_data != gen9_free_surfaces_vp9) {
217         obj_surface->free_private_data(&obj_surface->private_data);
218         obj_surface->private_data = NULL;
219     }
220
221     if (obj_surface->private_data) {
222         /* if the frame width/height is already the same as the expected,
223          * it is unncessary to reallocate it.
224          */
225         vp9_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
226         if (vp9_surface->frame_width >= surface_param->frame_width ||
227             vp9_surface->frame_height >= surface_param->frame_height)
228             return VA_STATUS_SUCCESS;
229
230         obj_surface->free_private_data(&obj_surface->private_data);
231         obj_surface->private_data = NULL;
232         vp9_surface = NULL;
233     }
234
235     vp9_surface = calloc(1, sizeof(struct gen9_surface_vp9));
236
237     if (!vp9_surface)
238         return VA_STATUS_ERROR_ALLOCATION_FAILED;
239
240     vp9_surface->ctx = ctx;
241     obj_surface->private_data = vp9_surface;
242     obj_surface->free_private_data = gen9_free_surfaces_vp9;
243
244     vp9_surface->frame_width = surface_param->frame_width;
245     vp9_surface->frame_height = surface_param->frame_height;
246
247     downscaled_width_4x = ALIGN(surface_param->frame_width / 4, 16);
248     downscaled_height_4x = ALIGN(surface_param->frame_height / 4, 16);
249
250     i965_CreateSurfaces(ctx,
251                         downscaled_width_4x,
252                         downscaled_height_4x,
253                         VA_RT_FORMAT_YUV420,
254                         1,
255                         &vp9_surface->scaled_4x_surface_id);
256
257     vp9_surface->scaled_4x_surface_obj = SURFACE(vp9_surface->scaled_4x_surface_id);
258
259     if (!vp9_surface->scaled_4x_surface_obj) {
260         return VA_STATUS_ERROR_ALLOCATION_FAILED;
261     }
262
263     i965_check_alloc_surface_bo(ctx, vp9_surface->scaled_4x_surface_obj, 1,
264                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
265
266     downscaled_width_16x = ALIGN(surface_param->frame_width / 16, 16);
267     downscaled_height_16x = ALIGN(surface_param->frame_height / 16, 16);
268     i965_CreateSurfaces(ctx,
269                         downscaled_width_16x,
270                         downscaled_height_16x,
271                         VA_RT_FORMAT_YUV420,
272                         1,
273                         &vp9_surface->scaled_16x_surface_id);
274     vp9_surface->scaled_16x_surface_obj = SURFACE(vp9_surface->scaled_16x_surface_id);
275
276     if (!vp9_surface->scaled_16x_surface_obj) {
277         return VA_STATUS_ERROR_ALLOCATION_FAILED;
278     }
279
280     i965_check_alloc_surface_bo(ctx, vp9_surface->scaled_16x_surface_obj, 1,
281                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
282
283     return VA_STATUS_SUCCESS;
284 }
285
286 static VAStatus
287 gen9_vp9_check_dys_surfaces(VADriverContextP ctx,
288                             struct object_surface *obj_surface,
289                             struct vp9_surface_param *surface_param)
290 {
291     struct i965_driver_data *i965 = i965_driver_data(ctx);
292     struct gen9_surface_vp9 *vp9_surface;
293     int dys_width_4x, dys_height_4x;
294     int dys_width_16x, dys_height_16x;
295
296     /* As this is handled after the surface checking, it is unnecessary
297      * to check the surface bo and vp9_priv_surface again
298      */
299
300     vp9_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
301
302     if (!vp9_surface)
303         return VA_STATUS_ERROR_INVALID_SURFACE;
304
305     /* if the frame_width/height of dys_surface is the same as
306      * the expected, it is unnecessary to allocate it again
307      */
308     if (vp9_surface->dys_frame_width == surface_param->frame_width &&
309         vp9_surface->dys_frame_width == surface_param->frame_width)
310         return VA_STATUS_SUCCESS;
311
312     if (vp9_surface->dys_4x_surface_obj) {
313         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_4x_surface_id, 1);
314         vp9_surface->dys_4x_surface_id = VA_INVALID_SURFACE;
315         vp9_surface->dys_4x_surface_obj = NULL;
316     }
317
318     if (vp9_surface->dys_16x_surface_obj) {
319         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_16x_surface_id, 1);
320         vp9_surface->dys_16x_surface_id = VA_INVALID_SURFACE;
321         vp9_surface->dys_16x_surface_obj = NULL;
322     }
323
324     if (vp9_surface->dys_surface_obj) {
325         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_surface_id, 1);
326         vp9_surface->dys_surface_id = VA_INVALID_SURFACE;
327         vp9_surface->dys_surface_obj = NULL;
328     }
329
330     vp9_surface->dys_frame_width = surface_param->frame_width;
331     vp9_surface->dys_frame_height = surface_param->frame_height;
332
333     i965_CreateSurfaces(ctx,
334                         surface_param->frame_width,
335                         surface_param->frame_height,
336                         VA_RT_FORMAT_YUV420,
337                         1,
338                         &vp9_surface->dys_surface_id);
339     vp9_surface->dys_surface_obj = SURFACE(vp9_surface->dys_surface_id);
340
341     if (!vp9_surface->dys_surface_obj) {
342         return VA_STATUS_ERROR_ALLOCATION_FAILED;
343     }
344
345     i965_check_alloc_surface_bo(ctx, vp9_surface->dys_surface_obj, 1,
346                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
347
348     dys_width_4x = ALIGN(surface_param->frame_width / 4, 16);
349     dys_height_4x = ALIGN(surface_param->frame_width / 4, 16);
350
351     i965_CreateSurfaces(ctx,
352                         dys_width_4x,
353                         dys_height_4x,
354                         VA_RT_FORMAT_YUV420,
355                         1,
356                         &vp9_surface->dys_4x_surface_id);
357
358     vp9_surface->dys_4x_surface_obj = SURFACE(vp9_surface->dys_4x_surface_id);
359
360     if (!vp9_surface->dys_4x_surface_obj) {
361         return VA_STATUS_ERROR_ALLOCATION_FAILED;
362     }
363
364     i965_check_alloc_surface_bo(ctx, vp9_surface->dys_4x_surface_obj, 1,
365                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
366
367     dys_width_16x = ALIGN(surface_param->frame_width / 16, 16);
368     dys_height_16x = ALIGN(surface_param->frame_width / 16, 16);
369     i965_CreateSurfaces(ctx,
370                         dys_width_16x,
371                         dys_height_16x,
372                         VA_RT_FORMAT_YUV420,
373                         1,
374                         &vp9_surface->dys_16x_surface_id);
375     vp9_surface->dys_16x_surface_obj = SURFACE(vp9_surface->dys_16x_surface_id);
376
377     if (!vp9_surface->dys_16x_surface_obj) {
378         return VA_STATUS_ERROR_ALLOCATION_FAILED;
379     }
380
381     i965_check_alloc_surface_bo(ctx, vp9_surface->dys_16x_surface_obj, 1,
382                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
383
384     return VA_STATUS_SUCCESS;
385 }
386
387 static VAStatus
388 gen9_vp9_allocate_resources(VADriverContextP ctx,
389                             struct encode_state *encode_state,
390                             struct intel_encoder_context *encoder_context,
391                             int allocate)
392 {
393     struct i965_driver_data *i965 = i965_driver_data(ctx);
394     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
395     struct gen9_vp9_state *vp9_state;
396     int allocate_flag, i;
397     int res_size;
398     uint32_t        frame_width_in_sb, frame_height_in_sb, frame_sb_num;
399     unsigned int width, height;
400
401     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
402
403     if (!vp9_state || !vp9_state->pic_param)
404         return VA_STATUS_ERROR_INVALID_PARAMETER;
405
406     /* the buffer related with BRC is not changed. So it is allocated
407      * based on the input parameter
408      */
409     if (allocate) {
410         i965_free_gpe_resource(&vme_context->res_brc_history_buffer);
411         i965_free_gpe_resource(&vme_context->res_brc_const_data_buffer);
412         i965_free_gpe_resource(&vme_context->res_brc_mbenc_curbe_write_buffer);
413         i965_free_gpe_resource(&vme_context->res_pic_state_brc_read_buffer);
414         i965_free_gpe_resource(&vme_context->res_pic_state_brc_write_hfw_read_buffer);
415         i965_free_gpe_resource(&vme_context->res_pic_state_hfw_write_buffer);
416         i965_free_gpe_resource(&vme_context->res_seg_state_brc_read_buffer);
417         i965_free_gpe_resource(&vme_context->res_seg_state_brc_write_buffer);
418         i965_free_gpe_resource(&vme_context->res_brc_bitstream_size_buffer);
419         i965_free_gpe_resource(&vme_context->res_brc_hfw_data_buffer);
420         i965_free_gpe_resource(&vme_context->res_brc_mmdk_pak_buffer);
421
422         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
423                                                    &vme_context->res_brc_history_buffer,
424                                                    VP9_BRC_HISTORY_BUFFER_SIZE,
425                                                    "Brc History buffer");
426         if (!allocate_flag)
427             goto failed_allocation;
428         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
429                                                    &vme_context->res_brc_const_data_buffer,
430                                                    VP9_BRC_CONSTANTSURFACE_SIZE,
431                                                    "Brc Constant buffer");
432         if (!allocate_flag)
433             goto failed_allocation;
434
435         res_size = ALIGN(sizeof(vp9_mbenc_curbe_data), 64) + 128 +
436                    ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) * NUM_VP9_MBENC;
437         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
438                                                    &vme_context->res_brc_mbenc_curbe_write_buffer,
439                                                    res_size,
440                                                    "Brc Curbe write");
441         if (!allocate_flag)
442             goto failed_allocation;
443
444         res_size = VP9_PIC_STATE_BUFFER_SIZE * 4;
445         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
446                                                    &vme_context->res_pic_state_brc_read_buffer,
447                                                    res_size,
448                                                    "Pic State Brc_read");
449         if (!allocate_flag)
450             goto failed_allocation;
451
452         res_size = VP9_PIC_STATE_BUFFER_SIZE * 4;
453         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
454                                                    &vme_context->res_pic_state_brc_write_hfw_read_buffer,
455                                                    res_size,
456                                                    "Pic State Brc_write Hfw_Read");
457         if (!allocate_flag)
458             goto failed_allocation;
459
460         res_size = VP9_PIC_STATE_BUFFER_SIZE * 4;
461         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
462                                                    &vme_context->res_pic_state_hfw_write_buffer,
463                                                    res_size,
464                                                    "Pic State Hfw Write");
465         if (!allocate_flag)
466             goto failed_allocation;
467
468         res_size = VP9_SEGMENT_STATE_BUFFER_SIZE;
469         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
470                                                    &vme_context->res_seg_state_brc_read_buffer,
471                                                    res_size,
472                                                    "Segment state brc_read");
473         if (!allocate_flag)
474             goto failed_allocation;
475
476         res_size = VP9_SEGMENT_STATE_BUFFER_SIZE;
477         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
478                                                    &vme_context->res_seg_state_brc_write_buffer,
479                                                    res_size,
480                                                    "Segment state brc_write");
481         if (!allocate_flag)
482             goto failed_allocation;
483
484         res_size = VP9_BRC_BITSTREAM_SIZE_BUFFER_SIZE;
485         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
486                                                    &vme_context->res_brc_bitstream_size_buffer,
487                                                    res_size,
488                                                    "Brc bitstream buffer");
489         if (!allocate_flag)
490             goto failed_allocation;
491
492         res_size = VP9_HFW_BRC_DATA_BUFFER_SIZE;
493         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
494                                                    &vme_context->res_brc_hfw_data_buffer,
495                                                    res_size,
496                                                    "mfw Brc data");
497         if (!allocate_flag)
498             goto failed_allocation;
499
500         res_size = VP9_BRC_MMDK_PAK_BUFFER_SIZE;
501         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
502                                                    &vme_context->res_brc_mmdk_pak_buffer,
503                                                    res_size,
504                                                    "Brc mmdk_pak");
505         if (!allocate_flag)
506             goto failed_allocation;
507     }
508
509     /* If the width/height of allocated buffer is greater than the expected,
510      * it is unnecessary to allocate it again
511      */
512     if (vp9_state->res_width >= vp9_state->frame_width &&
513         vp9_state->res_height >= vp9_state->frame_height) {
514
515         return VA_STATUS_SUCCESS;
516     }
517     frame_width_in_sb = ALIGN(vp9_state->frame_width, 64) / 64;
518     frame_height_in_sb = ALIGN(vp9_state->frame_height, 64) / 64;
519     frame_sb_num  = frame_width_in_sb * frame_height_in_sb;
520
521     i965_free_gpe_resource(&vme_context->res_hvd_line_buffer);
522     res_size = frame_width_in_sb * 64;
523     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
524                                                &vme_context->res_hvd_line_buffer,
525                                                res_size,
526                                                "VP9 hvd line line");
527     if (!allocate_flag)
528         goto failed_allocation;
529
530     i965_free_gpe_resource(&vme_context->res_hvd_tile_line_buffer);
531     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
532                                                &vme_context->res_hvd_tile_line_buffer,
533                                                res_size,
534                                                "VP9 hvd tile_line line");
535     if (!allocate_flag)
536         goto failed_allocation;
537
538     i965_free_gpe_resource(&vme_context->res_deblocking_filter_line_buffer);
539     res_size = frame_width_in_sb * 18 * 64;
540     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
541                                                &vme_context->res_deblocking_filter_line_buffer,
542                                                res_size,
543                                                "VP9 deblocking filter line");
544     if (!allocate_flag)
545         goto failed_allocation;
546
547     i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_line_buffer);
548     res_size = frame_width_in_sb * 18 * 64;
549     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
550                                                &vme_context->res_deblocking_filter_tile_line_buffer,
551                                                res_size,
552                                                "VP9 deblocking tile line");
553     if (!allocate_flag)
554         goto failed_allocation;
555
556     i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_col_buffer);
557     res_size = frame_height_in_sb * 17 * 64;
558     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
559                                                &vme_context->res_deblocking_filter_tile_col_buffer,
560                                                res_size,
561                                                "VP9 deblocking tile col");
562     if (!allocate_flag)
563         goto failed_allocation;
564
565     i965_free_gpe_resource(&vme_context->res_metadata_line_buffer);
566     res_size = frame_width_in_sb * 5 * 64;
567     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
568                                                &vme_context->res_metadata_line_buffer,
569                                                res_size,
570                                                "VP9 metadata line");
571     if (!allocate_flag)
572         goto failed_allocation;
573
574     i965_free_gpe_resource(&vme_context->res_metadata_tile_line_buffer);
575     res_size = frame_width_in_sb * 5 * 64;
576     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
577                                                &vme_context->res_metadata_tile_line_buffer,
578                                                res_size,
579                                                "VP9 metadata tile line");
580     if (!allocate_flag)
581         goto failed_allocation;
582
583     i965_free_gpe_resource(&vme_context->res_metadata_tile_col_buffer);
584     res_size = frame_height_in_sb * 5 * 64;
585     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
586                                                &vme_context->res_metadata_tile_col_buffer,
587                                                res_size,
588                                                "VP9 metadata tile col");
589     if (!allocate_flag)
590         goto failed_allocation;
591
592     i965_free_gpe_resource(&vme_context->res_prob_buffer);
593     res_size = 2048;
594     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
595                                                &vme_context->res_prob_buffer,
596                                                res_size,
597                                                "VP9 prob");
598     if (!allocate_flag)
599         goto failed_allocation;
600
601     i965_free_gpe_resource(&vme_context->res_segmentid_buffer);
602     res_size = frame_sb_num * 64;
603     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
604                                                &vme_context->res_segmentid_buffer,
605                                                res_size,
606                                                "VP9 segment id");
607     if (!allocate_flag)
608         goto failed_allocation;
609
610     i965_zero_gpe_resource(&vme_context->res_segmentid_buffer);
611
612     i965_free_gpe_resource(&vme_context->res_prob_delta_buffer);
613     res_size = 29 * 64;
614     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
615                                                &vme_context->res_prob_delta_buffer,
616                                                res_size,
617                                                "VP9 prob delta");
618     if (!allocate_flag)
619         goto failed_allocation;
620
621     i965_zero_gpe_resource(&vme_context->res_segmentid_buffer);
622
623     i965_free_gpe_resource(&vme_context->res_prob_delta_buffer);
624     res_size = 29 * 64;
625     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
626                                                &vme_context->res_prob_delta_buffer,
627                                                res_size,
628                                                "VP9 prob delta");
629     if (!allocate_flag)
630         goto failed_allocation;
631
632     i965_free_gpe_resource(&vme_context->res_compressed_input_buffer);
633     res_size = 32 * 64;
634     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
635                                                &vme_context->res_compressed_input_buffer,
636                                                res_size,
637                                                "VP9 compressed_input buffer");
638     if (!allocate_flag)
639         goto failed_allocation;
640
641     i965_free_gpe_resource(&vme_context->res_prob_counter_buffer);
642     res_size = 193 * 64;
643     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
644                                                &vme_context->res_prob_counter_buffer,
645                                                res_size,
646                                                "VP9 prob counter");
647     if (!allocate_flag)
648         goto failed_allocation;
649
650     i965_free_gpe_resource(&vme_context->res_tile_record_streamout_buffer);
651     res_size = frame_sb_num * 64;
652     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
653                                                &vme_context->res_tile_record_streamout_buffer,
654                                                res_size,
655                                                "VP9 tile record stream_out");
656     if (!allocate_flag)
657         goto failed_allocation;
658
659     i965_free_gpe_resource(&vme_context->res_cu_stat_streamout_buffer);
660     res_size = frame_sb_num * 64;
661     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
662                                                &vme_context->res_cu_stat_streamout_buffer,
663                                                res_size,
664                                                "VP9 CU stat stream_out");
665     if (!allocate_flag)
666         goto failed_allocation;
667
668     width = vp9_state->downscaled_width_4x_in_mb * 32;
669     height = vp9_state->downscaled_height_4x_in_mb * 16;
670     i965_free_gpe_resource(&vme_context->s4x_memv_data_buffer);
671     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
672                                                   &vme_context->s4x_memv_data_buffer,
673                                                   width, height,
674                                                   ALIGN(width, 64),
675                                                   "VP9 4x MEMV data");
676     if (!allocate_flag)
677         goto failed_allocation;
678
679     width = vp9_state->downscaled_width_4x_in_mb * 8;
680     height = vp9_state->downscaled_height_4x_in_mb * 16;
681     i965_free_gpe_resource(&vme_context->s4x_memv_distortion_buffer);
682     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
683                                                   &vme_context->s4x_memv_distortion_buffer,
684                                                   width, height,
685                                                   ALIGN(width, 64),
686                                                   "VP9 4x MEMV distorion");
687     if (!allocate_flag)
688         goto failed_allocation;
689
690     width = ALIGN(vp9_state->downscaled_width_16x_in_mb * 32, 64);
691     height = vp9_state->downscaled_height_16x_in_mb * 16;
692     i965_free_gpe_resource(&vme_context->s16x_memv_data_buffer);
693     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
694                                                   &vme_context->s16x_memv_data_buffer,
695                                                   width, height,
696                                                   width,
697                                                   "VP9 16x MEMV data");
698     if (!allocate_flag)
699         goto failed_allocation;
700
701     width = vp9_state->frame_width_in_mb * 16;
702     height = vp9_state->frame_height_in_mb * 8;
703     i965_free_gpe_resource(&vme_context->res_output_16x16_inter_modes);
704     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
705                                                   &vme_context->res_output_16x16_inter_modes,
706                                                   width, height,
707                                                   ALIGN(width, 64),
708                                                   "VP9 output inter_mode");
709     if (!allocate_flag)
710         goto failed_allocation;
711
712     res_size = vp9_state->frame_width_in_mb * vp9_state->frame_height_in_mb *
713                16 * 4;
714     for (i = 0; i < 2; i++) {
715         i965_free_gpe_resource(&vme_context->res_mode_decision[i]);
716         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
717                                                    &vme_context->res_mode_decision[i],
718                                                    res_size,
719                                                    "VP9 mode decision");
720         if (!allocate_flag)
721             goto failed_allocation;
722
723     }
724
725     res_size = frame_sb_num * 9 * 64;
726     for (i = 0; i < 2; i++) {
727         i965_free_gpe_resource(&vme_context->res_mv_temporal_buffer[i]);
728         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
729                                                    &vme_context->res_mv_temporal_buffer[i],
730                                                    res_size,
731                                                    "VP9 temporal mv");
732         if (!allocate_flag)
733             goto failed_allocation;
734     }
735
736     vp9_state->mb_data_offset = ALIGN(frame_sb_num * 16, 4096) + 4096;
737     res_size = vp9_state->mb_data_offset + frame_sb_num * 64 * 64 + 1000;
738     i965_free_gpe_resource(&vme_context->res_mb_code_surface);
739     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
740                                                &vme_context->res_mb_code_surface,
741                                                ALIGN(res_size, 4096),
742                                                "VP9 mb_code surface");
743     if (!allocate_flag)
744         goto failed_allocation;
745
746     res_size = 128;
747     i965_free_gpe_resource(&vme_context->res_pak_uncompressed_input_buffer);
748     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
749                                                &vme_context->res_pak_uncompressed_input_buffer,
750                                                ALIGN(res_size, 4096),
751                                                "VP9 pak_uncompressed_input");
752     if (!allocate_flag)
753         goto failed_allocation;
754
755     if (!vme_context->frame_header_data) {
756         /* allocate 512 bytes for generating the uncompressed header */
757         vme_context->frame_header_data = calloc(1, 512);
758     }
759
760     vp9_state->res_width = vp9_state->frame_width;
761     vp9_state->res_height = vp9_state->frame_height;
762
763     return VA_STATUS_SUCCESS;
764
765 failed_allocation:
766     return VA_STATUS_ERROR_ALLOCATION_FAILED;
767 }
768
769 static void
770 gen9_vp9_free_resources(struct gen9_encoder_context_vp9 *vme_context)
771 {
772     int i;
773     struct gen9_vp9_state *vp9_state = (struct gen9_vp9_state *) vme_context->enc_priv_state;
774
775     if (vp9_state->brc_enabled) {
776         i965_free_gpe_resource(&vme_context->res_brc_history_buffer);
777         i965_free_gpe_resource(&vme_context->res_brc_const_data_buffer);
778         i965_free_gpe_resource(&vme_context->res_brc_mbenc_curbe_write_buffer);
779         i965_free_gpe_resource(&vme_context->res_pic_state_brc_read_buffer);
780         i965_free_gpe_resource(&vme_context->res_pic_state_brc_write_hfw_read_buffer);
781         i965_free_gpe_resource(&vme_context->res_pic_state_hfw_write_buffer);
782         i965_free_gpe_resource(&vme_context->res_seg_state_brc_read_buffer);
783         i965_free_gpe_resource(&vme_context->res_seg_state_brc_write_buffer);
784         i965_free_gpe_resource(&vme_context->res_brc_bitstream_size_buffer);
785         i965_free_gpe_resource(&vme_context->res_brc_hfw_data_buffer);
786         i965_free_gpe_resource(&vme_context->res_brc_mmdk_pak_buffer);
787     }
788
789     i965_free_gpe_resource(&vme_context->res_hvd_line_buffer);
790     i965_free_gpe_resource(&vme_context->res_hvd_tile_line_buffer);
791     i965_free_gpe_resource(&vme_context->res_deblocking_filter_line_buffer);
792     i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_line_buffer);
793     i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_col_buffer);
794     i965_free_gpe_resource(&vme_context->res_metadata_line_buffer);
795     i965_free_gpe_resource(&vme_context->res_metadata_tile_line_buffer);
796     i965_free_gpe_resource(&vme_context->res_metadata_tile_col_buffer);
797     i965_free_gpe_resource(&vme_context->res_prob_buffer);
798     i965_free_gpe_resource(&vme_context->res_segmentid_buffer);
799     i965_free_gpe_resource(&vme_context->res_prob_delta_buffer);
800     i965_free_gpe_resource(&vme_context->res_prob_counter_buffer);
801     i965_free_gpe_resource(&vme_context->res_tile_record_streamout_buffer);
802     i965_free_gpe_resource(&vme_context->res_cu_stat_streamout_buffer);
803     i965_free_gpe_resource(&vme_context->s4x_memv_data_buffer);
804     i965_free_gpe_resource(&vme_context->s4x_memv_distortion_buffer);
805     i965_free_gpe_resource(&vme_context->s16x_memv_data_buffer);
806     i965_free_gpe_resource(&vme_context->res_output_16x16_inter_modes);
807     for (i = 0; i < 2; i++) {
808         i965_free_gpe_resource(&vme_context->res_mode_decision[i]);
809     }
810
811     for (i = 0; i < 2; i++) {
812         i965_free_gpe_resource(&vme_context->res_mv_temporal_buffer[i]);
813     }
814
815     i965_free_gpe_resource(&vme_context->res_compressed_input_buffer);
816     i965_free_gpe_resource(&vme_context->res_mb_code_surface);
817     i965_free_gpe_resource(&vme_context->res_pak_uncompressed_input_buffer);
818
819     if (vme_context->frame_header_data) {
820         free(vme_context->frame_header_data);
821         vme_context->frame_header_data = NULL;
822     }
823     return;
824 }
825
826 static void
827 gen9_init_media_object_walker_parameter(struct intel_encoder_context *encoder_context,
828                                         struct gpe_encoder_kernel_walker_parameter *kernel_walker_param,
829                                         struct gpe_media_object_walker_parameter *walker_param)
830 {
831     memset(walker_param, 0, sizeof(*walker_param));
832
833     walker_param->use_scoreboard = kernel_walker_param->use_scoreboard;
834
835     walker_param->block_resolution.x = kernel_walker_param->resolution_x;
836     walker_param->block_resolution.y = kernel_walker_param->resolution_y;
837
838     walker_param->global_resolution.x = kernel_walker_param->resolution_x;
839     walker_param->global_resolution.y = kernel_walker_param->resolution_y;
840
841     walker_param->global_outer_loop_stride.x = kernel_walker_param->resolution_x;
842     walker_param->global_outer_loop_stride.y = 0;
843
844     walker_param->global_inner_loop_unit.x = 0;
845     walker_param->global_inner_loop_unit.y = kernel_walker_param->resolution_y;
846
847     walker_param->local_loop_exec_count = 0xFFFF;  //MAX VALUE
848     walker_param->global_loop_exec_count = 0xFFFF;  //MAX VALUE
849
850     if (kernel_walker_param->no_dependency) {
851         walker_param->scoreboard_mask = 0;
852         walker_param->use_scoreboard = 0;
853         // Raster scan walking pattern
854         walker_param->local_outer_loop_stride.x = 0;
855         walker_param->local_outer_loop_stride.y = 1;
856         walker_param->local_inner_loop_unit.x = 1;
857         walker_param->local_inner_loop_unit.y = 0;
858         walker_param->local_end.x = kernel_walker_param->resolution_x - 1;
859         walker_param->local_end.y = 0;
860     } else {
861         walker_param->local_end.x = 0;
862         walker_param->local_end.y = 0;
863
864         if (kernel_walker_param->walker_degree == VP9_45Z_DEGREE) {
865             // 45z degree
866             walker_param->scoreboard_mask = 0x0F;
867
868             walker_param->global_loop_exec_count = 0x3FF;
869             walker_param->local_loop_exec_count = 0x3FF;
870
871             walker_param->global_resolution.x = (unsigned int)(kernel_walker_param->resolution_x / 2.f) + 1;
872             walker_param->global_resolution.y = 2 * kernel_walker_param->resolution_y;
873
874             walker_param->global_start.x = 0;
875             walker_param->global_start.y = 0;
876
877             walker_param->global_outer_loop_stride.x = walker_param->global_resolution.x;
878             walker_param->global_outer_loop_stride.y = 0;
879
880             walker_param->global_inner_loop_unit.x = 0;
881             walker_param->global_inner_loop_unit.y = walker_param->global_resolution.y;
882
883             walker_param->block_resolution.x = walker_param->global_resolution.x;
884             walker_param->block_resolution.y = walker_param->global_resolution.y;
885
886             walker_param->local_start.x = 0;
887             walker_param->local_start.y = 0;
888
889             walker_param->local_outer_loop_stride.x = 1;
890             walker_param->local_outer_loop_stride.y = 0;
891
892             walker_param->local_inner_loop_unit.x = -1;
893             walker_param->local_inner_loop_unit.y = 4;
894
895             walker_param->middle_loop_extra_steps = 3;
896             walker_param->mid_loop_unit_x = 0;
897             walker_param->mid_loop_unit_y = 1;
898         } else {
899             // 26 degree
900             walker_param->scoreboard_mask = 0x0F;
901             walker_param->local_outer_loop_stride.x = 1;
902             walker_param->local_outer_loop_stride.y = 0;
903             walker_param->local_inner_loop_unit.x = -2;
904             walker_param->local_inner_loop_unit.y = 1;
905         }
906     }
907 }
908
909 static void
910 gen9_run_kernel_media_object(VADriverContextP ctx,
911                              struct intel_encoder_context *encoder_context,
912                              struct i965_gpe_context *gpe_context,
913                              int media_function,
914                              struct gpe_media_object_parameter *param)
915 {
916     struct intel_batchbuffer *batch = encoder_context->base.batch;
917     struct vp9_encode_status_buffer_internal *status_buffer;
918     struct gen9_vp9_state *vp9_state;
919     struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
920
921     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
922     if (!vp9_state || !batch)
923         return;
924
925     intel_batchbuffer_start_atomic(batch, 0x1000);
926
927     status_buffer = &(vp9_state->status_buffer);
928     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
929     mi_store_data_imm.bo = status_buffer->bo;
930     mi_store_data_imm.offset = status_buffer->media_index_offset;
931     mi_store_data_imm.dw0 = media_function;
932     gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);
933
934     intel_batchbuffer_emit_mi_flush(batch);
935     gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
936     gen8_gpe_media_object(ctx, gpe_context, batch, param);
937     gen8_gpe_media_state_flush(ctx, gpe_context, batch);
938
939     gen9_gpe_pipeline_end(ctx, gpe_context, batch);
940
941     intel_batchbuffer_end_atomic(batch);
942
943     intel_batchbuffer_flush(batch);
944 }
945
946 static void
947 gen9_run_kernel_media_object_walker(VADriverContextP ctx,
948                                     struct intel_encoder_context *encoder_context,
949                                     struct i965_gpe_context *gpe_context,
950                                     int media_function,
951                                     struct gpe_media_object_walker_parameter *param)
952 {
953     struct intel_batchbuffer *batch = encoder_context->base.batch;
954     struct vp9_encode_status_buffer_internal *status_buffer;
955     struct gen9_vp9_state *vp9_state;
956     struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
957
958     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
959     if (!vp9_state || !batch)
960         return;
961
962     intel_batchbuffer_start_atomic(batch, 0x1000);
963
964     intel_batchbuffer_emit_mi_flush(batch);
965
966     status_buffer = &(vp9_state->status_buffer);
967     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
968     mi_store_data_imm.bo = status_buffer->bo;
969     mi_store_data_imm.offset = status_buffer->media_index_offset;
970     mi_store_data_imm.dw0 = media_function;
971     gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);
972
973     gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
974     gen8_gpe_media_object_walker(ctx, gpe_context, batch, param);
975     gen8_gpe_media_state_flush(ctx, gpe_context, batch);
976
977     gen9_gpe_pipeline_end(ctx, gpe_context, batch);
978
979     intel_batchbuffer_end_atomic(batch);
980
981     intel_batchbuffer_flush(batch);
982 }
983
984 static
985 void gen9_vp9_set_curbe_brc(VADriverContextP ctx,
986                             struct encode_state *encode_state,
987                             struct i965_gpe_context *gpe_context,
988                             struct intel_encoder_context *encoder_context,
989                             struct gen9_vp9_brc_curbe_param *param)
990 {
991     VAEncSequenceParameterBufferVP9 *seq_param;
992     VAEncPictureParameterBufferVP9  *pic_param;
993     VAEncMiscParameterTypeVP9PerSegmantParam *segment_param;
994     vp9_brc_curbe_data      *cmd;
995     double                  dbps_ratio, dInputBitsPerFrame;
996     struct gen9_vp9_state *vp9_state;
997
998     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
999
1000     pic_param      = param->ppic_param;
1001     seq_param      = param->pseq_param;
1002     segment_param  = param->psegment_param;
1003
1004     cmd = i965_gpe_context_map_curbe(gpe_context);
1005
1006     if (!cmd)
1007         return;
1008
1009     memset(cmd, 0, sizeof(vp9_brc_curbe_data));
1010
1011     if (!vp9_state->dys_enabled) {
1012         cmd->dw0.frame_width  = pic_param->frame_width_src;
1013         cmd->dw0.frame_height = pic_param->frame_height_src;
1014     } else {
1015         cmd->dw0.frame_width  = pic_param->frame_width_dst;
1016         cmd->dw0.frame_height = pic_param->frame_height_dst;
1017     }
1018
1019     cmd->dw1.frame_type           = vp9_state->picture_coding_type;
1020     cmd->dw1.segmentation_enable  = 0;
1021     cmd->dw1.ref_frame_flags      = vp9_state->ref_frame_flag;
1022     cmd->dw1.num_tlevels          = 1;
1023
1024     switch (param->media_state_type) {
1025     case VP9_MEDIA_STATE_BRC_INIT_RESET: {
1026         cmd->dw3.max_level_ratiot0 = 0;
1027         cmd->dw3.max_level_ratiot1 = 0;
1028         cmd->dw3.max_level_ratiot2 = 0;
1029         cmd->dw3.max_level_ratiot3 = 0;
1030
1031         cmd->dw4.profile_level_max_frame    = seq_param->max_frame_width *
1032                                               seq_param->max_frame_height;
1033         cmd->dw5.init_buf_fullness         = vp9_state->init_vbv_buffer_fullness_in_bit;
1034         cmd->dw6.buf_size                  = vp9_state->vbv_buffer_size_in_bit;
1035         cmd->dw7.target_bit_rate           = (vp9_state->target_bit_rate  + VP9_BRC_KBPS - 1) / VP9_BRC_KBPS *
1036                                              VP9_BRC_KBPS;
1037         cmd->dw8.max_bit_rate           = (vp9_state->max_bit_rate  + VP9_BRC_KBPS - 1) / VP9_BRC_KBPS *
1038                                           VP9_BRC_KBPS;
1039         cmd->dw9.min_bit_rate           = (vp9_state->min_bit_rate  + VP9_BRC_KBPS - 1) / VP9_BRC_KBPS *
1040                                           VP9_BRC_KBPS;
1041         cmd->dw10.frame_ratem           = vp9_state->framerate.num;
1042         cmd->dw11.frame_rated           = vp9_state->framerate.den;
1043
1044         cmd->dw14.avbr_accuracy         = 30;
1045         cmd->dw14.avbr_convergence      = 150;
1046
1047         if (encoder_context->rate_control_mode == VA_RC_CBR) {
1048             cmd->dw12.brc_flag    = BRC_KERNEL_CBR;
1049             cmd->dw8.max_bit_rate  = cmd->dw7.target_bit_rate;
1050             cmd->dw9.min_bit_rate  = 0;
1051         } else if (encoder_context->rate_control_mode == VA_RC_VBR) {
1052             cmd->dw12.brc_flag    = BRC_KERNEL_VBR;
1053         } else {
1054             cmd->dw12.brc_flag = BRC_KERNEL_CQL;
1055             cmd->dw16.cq_level = 30;
1056         }
1057         cmd->dw12.gopp = seq_param->intra_period - 1;
1058
1059         cmd->dw13.init_frame_width   = pic_param->frame_width_src;
1060         cmd->dw13.init_frame_height   = pic_param->frame_height_src;
1061
1062         cmd->dw15.min_qp          = 0;
1063         cmd->dw15.max_qp          = 255;
1064
1065         cmd->dw16.cq_level            = 30;
1066
1067         cmd->dw17.enable_dynamic_scaling = vp9_state->dys_in_use;
1068         cmd->dw17.brc_overshoot_cbr_pct = 150;
1069
1070         dInputBitsPerFrame = (double)cmd->dw8.max_bit_rate * (double)vp9_state->framerate.den / (double)vp9_state->framerate.num;
1071         dbps_ratio         = dInputBitsPerFrame / ((double)vp9_state->vbv_buffer_size_in_bit / 30.0);
1072         if (dbps_ratio < 0.1)
1073             dbps_ratio = 0.1;
1074         if (dbps_ratio > 3.5)
1075             dbps_ratio = 3.5;
1076
1077         *param->pbrc_init_reset_buf_size_in_bits  = cmd->dw6.buf_size;
1078         *param->pbrc_init_reset_input_bits_per_frame  = dInputBitsPerFrame;
1079         *param->pbrc_init_current_target_buf_full_in_bits = cmd->dw6.buf_size >> 1;
1080
1081         cmd->dw18.pframe_deviation_threshold0 = (uint32_t)(-50 * pow(0.90, dbps_ratio));
1082         cmd->dw18.pframe_deviation_threshold1  = (uint32_t)(-50 * pow(0.66, dbps_ratio));
1083         cmd->dw18.pframe_deviation_threshold2  = (uint32_t)(-50 * pow(0.46, dbps_ratio));
1084         cmd->dw18.pframe_deviation_threshold3  = (uint32_t)(-50 * pow(0.3, dbps_ratio));
1085         cmd->dw19.pframe_deviation_threshold4  = (uint32_t)(50 * pow(0.3, dbps_ratio));
1086         cmd->dw19.pframe_deviation_threshold5  = (uint32_t)(50 * pow(0.46, dbps_ratio));
1087         cmd->dw19.pframe_deviation_threshold6  = (uint32_t)(50 * pow(0.7, dbps_ratio));
1088         cmd->dw19.pframe_deviation_threshold7  = (uint32_t)(50 * pow(0.9, dbps_ratio));
1089
1090         cmd->dw20.vbr_deviation_threshold0     = (uint32_t)(-50 * pow(0.9, dbps_ratio));
1091         cmd->dw20.vbr_deviation_threshold1     = (uint32_t)(-50 * pow(0.7, dbps_ratio));
1092         cmd->dw20.vbr_deviation_threshold2     = (uint32_t)(-50 * pow(0.5, dbps_ratio));
1093         cmd->dw20.vbr_deviation_threshold3     = (uint32_t)(-50 * pow(0.3, dbps_ratio));
1094         cmd->dw21.vbr_deviation_threshold4     = (uint32_t)(100 * pow(0.4, dbps_ratio));
1095         cmd->dw21.vbr_deviation_threshold5     = (uint32_t)(100 * pow(0.5, dbps_ratio));
1096         cmd->dw21.vbr_deviation_threshold6     = (uint32_t)(100 * pow(0.75, dbps_ratio));
1097         cmd->dw21.vbr_deviation_threshold7     = (uint32_t)(100 * pow(0.9, dbps_ratio));
1098
1099         cmd->dw22.kframe_deviation_threshold0  = (uint32_t)(-50 * pow(0.8, dbps_ratio));
1100         cmd->dw22.kframe_deviation_threshold1  = (uint32_t)(-50 * pow(0.6, dbps_ratio));
1101         cmd->dw22.kframe_deviation_threshold2  = (uint32_t)(-50 * pow(0.34, dbps_ratio));
1102         cmd->dw22.kframe_deviation_threshold3  = (uint32_t)(-50 * pow(0.2, dbps_ratio));
1103         cmd->dw23.kframe_deviation_threshold4  = (uint32_t)(50 * pow(0.2, dbps_ratio));
1104         cmd->dw23.kframe_deviation_threshold5  = (uint32_t)(50 * pow(0.4, dbps_ratio));
1105         cmd->dw23.kframe_deviation_threshold6  = (uint32_t)(50 * pow(0.66, dbps_ratio));
1106         cmd->dw23.kframe_deviation_threshold7  = (uint32_t)(50 * pow(0.9, dbps_ratio));
1107
1108         break;
1109     }
1110     case VP9_MEDIA_STATE_BRC_UPDATE: {
1111         cmd->dw15.min_qp          = 0;
1112         cmd->dw15.max_qp          = 255;
1113
1114         cmd->dw25.frame_number    = param->frame_number;
1115
1116         // Used in dynamic scaling. set to zero for now
1117         cmd->dw27.hrd_buffer_fullness_upper_limit = 0;
1118         cmd->dw28.hrd_buffer_fullness_lower_limit = 0;
1119
1120         if (pic_param->pic_flags.bits.segmentation_enabled) {
1121             cmd->dw32.seg_delta_qp0              = segment_param->seg_data[0].segment_qindex_delta;
1122             cmd->dw32.seg_delta_qp1              = segment_param->seg_data[1].segment_qindex_delta;
1123             cmd->dw32.seg_delta_qp2              = segment_param->seg_data[2].segment_qindex_delta;
1124             cmd->dw32.seg_delta_qp3              = segment_param->seg_data[3].segment_qindex_delta;
1125
1126             cmd->dw33.seg_delta_qp4              = segment_param->seg_data[4].segment_qindex_delta;
1127             cmd->dw33.seg_delta_qp5              = segment_param->seg_data[5].segment_qindex_delta;
1128             cmd->dw33.seg_delta_qp6              = segment_param->seg_data[6].segment_qindex_delta;
1129             cmd->dw33.seg_delta_qp7              = segment_param->seg_data[7].segment_qindex_delta;
1130         }
1131
1132         //cmd->dw34.temporal_id                = pPicParams->temporal_idi;
1133         cmd->dw34.temporal_id                = 0;
1134         cmd->dw34.multi_ref_qp_check         = param->multi_ref_qp_check;
1135
1136         cmd->dw35.max_num_pak_passes         = param->brc_num_pak_passes;
1137         cmd->dw35.sync_async                 = 0;
1138         cmd->dw35.mbrc                       = param->mbbrc_enabled;
1139         if (*param->pbrc_init_current_target_buf_full_in_bits >
1140             ((double)(*param->pbrc_init_reset_buf_size_in_bits))) {
1141             *param->pbrc_init_current_target_buf_full_in_bits -=
1142                 (double)(*param->pbrc_init_reset_buf_size_in_bits);
1143             cmd->dw35.overflow = 1;
1144         } else
1145             cmd->dw35.overflow = 0;
1146
1147         cmd->dw24.target_size                 = (uint32_t)(*param->pbrc_init_current_target_buf_full_in_bits);
1148
1149         cmd->dw36.segmentation               = pic_param->pic_flags.bits.segmentation_enabled;
1150
1151         *param->pbrc_init_current_target_buf_full_in_bits += *param->pbrc_init_reset_input_bits_per_frame;
1152
1153         cmd->dw38.qdelta_ydc  = pic_param->luma_dc_qindex_delta;
1154         cmd->dw38.qdelta_uvdc = pic_param->chroma_dc_qindex_delta;
1155         cmd->dw38.qdelta_uvac = pic_param->chroma_ac_qindex_delta;
1156
1157         break;
1158     }
1159     case VP9_MEDIA_STATE_ENC_I_FRAME_DIST:
1160         cmd->dw2.intra_mode_disable        = 0;
1161         break;
1162     default:
1163         break;
1164     }
1165
1166     cmd->dw48.brc_y4x_input_bti                = VP9_BTI_BRC_SRCY4X_G9;
1167     cmd->dw49.brc_vme_coarse_intra_input_bti   = VP9_BTI_BRC_VME_COARSE_INTRA_G9;
1168     cmd->dw50.brc_history_buffer_bti           = VP9_BTI_BRC_HISTORY_G9;
1169     cmd->dw51.brc_const_data_input_bti         = VP9_BTI_BRC_CONSTANT_DATA_G9;
1170     cmd->dw52.brc_distortion_bti               = VP9_BTI_BRC_DISTORTION_G9;
1171     cmd->dw53.brc_mmdk_pak_output_bti          = VP9_BTI_BRC_MMDK_PAK_OUTPUT_G9;
1172     cmd->dw54.brc_enccurbe_input_bti           = VP9_BTI_BRC_MBENC_CURBE_INPUT_G9;
1173     cmd->dw55.brc_enccurbe_output_bti          = VP9_BTI_BRC_MBENC_CURBE_OUTPUT_G9;
1174     cmd->dw56.brc_pic_state_input_bti          = VP9_BTI_BRC_PIC_STATE_INPUT_G9;
1175     cmd->dw57.brc_pic_state_output_bti         = VP9_BTI_BRC_PIC_STATE_OUTPUT_G9;
1176     cmd->dw58.brc_seg_state_input_bti          = VP9_BTI_BRC_SEGMENT_STATE_INPUT_G9;
1177     cmd->dw59.brc_seg_state_output_bti         = VP9_BTI_BRC_SEGMENT_STATE_OUTPUT_G9;
1178     cmd->dw60.brc_bitstream_size_data_bti      = VP9_BTI_BRC_BITSTREAM_SIZE_G9;
1179     cmd->dw61.brc_hfw_data_output_bti          = VP9_BTI_BRC_HFW_DATA_G9;
1180
1181     i965_gpe_context_unmap_curbe(gpe_context);
1182     return;
1183 }
1184
1185 static void
1186 gen9_brc_init_reset_add_surfaces_vp9(VADriverContextP ctx,
1187                                      struct encode_state *encode_state,
1188                                      struct intel_encoder_context *encoder_context,
1189                                      struct i965_gpe_context *gpe_context)
1190 {
1191     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1192
1193     gen9_add_buffer_gpe_surface(ctx,
1194                                 gpe_context,
1195                                 &vme_context->res_brc_history_buffer,
1196                                 0,
1197                                 vme_context->res_brc_history_buffer.size,
1198                                 0,
1199                                 VP9_BTI_BRC_HISTORY_G9);
1200
1201     gen9_add_buffer_2d_gpe_surface(ctx,
1202                                    gpe_context,
1203                                    &vme_context->s4x_memv_distortion_buffer,
1204                                    1,
1205                                    I965_SURFACEFORMAT_R8_UNORM,
1206                                    VP9_BTI_BRC_DISTORTION_G9);
1207 }
1208
1209 /* The function related with BRC */
1210 static VAStatus
1211 gen9_vp9_brc_init_reset_kernel(VADriverContextP ctx,
1212                                struct encode_state *encode_state,
1213                                struct intel_encoder_context *encoder_context)
1214 {
1215     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1216     struct vp9_brc_context *brc_context = &vme_context->brc_context;
1217     struct gpe_media_object_parameter media_object_param;
1218     struct i965_gpe_context *gpe_context;
1219     int gpe_index = VP9_BRC_INIT;
1220     int media_function = VP9_MEDIA_STATE_BRC_INIT_RESET;
1221     struct gen9_vp9_brc_curbe_param                brc_initreset_curbe;
1222     VAEncPictureParameterBufferVP9 *pic_param;
1223     struct gen9_vp9_state *vp9_state;
1224
1225     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1226
1227     if (!vp9_state || !vp9_state->pic_param)
1228         return VA_STATUS_ERROR_INVALID_PARAMETER;
1229
1230     pic_param = vp9_state->pic_param;
1231
1232     if (vp9_state->brc_inited)
1233         gpe_index = VP9_BRC_RESET;
1234
1235     gpe_context = &brc_context->gpe_contexts[gpe_index];
1236
1237     gen8_gpe_context_init(ctx, gpe_context);
1238     gen9_gpe_reset_binding_table(ctx, gpe_context);
1239
1240     brc_initreset_curbe.media_state_type    = media_function;
1241     brc_initreset_curbe.curr_frame          = pic_param->reconstructed_frame;
1242     brc_initreset_curbe.ppic_param          = vp9_state->pic_param;
1243     brc_initreset_curbe.pseq_param          = vp9_state->seq_param;
1244     brc_initreset_curbe.psegment_param      = vp9_state->segment_param;
1245     brc_initreset_curbe.frame_width         = vp9_state->frame_width;
1246     brc_initreset_curbe.frame_height        = vp9_state->frame_height;
1247     brc_initreset_curbe.pbrc_init_current_target_buf_full_in_bits =
1248         &vp9_state->brc_init_current_target_buf_full_in_bits;
1249     brc_initreset_curbe.pbrc_init_reset_buf_size_in_bits =
1250         &vp9_state->brc_init_reset_buf_size_in_bits;
1251     brc_initreset_curbe.pbrc_init_reset_input_bits_per_frame =
1252         &vp9_state->brc_init_reset_input_bits_per_frame;
1253     brc_initreset_curbe.picture_coding_type  = vp9_state->picture_coding_type;
1254     brc_initreset_curbe.initbrc            = !vp9_state->brc_inited;
1255     brc_initreset_curbe.mbbrc_enabled      = 0;
1256     brc_initreset_curbe.ref_frame_flag      = vp9_state->ref_frame_flag;
1257
1258     vme_context->pfn_set_curbe_brc(ctx, encode_state,
1259                                    gpe_context,
1260                                    encoder_context,
1261                                    &brc_initreset_curbe);
1262
1263     gen9_brc_init_reset_add_surfaces_vp9(ctx, encode_state, encoder_context, gpe_context);
1264     gen8_gpe_setup_interface_data(ctx, gpe_context);
1265
1266     memset(&media_object_param, 0, sizeof(media_object_param));
1267     gen9_run_kernel_media_object(ctx, encoder_context, gpe_context, media_function, &media_object_param);
1268
1269     return VA_STATUS_SUCCESS;
1270 }
1271
1272 static void
1273 gen9_brc_intra_dist_add_surfaces_vp9(VADriverContextP ctx,
1274                                      struct encode_state *encode_state,
1275                                      struct intel_encoder_context *encoder_context,
1276                                      struct i965_gpe_context *gpe_context)
1277 {
1278     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1279
1280     struct object_surface *obj_surface;
1281     struct gen9_surface_vp9 *vp9_priv_surface;
1282
1283     /* sScaled4xSurface surface */
1284     obj_surface = encode_state->reconstructed_object;
1285
1286     vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
1287
1288     obj_surface = vp9_priv_surface->scaled_4x_surface_obj;
1289     gen9_add_2d_gpe_surface(ctx, gpe_context,
1290                             obj_surface,
1291                             0, 1,
1292                             I965_SURFACEFORMAT_R8_UNORM,
1293                             VP9_BTI_BRC_SRCY4X_G9
1294                            );
1295
1296     gen9_add_adv_gpe_surface(ctx, gpe_context,
1297                              obj_surface,
1298                              VP9_BTI_BRC_VME_COARSE_INTRA_G9);
1299
1300     gen9_add_buffer_2d_gpe_surface(ctx,
1301                                    gpe_context,
1302                                    &vme_context->s4x_memv_distortion_buffer,
1303                                    1,
1304                                    I965_SURFACEFORMAT_R8_UNORM,
1305                                    VP9_BTI_BRC_DISTORTION_G9);
1306
1307     return;
1308 }
1309
1310 /* The function related with BRC */
1311 static VAStatus
1312 gen9_vp9_brc_intra_dist_kernel(VADriverContextP ctx,
1313                                struct encode_state *encode_state,
1314                                struct intel_encoder_context *encoder_context)
1315 {
1316     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1317     struct vp9_brc_context *brc_context = &vme_context->brc_context;
1318     struct i965_gpe_context *gpe_context;
1319     int gpe_index = VP9_BRC_INTRA_DIST;
1320     int media_function = VP9_MEDIA_STATE_ENC_I_FRAME_DIST;
1321     struct gen9_vp9_brc_curbe_param                brc_intra_dist_curbe;
1322     VAEncPictureParameterBufferVP9 *pic_param;
1323     struct gen9_vp9_state *vp9_state;
1324     struct gpe_media_object_walker_parameter media_object_walker_param;
1325     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
1326
1327     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1328
1329     if (!vp9_state || !vp9_state->pic_param)
1330         return VA_STATUS_ERROR_INVALID_PARAMETER;
1331
1332     pic_param = vp9_state->pic_param;
1333
1334     gpe_context = &brc_context->gpe_contexts[gpe_index];
1335
1336     gen8_gpe_context_init(ctx, gpe_context);
1337     gen9_gpe_reset_binding_table(ctx, gpe_context);
1338
1339     brc_intra_dist_curbe.media_state_type    = media_function;
1340     brc_intra_dist_curbe.curr_frame          = pic_param->reconstructed_frame;
1341     brc_intra_dist_curbe.ppic_param          = vp9_state->pic_param;
1342     brc_intra_dist_curbe.pseq_param          = vp9_state->seq_param;
1343     brc_intra_dist_curbe.psegment_param      = vp9_state->segment_param;
1344     brc_intra_dist_curbe.frame_width         = vp9_state->frame_width;
1345     brc_intra_dist_curbe.frame_height        = vp9_state->frame_height;
1346     brc_intra_dist_curbe.pbrc_init_current_target_buf_full_in_bits =
1347         &vp9_state->brc_init_current_target_buf_full_in_bits;
1348     brc_intra_dist_curbe.pbrc_init_reset_buf_size_in_bits =
1349         &vp9_state->brc_init_reset_buf_size_in_bits;
1350     brc_intra_dist_curbe.pbrc_init_reset_input_bits_per_frame =
1351         &vp9_state->brc_init_reset_input_bits_per_frame;
1352     brc_intra_dist_curbe.picture_coding_type  = vp9_state->picture_coding_type;
1353     brc_intra_dist_curbe.initbrc            = !vp9_state->brc_inited;
1354     brc_intra_dist_curbe.mbbrc_enabled      = 0;
1355     brc_intra_dist_curbe.ref_frame_flag      = vp9_state->ref_frame_flag;
1356
1357     vme_context->pfn_set_curbe_brc(ctx, encode_state,
1358                                    gpe_context,
1359                                    encoder_context,
1360                                    &brc_intra_dist_curbe);
1361
1362     /* zero distortion buffer */
1363     i965_zero_gpe_resource(&vme_context->s4x_memv_distortion_buffer);
1364
1365     gen9_brc_intra_dist_add_surfaces_vp9(ctx, encode_state, encoder_context, gpe_context);
1366     gen8_gpe_setup_interface_data(ctx, gpe_context);
1367
1368     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
1369     kernel_walker_param.resolution_x = vp9_state->downscaled_width_4x_in_mb;
1370     kernel_walker_param.resolution_y = vp9_state->downscaled_height_4x_in_mb;
1371     kernel_walker_param.no_dependency = 1;
1372
1373     gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
1374
1375     gen9_run_kernel_media_object_walker(ctx, encoder_context,
1376                                         gpe_context,
1377                                         media_function,
1378                                         &media_object_walker_param);
1379
1380     return VA_STATUS_SUCCESS;
1381 }
1382
1383 static void
1384 intel_vp9enc_construct_picstate_batchbuf(VADriverContextP ctx,
1385                                          struct encode_state *encode_state,
1386                                          struct intel_encoder_context *encoder_context,
1387                                          struct i965_gpe_resource *gpe_resource)
1388 {
1389     struct gen9_vp9_state *vp9_state;
1390     VAEncPictureParameterBufferVP9 *pic_param;
1391     int frame_width_minus1, frame_height_minus1;
1392     int is_lossless = 0;
1393     int is_intra_only = 0;
1394     unsigned int last_frame_type;
1395     unsigned int ref_flags;
1396     unsigned int use_prev_frame_mvs, adapt_flag;
1397     struct gen9_surface_vp9 *vp9_surface = NULL;
1398     struct object_surface *obj_surface = NULL;
1399     uint32_t scale_h = 0;
1400     uint32_t scale_w = 0;
1401
1402     char *pdata;
1403     int i, j;
1404     unsigned int *cmd_ptr, cmd_value, tmp;
1405
1406     pdata = i965_map_gpe_resource(gpe_resource);
1407     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1408
1409     if (!vp9_state || !vp9_state->pic_param || !pdata)
1410         return;
1411
1412     pic_param = vp9_state->pic_param;
1413     frame_width_minus1 = ALIGN(pic_param->frame_width_dst, 8) - 1;
1414     frame_height_minus1 = ALIGN(pic_param->frame_height_dst, 8) - 1;
1415     if ((pic_param->luma_ac_qindex == 0) &&
1416         (pic_param->luma_dc_qindex_delta == 0) &&
1417         (pic_param->chroma_ac_qindex_delta == 0) &&
1418         (pic_param->chroma_dc_qindex_delta == 0))
1419         is_lossless = 1;
1420
1421     if (pic_param->pic_flags.bits.frame_type)
1422         is_intra_only = pic_param->pic_flags.bits.intra_only;
1423
1424     last_frame_type = vp9_state->vp9_last_frame.frame_type;
1425
1426     use_prev_frame_mvs = 0;
1427     if (pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) {
1428         last_frame_type = 0;
1429         ref_flags = 0;
1430     } else {
1431         ref_flags = ((pic_param->ref_flags.bits.ref_arf_sign_bias << 9) |
1432                      (pic_param->ref_flags.bits.ref_gf_sign_bias << 8) |
1433                      (pic_param->ref_flags.bits.ref_last_sign_bias << 7)
1434                     );
1435         if (!pic_param->pic_flags.bits.error_resilient_mode &&
1436             (pic_param->frame_width_dst == vp9_state->vp9_last_frame.frame_width) &&
1437             (pic_param->frame_height_dst == vp9_state->vp9_last_frame.frame_height) &&
1438             !pic_param->pic_flags.bits.intra_only &&
1439             vp9_state->vp9_last_frame.show_frame &&
1440             ((vp9_state->vp9_last_frame.frame_type == HCP_VP9_INTER_FRAME) &&
1441              !vp9_state->vp9_last_frame.intra_only)
1442            )
1443             use_prev_frame_mvs = 1;
1444     }
1445     adapt_flag = 0;
1446     if (!pic_param->pic_flags.bits.error_resilient_mode &&
1447         !pic_param->pic_flags.bits.frame_parallel_decoding_mode)
1448         adapt_flag = 1;
1449
1450     for (i = 0; i < 4; i++) {
1451         uint32_t non_first_pass;
1452         non_first_pass = 1;
1453         if (i == 0)
1454             non_first_pass = 0;
1455
1456         cmd_ptr = (unsigned int *)(pdata + i * VP9_PIC_STATE_BUFFER_SIZE);
1457
1458         *cmd_ptr++ = (HCP_VP9_PIC_STATE | (33 - 2));
1459         *cmd_ptr++ = (frame_height_minus1 << 16 |
1460                       frame_width_minus1);
1461         /* dw2 */
1462         *cmd_ptr++ = (0 << 31 |  /* disable segment_in */
1463                       0 << 30 | /* disable segment_out */
1464                       is_lossless << 29 | /* loseless */
1465                       (pic_param->pic_flags.bits.segmentation_enabled && pic_param->pic_flags.bits.segmentation_temporal_update) << 28 | /* temporal update */
1466                       (pic_param->pic_flags.bits.segmentation_enabled && pic_param->pic_flags.bits.segmentation_update_map) << 27 | /* temporal update */
1467                       (pic_param->pic_flags.bits.segmentation_enabled << 26) |
1468                       (pic_param->sharpness_level << 23) |
1469                       (pic_param->filter_level << 17) |
1470                       (pic_param->pic_flags.bits.frame_parallel_decoding_mode << 16) |
1471                       (pic_param->pic_flags.bits.error_resilient_mode << 15) |
1472                       (pic_param->pic_flags.bits.refresh_frame_context << 14) |
1473                       (last_frame_type << 13) |
1474                       (vp9_state->tx_mode == TX_MODE_SELECT) << 12 |
1475                       (pic_param->pic_flags.bits.comp_prediction_mode == REFERENCE_MODE_SELECT) << 11 |
1476                       (use_prev_frame_mvs) << 10 |
1477                       ref_flags |
1478                       (pic_param->pic_flags.bits.mcomp_filter_type << 4) |
1479                       (pic_param->pic_flags.bits.allow_high_precision_mv << 3) |
1480                       (is_intra_only << 2) |
1481                       (adapt_flag << 1) |
1482                       (pic_param->pic_flags.bits.frame_type) << 0);
1483
1484         *cmd_ptr++ = ((0 << 28) | /* VP9Profile0 */
1485                       (0 << 24) | /* 8-bit depth */
1486                       (0 << 22) | /* only 420 format */
1487                       (0 << 0)  | /* sse statistics */
1488                       (pic_param->log2_tile_rows << 8) |
1489                       (pic_param->log2_tile_columns << 0));
1490
1491         /* dw4..6 */
1492         if (pic_param->pic_flags.bits.frame_type &&
1493             !pic_param->pic_flags.bits.intra_only) {
1494             for (j = 0; j < 3; j++) {
1495                 obj_surface = encode_state->reference_objects[j];
1496                 scale_w = 0;
1497                 scale_h = 0;
1498                 if (obj_surface && obj_surface->private_data) {
1499                     vp9_surface = obj_surface->private_data;
1500                     scale_w = (vp9_surface->frame_width  << 14) / pic_param->frame_width_dst;
1501                     scale_h = (vp9_surface->frame_height << 14) / pic_param->frame_height_dst;
1502                     *cmd_ptr++ = (scale_w << 16 |
1503                                   scale_h);
1504                 } else
1505                     *cmd_ptr++ = 0;
1506             }
1507         } else {
1508             *cmd_ptr++ = 0;
1509             *cmd_ptr++ = 0;
1510             *cmd_ptr++ = 0;
1511         }
1512         /* dw7..9 */
1513         for (j = 0; j < 3; j++) {
1514             obj_surface = encode_state->reference_objects[j];
1515             vp9_surface = NULL;
1516
1517             if (obj_surface && obj_surface->private_data) {
1518                 vp9_surface = obj_surface->private_data;
1519                 *cmd_ptr++ = (vp9_surface->frame_height - 1) << 16 |
1520                              (vp9_surface->frame_width - 1);
1521             } else
1522                 *cmd_ptr++ = 0;
1523         }
1524         /* dw10 */
1525         *cmd_ptr++ = 0;
1526         /* dw11 */
1527         *cmd_ptr++ = (1 << 1);
1528         *cmd_ptr++ = 0;
1529
1530         /* dw13 */
1531         *cmd_ptr++ = ((1 << 25) | /* header insertation for VP9 */
1532                       (0 << 24) | /* tail insertation */
1533                       (pic_param->luma_ac_qindex << 16) |
1534                       0 /* compressed header bin count */);
1535
1536         /* dw14 */
1537         tmp = intel_convert_sign_mag(pic_param->luma_dc_qindex_delta, 5);
1538         cmd_value = (tmp << 16);
1539         tmp = intel_convert_sign_mag(pic_param->chroma_dc_qindex_delta, 5);
1540         cmd_value |= (tmp << 8);
1541         tmp = intel_convert_sign_mag(pic_param->chroma_ac_qindex_delta, 5);
1542         cmd_value |= tmp;
1543         *cmd_ptr++ = cmd_value;
1544
1545         tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[0], 7);
1546         cmd_value = tmp;
1547         tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[1], 7);
1548         cmd_value |= (tmp << 8);
1549         tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[2], 7);
1550         cmd_value |= (tmp << 16);
1551         tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[3], 7);
1552         cmd_value |= (tmp << 24);
1553         *cmd_ptr++ = cmd_value;
1554
1555         /* dw16 */
1556         tmp = intel_convert_sign_mag(pic_param->mode_lf_delta[0], 7);
1557         cmd_value = tmp;
1558         tmp = intel_convert_sign_mag(pic_param->mode_lf_delta[1], 7);
1559         cmd_value |= (tmp << 8);
1560         *cmd_ptr++ = cmd_value;
1561
1562         /* dw17 */
1563         *cmd_ptr++ = vp9_state->frame_header.bit_offset_ref_lf_delta |
1564                      (vp9_state->frame_header.bit_offset_mode_lf_delta << 16);
1565         *cmd_ptr++ = vp9_state->frame_header.bit_offset_qindex |
1566                      (vp9_state->frame_header.bit_offset_lf_level << 16);
1567
1568         /* dw19 */
1569         *cmd_ptr++ = (1 << 26 | (1 << 25) |
1570                       non_first_pass << 16);
1571         /* dw20 */
1572         *cmd_ptr++ = (1 << 31) | (256);
1573
1574         /* dw21 */
1575         *cmd_ptr++ = (0 << 31) | 1;
1576
1577         /* dw22-dw24. Frame_delta_qindex_range */
1578         *cmd_ptr++ = 0;
1579         *cmd_ptr++ = 0;
1580         *cmd_ptr++ = 0;
1581
1582         /* dw25-26. frame_delta_lf_range */
1583         *cmd_ptr++ = 0;
1584         *cmd_ptr++ = 0;
1585
1586         /* dw27. frame_delta_lf_min */
1587         *cmd_ptr++ = 0;
1588
1589         /* dw28..30 */
1590         *cmd_ptr++ = 0;
1591         *cmd_ptr++ = 0;
1592         *cmd_ptr++ = 0;
1593
1594         /* dw31 */
1595         *cmd_ptr++ = (0 << 30) | 1;
1596         /* dw32 */
1597         *cmd_ptr++ = vp9_state->frame_header.bit_offset_first_partition_size;
1598
1599         *cmd_ptr++ = 0;
1600         *cmd_ptr++ = MI_BATCH_BUFFER_END;
1601     }
1602
1603     i965_unmap_gpe_resource(gpe_resource);
1604 }
1605
1606 static void
1607 gen9_brc_update_add_surfaces_vp9(VADriverContextP ctx,
1608                                  struct encode_state *encode_state,
1609                                  struct intel_encoder_context *encoder_context,
1610                                  struct i965_gpe_context *brc_gpe_context,
1611                                  struct i965_gpe_context *mbenc_gpe_context)
1612 {
1613     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1614
1615     /* 0. BRC history buffer */
1616     gen9_add_buffer_gpe_surface(ctx,
1617                                 brc_gpe_context,
1618                                 &vme_context->res_brc_history_buffer,
1619                                 0,
1620                                 vme_context->res_brc_history_buffer.size,
1621                                 0,
1622                                 VP9_BTI_BRC_HISTORY_G9);
1623
1624     /* 1. Constant data buffer */
1625     gen9_add_buffer_gpe_surface(ctx,
1626                                 brc_gpe_context,
1627                                 &vme_context->res_brc_const_data_buffer,
1628                                 0,
1629                                 vme_context->res_brc_const_data_buffer.size,
1630                                 0,
1631                                 VP9_BTI_BRC_CONSTANT_DATA_G9);
1632
1633     /* 2. Distortion 2D surface buffer */
1634     gen9_add_buffer_2d_gpe_surface(ctx,
1635                                    brc_gpe_context,
1636                                    &vme_context->s4x_memv_distortion_buffer,
1637                                    1,
1638                                    I965_SURFACEFORMAT_R8_UNORM,
1639                                    VP9_BTI_BRC_DISTORTION_G9);
1640
1641     /* 3. pak buffer */
1642     gen9_add_buffer_gpe_surface(ctx,
1643                                 brc_gpe_context,
1644                                 &vme_context->res_brc_mmdk_pak_buffer,
1645                                 0,
1646                                 vme_context->res_brc_mmdk_pak_buffer.size,
1647                                 0,
1648                                 VP9_BTI_BRC_MMDK_PAK_OUTPUT_G9);
1649     /* 4. Mbenc curbe input buffer */
1650     gen9_add_dri_buffer_gpe_surface(ctx,
1651                                     brc_gpe_context,
1652                                     mbenc_gpe_context->curbe.bo,
1653                                     0,
1654                                     ALIGN(mbenc_gpe_context->curbe.length, 64),
1655                                     mbenc_gpe_context->curbe.offset,
1656                                     VP9_BTI_BRC_MBENC_CURBE_INPUT_G9);
1657     /* 5. Mbenc curbe output buffer */
1658     gen9_add_dri_buffer_gpe_surface(ctx,
1659                                     brc_gpe_context,
1660                                     mbenc_gpe_context->curbe.bo,
1661                                     0,
1662                                     ALIGN(mbenc_gpe_context->curbe.length, 64),
1663                                     mbenc_gpe_context->curbe.offset,
1664                                     VP9_BTI_BRC_MBENC_CURBE_OUTPUT_G9);
1665
1666     /* 6. BRC_PIC_STATE read buffer */
1667     gen9_add_buffer_gpe_surface(ctx, brc_gpe_context,
1668                                 &vme_context->res_pic_state_brc_read_buffer,
1669                                 0,
1670                                 vme_context->res_pic_state_brc_read_buffer.size,
1671                                 0,
1672                                 VP9_BTI_BRC_PIC_STATE_INPUT_G9);
1673
1674     /* 7. BRC_PIC_STATE write buffer */
1675     gen9_add_buffer_gpe_surface(ctx, brc_gpe_context,
1676                                 &vme_context->res_pic_state_brc_write_hfw_read_buffer,
1677                                 0,
1678                                 vme_context->res_pic_state_brc_write_hfw_read_buffer.size,
1679                                 0,
1680                                 VP9_BTI_BRC_PIC_STATE_OUTPUT_G9);
1681
1682     /* 8. SEGMENT_STATE read buffer */
1683     gen9_add_buffer_gpe_surface(ctx, brc_gpe_context,
1684                                 &vme_context->res_seg_state_brc_read_buffer,
1685                                 0,
1686                                 vme_context->res_seg_state_brc_read_buffer.size,
1687                                 0,
1688                                 VP9_BTI_BRC_SEGMENT_STATE_INPUT_G9);
1689
1690     /* 9. SEGMENT_STATE write buffer */
1691     gen9_add_buffer_gpe_surface(ctx, brc_gpe_context,
1692                                 &vme_context->res_seg_state_brc_write_buffer,
1693                                 0,
1694                                 vme_context->res_seg_state_brc_write_buffer.size,
1695                                 0,
1696                                 VP9_BTI_BRC_SEGMENT_STATE_OUTPUT_G9);
1697
1698     /* 10. Bitstream size buffer */
1699     gen9_add_buffer_gpe_surface(ctx, brc_gpe_context,
1700                                 &vme_context->res_brc_bitstream_size_buffer,
1701                                 0,
1702                                 vme_context->res_brc_bitstream_size_buffer.size,
1703                                 0,
1704                                 VP9_BTI_BRC_BITSTREAM_SIZE_G9);
1705
1706     gen9_add_buffer_gpe_surface(ctx, brc_gpe_context,
1707                                 &vme_context->res_brc_hfw_data_buffer,
1708                                 0,
1709                                 vme_context->res_brc_hfw_data_buffer.size,
1710                                 0,
1711                                 VP9_BTI_BRC_HFW_DATA_G9);
1712
1713     return;
1714 }
1715
1716 static VAStatus
1717 gen9_vp9_brc_update_kernel(VADriverContextP ctx,
1718                            struct encode_state *encode_state,
1719                            struct intel_encoder_context *encoder_context)
1720 {
1721     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1722     struct vp9_brc_context *brc_context = &vme_context->brc_context;
1723     struct i965_gpe_context *brc_gpe_context, *mbenc_gpe_context;
1724     int mbenc_index, gpe_index = VP9_BRC_UPDATE;
1725     int media_function = VP9_MEDIA_STATE_BRC_UPDATE;
1726     int mbenc_function;
1727     struct gen9_vp9_brc_curbe_param        brc_update_curbe_param;
1728     VAEncPictureParameterBufferVP9 *pic_param;
1729     struct gen9_vp9_state *vp9_state;
1730     struct gen9_vp9_mbenc_curbe_param    mbenc_curbe_param;
1731     struct gpe_media_object_parameter media_object_param;
1732
1733     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1734     if (!vp9_state || !vp9_state->pic_param)
1735         return VA_STATUS_ERROR_INVALID_PARAMETER;
1736
1737     pic_param = vp9_state->pic_param;
1738     // Setup VP9 MbEnc Curbe
1739     if (vp9_state->picture_coding_type) {
1740         mbenc_function = VP9_MEDIA_STATE_MBENC_P;
1741         mbenc_index = VP9_MBENC_IDX_INTER;
1742     } else {
1743         mbenc_function = VP9_MEDIA_STATE_MBENC_I_32x32;
1744         mbenc_index = VP9_MBENC_IDX_KEY_32x32;
1745     }
1746
1747     mbenc_gpe_context = &(vme_context->mbenc_context.gpe_contexts[mbenc_index]);
1748
1749     memset(&mbenc_curbe_param, 0, sizeof(mbenc_curbe_param));
1750
1751     mbenc_curbe_param.ppic_param             = vp9_state->pic_param;
1752     mbenc_curbe_param.pseq_param             = vp9_state->seq_param;
1753     mbenc_curbe_param.psegment_param         = vp9_state->segment_param;
1754     //mbenc_curbe_param.ppRefList              = &(vp9_state->pRefList[0]);
1755     mbenc_curbe_param.last_ref_obj           = vp9_state->last_ref_obj;
1756     mbenc_curbe_param.golden_ref_obj         = vp9_state->golden_ref_obj;
1757     mbenc_curbe_param.alt_ref_obj            = vp9_state->alt_ref_obj;
1758     mbenc_curbe_param.frame_width_in_mb      = ALIGN(vp9_state->frame_width, 16) / 16;
1759     mbenc_curbe_param.frame_height_in_mb     = ALIGN(vp9_state->frame_height, 16) / 16;
1760     mbenc_curbe_param.hme_enabled            = vp9_state->hme_enabled;
1761     mbenc_curbe_param.ref_frame_flag         = vp9_state->ref_frame_flag;
1762     mbenc_curbe_param.multi_ref_qp_check     = vp9_state->multi_ref_qp_check;
1763     mbenc_curbe_param.picture_coding_type    = vp9_state->picture_coding_type;
1764     mbenc_curbe_param.media_state_type       = mbenc_function;
1765
1766     vme_context->pfn_set_curbe_mbenc(ctx, encode_state,
1767                                      mbenc_gpe_context,
1768                                      encoder_context,
1769                                      &mbenc_curbe_param);
1770
1771     vp9_state->mbenc_curbe_set_in_brc_update = true;
1772
1773     brc_gpe_context = &brc_context->gpe_contexts[gpe_index];
1774
1775     gen8_gpe_context_init(ctx, brc_gpe_context);
1776     gen9_gpe_reset_binding_table(ctx, brc_gpe_context);
1777
1778     memset(&brc_update_curbe_param, 0, sizeof(brc_update_curbe_param));
1779
1780     // Setup BRC Update Curbe
1781     brc_update_curbe_param.media_state_type       = media_function;
1782     brc_update_curbe_param.curr_frame               = pic_param->reconstructed_frame;
1783     brc_update_curbe_param.ppic_param             = vp9_state->pic_param;
1784     brc_update_curbe_param.pseq_param             = vp9_state->seq_param;
1785     brc_update_curbe_param.psegment_param         = vp9_state->segment_param;
1786     brc_update_curbe_param.picture_coding_type    = vp9_state->picture_coding_type;
1787     brc_update_curbe_param.frame_width_in_mb      = ALIGN(vp9_state->frame_width, 16) / 16;
1788     brc_update_curbe_param.frame_height_in_mb     = ALIGN(vp9_state->frame_height, 16) / 16;
1789     brc_update_curbe_param.hme_enabled            = vp9_state->hme_enabled;
1790     brc_update_curbe_param.b_used_ref             = 1;
1791     brc_update_curbe_param.frame_number           = vp9_state->frame_number;
1792     brc_update_curbe_param.ref_frame_flag         = vp9_state->ref_frame_flag;
1793     brc_update_curbe_param.mbbrc_enabled          = 0;
1794     brc_update_curbe_param.multi_ref_qp_check     = vp9_state->multi_ref_qp_check;
1795     brc_update_curbe_param.brc_num_pak_passes     = vp9_state->num_pak_passes;
1796
1797     brc_update_curbe_param.pbrc_init_current_target_buf_full_in_bits =
1798         &vp9_state->brc_init_current_target_buf_full_in_bits;
1799     brc_update_curbe_param.pbrc_init_reset_buf_size_in_bits =
1800         &vp9_state->brc_init_reset_buf_size_in_bits;
1801     brc_update_curbe_param.pbrc_init_reset_input_bits_per_frame =
1802         &vp9_state->brc_init_reset_input_bits_per_frame;
1803
1804     vme_context->pfn_set_curbe_brc(ctx, encode_state,
1805                                    brc_gpe_context,
1806                                    encoder_context,
1807                                    &brc_update_curbe_param);
1808
1809
1810     // Check if the constant data surface is present
1811     if (vp9_state->brc_constant_buffer_supported) {
1812         char *brc_const_buffer;
1813         brc_const_buffer = i965_map_gpe_resource(&vme_context->res_brc_const_data_buffer);
1814
1815         if (!brc_const_buffer)
1816             return VA_STATUS_ERROR_OPERATION_FAILED;
1817
1818         if (vp9_state->picture_coding_type)
1819             memcpy(brc_const_buffer, vp9_brc_const_data_p_g9,
1820                    sizeof(vp9_brc_const_data_p_g9));
1821         else
1822             memcpy(brc_const_buffer, vp9_brc_const_data_i_g9,
1823                    sizeof(vp9_brc_const_data_i_g9));
1824
1825         i965_unmap_gpe_resource(&vme_context->res_brc_const_data_buffer);
1826     }
1827
1828     if (pic_param->pic_flags.bits.segmentation_enabled) {
1829         //reallocate the vme_state->mb_segment_map_surface
1830         /* this will be added later */
1831     }
1832
1833     {
1834         pic_param->filter_level = 0;
1835         // clear the filter level value in picParams ebfore programming pic state, as this value will be determined and updated by BRC.
1836         intel_vp9enc_construct_picstate_batchbuf(ctx, encode_state,
1837                                                  encoder_context, &vme_context->res_pic_state_brc_read_buffer);
1838     }
1839
1840     gen9_brc_update_add_surfaces_vp9(ctx, encode_state,
1841                                      encoder_context,
1842                                      brc_gpe_context,
1843                                      mbenc_gpe_context);
1844
1845     gen8_gpe_setup_interface_data(ctx, brc_gpe_context);
1846     memset(&media_object_param, 0, sizeof(media_object_param));
1847     gen9_run_kernel_media_object(ctx, encoder_context,
1848                                  brc_gpe_context,
1849                                  media_function,
1850                                  &media_object_param);
1851     return VA_STATUS_SUCCESS;
1852 }
1853
1854 static
1855 void gen9_vp9_set_curbe_me(VADriverContextP ctx,
1856                            struct encode_state *encode_state,
1857                            struct i965_gpe_context *gpe_context,
1858                            struct intel_encoder_context *encoder_context,
1859                            struct gen9_vp9_me_curbe_param *param)
1860 {
1861     vp9_me_curbe_data        *me_cmd;
1862     int enc_media_state;
1863     int                                       me_mode;
1864     unsigned int                                       width, height;
1865     uint32_t                                  l0_ref_frames;
1866     uint32_t                                  scale_factor;
1867
1868     if (param->b16xme_enabled) {
1869         if (param->use_16x_me)
1870             me_mode = VP9_ENC_ME16X_BEFORE_ME4X;
1871         else
1872             me_mode = VP9_ENC_ME4X_AFTER_ME16X;
1873     } else {
1874         me_mode = VP9_ENC_ME4X_ONLY;
1875     }
1876
1877     if (me_mode == VP9_ENC_ME16X_BEFORE_ME4X)
1878         scale_factor = 16;
1879     else
1880         scale_factor = 4;
1881
1882     if (param->use_16x_me)
1883         enc_media_state = VP9_MEDIA_STATE_16X_ME;
1884     else
1885         enc_media_state = VP9_MEDIA_STATE_4X_ME;
1886
1887     me_cmd = i965_gpe_context_map_curbe(gpe_context);
1888
1889     if (!me_cmd)
1890         return;
1891
1892     memset(me_cmd, 0, sizeof(vp9_me_curbe_data));
1893
1894     me_cmd->dw1.max_num_mvs           = 0x10;
1895     me_cmd->dw1.bi_weight             = 0x00;
1896
1897     me_cmd->dw2.max_num_su            = 0x39;
1898     me_cmd->dw2.max_len_sp            = 0x39;
1899
1900     me_cmd->dw3.sub_mb_part_mask       = 0x77;
1901     me_cmd->dw3.inter_sad             = 0x00;
1902     me_cmd->dw3.intra_sad            = 0x00;
1903     me_cmd->dw3.bme_disable_fbr      = 0x01;
1904     me_cmd->dw3.sub_pel_mode         = 0x03;
1905
1906     width = param->frame_width / scale_factor;
1907     height = param->frame_height / scale_factor;
1908
1909     me_cmd->dw4.picture_width        = ALIGN(width, 16) / 16;
1910     me_cmd->dw4.picture_height_minus1       = ALIGN(height, 16) / 16 - 1;
1911
1912     me_cmd->dw5.ref_width            = 0x30;
1913     me_cmd->dw5.ref_height           = 0x28;
1914
1915     if (enc_media_state == VP9_MEDIA_STATE_4X_ME)
1916         me_cmd->dw6.write_distortions = 0x01;
1917
1918     me_cmd->dw6.use_mv_from_prev_step   = me_mode == VP9_ENC_ME4X_AFTER_ME16X ? 1 : 0;
1919     me_cmd->dw6.super_combine_dist    = 0x5;
1920     me_cmd->dw6.max_vmvr              = 0x7fc;
1921
1922     l0_ref_frames = (param->ref_frame_flag & 0x01) +
1923                     !!(param->ref_frame_flag & 0x02) +
1924                     !!(param->ref_frame_flag & 0x04);
1925     me_cmd->dw13.num_ref_idx_l0_minus1 = (l0_ref_frames > 0) ? l0_ref_frames - 1 : 0;
1926     me_cmd->dw13.num_ref_idx_l1_minus1 =  0;
1927
1928     me_cmd->dw14.l0_ref_pic_polarity_bits = 0;
1929     me_cmd->dw14.l1_ref_pic_polarity_bits = 0;
1930
1931     me_cmd->dw15.mv_shift_factor        = 0x02;
1932
1933     {
1934         memcpy((void *)((char *)me_cmd + 64),
1935                vp9_diamond_ime_search_path_delta,
1936                sizeof(vp9_diamond_ime_search_path_delta));
1937     }
1938
1939
1940     me_cmd->dw32._4x_memv_output_data_surf_index     = VP9_BTI_ME_MV_DATA_SURFACE;
1941     me_cmd->dw33._16x_32x_memv_input_data_surf_index = VP9_BTI_16XME_MV_DATA_SURFACE;
1942     me_cmd->dw34._4x_me_output_dist_surf_index       = VP9_BTI_ME_DISTORTION_SURFACE;
1943     me_cmd->dw35._4x_me_output_brc_dist_surf_index   = VP9_BTI_ME_BRC_DISTORTION_SURFACE;
1944     me_cmd->dw36.vme_fwd_inter_pred_surf_index       = VP9_BTI_ME_CURR_PIC_L0;
1945     me_cmd->dw37.vme_bdw_inter_pred_surf_index       = VP9_BTI_ME_CURR_PIC_L1;
1946
1947     i965_gpe_context_unmap_curbe(gpe_context);
1948 }
1949
1950 static void
1951 gen9_vp9_send_me_surface(VADriverContextP ctx,
1952                          struct encode_state *encode_state,
1953                          struct i965_gpe_context *gpe_context,
1954                          struct intel_encoder_context *encoder_context,
1955                          struct gen9_vp9_me_surface_param *param)
1956 {
1957     struct i965_driver_data *i965 = i965_driver_data(ctx);
1958     struct object_surface *obj_surface;
1959     struct gen9_surface_vp9 *vp9_priv_surface;
1960     struct object_surface *input_surface;
1961     struct i965_gpe_resource *gpe_resource;
1962     int ref_bti;
1963
1964     obj_surface = SURFACE(param->curr_pic);
1965
1966     if (!obj_surface || !obj_surface->private_data)
1967         return;
1968
1969     vp9_priv_surface = obj_surface->private_data;
1970     if (param->use_16x_me) {
1971         gpe_resource = param->pres_16x_memv_data_buffer;
1972     } else {
1973         gpe_resource = param->pres_4x_memv_data_buffer;
1974     }
1975
1976     gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
1977                                    gpe_resource,
1978                                    1,
1979                                    I965_SURFACEFORMAT_R8_UNORM,
1980                                    VP9_BTI_ME_MV_DATA_SURFACE);
1981
1982     if (param->b16xme_enabled) {
1983         gpe_resource = param->pres_16x_memv_data_buffer;
1984         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
1985                                        gpe_resource,
1986                                        1,
1987                                        I965_SURFACEFORMAT_R8_UNORM,
1988                                        VP9_BTI_16XME_MV_DATA_SURFACE);
1989     }
1990
1991     if (!param->use_16x_me) {
1992         gpe_resource = param->pres_me_brc_distortion_buffer;
1993
1994         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
1995                                        gpe_resource,
1996                                        1,
1997                                        I965_SURFACEFORMAT_R8_UNORM,
1998                                        VP9_BTI_ME_BRC_DISTORTION_SURFACE);
1999
2000         gpe_resource = param->pres_me_distortion_buffer;
2001
2002         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
2003                                        gpe_resource,
2004                                        1,
2005                                        I965_SURFACEFORMAT_R8_UNORM,
2006                                        VP9_BTI_ME_DISTORTION_SURFACE);
2007     }
2008
2009     if (param->use_16x_me)
2010         input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2011     else
2012         input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2013
2014     gen9_add_adv_gpe_surface(ctx, gpe_context,
2015                              input_surface,
2016                              VP9_BTI_ME_CURR_PIC_L0);
2017
2018     ref_bti = VP9_BTI_ME_CURR_PIC_L0 + 1;
2019
2020
2021     if (param->last_ref_pic) {
2022         obj_surface = param->last_ref_pic;
2023         vp9_priv_surface = obj_surface->private_data;
2024
2025         if (param->use_16x_me)
2026             input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2027         else
2028             input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2029
2030         if (param->dys_enabled &&
2031             ((vp9_priv_surface->frame_width != param->frame_width) ||
2032              (vp9_priv_surface->frame_height != param->frame_height))) {
2033             if (param->use_16x_me)
2034                 input_surface = vp9_priv_surface->dys_16x_surface_obj;
2035             else
2036                 input_surface = vp9_priv_surface->dys_4x_surface_obj;
2037         }
2038         gen9_add_adv_gpe_surface(ctx, gpe_context,
2039                                  input_surface,
2040                                  ref_bti);
2041         gen9_add_adv_gpe_surface(ctx, gpe_context,
2042                                  input_surface,
2043                                  ref_bti + 1);
2044         ref_bti += 2;
2045     }
2046
2047     if (param->golden_ref_pic) {
2048         obj_surface = param->golden_ref_pic;
2049         vp9_priv_surface = obj_surface->private_data;
2050
2051         if (param->use_16x_me)
2052             input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2053         else
2054             input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2055
2056         if (param->dys_enabled &&
2057             ((vp9_priv_surface->frame_width != param->frame_width) ||
2058              (vp9_priv_surface->frame_height != param->frame_height))) {
2059             if (param->use_16x_me)
2060                 input_surface = vp9_priv_surface->dys_16x_surface_obj;
2061             else
2062                 input_surface = vp9_priv_surface->dys_4x_surface_obj;
2063         }
2064
2065         gen9_add_adv_gpe_surface(ctx, gpe_context,
2066                                  input_surface,
2067                                  ref_bti);
2068         gen9_add_adv_gpe_surface(ctx, gpe_context,
2069                                  input_surface,
2070                                  ref_bti + 1);
2071         ref_bti += 2;
2072     }
2073
2074     if (param->alt_ref_pic) {
2075         obj_surface = param->alt_ref_pic;
2076         vp9_priv_surface = obj_surface->private_data;
2077
2078         if (param->use_16x_me)
2079             input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2080         else
2081             input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2082
2083         if (param->dys_enabled &&
2084             ((vp9_priv_surface->frame_width != param->frame_width) ||
2085              (vp9_priv_surface->frame_height != param->frame_height))) {
2086             if (param->use_16x_me)
2087                 input_surface = vp9_priv_surface->dys_16x_surface_obj;
2088             else
2089                 input_surface = vp9_priv_surface->dys_4x_surface_obj;
2090         }
2091         gen9_add_adv_gpe_surface(ctx, gpe_context,
2092                                  input_surface,
2093                                  ref_bti);
2094         gen9_add_adv_gpe_surface(ctx, gpe_context,
2095                                  input_surface,
2096                                  ref_bti + 1);
2097         ref_bti += 2;
2098     }
2099
2100     return;
2101 }
2102
2103 static
2104 void gen9_me_add_surfaces_vp9(VADriverContextP ctx,
2105                               struct encode_state *encode_state,
2106                               struct intel_encoder_context *encoder_context,
2107                               struct i965_gpe_context *gpe_context,
2108                               int use_16x_me)
2109 {
2110     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2111     struct gen9_vp9_me_surface_param  me_surface_param;
2112     struct gen9_vp9_state *vp9_state;
2113
2114     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
2115
2116     /* sScaled4xSurface surface */
2117     memset(&me_surface_param, 0, sizeof(me_surface_param));
2118     me_surface_param.last_ref_pic = vp9_state->last_ref_obj;
2119     me_surface_param.golden_ref_pic = vp9_state->golden_ref_obj;
2120     me_surface_param.alt_ref_pic = vp9_state->alt_ref_obj;
2121     me_surface_param.curr_pic = vp9_state->curr_frame;
2122     me_surface_param.pres_4x_memv_data_buffer  = &vme_context->s4x_memv_data_buffer;
2123     me_surface_param.pres_16x_memv_data_buffer = &vme_context->s16x_memv_data_buffer;
2124     me_surface_param.pres_me_distortion_buffer = &vme_context->s4x_memv_distortion_buffer;
2125     me_surface_param.pres_me_brc_distortion_buffer = &vme_context->s4x_memv_distortion_buffer;
2126
2127     if (use_16x_me) {
2128         me_surface_param.downscaled_width_in_mb = vp9_state->downscaled_width_16x_in_mb;
2129         me_surface_param.downscaled_height_in_mb = vp9_state->downscaled_height_16x_in_mb;
2130     } else {
2131         me_surface_param.downscaled_width_in_mb = vp9_state->downscaled_width_4x_in_mb;
2132         me_surface_param.downscaled_height_in_mb = vp9_state->downscaled_height_4x_in_mb;
2133     }
2134     me_surface_param.frame_width  = vp9_state->frame_width;
2135     me_surface_param.frame_height  = vp9_state->frame_height;
2136
2137     me_surface_param.use_16x_me = use_16x_me;
2138     me_surface_param.b16xme_enabled = vp9_state->b16xme_enabled;
2139     me_surface_param.dys_enabled = vp9_state->dys_in_use;
2140
2141     vme_context->pfn_send_me_surface(ctx, encode_state,
2142                                      gpe_context,
2143                                      encoder_context,
2144                                      &me_surface_param);
2145     return;
2146 }
2147
2148 static VAStatus
2149 gen9_vp9_me_kernel(VADriverContextP ctx,
2150                    struct encode_state *encode_state,
2151                    struct intel_encoder_context *encoder_context,
2152                    int use_16x_me)
2153 {
2154     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2155     struct i965_gpe_context *gpe_context;
2156     int media_function;
2157     struct gen9_vp9_me_curbe_param me_curbe_param;
2158     struct gen9_vp9_state *vp9_state;
2159     struct gpe_media_object_walker_parameter media_object_walker_param;
2160     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
2161
2162     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2163     if (!vp9_state || !vp9_state->pic_param)
2164         return VA_STATUS_ERROR_INVALID_PARAMETER;
2165
2166     if (use_16x_me)
2167         media_function = VP9_MEDIA_STATE_16X_ME;
2168     else
2169         media_function = VP9_MEDIA_STATE_4X_ME;
2170
2171     gpe_context = &(vme_context->me_context.gpe_context);
2172
2173     gen8_gpe_context_init(ctx, gpe_context);
2174     gen9_gpe_reset_binding_table(ctx, gpe_context);
2175
2176     memset(&me_curbe_param, 0, sizeof(me_curbe_param));
2177     me_curbe_param.ppic_param = vp9_state->pic_param;
2178     me_curbe_param.pseq_param = vp9_state->seq_param;
2179     me_curbe_param.frame_width = vp9_state->frame_width;
2180     me_curbe_param.frame_height = vp9_state->frame_height;
2181     me_curbe_param.ref_frame_flag = vp9_state->ref_frame_flag;
2182     me_curbe_param.use_16x_me = use_16x_me;
2183     me_curbe_param.b16xme_enabled = vp9_state->b16xme_enabled;
2184     vme_context->pfn_set_curbe_me(ctx, encode_state,
2185                                   gpe_context,
2186                                   encoder_context,
2187                                   &me_curbe_param);
2188
2189     gen9_me_add_surfaces_vp9(ctx, encode_state,
2190                              encoder_context,
2191                              gpe_context,
2192                              use_16x_me);
2193
2194     gen8_gpe_setup_interface_data(ctx, gpe_context);
2195
2196     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2197     if (use_16x_me) {
2198         kernel_walker_param.resolution_x = vp9_state->downscaled_width_16x_in_mb;
2199         kernel_walker_param.resolution_y = vp9_state->downscaled_height_16x_in_mb;
2200     } else {
2201         kernel_walker_param.resolution_x = vp9_state->downscaled_width_4x_in_mb;
2202         kernel_walker_param.resolution_y = vp9_state->downscaled_height_4x_in_mb;
2203     }
2204     kernel_walker_param.no_dependency = 1;
2205
2206     gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
2207
2208     gen9_run_kernel_media_object_walker(ctx, encoder_context,
2209                                         gpe_context,
2210                                         media_function,
2211                                         &media_object_walker_param);
2212
2213     return VA_STATUS_SUCCESS;
2214 }
2215
2216 static void
2217 gen9_vp9_set_curbe_scaling_cm(VADriverContextP ctx,
2218                               struct encode_state *encode_state,
2219                               struct i965_gpe_context *gpe_context,
2220                               struct intel_encoder_context *encoder_context,
2221                               struct gen9_vp9_scaling_curbe_param *curbe_param)
2222 {
2223     vp9_scaling4x_curbe_data_cm *curbe_cmd;
2224
2225     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
2226
2227     if (!curbe_cmd)
2228         return;
2229
2230     memset(curbe_cmd, 0, sizeof(vp9_scaling4x_curbe_data_cm));
2231
2232     curbe_cmd->dw0.input_picture_width = curbe_param->input_picture_width;
2233     curbe_cmd->dw0.input_picture_height = curbe_param->input_picture_height;
2234
2235     curbe_cmd->dw1.input_y_bti = VP9_BTI_SCALING_FRAME_SRC_Y;
2236     curbe_cmd->dw2.output_y_bti = VP9_BTI_SCALING_FRAME_DST_Y;
2237
2238
2239     curbe_cmd->dw6.enable_mb_variance_output = 0;
2240     curbe_cmd->dw6.enable_mb_pixel_average_output = 0;
2241     curbe_cmd->dw6.enable_blk8x8_stat_output = 0;
2242
2243     if (curbe_param->mb_variance_output_enabled ||
2244         curbe_param->mb_pixel_average_output_enabled) {
2245         curbe_cmd->dw10.mbv_proc_stat_bti = VP9_BTI_SCALING_FRAME_MBVPROCSTATS_DST_CM;
2246     }
2247
2248     i965_gpe_context_unmap_curbe(gpe_context);
2249     return;
2250 }
2251
2252 static void
2253 gen9_vp9_send_scaling_surface(VADriverContextP ctx,
2254                               struct encode_state *encode_state,
2255                               struct i965_gpe_context *gpe_context,
2256                               struct intel_encoder_context *encoder_context,
2257                               struct gen9_vp9_scaling_surface_param *scaling_surface_param)
2258 {
2259     vp9_bti_scaling_offset *scaling_bti;
2260     unsigned int surface_format;
2261
2262     scaling_bti = scaling_surface_param->p_scaling_bti;
2263
2264     if (scaling_surface_param->scaling_out_use_32unorm_surf_fmt)
2265         surface_format = I965_SURFACEFORMAT_R32_UNORM;
2266     else if (scaling_surface_param->scaling_out_use_16unorm_surf_fmt)
2267         surface_format = I965_SURFACEFORMAT_R16_UNORM;
2268     else
2269         surface_format = I965_SURFACEFORMAT_R8_UNORM;
2270
2271     gen9_add_2d_gpe_surface(ctx, gpe_context,
2272                             scaling_surface_param->input_surface,
2273                             0, 1, surface_format,
2274                             scaling_bti->scaling_frame_src_y);
2275
2276     gen9_add_2d_gpe_surface(ctx, gpe_context,
2277                             scaling_surface_param->output_surface,
2278                             0, 1, surface_format,
2279                             scaling_bti->scaling_frame_dst_y);
2280
2281
2282     return;
2283 }
2284
2285 static VAStatus
2286 gen9_vp9_scaling_kernel(VADriverContextP ctx,
2287                         struct encode_state *encode_state,
2288                         struct intel_encoder_context *encoder_context,
2289                         int use_16x_scaling)
2290 {
2291     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2292     struct i965_gpe_context *gpe_context;
2293     int media_function;
2294     struct gen9_vp9_scaling_curbe_param scaling_curbe_param;
2295     struct gen9_vp9_scaling_surface_param scaling_surface_param;
2296     struct gen9_vp9_state *vp9_state;
2297     VAEncPictureParameterBufferVP9  *pic_param;
2298     struct gpe_media_object_walker_parameter media_object_walker_param;
2299     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
2300     struct object_surface *obj_surface;
2301     struct object_surface *input_surface, *output_surface;
2302     struct gen9_surface_vp9 *vp9_priv_surface;
2303     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
2304     unsigned int input_frame_width, input_frame_height;
2305     unsigned int output_frame_width, output_frame_height;
2306
2307     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2308     if (!vp9_state || !vp9_state->pic_param)
2309         return VA_STATUS_ERROR_INVALID_PARAMETER;
2310
2311     pic_param = vp9_state->pic_param;
2312
2313     if (use_16x_scaling)
2314         media_function = VP9_MEDIA_STATE_16X_SCALING;
2315     else
2316         media_function = VP9_MEDIA_STATE_4X_SCALING;
2317
2318     gpe_context = &(vme_context->scaling_context.gpe_contexts[0]);
2319
2320     gen8_gpe_context_init(ctx, gpe_context);
2321     gen9_gpe_reset_binding_table(ctx, gpe_context);
2322
2323     obj_surface = encode_state->reconstructed_object;
2324     vp9_priv_surface = obj_surface->private_data;
2325
2326     if (use_16x_scaling) {
2327         downscaled_width_in_mb      = vp9_state->downscaled_width_16x_in_mb;
2328         downscaled_height_in_mb      = vp9_state->downscaled_height_16x_in_mb;
2329
2330         input_surface               = vp9_priv_surface->scaled_4x_surface_obj;
2331         input_frame_width           = vp9_state->frame_width_4x;
2332         input_frame_height          = vp9_state->frame_height_4x;
2333
2334         output_surface              = vp9_priv_surface->scaled_16x_surface_obj;
2335         output_frame_width          = vp9_state->frame_width_16x;
2336         output_frame_height         = vp9_state->frame_height_16x;
2337     } else {
2338         downscaled_width_in_mb      = vp9_state->downscaled_width_4x_in_mb;
2339         downscaled_height_in_mb      = vp9_state->downscaled_height_4x_in_mb;
2340
2341         if (vp9_state->dys_in_use &&
2342             ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
2343              (pic_param->frame_height_src != pic_param->frame_height_dst)))
2344             input_surface               = vp9_priv_surface->dys_surface_obj;
2345         else
2346             input_surface               = encode_state->input_yuv_object;
2347
2348         input_frame_width           = vp9_state->frame_width;
2349         input_frame_height          = vp9_state->frame_height;
2350
2351         output_surface              = vp9_priv_surface->scaled_4x_surface_obj;
2352         output_frame_width          = vp9_state->frame_width_4x;
2353         output_frame_height         = vp9_state->frame_height_4x;
2354     }
2355
2356     memset(&scaling_curbe_param, 0, sizeof(scaling_curbe_param));
2357
2358     scaling_curbe_param.input_picture_width  = input_frame_width;
2359     scaling_curbe_param.input_picture_height = input_frame_height;
2360
2361     scaling_curbe_param.use_16x_scaling = use_16x_scaling;
2362     scaling_curbe_param.use_32x_scaling = 0;
2363
2364     if (use_16x_scaling)
2365         scaling_curbe_param.mb_variance_output_enabled = 0;
2366     else
2367         scaling_curbe_param.mb_variance_output_enabled = vp9_state->adaptive_transform_decision_enabled;
2368
2369     scaling_curbe_param.blk8x8_stat_enabled = 0;
2370
2371     vme_context->pfn_set_curbe_scaling(ctx, encode_state,
2372                                        gpe_context,
2373                                        encoder_context,
2374                                        &scaling_curbe_param);
2375
2376     memset(&scaling_surface_param, 0, sizeof(scaling_surface_param));
2377     scaling_surface_param.p_scaling_bti = (void *)(&vme_context->scaling_context.scaling_4x_bti);
2378     scaling_surface_param.input_surface                      = input_surface;
2379     scaling_surface_param.input_frame_width                  = input_frame_width;
2380     scaling_surface_param.input_frame_height                 = input_frame_height;
2381
2382     scaling_surface_param.output_surface                     = output_surface;
2383     scaling_surface_param.output_frame_width                 = output_frame_width;
2384     scaling_surface_param.output_frame_height                = output_frame_height;
2385     scaling_surface_param.scaling_out_use_16unorm_surf_fmt   = 0;
2386     scaling_surface_param.scaling_out_use_32unorm_surf_fmt   = 1;
2387
2388     vme_context->pfn_send_scaling_surface(ctx, encode_state,
2389                                           gpe_context,
2390                                           encoder_context,
2391                                           &scaling_surface_param);
2392
2393     gen8_gpe_setup_interface_data(ctx, gpe_context);
2394
2395     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2396     /* the scaling is based on 8x8 blk level */
2397     kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
2398     kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
2399     kernel_walker_param.no_dependency = 1;
2400
2401     gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
2402
2403     gen9_run_kernel_media_object_walker(ctx, encoder_context,
2404                                         gpe_context,
2405                                         media_function,
2406                                         &media_object_walker_param);
2407
2408     return VA_STATUS_SUCCESS;
2409 }
2410
2411 static void
2412 gen9_vp9_dys_set_sampler_state(struct i965_gpe_context *gpe_context)
2413 {
2414     struct gen9_sampler_8x8_avs                *sampler_cmd;
2415
2416     if (!gpe_context)
2417         return;
2418
2419     dri_bo_map(gpe_context->sampler.bo, 1);
2420
2421     if (!gpe_context->sampler.bo->virtual)
2422         return;
2423
2424     sampler_cmd = (struct gen9_sampler_8x8_avs *)
2425                   (gpe_context->sampler.bo->virtual + gpe_context->sampler.offset);
2426
2427     memset(sampler_cmd, 0, sizeof(struct gen9_sampler_8x8_avs));
2428
2429     sampler_cmd->dw0.r3c_coefficient                      = 15;
2430     sampler_cmd->dw0.r3x_coefficient                      = 6;
2431     sampler_cmd->dw0.strong_edge_threshold                = 8;
2432     sampler_cmd->dw0.weak_edge_threshold                  = 1;
2433     sampler_cmd->dw0.gain_factor                          = 32;
2434
2435     sampler_cmd->dw2.r5c_coefficient                     = 3;
2436     sampler_cmd->dw2.r5cx_coefficient                    = 8;
2437     sampler_cmd->dw2.r5x_coefficient                     = 9;
2438     sampler_cmd->dw2.strong_edge_weight                  = 6;
2439     sampler_cmd->dw2.regular_weight                      = 3;
2440     sampler_cmd->dw2.non_edge_weight                     = 2;
2441     sampler_cmd->dw2.global_noise_estimation             = 255;
2442
2443     sampler_cmd->dw3.enable_8tap_adaptive_filter         = 0;
2444     sampler_cmd->dw3.cos_alpha                           = 79;
2445     sampler_cmd->dw3.sin_alpha                           = 101;
2446
2447     sampler_cmd->dw5.diamond_du                           = 0;
2448     sampler_cmd->dw5.hs_margin                            = 3;
2449     sampler_cmd->dw5.diamond_alpha                        = 100;
2450
2451     sampler_cmd->dw7.inv_margin_vyl                       = 3300;
2452
2453     sampler_cmd->dw8.inv_margin_vyu                       = 1600;
2454
2455     sampler_cmd->dw10.y_slope2                            = 24;
2456     sampler_cmd->dw10.s0l                                 = 1792;
2457
2458     sampler_cmd->dw12.y_slope1                            = 24;
2459
2460     sampler_cmd->dw14.s0u                                = 256;
2461
2462     sampler_cmd->dw15.s2u                                = 1792;
2463     sampler_cmd->dw15.s1u                                = 0;
2464
2465     memcpy(sampler_cmd->coefficients,
2466            &gen9_vp9_avs_coeffs[0],
2467            17 * sizeof(struct gen8_sampler_8x8_avs_coefficients));
2468
2469     sampler_cmd->dw152.default_sharpness_level     = 255;
2470     sampler_cmd->dw152.max_derivative_4_pixels     = 7;
2471     sampler_cmd->dw152.max_derivative_8_pixels     = 20;
2472     sampler_cmd->dw152.transition_area_with_4_pixels    = 4;
2473     sampler_cmd->dw152.transition_area_with_8_pixels    = 5;
2474
2475     sampler_cmd->dw153.bypass_x_adaptive_filtering  = 1;
2476     sampler_cmd->dw153.bypass_y_adaptive_filtering  = 1;
2477     sampler_cmd->dw153.adaptive_filter_for_all_channel = 0;
2478
2479     memcpy(sampler_cmd->extra_coefficients,
2480            &gen9_vp9_avs_coeffs[17 * 8],
2481            15 * sizeof(struct gen8_sampler_8x8_avs_coefficients));
2482
2483     dri_bo_unmap(gpe_context->sampler.bo);
2484 }
2485
2486 static void
2487 gen9_vp9_set_curbe_dys(VADriverContextP ctx,
2488                        struct encode_state *encode_state,
2489                        struct i965_gpe_context *gpe_context,
2490                        struct intel_encoder_context *encoder_context,
2491                        struct gen9_vp9_dys_curbe_param *curbe_param)
2492 {
2493     vp9_dys_curbe_data  *curbe_cmd;
2494
2495     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
2496
2497     if (!curbe_cmd)
2498         return;
2499
2500     memset(curbe_cmd, 0, sizeof(vp9_dys_curbe_data));
2501
2502     curbe_cmd->dw0.input_frame_width    = curbe_param->input_width;
2503     curbe_cmd->dw0.input_frame_height   = curbe_param->input_height;
2504
2505     curbe_cmd->dw1.output_frame_width   = curbe_param->output_width;
2506     curbe_cmd->dw1.output_frame_height  = curbe_param->output_height;
2507
2508     curbe_cmd->dw2.delta_u                 = 1.0f / curbe_param->output_width;
2509     curbe_cmd->dw3.delta_v                 = 1.0f / curbe_param->output_height;
2510
2511     curbe_cmd->dw16.input_frame_nv12_bti  = VP9_BTI_DYS_INPUT_NV12;
2512     curbe_cmd->dw17.output_frame_y_bti    = VP9_BTI_DYS_OUTPUT_Y;
2513     curbe_cmd->dw18.avs_sample_idx            = 0;
2514
2515     i965_gpe_context_unmap_curbe(gpe_context);
2516 }
2517
2518 static void
2519 gen9_vp9_send_dys_surface(VADriverContextP ctx,
2520                           struct encode_state *encode_state,
2521                           struct i965_gpe_context *gpe_context,
2522                           struct intel_encoder_context *encoder_context,
2523                           struct gen9_vp9_dys_surface_param *surface_param)
2524 {
2525
2526     if (surface_param->input_frame)
2527         gen9_add_adv_gpe_surface(ctx,
2528                                  gpe_context,
2529                                  surface_param->input_frame,
2530                                  VP9_BTI_DYS_INPUT_NV12);
2531
2532     if (surface_param->output_frame) {
2533         gen9_add_2d_gpe_surface(ctx,
2534                                 gpe_context,
2535                                 surface_param->output_frame,
2536                                 0,
2537                                 1,
2538                                 I965_SURFACEFORMAT_R8_UNORM,
2539                                 VP9_BTI_DYS_OUTPUT_Y);
2540
2541         gen9_add_2d_gpe_surface(ctx,
2542                                 gpe_context,
2543                                 surface_param->output_frame,
2544                                 1,
2545                                 1,
2546                                 I965_SURFACEFORMAT_R16_UINT,
2547                                 VP9_BTI_DYS_OUTPUT_UV);
2548     }
2549
2550     return;
2551 }
2552
2553 static VAStatus
2554 gen9_vp9_dys_kernel(VADriverContextP ctx,
2555                     struct encode_state *encode_state,
2556                     struct intel_encoder_context *encoder_context,
2557                     gen9_vp9_dys_kernel_param *dys_kernel_param)
2558 {
2559     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2560     struct i965_gpe_context *gpe_context;
2561     int media_function;
2562     struct gen9_vp9_dys_curbe_param                 curbe_param;
2563     struct gen9_vp9_dys_surface_param               surface_param;
2564     struct gpe_media_object_walker_parameter        media_object_walker_param;
2565     struct gpe_encoder_kernel_walker_parameter      kernel_walker_param;
2566     unsigned int                                    resolution_x, resolution_y;
2567
2568     media_function = VP9_MEDIA_STATE_DYS;
2569     gpe_context = &vme_context->dys_context.gpe_context;
2570
2571     //gen8_gpe_context_init(ctx, gpe_context);
2572     gen9_gpe_reset_binding_table(ctx, gpe_context);
2573
2574     /* sampler state is configured only when initializing the GPE context */
2575
2576     memset(&curbe_param, 0, sizeof(curbe_param));
2577     curbe_param.input_width   = dys_kernel_param->input_width;
2578     curbe_param.input_height  = dys_kernel_param->input_height;
2579     curbe_param.output_width = dys_kernel_param->output_width;
2580     curbe_param.output_height = dys_kernel_param->output_height;
2581     vme_context->pfn_set_curbe_dys(ctx, encode_state,
2582                                    gpe_context,
2583                                    encoder_context,
2584                                    &curbe_param);
2585
2586     // Add surface states
2587     memset(&surface_param, 0, sizeof(surface_param));
2588     surface_param.input_frame = dys_kernel_param->input_surface;
2589     surface_param.output_frame = dys_kernel_param->output_surface;
2590     surface_param.vert_line_stride = 0;
2591     surface_param.vert_line_stride_offset = 0;
2592
2593     vme_context->pfn_send_dys_surface(ctx,
2594                                       encode_state,
2595                                       gpe_context,
2596                                       encoder_context,
2597                                       &surface_param);
2598
2599     resolution_x = ALIGN(dys_kernel_param->output_width, 16) / 16;
2600     resolution_y = ALIGN(dys_kernel_param->output_height, 16) / 16;
2601
2602     gen8_gpe_setup_interface_data(ctx, gpe_context);
2603
2604     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2605     kernel_walker_param.resolution_x = resolution_x;
2606     kernel_walker_param.resolution_y = resolution_y;
2607     kernel_walker_param.no_dependency = 1;
2608
2609     gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
2610
2611     gen9_run_kernel_media_object_walker(ctx, encoder_context,
2612                                         gpe_context,
2613                                         media_function,
2614                                         &media_object_walker_param);
2615
2616     return VA_STATUS_SUCCESS;
2617 }
2618
2619 static VAStatus
2620 gen9_vp9_run_dys_refframes(VADriverContextP ctx,
2621                            struct encode_state *encode_state,
2622                            struct intel_encoder_context *encoder_context)
2623 {
2624     struct gen9_vp9_state *vp9_state;
2625     VAEncPictureParameterBufferVP9  *pic_param;
2626     gen9_vp9_dys_kernel_param dys_kernel_param;
2627     struct object_surface *obj_surface;
2628     struct object_surface *input_surface, *output_surface;
2629     struct gen9_surface_vp9 *vp9_priv_surface;
2630
2631     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2632
2633     if (!vp9_state || !vp9_state->pic_param)
2634         return VA_STATUS_ERROR_INVALID_PARAMETER;
2635
2636     pic_param = vp9_state->pic_param;
2637
2638     if ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
2639         (pic_param->frame_height_src != pic_param->frame_height_dst)) {
2640         input_surface = encode_state->input_yuv_object;
2641         obj_surface = encode_state->reconstructed_object;
2642         vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2643         output_surface = vp9_priv_surface->dys_surface_obj;
2644
2645         memset(&dys_kernel_param, 0, sizeof(dys_kernel_param));
2646         dys_kernel_param.input_width = pic_param->frame_width_src;
2647         dys_kernel_param.input_height = pic_param->frame_height_src;
2648         dys_kernel_param.input_surface = input_surface;
2649         dys_kernel_param.output_width = pic_param->frame_width_dst;
2650         dys_kernel_param.output_height = pic_param->frame_height_dst;
2651         dys_kernel_param.output_surface = output_surface;
2652         gen9_vp9_dys_kernel(ctx, encode_state,
2653                             encoder_context,
2654                             &dys_kernel_param);
2655     }
2656
2657     if ((vp9_state->dys_ref_frame_flag & VP9_LAST_REF) &&
2658         vp9_state->last_ref_obj) {
2659         obj_surface = vp9_state->last_ref_obj;
2660         vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2661
2662         input_surface = obj_surface;
2663         output_surface = vp9_priv_surface->dys_surface_obj;
2664
2665         dys_kernel_param.input_width = vp9_priv_surface->frame_width;
2666         dys_kernel_param.input_height = vp9_priv_surface->frame_height;
2667         dys_kernel_param.input_surface = input_surface;
2668
2669         dys_kernel_param.output_width = pic_param->frame_width_dst;
2670         dys_kernel_param.output_height = pic_param->frame_height_dst;
2671         dys_kernel_param.output_surface = output_surface;
2672
2673         gen9_vp9_dys_kernel(ctx, encode_state,
2674                             encoder_context,
2675                             &dys_kernel_param);
2676
2677         if (vp9_state->hme_enabled) {
2678             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 4), 16);
2679             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_height / 4), 16);
2680             dys_kernel_param.input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2681
2682             dys_kernel_param.output_width = vp9_state->frame_width_4x;
2683             dys_kernel_param.output_height = vp9_state->frame_height_4x;
2684             dys_kernel_param.output_surface = vp9_priv_surface->dys_4x_surface_obj;
2685
2686             gen9_vp9_dys_kernel(ctx, encode_state,
2687                                 encoder_context,
2688                                 &dys_kernel_param);
2689
2690             /* Does it really need to do the 16x HME if the
2691              * resolution is different?
2692              * Maybe it should be restricted
2693              */
2694             if (vp9_state->b16xme_enabled) {
2695                 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 16), 16);
2696                 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 16), 16);
2697                 dys_kernel_param.input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2698
2699                 dys_kernel_param.output_width = vp9_state->frame_width_16x;
2700                 dys_kernel_param.output_height = vp9_state->frame_height_16x;
2701                 dys_kernel_param.output_surface = vp9_priv_surface->dys_16x_surface_obj;
2702
2703                 gen9_vp9_dys_kernel(ctx, encode_state,
2704                                     encoder_context,
2705                                     &dys_kernel_param);
2706             }
2707         }
2708     }
2709
2710     if ((vp9_state->dys_ref_frame_flag & VP9_GOLDEN_REF) &&
2711         vp9_state->golden_ref_obj) {
2712         obj_surface = vp9_state->golden_ref_obj;
2713         vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2714
2715         input_surface = obj_surface;
2716         output_surface = vp9_priv_surface->dys_surface_obj;
2717
2718         dys_kernel_param.input_width = vp9_priv_surface->frame_width;
2719         dys_kernel_param.input_height = vp9_priv_surface->frame_height;
2720         dys_kernel_param.input_surface = input_surface;
2721
2722         dys_kernel_param.output_width = pic_param->frame_width_dst;
2723         dys_kernel_param.output_height = pic_param->frame_height_dst;
2724         dys_kernel_param.output_surface = output_surface;
2725
2726         gen9_vp9_dys_kernel(ctx, encode_state,
2727                             encoder_context,
2728                             &dys_kernel_param);
2729
2730         if (vp9_state->hme_enabled) {
2731             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 4), 16);
2732             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_height / 4), 16);
2733             dys_kernel_param.input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2734
2735             dys_kernel_param.output_width = vp9_state->frame_width_4x;
2736             dys_kernel_param.output_height = vp9_state->frame_height_4x;
2737             dys_kernel_param.output_surface = vp9_priv_surface->dys_4x_surface_obj;
2738
2739             gen9_vp9_dys_kernel(ctx, encode_state,
2740                                 encoder_context,
2741                                 &dys_kernel_param);
2742
2743             /* Does it really need to do the 16x HME if the
2744              * resolution is different?
2745              * Maybe it should be restricted
2746              */
2747             if (vp9_state->b16xme_enabled) {
2748                 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 16), 16);
2749                 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 16), 16);
2750                 dys_kernel_param.input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2751
2752                 dys_kernel_param.output_width = vp9_state->frame_width_16x;
2753                 dys_kernel_param.output_height = vp9_state->frame_height_16x;
2754                 dys_kernel_param.output_surface = vp9_priv_surface->dys_16x_surface_obj;
2755
2756                 gen9_vp9_dys_kernel(ctx, encode_state,
2757                                     encoder_context,
2758                                     &dys_kernel_param);
2759             }
2760         }
2761     }
2762
2763     if ((vp9_state->dys_ref_frame_flag & VP9_ALT_REF) &&
2764         vp9_state->alt_ref_obj) {
2765         obj_surface = vp9_state->alt_ref_obj;
2766         vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2767
2768         input_surface = obj_surface;
2769         output_surface = vp9_priv_surface->dys_surface_obj;
2770
2771         dys_kernel_param.input_width = vp9_priv_surface->frame_width;
2772         dys_kernel_param.input_height = vp9_priv_surface->frame_height;
2773         dys_kernel_param.input_surface = input_surface;
2774
2775         dys_kernel_param.output_width = pic_param->frame_width_dst;
2776         dys_kernel_param.output_height = pic_param->frame_height_dst;
2777         dys_kernel_param.output_surface = output_surface;
2778
2779         gen9_vp9_dys_kernel(ctx, encode_state,
2780                             encoder_context,
2781                             &dys_kernel_param);
2782
2783         if (vp9_state->hme_enabled) {
2784             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 4), 16);
2785             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_height / 4), 16);
2786             dys_kernel_param.input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2787
2788             dys_kernel_param.output_width = vp9_state->frame_width_4x;
2789             dys_kernel_param.output_height = vp9_state->frame_height_4x;
2790             dys_kernel_param.output_surface = vp9_priv_surface->dys_4x_surface_obj;
2791
2792             gen9_vp9_dys_kernel(ctx, encode_state,
2793                                 encoder_context,
2794                                 &dys_kernel_param);
2795
2796             /* Does it really need to do the 16x HME if the
2797              * resolution is different?
2798              * Maybe it should be restricted
2799              */
2800             if (vp9_state->b16xme_enabled) {
2801                 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 16), 16);
2802                 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 16), 16);
2803                 dys_kernel_param.input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2804
2805                 dys_kernel_param.output_width = vp9_state->frame_width_16x;
2806                 dys_kernel_param.output_height = vp9_state->frame_height_16x;
2807                 dys_kernel_param.output_surface = vp9_priv_surface->dys_16x_surface_obj;
2808
2809                 gen9_vp9_dys_kernel(ctx, encode_state,
2810                                     encoder_context,
2811                                     &dys_kernel_param);
2812             }
2813         }
2814     }
2815
2816     return VA_STATUS_SUCCESS;
2817 }
2818
2819 static void
2820 gen9_vp9_set_curbe_mbenc(VADriverContextP ctx,
2821                          struct encode_state *encode_state,
2822                          struct i965_gpe_context *gpe_context,
2823                          struct intel_encoder_context *encoder_context,
2824                          struct gen9_vp9_mbenc_curbe_param *curbe_param)
2825 {
2826     struct gen9_vp9_state *vp9_state;
2827     VAEncMiscParameterTypeVP9PerSegmantParam *seg_param, tmp_seg_param;
2828     vp9_mbenc_curbe_data  *curbe_cmd;
2829     VAEncPictureParameterBufferVP9  *pic_param;
2830     int i, segment_count;
2831     int seg_qindex;
2832     struct object_surface *obj_surface;
2833     struct gen9_surface_vp9 *vp9_priv_surface;
2834
2835     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2836
2837     if (!vp9_state || !vp9_state->pic_param)
2838         return;
2839
2840     pic_param = curbe_param->ppic_param;
2841     seg_param = curbe_param->psegment_param;
2842
2843     if (!seg_param) {
2844         memset(&tmp_seg_param, 0, sizeof(tmp_seg_param));
2845         seg_param = &tmp_seg_param;
2846     }
2847
2848     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
2849
2850     if (!curbe_cmd)
2851         return;
2852
2853     memset(curbe_cmd, 0, sizeof(vp9_mbenc_curbe_data));
2854
2855     if (vp9_state->dys_in_use) {
2856         curbe_cmd->dw0.frame_width = pic_param->frame_width_dst;
2857         curbe_cmd->dw0.frame_height = pic_param->frame_height_dst;
2858     } else {
2859         curbe_cmd->dw0.frame_width = pic_param->frame_width_src;
2860         curbe_cmd->dw0.frame_height = pic_param->frame_height_src;
2861     }
2862
2863     curbe_cmd->dw1.frame_type = curbe_param->picture_coding_type;
2864
2865     curbe_cmd->dw1.segmentation_enable = pic_param->pic_flags.bits.segmentation_enabled;
2866     if (pic_param->pic_flags.bits.segmentation_enabled)
2867         segment_count = 8;
2868     else
2869         segment_count = 1;
2870
2871     curbe_cmd->dw1.ref_frame_flags = curbe_param->ref_frame_flag;
2872
2873     //right now set them to normal settings
2874     if (curbe_param->picture_coding_type) {
2875         switch (vp9_state->target_usage) {
2876         case INTEL_ENC_VP9_TU_QUALITY:
2877             curbe_cmd->dw1.min_16for32_check    = 0x00;
2878             curbe_cmd->dw2.multi_pred           = 0x02;
2879             curbe_cmd->dw2.len_sp               = 0x39;
2880             curbe_cmd->dw2.search_x             = 0x30;
2881             curbe_cmd->dw2.search_y             = 0x28;
2882             curbe_cmd->dw3.min_ref_for32_check = 0x01;
2883             curbe_cmd->dw4.skip16_threshold     = 0x000A;
2884             curbe_cmd->dw4.disable_mr_threshold = 0x000C;
2885
2886             memcpy(&curbe_cmd->dw16,
2887                    vp9_diamond_ime_search_path_delta,
2888                    14 * sizeof(unsigned int));
2889             break;
2890         case INTEL_ENC_VP9_TU_PERFORMANCE:
2891             curbe_cmd->dw1.min_16for32_check    = 0x02;
2892             curbe_cmd->dw2.multi_pred           = 0x00;
2893             curbe_cmd->dw2.len_sp               = 0x10;
2894             curbe_cmd->dw2.search_x             = 0x20;
2895             curbe_cmd->dw2.search_y             = 0x20;
2896             curbe_cmd->dw3.min_ref_for32_check = 0x03;
2897             curbe_cmd->dw4.skip16_threshold     = 0x0014;
2898             curbe_cmd->dw4.disable_mr_threshold = 0x0016;
2899
2900             memcpy(&curbe_cmd->dw16,
2901                    vp9_fullspiral_ime_search_path_delta,
2902                    14 * sizeof(unsigned int));
2903
2904             break;
2905         default:  // normal settings
2906             curbe_cmd->dw1.min_16for32_check     = 0x01;
2907             curbe_cmd->dw2.multi_pred           = 0x00;
2908             curbe_cmd->dw2.len_sp               = 0x19;
2909             curbe_cmd->dw2.search_x             = 0x30;
2910             curbe_cmd->dw2.search_y             = 0x28;
2911             curbe_cmd->dw3.min_ref_for32_check = 0x02;
2912             curbe_cmd->dw4.skip16_threshold     = 0x000F;
2913             curbe_cmd->dw4.disable_mr_threshold = 0x0011;
2914
2915             memcpy(&curbe_cmd->dw16,
2916                    vp9_diamond_ime_search_path_delta,
2917                    14 * sizeof(unsigned int));
2918             break;
2919         }
2920
2921         curbe_cmd->dw3.hme_enabled               = curbe_param->hme_enabled;
2922         curbe_cmd->dw3.multi_ref_qp_check         = curbe_param->multi_ref_qp_check;
2923         // co-located predictor must be disabled when dynamic scaling is enabled
2924         curbe_cmd->dw3.disable_temp_pred    = vp9_state->dys_in_use;
2925     }
2926
2927     curbe_cmd->dw5.inter_round = 0;
2928     curbe_cmd->dw5.intra_round = 4;
2929     curbe_cmd->dw5.frame_qpindex = pic_param->luma_ac_qindex;
2930
2931     for (i = 0; i < segment_count; i++) {
2932         seg_qindex = pic_param->luma_ac_qindex + pic_param->luma_dc_qindex_delta
2933                      + seg_param->seg_data[i].segment_qindex_delta;
2934
2935         seg_qindex = CLAMP(0, 255, seg_qindex);
2936
2937         if (curbe_param->picture_coding_type)
2938             memcpy(&curbe_cmd->segments[i],
2939                    &intel_vp9_costlut_p[seg_qindex * 16],
2940                    16 * sizeof(unsigned int));
2941         else
2942             memcpy(&curbe_cmd->segments[i],
2943                    &intel_vp9_costlut_key[seg_qindex * 16],
2944                    16 * sizeof(unsigned int));
2945     }
2946
2947     if (curbe_param->picture_coding_type) {
2948         if (curbe_cmd->dw3.multi_ref_qp_check) {
2949             if (curbe_param->ref_frame_flag & 0x01) {
2950                 obj_surface = curbe_param->last_ref_obj;
2951                 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2952                 curbe_cmd->dw8.last_ref_qp = vp9_quant_dc[vp9_priv_surface->qp_value];
2953             }
2954
2955             if (curbe_param->ref_frame_flag & 0x02) {
2956                 obj_surface = curbe_param->golden_ref_obj;
2957                 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2958                 curbe_cmd->dw8.golden_ref_qp = vp9_quant_dc[vp9_priv_surface->qp_value];
2959             }
2960
2961             if (curbe_param->ref_frame_flag & 0x04) {
2962                 obj_surface = curbe_param->alt_ref_obj;
2963                 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2964                 curbe_cmd->dw9.alt_ref_qp = vp9_quant_dc[vp9_priv_surface->qp_value];
2965             }
2966         }
2967     }
2968     curbe_cmd->dw160.enc_curr_y_surf_bti           = VP9_BTI_MBENC_CURR_Y_G9;
2969     curbe_cmd->dw162.enc_curr_nv12_surf_bti        = VP9_BTI_MBENC_CURR_NV12_G9;
2970     curbe_cmd->dw166.segmentation_map_bti          = VP9_BTI_MBENC_SEGMENTATION_MAP_G9;
2971     curbe_cmd->dw172.mode_decision_bti           = VP9_BTI_MBENC_MODE_DECISION_G9;
2972     curbe_cmd->dw167.tx_curbe_bti                = VP9_BTI_MBENC_TX_CURBE_G9;
2973     curbe_cmd->dw168.hme_mvdata_bti             = VP9_BTI_MBENC_HME_MV_DATA_G9;
2974     curbe_cmd->dw169.hme_distortion_bti          = VP9_BTI_MBENC_HME_DISTORTION_G9;
2975     curbe_cmd->dw171.mode_decision_prev_bti      = VP9_BTI_MBENC_MODE_DECISION_PREV_G9;
2976     curbe_cmd->dw172.mode_decision_bti           = VP9_BTI_MBENC_MODE_DECISION_G9;
2977     curbe_cmd->dw173.output_16x16_inter_modes_bti = VP9_BTI_MBENC_OUT_16x16_INTER_MODES_G9;
2978     curbe_cmd->dw174.cu_record_bti               = VP9_BTI_MBENC_CU_RECORDS_G9;
2979     curbe_cmd->dw175.pak_data_bti                = VP9_BTI_MBENC_PAK_DATA_G9;
2980
2981     i965_gpe_context_unmap_curbe(gpe_context);
2982     return;
2983 }
2984
2985 static void
2986 gen9_vp9_send_mbenc_surface(VADriverContextP ctx,
2987                             struct encode_state *encode_state,
2988                             struct i965_gpe_context *gpe_context,
2989                             struct intel_encoder_context *encoder_context,
2990                             struct gen9_vp9_mbenc_surface_param *mbenc_param)
2991 {
2992     struct gen9_vp9_state *vp9_state;
2993     unsigned int            res_size;
2994     unsigned int            frame_width_in_sb, frame_height_in_sb;
2995     struct object_surface   *obj_surface, *tmp_input;
2996     struct gen9_surface_vp9 *vp9_priv_surface;
2997     int media_function;
2998
2999     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3000
3001     if (!vp9_state || !vp9_state->pic_param)
3002         return;
3003
3004     frame_width_in_sb = ALIGN(mbenc_param->frame_width, 64) / 64;
3005     frame_height_in_sb = ALIGN(mbenc_param->frame_height, 64) / 64;
3006     media_function = mbenc_param->media_state_type;
3007
3008     switch (media_function) {
3009     case VP9_MEDIA_STATE_MBENC_I_32x32: {
3010         obj_surface = mbenc_param->curr_frame_obj;
3011
3012         gen9_add_2d_gpe_surface(ctx,
3013                                 gpe_context,
3014                                 obj_surface,
3015                                 0,
3016                                 1,
3017                                 I965_SURFACEFORMAT_R8_UNORM,
3018                                 VP9_BTI_MBENC_CURR_Y_G9);
3019
3020         gen9_add_2d_gpe_surface(ctx,
3021                                 gpe_context,
3022                                 obj_surface,
3023                                 1,
3024                                 1,
3025                                 I965_SURFACEFORMAT_R16_UINT,
3026                                 VP9_BTI_MBENC_CURR_UV_G9);
3027
3028
3029         if (mbenc_param->segmentation_enabled) {
3030             gen9_add_buffer_2d_gpe_surface(ctx,
3031                                            gpe_context,
3032                                            mbenc_param->pres_segmentation_map,
3033                                            1,
3034                                            I965_SURFACEFORMAT_R8_UNORM,
3035                                            VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3036
3037         }
3038
3039         res_size = 16 * mbenc_param->frame_width_in_mb *
3040                    mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3041         gen9_add_buffer_gpe_surface(ctx,
3042                                     gpe_context,
3043                                     mbenc_param->pres_mode_decision,
3044                                     0,
3045                                     res_size / 4,
3046                                     0,
3047                                     VP9_BTI_MBENC_MODE_DECISION_G9);
3048
3049         break;
3050     }
3051     case VP9_MEDIA_STATE_MBENC_I_16x16: {
3052         obj_surface = mbenc_param->curr_frame_obj;
3053
3054         gen9_add_2d_gpe_surface(ctx,
3055                                 gpe_context,
3056                                 obj_surface,
3057                                 0,
3058                                 1,
3059                                 I965_SURFACEFORMAT_R8_UNORM,
3060                                 VP9_BTI_MBENC_CURR_Y_G9);
3061
3062         gen9_add_2d_gpe_surface(ctx,
3063                                 gpe_context,
3064                                 obj_surface,
3065                                 1,
3066                                 1,
3067                                 I965_SURFACEFORMAT_R16_UINT,
3068                                 VP9_BTI_MBENC_CURR_UV_G9);
3069
3070         gen9_add_adv_gpe_surface(ctx, gpe_context,
3071                                  obj_surface,
3072                                  VP9_BTI_MBENC_CURR_NV12_G9);
3073
3074         if (mbenc_param->segmentation_enabled) {
3075             gen9_add_buffer_2d_gpe_surface(ctx,
3076                                            gpe_context,
3077                                            mbenc_param->pres_segmentation_map,
3078                                            1,
3079                                            I965_SURFACEFORMAT_R8_UNORM,
3080                                            VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3081
3082         }
3083
3084         res_size = 16 * mbenc_param->frame_width_in_mb *
3085                    mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3086         gen9_add_buffer_gpe_surface(ctx,
3087                                     gpe_context,
3088                                     mbenc_param->pres_mode_decision,
3089                                     0,
3090                                     res_size / 4,
3091                                     0,
3092                                     VP9_BTI_MBENC_MODE_DECISION_G9);
3093
3094         res_size = 160;
3095
3096         gen9_add_dri_buffer_gpe_surface(ctx,
3097                                         gpe_context,
3098                                         mbenc_param->gpe_context_tx->curbe.bo,
3099                                         0,
3100                                         ALIGN(res_size, 64),
3101                                         mbenc_param->gpe_context_tx->curbe.offset,
3102                                         VP9_BTI_MBENC_TX_CURBE_G9);
3103
3104         break;
3105     }
3106     case VP9_MEDIA_STATE_MBENC_P: {
3107         obj_surface = mbenc_param->curr_frame_obj;
3108
3109         gen9_add_2d_gpe_surface(ctx,
3110                                 gpe_context,
3111                                 obj_surface,
3112                                 0,
3113                                 1,
3114                                 I965_SURFACEFORMAT_R8_UNORM,
3115                                 VP9_BTI_MBENC_CURR_Y_G9);
3116
3117         gen9_add_2d_gpe_surface(ctx, gpe_context,
3118                                 obj_surface,
3119                                 1,
3120                                 1,
3121                                 I965_SURFACEFORMAT_R16_UINT,
3122                                 VP9_BTI_MBENC_CURR_UV_G9);
3123
3124         gen9_add_adv_gpe_surface(ctx, gpe_context,
3125                                  obj_surface,
3126                                  VP9_BTI_MBENC_CURR_NV12_G9);
3127
3128         if (mbenc_param->last_ref_obj) {
3129             obj_surface = mbenc_param->last_ref_obj;
3130             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3131
3132             if (vp9_state->dys_in_use &&
3133                 ((vp9_priv_surface->frame_width != vp9_state->frame_width) ||
3134                  (vp9_priv_surface->frame_height != vp9_state->frame_height)))
3135                 tmp_input = vp9_priv_surface->dys_surface_obj;
3136             else
3137                 tmp_input = obj_surface;
3138
3139             gen9_add_adv_gpe_surface(ctx, gpe_context,
3140                                      tmp_input,
3141                                      VP9_BTI_MBENC_LAST_NV12_G9);
3142
3143             gen9_add_adv_gpe_surface(ctx, gpe_context,
3144                                      tmp_input,
3145                                      VP9_BTI_MBENC_LAST_NV12_G9 + 1);
3146
3147         }
3148
3149         if (mbenc_param->golden_ref_obj) {
3150             obj_surface = mbenc_param->golden_ref_obj;
3151             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3152
3153             if (vp9_state->dys_in_use &&
3154                 ((vp9_priv_surface->frame_width != vp9_state->frame_width) ||
3155                  (vp9_priv_surface->frame_height != vp9_state->frame_height)))
3156                 tmp_input = vp9_priv_surface->dys_surface_obj;
3157             else
3158                 tmp_input = obj_surface;
3159
3160             gen9_add_adv_gpe_surface(ctx, gpe_context,
3161                                      tmp_input,
3162                                      VP9_BTI_MBENC_GOLD_NV12_G9);
3163
3164             gen9_add_adv_gpe_surface(ctx, gpe_context,
3165                                      tmp_input,
3166                                      VP9_BTI_MBENC_GOLD_NV12_G9 + 1);
3167
3168         }
3169
3170         if (mbenc_param->alt_ref_obj) {
3171             obj_surface = mbenc_param->alt_ref_obj;
3172             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3173
3174             if (vp9_state->dys_in_use &&
3175                 ((vp9_priv_surface->frame_width != vp9_state->frame_width) ||
3176                  (vp9_priv_surface->frame_height != vp9_state->frame_height)))
3177                 tmp_input = vp9_priv_surface->dys_surface_obj;
3178             else
3179                 tmp_input = obj_surface;
3180
3181             gen9_add_adv_gpe_surface(ctx, gpe_context,
3182                                      tmp_input,
3183                                      VP9_BTI_MBENC_ALTREF_NV12_G9);
3184
3185             gen9_add_adv_gpe_surface(ctx, gpe_context,
3186                                      tmp_input,
3187                                      VP9_BTI_MBENC_ALTREF_NV12_G9 + 1);
3188
3189         }
3190
3191         if (mbenc_param->hme_enabled) {
3192             gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3193                                            mbenc_param->ps4x_memv_data_buffer,
3194                                            1,
3195                                            I965_SURFACEFORMAT_R8_UNORM,
3196                                            VP9_BTI_MBENC_HME_MV_DATA_G9);
3197
3198             gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3199                                            mbenc_param->ps4x_memv_distortion_buffer,
3200                                            1,
3201                                            I965_SURFACEFORMAT_R8_UNORM,
3202                                            VP9_BTI_MBENC_HME_DISTORTION_G9);
3203         }
3204
3205         if (mbenc_param->segmentation_enabled) {
3206             gen9_add_buffer_2d_gpe_surface(ctx,
3207                                            gpe_context,
3208                                            mbenc_param->pres_segmentation_map,
3209                                            1,
3210                                            I965_SURFACEFORMAT_R8_UNORM,
3211                                            VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3212
3213         }
3214
3215         res_size = 16 * mbenc_param->frame_width_in_mb *
3216                    mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3217         gen9_add_buffer_gpe_surface(ctx,
3218                                     gpe_context,
3219                                     mbenc_param->pres_mode_decision_prev,
3220                                     0,
3221                                     res_size / 4,
3222                                     0,
3223                                     VP9_BTI_MBENC_MODE_DECISION_PREV_G9);
3224
3225         gen9_add_buffer_gpe_surface(ctx,
3226                                     gpe_context,
3227                                     mbenc_param->pres_mode_decision,
3228                                     0,
3229                                     res_size / 4,
3230                                     0,
3231                                     VP9_BTI_MBENC_MODE_DECISION_G9);
3232
3233         gen9_add_buffer_2d_gpe_surface(ctx,
3234                                        gpe_context,
3235                                        mbenc_param->pres_output_16x16_inter_modes,
3236                                        1,
3237                                        I965_SURFACEFORMAT_R8_UNORM,
3238                                        VP9_BTI_MBENC_OUT_16x16_INTER_MODES_G9);
3239
3240         res_size = 160;
3241
3242         gen9_add_dri_buffer_gpe_surface(ctx,
3243                                         gpe_context,
3244                                         mbenc_param->gpe_context_tx->curbe.bo,
3245                                         0,
3246                                         ALIGN(res_size, 64),
3247                                         mbenc_param->gpe_context_tx->curbe.offset,
3248                                         VP9_BTI_MBENC_TX_CURBE_G9);
3249
3250
3251         break;
3252     }
3253     case VP9_MEDIA_STATE_MBENC_TX: {
3254         obj_surface = mbenc_param->curr_frame_obj;
3255
3256         gen9_add_2d_gpe_surface(ctx,
3257                                 gpe_context,
3258                                 obj_surface,
3259                                 0,
3260                                 1,
3261                                 I965_SURFACEFORMAT_R8_UNORM,
3262                                 VP9_BTI_MBENC_CURR_Y_G9);
3263
3264         gen9_add_2d_gpe_surface(ctx,
3265                                 gpe_context,
3266                                 obj_surface,
3267                                 1,
3268                                 1,
3269                                 I965_SURFACEFORMAT_R16_UINT,
3270                                 VP9_BTI_MBENC_CURR_UV_G9);
3271
3272         if (mbenc_param->segmentation_enabled) {
3273             gen9_add_buffer_2d_gpe_surface(ctx,
3274                                            gpe_context,
3275                                            mbenc_param->pres_segmentation_map,
3276                                            1,
3277                                            I965_SURFACEFORMAT_R8_UNORM,
3278                                            VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3279
3280         }
3281
3282         res_size = 16 * mbenc_param->frame_width_in_mb *
3283                    mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3284         gen9_add_buffer_gpe_surface(ctx,
3285                                     gpe_context,
3286                                     mbenc_param->pres_mode_decision,
3287                                     0,
3288                                     res_size / 4,
3289                                     0,
3290                                     VP9_BTI_MBENC_MODE_DECISION_G9);
3291
3292         res_size = frame_width_in_sb * frame_height_in_sb * 4 * sizeof(unsigned int);
3293         gen9_add_buffer_gpe_surface(ctx,
3294                                     gpe_context,
3295                                     mbenc_param->pres_mb_code_surface,
3296                                     0,
3297                                     res_size / 4,
3298                                     0,
3299                                     VP9_BTI_MBENC_PAK_DATA_G9);
3300
3301         // CU Record
3302         res_size = frame_width_in_sb * frame_height_in_sb *
3303                    64 * 16 * sizeof(unsigned int);
3304
3305         gen9_add_buffer_gpe_surface(ctx,
3306                                     gpe_context,
3307                                     mbenc_param->pres_mb_code_surface,
3308                                     0,
3309                                     res_size / 4,
3310                                     mbenc_param->mb_data_offset,
3311                                     VP9_BTI_MBENC_CU_RECORDS_G9);
3312     }
3313     default:
3314         break;
3315     }
3316
3317     return;
3318 }
3319
3320 static VAStatus
3321 gen9_vp9_mbenc_kernel(VADriverContextP ctx,
3322                       struct encode_state *encode_state,
3323                       struct intel_encoder_context *encoder_context,
3324                       int media_function)
3325 {
3326     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
3327     struct i965_gpe_context *gpe_context, *tx_gpe_context;
3328     struct gpe_media_object_walker_parameter        media_object_walker_param;
3329     struct gpe_encoder_kernel_walker_parameter      kernel_walker_param;
3330     unsigned int    resolution_x, resolution_y;
3331     struct gen9_vp9_state *vp9_state;
3332     VAEncPictureParameterBufferVP9  *pic_param;
3333     struct gen9_vp9_mbenc_curbe_param               curbe_param;
3334     struct gen9_vp9_mbenc_surface_param             surface_param;
3335     VAStatus    va_status = VA_STATUS_SUCCESS;
3336     int mbenc_gpe_index = 0;
3337     struct object_surface *obj_surface;
3338     struct gen9_surface_vp9 *vp9_priv_surface;
3339
3340     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3341
3342     if (!vp9_state || !vp9_state->pic_param)
3343         return VA_STATUS_ERROR_ENCODING_ERROR;
3344
3345     pic_param = vp9_state->pic_param;
3346
3347     switch (media_function) {
3348     case VP9_MEDIA_STATE_MBENC_I_32x32:
3349         mbenc_gpe_index = VP9_MBENC_IDX_KEY_32x32;
3350         break;
3351
3352     case VP9_MEDIA_STATE_MBENC_I_16x16:
3353         mbenc_gpe_index = VP9_MBENC_IDX_KEY_16x16;
3354         break;
3355
3356     case VP9_MEDIA_STATE_MBENC_P:
3357         mbenc_gpe_index = VP9_MBENC_IDX_INTER;
3358         break;
3359
3360     case VP9_MEDIA_STATE_MBENC_TX:
3361         mbenc_gpe_index = VP9_MBENC_IDX_TX;
3362         break;
3363
3364     default:
3365         va_status = VA_STATUS_ERROR_OPERATION_FAILED;
3366         return va_status;
3367     }
3368
3369     gpe_context = &(vme_context->mbenc_context.gpe_contexts[mbenc_gpe_index]);
3370     tx_gpe_context = &(vme_context->mbenc_context.gpe_contexts[VP9_MBENC_IDX_TX]);
3371
3372     gen9_gpe_reset_binding_table(ctx, gpe_context);
3373
3374     // Set curbe
3375     if (!vp9_state->mbenc_curbe_set_in_brc_update) {
3376         if (media_function == VP9_MEDIA_STATE_MBENC_I_32x32 ||
3377             media_function == VP9_MEDIA_STATE_MBENC_P) {
3378             memset(&curbe_param, 0, sizeof(curbe_param));
3379             curbe_param.ppic_param            = vp9_state->pic_param;
3380             curbe_param.pseq_param            = vp9_state->seq_param;
3381             curbe_param.psegment_param        = vp9_state->segment_param;
3382             curbe_param.frame_width_in_mb     = vp9_state->frame_width_in_mb;
3383             curbe_param.frame_height_in_mb    = vp9_state->frame_height_in_mb;
3384             curbe_param.last_ref_obj          = vp9_state->last_ref_obj;
3385             curbe_param.golden_ref_obj        = vp9_state->golden_ref_obj;
3386             curbe_param.alt_ref_obj           = vp9_state->alt_ref_obj;
3387             curbe_param.hme_enabled           = vp9_state->hme_enabled;
3388             curbe_param.ref_frame_flag        = vp9_state->ref_frame_flag;
3389             curbe_param.picture_coding_type   = vp9_state->picture_coding_type;
3390             curbe_param.media_state_type      = media_function;
3391             curbe_param.mbenc_curbe_set_in_brc_update = vp9_state->mbenc_curbe_set_in_brc_update;
3392
3393             vme_context->pfn_set_curbe_mbenc(ctx,
3394                                              encode_state,
3395                                              gpe_context,
3396                                              encoder_context,
3397                                              &curbe_param);
3398         }
3399     }
3400
3401     memset(&surface_param, 0, sizeof(surface_param));
3402     surface_param.media_state_type             = media_function;
3403     surface_param.picture_coding_type          = vp9_state->picture_coding_type;
3404     surface_param.frame_width                  = vp9_state->frame_width;
3405     surface_param.frame_height                 = vp9_state->frame_height;
3406     surface_param.frame_width_in_mb            = vp9_state->frame_width_in_mb;
3407     surface_param.frame_height_in_mb           = vp9_state->frame_height_in_mb;
3408     surface_param.hme_enabled                  = vp9_state->hme_enabled;
3409     surface_param.segmentation_enabled         = pic_param->pic_flags.bits.segmentation_enabled;
3410     surface_param.pres_segmentation_map        = &vme_context->mb_segment_map_surface;
3411     surface_param.ps4x_memv_data_buffer        = &vme_context->s4x_memv_data_buffer;
3412     surface_param.ps4x_memv_distortion_buffer  = &vme_context->s4x_memv_distortion_buffer;
3413     surface_param.pres_mode_decision           =
3414         &vme_context->res_mode_decision[vp9_state->curr_mode_decision_index];
3415     surface_param.pres_mode_decision_prev      =
3416         &vme_context->res_mode_decision[!vp9_state->curr_mode_decision_index];
3417     surface_param.pres_output_16x16_inter_modes = &vme_context->res_output_16x16_inter_modes;
3418     surface_param.pres_mbenc_curbe_buffer      = NULL;
3419     surface_param.last_ref_obj               = vp9_state->last_ref_obj;
3420     surface_param.golden_ref_obj             = vp9_state->golden_ref_obj;
3421     surface_param.alt_ref_obj                  = vp9_state->alt_ref_obj;
3422     surface_param.pres_mb_code_surface         = &vme_context->res_mb_code_surface;
3423     surface_param.gpe_context_tx               = tx_gpe_context;
3424     surface_param.mb_data_offset             = vp9_state->mb_data_offset;
3425
3426     obj_surface = encode_state->reconstructed_object;
3427     vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3428     if (vp9_state->dys_in_use &&
3429         (pic_param->frame_width_src != pic_param->frame_height_dst ||
3430          pic_param->frame_height_src != pic_param->frame_height_dst)) {
3431         obj_surface = vp9_priv_surface->dys_surface_obj;
3432     } else
3433         obj_surface = encode_state->input_yuv_object;
3434
3435     surface_param.curr_frame_obj             = obj_surface;
3436
3437     vme_context->pfn_send_mbenc_surface(ctx,
3438                                         encode_state,
3439                                         gpe_context,
3440                                         encoder_context,
3441                                         &surface_param);
3442
3443     if (media_function == VP9_MEDIA_STATE_MBENC_I_32x32) {
3444         resolution_x = ALIGN(vp9_state->frame_width, 32) / 32;
3445         resolution_y = ALIGN(vp9_state->frame_height, 32) / 32;
3446     } else {
3447         resolution_x = ALIGN(vp9_state->frame_width, 16) / 16;
3448         resolution_y = ALIGN(vp9_state->frame_height, 16) / 16;
3449     }
3450
3451     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
3452     kernel_walker_param.resolution_x = resolution_x;
3453     kernel_walker_param.resolution_y = resolution_y;
3454
3455     if (media_function == VP9_MEDIA_STATE_MBENC_P ||
3456         media_function == VP9_MEDIA_STATE_MBENC_I_16x16) {
3457         kernel_walker_param.use_scoreboard = 1;
3458         kernel_walker_param.no_dependency = 0;
3459         kernel_walker_param.walker_degree = VP9_45Z_DEGREE;
3460     } else {
3461         kernel_walker_param.use_scoreboard = 0;
3462         kernel_walker_param.no_dependency = 1;
3463     }
3464
3465     gen8_gpe_setup_interface_data(ctx, gpe_context);
3466
3467     gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
3468
3469     gen9_run_kernel_media_object_walker(ctx, encoder_context,
3470                                         gpe_context,
3471                                         media_function,
3472                                         &media_object_walker_param);
3473     return va_status;
3474 }
3475
3476 static void
3477 gen9_init_gpe_context_vp9(VADriverContextP ctx,
3478                           struct i965_gpe_context *gpe_context,
3479                           struct vp9_encoder_kernel_parameter *kernel_param)
3480 {
3481     struct i965_driver_data *i965 = i965_driver_data(ctx);
3482
3483     gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
3484
3485     gpe_context->sampler.entry_size = 0;
3486     gpe_context->sampler.max_entries = 0;
3487
3488     if (kernel_param->sampler_size) {
3489         gpe_context->sampler.entry_size = ALIGN(kernel_param->sampler_size, 64);
3490         gpe_context->sampler.max_entries = 1;
3491     }
3492
3493     gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
3494     gpe_context->idrt.max_entries = NUM_KERNELS_PER_GPE_CONTEXT;
3495
3496     gpe_context->surface_state_binding_table.max_entries = MAX_VP9_ENCODER_SURFACES;
3497     gpe_context->surface_state_binding_table.binding_table_offset = 0;
3498     gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_VP9_ENCODER_SURFACES * 4, 64);
3499     gpe_context->surface_state_binding_table.length = ALIGN(MAX_VP9_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_VP9_ENCODER_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
3500
3501     if (i965->intel.eu_total > 0)
3502         gpe_context->vfe_state.max_num_threads = 6 * i965->intel.eu_total;
3503     else
3504         gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads
3505
3506     gpe_context->vfe_state.curbe_allocation_size = MAX(1, ALIGN(gpe_context->curbe.length, 32) >> 5); // in registers
3507     gpe_context->vfe_state.urb_entry_size = MAX(1, ALIGN(kernel_param->inline_data_size, 32) >> 5); // in registers
3508     gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE -
3509                                               gpe_context->vfe_state.curbe_allocation_size -
3510                                               ((gpe_context->idrt.entry_size >> 5) *
3511                                                gpe_context->idrt.max_entries)) / gpe_context->vfe_state.urb_entry_size;
3512     gpe_context->vfe_state.num_urb_entries = CLAMP(1, 127, gpe_context->vfe_state.num_urb_entries);
3513     gpe_context->vfe_state.gpgpu_mode = 0;
3514 }
3515
3516 static void
3517 gen9_init_vfe_scoreboard_vp9(struct i965_gpe_context *gpe_context,
3518                              struct vp9_encoder_scoreboard_parameter *scoreboard_param)
3519 {
3520     gpe_context->vfe_desc5.scoreboard0.mask = scoreboard_param->mask;
3521     gpe_context->vfe_desc5.scoreboard0.type = scoreboard_param->type;
3522     gpe_context->vfe_desc5.scoreboard0.enable = scoreboard_param->enable;
3523
3524     if (scoreboard_param->walkpat_flag) {
3525         gpe_context->vfe_desc5.scoreboard0.mask = 0x0F;
3526         gpe_context->vfe_desc5.scoreboard0.type = 1;
3527
3528         gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0x0;
3529         gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0xF;
3530
3531         gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
3532         gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xE;
3533
3534         gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0xF;
3535         gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0x3;
3536
3537         gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
3538         gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0x1;
3539     } else {
3540         // Scoreboard 0
3541         gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0xF;
3542         gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0x0;
3543
3544         // Scoreboard 1
3545         gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
3546         gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xF;
3547
3548         // Scoreboard 2
3549         gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0x1;
3550         gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0xF;
3551
3552         // Scoreboard 3
3553         gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
3554         gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0xF;
3555
3556         // Scoreboard 4
3557         gpe_context->vfe_desc7.scoreboard2.delta_x4 = 0xF;
3558         gpe_context->vfe_desc7.scoreboard2.delta_y4 = 0x1;
3559
3560         // Scoreboard 5
3561         gpe_context->vfe_desc7.scoreboard2.delta_x5 = 0x0;
3562         gpe_context->vfe_desc7.scoreboard2.delta_y5 = 0xE;
3563
3564         // Scoreboard 6
3565         gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0x1;
3566         gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
3567
3568         // Scoreboard 7
3569         gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0xF;
3570         gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
3571     }
3572 }
3573
3574 #define VP9_MI_BLOCK_MASK     0x07
3575 #define VP9_VME_REF_WIN       48
3576
3577 static VAStatus
3578 gen9_encode_vp9_check_parameter(VADriverContextP ctx,
3579                                 struct encode_state *encode_state,
3580                                 struct intel_encoder_context *encoder_context)
3581 {
3582     struct i965_driver_data *i965 = i965_driver_data(ctx);
3583     struct gen9_vp9_state *vp9_state;
3584     VAEncPictureParameterBufferVP9  *pic_param;
3585     VAEncMiscParameterTypeVP9PerSegmantParam *seg_param;
3586     VAEncSequenceParameterBufferVP9 *seq_param;
3587     struct object_surface *obj_surface;
3588     struct object_buffer *obj_buffer;
3589     struct gen9_surface_vp9 *vp9_priv_surface;
3590
3591     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3592
3593     if (!encode_state->pic_param_ext ||
3594         !encode_state->pic_param_ext->buffer) {
3595         return VA_STATUS_ERROR_INVALID_PARAMETER;
3596     }
3597     pic_param = (VAEncPictureParameterBufferVP9 *)encode_state->pic_param_ext->buffer;
3598
3599     if (pic_param->frame_width_src & VP9_MI_BLOCK_MASK ||
3600         pic_param->frame_height_src & VP9_MI_BLOCK_MASK ||
3601         pic_param->frame_width_dst & VP9_MI_BLOCK_MASK ||
3602         pic_param->frame_height_dst & VP9_MI_BLOCK_MASK)
3603         return VA_STATUS_ERROR_INVALID_PARAMETER;
3604
3605     obj_buffer = BUFFER(pic_param->coded_buf);
3606
3607     if (!obj_buffer ||
3608         !obj_buffer->buffer_store ||
3609         !obj_buffer->buffer_store->bo)
3610         return VA_STATUS_ERROR_INVALID_PARAMETER;
3611
3612     encode_state->coded_buf_object = obj_buffer;
3613
3614     vp9_state->status_buffer.bo = obj_buffer->buffer_store->bo;
3615
3616     encode_state->reconstructed_object = SURFACE(pic_param->reconstructed_frame);
3617
3618     if (!encode_state->reconstructed_object ||
3619         !encode_state->input_yuv_object)
3620         return VA_STATUS_ERROR_INVALID_PARAMETER;
3621
3622     vp9_state->curr_frame = pic_param->reconstructed_frame;
3623     vp9_state->ref_frame_flag = 0;
3624     if (pic_param->pic_flags.bits.frame_type == KEY_FRAME ||
3625         pic_param->pic_flags.bits.intra_only) {
3626         /* this will be regarded as I-frame type */
3627         vp9_state->picture_coding_type = 0;
3628         vp9_state->last_ref_obj = NULL;
3629         vp9_state->golden_ref_obj = NULL;
3630         vp9_state->alt_ref_obj = NULL;
3631     } else {
3632         vp9_state->picture_coding_type = 1;
3633         vp9_state->ref_frame_flag = pic_param->ref_flags.bits.ref_frame_ctrl_l0 |
3634                                     pic_param->ref_flags.bits.ref_frame_ctrl_l1;
3635
3636         obj_surface = SURFACE(pic_param->reference_frames[pic_param->ref_flags.bits.ref_last_idx]);
3637         vp9_state->last_ref_obj = obj_surface;
3638         if (!obj_surface ||
3639             !obj_surface->bo ||
3640             !obj_surface->private_data) {
3641             vp9_state->last_ref_obj = NULL;
3642             vp9_state->ref_frame_flag &= ~(VP9_LAST_REF);
3643         }
3644
3645         obj_surface = SURFACE(pic_param->reference_frames[pic_param->ref_flags.bits.ref_gf_idx]);
3646         vp9_state->golden_ref_obj = obj_surface;
3647         if (!obj_surface ||
3648             !obj_surface->bo ||
3649             !obj_surface->private_data) {
3650             vp9_state->golden_ref_obj = NULL;
3651             vp9_state->ref_frame_flag &= ~(VP9_GOLDEN_REF);
3652         }
3653
3654         obj_surface = SURFACE(pic_param->reference_frames[pic_param->ref_flags.bits.ref_arf_idx]);
3655         vp9_state->alt_ref_obj = obj_surface;
3656         if (!obj_surface ||
3657             !obj_surface->bo ||
3658             !obj_surface->private_data) {
3659             vp9_state->alt_ref_obj = NULL;
3660             vp9_state->ref_frame_flag &= ~(VP9_ALT_REF);
3661         }
3662
3663         /* remove the duplicated flag and ref frame list */
3664         if (vp9_state->ref_frame_flag & VP9_LAST_REF) {
3665             if (pic_param->reference_frames[pic_param->ref_flags.bits.ref_last_idx] ==
3666                 pic_param->reference_frames[pic_param->ref_flags.bits.ref_gf_idx]) {
3667                 vp9_state->ref_frame_flag &= ~(VP9_GOLDEN_REF);
3668                 vp9_state->golden_ref_obj = NULL;
3669             }
3670
3671             if (pic_param->reference_frames[pic_param->ref_flags.bits.ref_last_idx] ==
3672                 pic_param->reference_frames[pic_param->ref_flags.bits.ref_arf_idx]) {
3673                 vp9_state->ref_frame_flag &= ~(VP9_ALT_REF);
3674                 vp9_state->alt_ref_obj = NULL;
3675             }
3676         }
3677
3678         if (vp9_state->ref_frame_flag & VP9_GOLDEN_REF) {
3679             if (pic_param->reference_frames[pic_param->ref_flags.bits.ref_gf_idx] ==
3680                 pic_param->reference_frames[pic_param->ref_flags.bits.ref_arf_idx]) {
3681                 vp9_state->ref_frame_flag &= ~(VP9_ALT_REF);
3682                 vp9_state->alt_ref_obj = NULL;
3683             }
3684         }
3685
3686         if (vp9_state->ref_frame_flag == 0)
3687             return VA_STATUS_ERROR_INVALID_PARAMETER;
3688     }
3689
3690     seg_param = NULL;
3691     if (pic_param->pic_flags.bits.segmentation_enabled) {
3692         if (!encode_state->q_matrix ||
3693             !encode_state->q_matrix->buffer) {
3694             return VA_STATUS_ERROR_INVALID_PARAMETER;
3695         }
3696         seg_param = (VAEncMiscParameterTypeVP9PerSegmantParam *)
3697                     encode_state->q_matrix->buffer;
3698     }
3699
3700     seq_param = NULL;
3701     if (encode_state->seq_param_ext &&
3702         encode_state->seq_param_ext->buffer)
3703         seq_param = (VAEncSequenceParameterBufferVP9 *)encode_state->seq_param_ext->buffer;
3704
3705     if (!seq_param) {
3706         seq_param = &vp9_state->bogus_seq_param;
3707     }
3708
3709     vp9_state->pic_param = pic_param;
3710     vp9_state->segment_param = seg_param;
3711     vp9_state->seq_param = seq_param;
3712
3713     obj_surface = encode_state->reconstructed_object;
3714     if (pic_param->frame_width_dst > obj_surface->orig_width ||
3715         pic_param->frame_height_dst > obj_surface->orig_height)
3716         return VA_STATUS_ERROR_INVALID_SURFACE;
3717
3718     if (!vp9_state->dys_enabled &&
3719         ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
3720          (pic_param->frame_height_src != pic_param->frame_height_dst)))
3721         return VA_STATUS_ERROR_UNIMPLEMENTED;
3722
3723     if (vp9_state->brc_enabled) {
3724         if (vp9_state->first_frame || vp9_state->picture_coding_type == KEY_FRAME) {
3725             vp9_state->brc_reset = encoder_context->brc.need_reset || vp9_state->first_frame;
3726
3727             if (!encoder_context->brc.framerate[0].num || !encoder_context->brc.framerate[0].den ||
3728                 !encoder_context->brc.bits_per_second[0])
3729                 return VA_STATUS_ERROR_INVALID_PARAMETER;
3730
3731             vp9_state->gop_size = encoder_context->brc.gop_size;
3732             vp9_state->framerate = encoder_context->brc.framerate[0];
3733
3734             if (encoder_context->rate_control_mode == VA_RC_CBR ||
3735                 !encoder_context->brc.target_percentage[0]) {
3736                 vp9_state->target_bit_rate = encoder_context->brc.bits_per_second[0];
3737                 vp9_state->max_bit_rate = vp9_state->target_bit_rate;
3738                 vp9_state->min_bit_rate = vp9_state->target_bit_rate;
3739             } else {
3740                 vp9_state->max_bit_rate = encoder_context->brc.bits_per_second[0];
3741                 vp9_state->target_bit_rate = vp9_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100;
3742                 if (2 * vp9_state->target_bit_rate < vp9_state->max_bit_rate)
3743                     vp9_state->min_bit_rate = 0;
3744                 else
3745                     vp9_state->min_bit_rate = 2 * vp9_state->target_bit_rate - vp9_state->max_bit_rate;
3746             }
3747
3748             if (encoder_context->brc.hrd_buffer_size)
3749                 vp9_state->vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;
3750             else if (encoder_context->brc.window_size)
3751                 vp9_state->vbv_buffer_size_in_bit = (uint64_t)vp9_state->max_bit_rate * encoder_context->brc.window_size / 1000;
3752             else
3753                 vp9_state->vbv_buffer_size_in_bit = vp9_state->max_bit_rate;
3754             if (encoder_context->brc.hrd_initial_buffer_fullness)
3755                 vp9_state->init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;
3756             else
3757                 vp9_state->init_vbv_buffer_fullness_in_bit = vp9_state->vbv_buffer_size_in_bit / 2;
3758         }
3759     }
3760
3761     vp9_state->frame_width = pic_param->frame_width_dst;
3762     vp9_state->frame_height = pic_param->frame_height_dst;
3763
3764     vp9_state->frame_width_4x = ALIGN(vp9_state->frame_width / 4, 16);
3765     vp9_state->frame_height_4x = ALIGN(vp9_state->frame_height / 4, 16);
3766
3767     vp9_state->frame_width_16x = ALIGN(vp9_state->frame_width / 16, 16);
3768     vp9_state->frame_height_16x = ALIGN(vp9_state->frame_height / 16, 16);
3769
3770     vp9_state->frame_width_in_mb = ALIGN(vp9_state->frame_width, 16) / 16;
3771     vp9_state->frame_height_in_mb = ALIGN(vp9_state->frame_height, 16) / 16;
3772
3773     vp9_state->downscaled_width_4x_in_mb = vp9_state->frame_width_4x / 16;
3774     vp9_state->downscaled_height_4x_in_mb = vp9_state->frame_height_4x / 16;
3775     vp9_state->downscaled_width_16x_in_mb = vp9_state->frame_width_16x / 16;
3776     vp9_state->downscaled_height_16x_in_mb = vp9_state->frame_height_16x / 16;
3777
3778     vp9_state->dys_in_use = 0;
3779     if (pic_param->frame_width_src != pic_param->frame_width_dst ||
3780         pic_param->frame_height_src != pic_param->frame_height_dst)
3781         vp9_state->dys_in_use = 1;
3782     vp9_state->dys_ref_frame_flag = 0;
3783     /* check the dys setting. The dys is supported by default. */
3784     if (pic_param->pic_flags.bits.frame_type != KEY_FRAME &&
3785         !pic_param->pic_flags.bits.intra_only) {
3786         vp9_state->dys_ref_frame_flag = vp9_state->ref_frame_flag;
3787
3788         if ((vp9_state->ref_frame_flag & VP9_LAST_REF) &&
3789             vp9_state->last_ref_obj) {
3790             obj_surface = vp9_state->last_ref_obj;
3791             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3792
3793             if (vp9_state->frame_width == vp9_priv_surface->frame_width &&
3794                 vp9_state->frame_height == vp9_priv_surface->frame_height)
3795                 vp9_state->dys_ref_frame_flag &= ~(VP9_LAST_REF);
3796         }
3797         if ((vp9_state->ref_frame_flag & VP9_GOLDEN_REF) &&
3798             vp9_state->golden_ref_obj) {
3799             obj_surface = vp9_state->golden_ref_obj;
3800             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3801
3802             if (vp9_state->frame_width == vp9_priv_surface->frame_width &&
3803                 vp9_state->frame_height == vp9_priv_surface->frame_height)
3804                 vp9_state->dys_ref_frame_flag &= ~(VP9_GOLDEN_REF);
3805         }
3806         if ((vp9_state->ref_frame_flag & VP9_ALT_REF) &&
3807             vp9_state->alt_ref_obj) {
3808             obj_surface = vp9_state->alt_ref_obj;
3809             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3810
3811             if (vp9_state->frame_width == vp9_priv_surface->frame_width &&
3812                 vp9_state->frame_height == vp9_priv_surface->frame_height)
3813                 vp9_state->dys_ref_frame_flag &= ~(VP9_ALT_REF);
3814         }
3815         if (vp9_state->dys_ref_frame_flag)
3816             vp9_state->dys_in_use = 1;
3817     }
3818
3819     if (vp9_state->hme_supported) {
3820         vp9_state->hme_enabled = 1;
3821     } else {
3822         vp9_state->hme_enabled = 0;
3823     }
3824
3825     if (vp9_state->b16xme_supported) {
3826         vp9_state->b16xme_enabled = 1;
3827     } else {
3828         vp9_state->b16xme_enabled = 0;
3829     }
3830
3831     /* disable HME/16xME if the size is too small */
3832     if (vp9_state->frame_width_4x <= VP9_VME_REF_WIN ||
3833         vp9_state->frame_height_4x <= VP9_VME_REF_WIN) {
3834         vp9_state->hme_enabled = 0;
3835         vp9_state->b16xme_enabled = 0;
3836     }
3837
3838     if (vp9_state->frame_width_16x < VP9_VME_REF_WIN ||
3839         vp9_state->frame_height_16x < VP9_VME_REF_WIN)
3840         vp9_state->b16xme_enabled = 0;
3841
3842     if (pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME ||
3843         pic_param->pic_flags.bits.intra_only) {
3844         vp9_state->hme_enabled = 0;
3845         vp9_state->b16xme_enabled = 0;
3846     }
3847
3848     vp9_state->mbenc_keyframe_dist_enabled = 0;
3849     if ((vp9_state->picture_coding_type == KEY_FRAME) &&
3850         vp9_state->brc_distortion_buffer_supported)
3851         vp9_state->mbenc_keyframe_dist_enabled = 1;
3852
3853     return VA_STATUS_SUCCESS;
3854 }
3855
3856 static VAStatus
3857 gen9_vme_gpe_kernel_prepare_vp9(VADriverContextP ctx,
3858                                 struct encode_state *encode_state,
3859                                 struct intel_encoder_context *encoder_context)
3860 {
3861     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
3862     struct vp9_surface_param surface_param;
3863     struct gen9_vp9_state *vp9_state;
3864     VAEncPictureParameterBufferVP9  *pic_param;
3865     struct object_surface *obj_surface;
3866     struct gen9_surface_vp9 *vp9_surface;
3867     int driver_header_flag = 0;
3868     VAStatus va_status;
3869
3870     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3871
3872     if (!vp9_state || !vp9_state->pic_param)
3873         return VA_STATUS_ERROR_INVALID_PARAMETER;
3874
3875     pic_param = vp9_state->pic_param;
3876
3877     /* this is to check whether the driver should generate the uncompressed header */
3878     driver_header_flag = 1;
3879     if (encode_state->packed_header_data_ext &&
3880         encode_state->packed_header_data_ext[0] &&
3881         pic_param->bit_offset_first_partition_size) {
3882         VAEncPackedHeaderParameterBuffer *param = NULL;
3883
3884         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_params_ext[0]->buffer;
3885
3886         if (param->type == VAEncPackedHeaderRawData) {
3887             char *header_data;
3888             unsigned int length_in_bits;
3889
3890             header_data = (char *)encode_state->packed_header_data_ext[0]->buffer;
3891             length_in_bits = param->bit_length;
3892             driver_header_flag = 0;
3893
3894             vp9_state->frame_header.bit_offset_first_partition_size =
3895                 pic_param->bit_offset_first_partition_size;
3896             vp9_state->header_length = ALIGN(length_in_bits, 8) >> 3;
3897             vp9_state->alias_insert_data = header_data;
3898
3899             vp9_state->frame_header.bit_offset_ref_lf_delta = pic_param->bit_offset_ref_lf_delta;
3900             vp9_state->frame_header.bit_offset_mode_lf_delta = pic_param->bit_offset_mode_lf_delta;
3901             vp9_state->frame_header.bit_offset_lf_level = pic_param->bit_offset_lf_level;
3902             vp9_state->frame_header.bit_offset_qindex = pic_param->bit_offset_qindex;
3903             vp9_state->frame_header.bit_offset_segmentation = pic_param->bit_offset_segmentation;
3904             vp9_state->frame_header.bit_size_segmentation = pic_param->bit_size_segmentation;
3905         }
3906     }
3907
3908     if (driver_header_flag) {
3909         memset(&vp9_state->frame_header, 0, sizeof(vp9_state->frame_header));
3910         intel_write_uncompressed_header(encode_state,
3911                                         VAProfileVP9Profile0,
3912                                         vme_context->frame_header_data,
3913                                         &vp9_state->header_length,
3914                                         &vp9_state->frame_header);
3915         vp9_state->alias_insert_data = vme_context->frame_header_data;
3916     }
3917
3918     va_status = i965_check_alloc_surface_bo(ctx, encode_state->input_yuv_object,
3919                                             1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
3920     if (va_status != VA_STATUS_SUCCESS)
3921         return va_status;
3922
3923     va_status = i965_check_alloc_surface_bo(ctx, encode_state->reconstructed_object,
3924                                             1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
3925
3926     if (va_status != VA_STATUS_SUCCESS)
3927         return va_status;
3928
3929     surface_param.frame_width = vp9_state->frame_width;
3930     surface_param.frame_height = vp9_state->frame_height;
3931     va_status = gen9_vp9_init_check_surfaces(ctx,
3932                                              encode_state->reconstructed_object,
3933                                              &surface_param);
3934
3935     {
3936         vp9_surface = (struct gen9_surface_vp9*)encode_state->reconstructed_object;
3937
3938         vp9_surface->qp_value = pic_param->luma_ac_qindex + pic_param->luma_dc_qindex_delta;
3939     }
3940     if (vp9_state->dys_in_use &&
3941         (pic_param->frame_width_src != pic_param->frame_width_dst ||
3942          pic_param->frame_height_src != pic_param->frame_height_dst)) {
3943         surface_param.frame_width = pic_param->frame_width_dst;
3944         surface_param.frame_height = pic_param->frame_height_dst;
3945         va_status = gen9_vp9_check_dys_surfaces(ctx,
3946                                                 encode_state->reconstructed_object,
3947                                                 &surface_param);
3948
3949         if (va_status)
3950             return va_status;
3951     }
3952
3953     if (vp9_state->dys_ref_frame_flag) {
3954         if ((vp9_state->dys_ref_frame_flag & VP9_LAST_REF) &&
3955             vp9_state->last_ref_obj) {
3956             obj_surface = vp9_state->last_ref_obj;
3957             surface_param.frame_width = vp9_state->frame_width;
3958             surface_param.frame_height = vp9_state->frame_height;
3959             va_status = gen9_vp9_check_dys_surfaces(ctx,
3960                                                     obj_surface,
3961                                                     &surface_param);
3962
3963             if (va_status)
3964                 return va_status;
3965         }
3966         if ((vp9_state->dys_ref_frame_flag & VP9_GOLDEN_REF) &&
3967             vp9_state->golden_ref_obj) {
3968             obj_surface = vp9_state->golden_ref_obj;
3969             surface_param.frame_width = vp9_state->frame_width;
3970             surface_param.frame_height = vp9_state->frame_height;
3971             va_status = gen9_vp9_check_dys_surfaces(ctx,
3972                                                     obj_surface,
3973                                                     &surface_param);
3974
3975             if (va_status)
3976                 return va_status;
3977         }
3978         if ((vp9_state->dys_ref_frame_flag & VP9_ALT_REF) &&
3979             vp9_state->alt_ref_obj) {
3980             obj_surface = vp9_state->alt_ref_obj;
3981             surface_param.frame_width = vp9_state->frame_width;
3982             surface_param.frame_height = vp9_state->frame_height;
3983             va_status = gen9_vp9_check_dys_surfaces(ctx,
3984                                                     obj_surface,
3985                                                     &surface_param);
3986
3987             if (va_status)
3988                 return va_status;
3989         }
3990     }
3991
3992     if (va_status != VA_STATUS_SUCCESS)
3993         return va_status;
3994     /* check the corresponding ref_frame_flag && dys_ref_frame_flag */
3995
3996     return VA_STATUS_SUCCESS;
3997 }
3998
3999 static VAStatus
4000 gen9_vme_gpe_kernel_init_vp9(VADriverContextP ctx,
4001                              struct encode_state *encode_state,
4002                              struct intel_encoder_context *encoder_context)
4003 {
4004     struct i965_driver_data *i965 = i965_driver_data(ctx);
4005     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4006     struct vp9_mbenc_context *mbenc_context = &vme_context->mbenc_context;
4007     struct vp9_dys_context *dys_context = &vme_context->dys_context;
4008     struct gpe_dynamic_state_parameter ds_param;
4009     int i;
4010
4011     /*
4012      * BRC will update MBEnc curbe data buffer, so initialize GPE context for
4013      * MBEnc first
4014      */
4015     for (i = 0; i < NUM_VP9_MBENC; i++) {
4016         gen8_gpe_context_init(ctx, &mbenc_context->gpe_contexts[i]);
4017     }
4018
4019     /*
4020      * VP9_MBENC_XXX uses the same dynamic state buffer as they share the same
4021      * curbe_buffer.
4022      */
4023     ds_param.bo_size = ALIGN(sizeof(vp9_mbenc_curbe_data), 64) + 128 +
4024                        ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) * NUM_VP9_MBENC;
4025     mbenc_context->mbenc_bo_dys = dri_bo_alloc(i965->intel.bufmgr,
4026                                                "mbenc_dys",
4027                                                ds_param.bo_size,
4028                                                0x1000);
4029     mbenc_context->mbenc_bo_size = ds_param.bo_size;
4030
4031     ds_param.bo = mbenc_context->mbenc_bo_dys;
4032     ds_param.curbe_offset = 0;
4033     ds_param.sampler_offset = ALIGN(sizeof(vp9_mbenc_curbe_data), 64);
4034     for (i = 0; i < NUM_VP9_MBENC; i++) {
4035         ds_param.idrt_offset = ds_param.sampler_offset + 128 +
4036                                ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) * i;
4037
4038         gen8_gpe_context_set_dynamic_buffer(ctx,
4039                                             &mbenc_context->gpe_contexts[i],
4040                                             &ds_param);
4041     }
4042
4043     gen8_gpe_context_init(ctx, &dys_context->gpe_context);
4044     gen9_vp9_dys_set_sampler_state(&dys_context->gpe_context);
4045
4046     return VA_STATUS_SUCCESS;
4047 }
4048
4049 static VAStatus
4050 gen9_vme_gpe_kernel_final_vp9(VADriverContextP ctx,
4051                               struct encode_state *encode_state,
4052                               struct intel_encoder_context *encoder_context)
4053 {
4054     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4055     struct vp9_mbenc_context *mbenc_context = &vme_context->mbenc_context;
4056
4057     dri_bo_unreference(mbenc_context->mbenc_bo_dys);
4058     mbenc_context->mbenc_bo_dys = NULL;
4059
4060     return VA_STATUS_SUCCESS;
4061 }
4062
4063 static VAStatus
4064 gen9_vme_gpe_kernel_run_vp9(VADriverContextP ctx,
4065                             struct encode_state *encode_state,
4066                             struct intel_encoder_context *encoder_context)
4067 {
4068     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4069     struct gen9_vp9_state *vp9_state;
4070     int i;
4071
4072     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
4073
4074     if (!vp9_state || !vp9_state->pic_param)
4075         return VA_STATUS_ERROR_INVALID_PARAMETER;
4076
4077     if (vp9_state->dys_in_use) {
4078         gen9_vp9_run_dys_refframes(ctx, encode_state, encoder_context);
4079     }
4080
4081     if (vp9_state->brc_enabled && (vp9_state->brc_reset || !vp9_state->brc_inited)) {
4082         gen9_vp9_brc_init_reset_kernel(ctx, encode_state, encoder_context);
4083     }
4084
4085     if (vp9_state->picture_coding_type == KEY_FRAME) {
4086         for (i = 0; i < 2; i++)
4087             i965_zero_gpe_resource(&vme_context->res_mode_decision[i]);
4088     }
4089
4090     if (vp9_state->hme_supported) {
4091         gen9_vp9_scaling_kernel(ctx, encode_state,
4092                                 encoder_context,
4093                                 0);
4094         if (vp9_state->b16xme_supported) {
4095             gen9_vp9_scaling_kernel(ctx, encode_state,
4096                                     encoder_context,
4097                                     1);
4098         }
4099     }
4100
4101     if (vp9_state->picture_coding_type && vp9_state->hme_enabled) {
4102         if (vp9_state->b16xme_enabled)
4103             gen9_vp9_me_kernel(ctx, encode_state,
4104                                encoder_context,
4105                                1);
4106
4107         gen9_vp9_me_kernel(ctx, encode_state,
4108                            encoder_context,
4109                            0);
4110     }
4111
4112     if (vp9_state->brc_enabled) {
4113         if (vp9_state->mbenc_keyframe_dist_enabled)
4114             gen9_vp9_brc_intra_dist_kernel(ctx,
4115                                            encode_state,
4116                                            encoder_context);
4117
4118         gen9_vp9_brc_update_kernel(ctx, encode_state,
4119                                    encoder_context);
4120     }
4121
4122     if (vp9_state->picture_coding_type == KEY_FRAME) {
4123         gen9_vp9_mbenc_kernel(ctx, encode_state,
4124                               encoder_context,
4125                               VP9_MEDIA_STATE_MBENC_I_32x32);
4126         gen9_vp9_mbenc_kernel(ctx, encode_state,
4127                               encoder_context,
4128                               VP9_MEDIA_STATE_MBENC_I_16x16);
4129     } else {
4130         gen9_vp9_mbenc_kernel(ctx, encode_state,
4131                               encoder_context,
4132                               VP9_MEDIA_STATE_MBENC_P);
4133     }
4134
4135     gen9_vp9_mbenc_kernel(ctx, encode_state,
4136                           encoder_context,
4137                           VP9_MEDIA_STATE_MBENC_TX);
4138
4139     vp9_state->curr_mode_decision_index ^= 1;
4140     if (vp9_state->brc_enabled) {
4141         vp9_state->brc_inited = 1;
4142         vp9_state->brc_reset = 0;
4143     }
4144
4145     return VA_STATUS_SUCCESS;
4146 }
4147
4148 static VAStatus
4149 gen9_vme_pipeline_vp9(VADriverContextP ctx,
4150                       VAProfile profile,
4151                       struct encode_state *encode_state,
4152                       struct intel_encoder_context *encoder_context)
4153 {
4154     VAStatus va_status;
4155     struct gen9_vp9_state *vp9_state;
4156
4157     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
4158
4159     if (!vp9_state)
4160         return VA_STATUS_ERROR_INVALID_CONTEXT;
4161
4162     va_status = gen9_encode_vp9_check_parameter(ctx, encode_state, encoder_context);
4163     if (va_status != VA_STATUS_SUCCESS)
4164         return va_status;
4165
4166     va_status = gen9_vp9_allocate_resources(ctx, encode_state,
4167                                             encoder_context,
4168                                             !vp9_state->brc_allocated);
4169
4170     if (va_status != VA_STATUS_SUCCESS)
4171         return va_status;
4172     vp9_state->brc_allocated = 1;
4173
4174     va_status = gen9_vme_gpe_kernel_prepare_vp9(ctx, encode_state, encoder_context);
4175
4176     if (va_status != VA_STATUS_SUCCESS)
4177         return va_status;
4178
4179     va_status = gen9_vme_gpe_kernel_init_vp9(ctx, encode_state, encoder_context);
4180     if (va_status != VA_STATUS_SUCCESS)
4181         return va_status;
4182
4183     va_status = gen9_vme_gpe_kernel_run_vp9(ctx, encode_state, encoder_context);
4184     if (va_status != VA_STATUS_SUCCESS)
4185         return va_status;
4186
4187     gen9_vme_gpe_kernel_final_vp9(ctx, encode_state, encoder_context);
4188
4189     return VA_STATUS_SUCCESS;
4190 }
4191
4192 static void
4193 gen9_vme_brc_context_destroy_vp9(struct vp9_brc_context *brc_context)
4194 {
4195     int i;
4196
4197     for (i = 0; i < NUM_VP9_BRC; i++)
4198         gen8_gpe_context_destroy(&brc_context->gpe_contexts[i]);
4199 }
4200
4201 static void
4202 gen9_vme_scaling_context_destroy_vp9(struct vp9_scaling_context *scaling_context)
4203 {
4204     int i;
4205
4206     for (i = 0; i < NUM_VP9_SCALING; i++)
4207         gen8_gpe_context_destroy(&scaling_context->gpe_contexts[i]);
4208 }
4209
4210 static void
4211 gen9_vme_me_context_destroy_vp9(struct vp9_me_context *me_context)
4212 {
4213     gen8_gpe_context_destroy(&me_context->gpe_context);
4214 }
4215
4216 static void
4217 gen9_vme_mbenc_context_destroy_vp9(struct vp9_mbenc_context *mbenc_context)
4218 {
4219     int i;
4220
4221     for (i = 0; i < NUM_VP9_MBENC; i++)
4222         gen8_gpe_context_destroy(&mbenc_context->gpe_contexts[i]);
4223     dri_bo_unreference(mbenc_context->mbenc_bo_dys);
4224     mbenc_context->mbenc_bo_size = 0;
4225 }
4226
4227 static void
4228 gen9_vme_dys_context_destroy_vp9(struct vp9_dys_context *dys_context)
4229 {
4230     gen8_gpe_context_destroy(&dys_context->gpe_context);
4231 }
4232
4233 static void
4234 gen9_vme_kernel_context_destroy_vp9(struct gen9_encoder_context_vp9 *vme_context)
4235 {
4236     gen9_vp9_free_resources(vme_context);
4237     gen9_vme_scaling_context_destroy_vp9(&vme_context->scaling_context);
4238     gen9_vme_me_context_destroy_vp9(&vme_context->me_context);
4239     gen9_vme_mbenc_context_destroy_vp9(&vme_context->mbenc_context);
4240     gen9_vme_brc_context_destroy_vp9(&vme_context->brc_context);
4241     gen9_vme_dys_context_destroy_vp9(&vme_context->dys_context);
4242
4243     return;
4244 }
4245
4246 static void
4247 gen9_vme_context_destroy_vp9(void *context)
4248 {
4249     struct gen9_encoder_context_vp9 *vme_context = context;
4250
4251     if (!vme_context)
4252         return;
4253
4254     gen9_vme_kernel_context_destroy_vp9(vme_context);
4255
4256     free(vme_context);
4257
4258     return;
4259 }
4260
4261 static void
4262 gen9_vme_scaling_context_init_vp9(VADriverContextP ctx,
4263                                   struct gen9_encoder_context_vp9 *vme_context,
4264                                   struct vp9_scaling_context *scaling_context)
4265 {
4266     struct i965_gpe_context *gpe_context = NULL;
4267     struct vp9_encoder_kernel_parameter kernel_param;
4268     struct vp9_encoder_scoreboard_parameter scoreboard_param;
4269     struct i965_kernel scale_kernel;
4270
4271     kernel_param.curbe_size = sizeof(vp9_scaling4x_curbe_data_cm);
4272     kernel_param.inline_data_size = sizeof(vp9_scaling4x_inline_data_cm);
4273     kernel_param.sampler_size = 0;
4274
4275     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4276     scoreboard_param.mask = 0xFF;
4277     scoreboard_param.enable = vme_context->use_hw_scoreboard;
4278     scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4279     scoreboard_param.walkpat_flag = 0;
4280
4281     gpe_context = &scaling_context->gpe_contexts[0];
4282     gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4283     gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4284
4285     scaling_context->scaling_4x_bti.scaling_frame_src_y = VP9_BTI_SCALING_FRAME_SRC_Y;
4286     scaling_context->scaling_4x_bti.scaling_frame_dst_y = VP9_BTI_SCALING_FRAME_DST_Y;
4287     scaling_context->scaling_4x_bti.scaling_frame_mbv_proc_stat_dst =
4288         VP9_BTI_SCALING_FRAME_MBVPROCSTATS_DST_CM;
4289
4290     memset(&scale_kernel, 0, sizeof(scale_kernel));
4291
4292     intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4293                                          sizeof(media_vp9_kernels),
4294                                          INTEL_VP9_ENC_SCALING4X,
4295                                          0,
4296                                          &scale_kernel);
4297
4298     gen8_gpe_load_kernels(ctx,
4299                           gpe_context,
4300                           &scale_kernel,
4301                           1);
4302
4303     kernel_param.curbe_size = sizeof(vp9_scaling2x_curbe_data_cm);
4304     kernel_param.inline_data_size = 0;
4305     kernel_param.sampler_size = 0;
4306
4307     gpe_context = &scaling_context->gpe_contexts[1];
4308     gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4309     gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4310
4311     memset(&scale_kernel, 0, sizeof(scale_kernel));
4312
4313     intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4314                                          sizeof(media_vp9_kernels),
4315                                          INTEL_VP9_ENC_SCALING2X,
4316                                          0,
4317                                          &scale_kernel);
4318
4319     gen8_gpe_load_kernels(ctx,
4320                           gpe_context,
4321                           &scale_kernel,
4322                           1);
4323
4324     scaling_context->scaling_2x_bti.scaling_frame_src_y = VP9_BTI_SCALING_FRAME_SRC_Y;
4325     scaling_context->scaling_2x_bti.scaling_frame_dst_y = VP9_BTI_SCALING_FRAME_DST_Y;
4326     return;
4327 }
4328
4329 static void
4330 gen9_vme_me_context_init_vp9(VADriverContextP ctx,
4331                              struct gen9_encoder_context_vp9 *vme_context,
4332                              struct vp9_me_context *me_context)
4333 {
4334     struct i965_gpe_context *gpe_context = NULL;
4335     struct vp9_encoder_kernel_parameter kernel_param;
4336     struct vp9_encoder_scoreboard_parameter scoreboard_param;
4337     struct i965_kernel scale_kernel;
4338
4339     kernel_param.curbe_size = sizeof(vp9_me_curbe_data);
4340     kernel_param.inline_data_size = 0;
4341     kernel_param.sampler_size = 0;
4342
4343     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4344     scoreboard_param.mask = 0xFF;
4345     scoreboard_param.enable = vme_context->use_hw_scoreboard;
4346     scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4347     scoreboard_param.walkpat_flag = 0;
4348
4349     gpe_context = &me_context->gpe_context;
4350     gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4351     gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4352
4353     memset(&scale_kernel, 0, sizeof(scale_kernel));
4354
4355     intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4356                                          sizeof(media_vp9_kernels),
4357                                          INTEL_VP9_ENC_ME,
4358                                          0,
4359                                          &scale_kernel);
4360
4361     gen8_gpe_load_kernels(ctx,
4362                           gpe_context,
4363                           &scale_kernel,
4364                           1);
4365
4366     return;
4367 }
4368
4369 static void
4370 gen9_vme_mbenc_context_init_vp9(VADriverContextP ctx,
4371                                 struct gen9_encoder_context_vp9 *vme_context,
4372                                 struct vp9_mbenc_context *mbenc_context)
4373 {
4374     struct i965_gpe_context *gpe_context = NULL;
4375     struct vp9_encoder_kernel_parameter kernel_param;
4376     struct vp9_encoder_scoreboard_parameter scoreboard_param;
4377     int i;
4378     struct i965_kernel scale_kernel;
4379
4380     kernel_param.curbe_size = sizeof(vp9_mbenc_curbe_data);
4381     kernel_param.inline_data_size = 0;
4382     kernel_param.sampler_size = 0;
4383
4384     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4385     scoreboard_param.mask = 0xFF;
4386     scoreboard_param.enable = vme_context->use_hw_scoreboard;
4387     scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4388
4389     for (i = 0; i < NUM_VP9_MBENC; i++) {
4390         gpe_context = &mbenc_context->gpe_contexts[i];
4391
4392         if ((i == VP9_MBENC_IDX_KEY_16x16) ||
4393             (i == VP9_MBENC_IDX_INTER)) {
4394             scoreboard_param.walkpat_flag = 1;
4395         } else
4396             scoreboard_param.walkpat_flag = 0;
4397
4398         gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4399         gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4400
4401         memset(&scale_kernel, 0, sizeof(scale_kernel));
4402
4403         intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4404                                              sizeof(media_vp9_kernels),
4405                                              INTEL_VP9_ENC_MBENC,
4406                                              i,
4407                                              &scale_kernel);
4408
4409         gen8_gpe_load_kernels(ctx,
4410                               gpe_context,
4411                               &scale_kernel,
4412                               1);
4413     }
4414 }
4415
4416 static void
4417 gen9_vme_brc_context_init_vp9(VADriverContextP ctx,
4418                               struct gen9_encoder_context_vp9 *vme_context,
4419                               struct vp9_brc_context *brc_context)
4420 {
4421     struct i965_gpe_context *gpe_context = NULL;
4422     struct vp9_encoder_kernel_parameter kernel_param;
4423     struct vp9_encoder_scoreboard_parameter scoreboard_param;
4424     int i;
4425     struct i965_kernel scale_kernel;
4426
4427     kernel_param.curbe_size = sizeof(vp9_brc_curbe_data);
4428     kernel_param.inline_data_size = 0;
4429     kernel_param.sampler_size = 0;
4430
4431     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4432     scoreboard_param.mask = 0xFF;
4433     scoreboard_param.enable = vme_context->use_hw_scoreboard;
4434     scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4435
4436     for (i = 0; i < NUM_VP9_BRC; i++) {
4437         gpe_context = &brc_context->gpe_contexts[i];
4438         gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4439         gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4440
4441         memset(&scale_kernel, 0, sizeof(scale_kernel));
4442
4443         intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4444                                              sizeof(media_vp9_kernels),
4445                                              INTEL_VP9_ENC_BRC,
4446                                              i,
4447                                              &scale_kernel);
4448
4449         gen8_gpe_load_kernels(ctx,
4450                               gpe_context,
4451                               &scale_kernel,
4452                               1);
4453     }
4454 }
4455
4456 static void
4457 gen9_vme_dys_context_init_vp9(VADriverContextP ctx,
4458                               struct gen9_encoder_context_vp9 *vme_context,
4459                               struct vp9_dys_context *dys_context)
4460 {
4461     struct i965_gpe_context *gpe_context = NULL;
4462     struct vp9_encoder_kernel_parameter kernel_param;
4463     struct vp9_encoder_scoreboard_parameter scoreboard_param;
4464     struct i965_kernel scale_kernel;
4465
4466     kernel_param.curbe_size = sizeof(vp9_dys_curbe_data);
4467     kernel_param.inline_data_size = 0;
4468     kernel_param.sampler_size = sizeof(struct gen9_sampler_8x8_avs);
4469
4470     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4471     scoreboard_param.mask = 0xFF;
4472     scoreboard_param.enable = vme_context->use_hw_scoreboard;
4473     scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4474     scoreboard_param.walkpat_flag = 0;
4475
4476     gpe_context = &dys_context->gpe_context;
4477     gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4478     gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4479
4480     memset(&scale_kernel, 0, sizeof(scale_kernel));
4481
4482     intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4483                                          sizeof(media_vp9_kernels),
4484                                          INTEL_VP9_ENC_DYS,
4485                                          0,
4486                                          &scale_kernel);
4487
4488     gen8_gpe_load_kernels(ctx,
4489                           gpe_context,
4490                           &scale_kernel,
4491                           1);
4492
4493     return;
4494 }
4495
4496 static Bool
4497 gen9_vme_kernels_context_init_vp9(VADriverContextP ctx,
4498                                   struct intel_encoder_context *encoder_context,
4499                                   struct gen9_encoder_context_vp9 *vme_context)
4500 {
4501     gen9_vme_scaling_context_init_vp9(ctx, vme_context, &vme_context->scaling_context);
4502     gen9_vme_me_context_init_vp9(ctx, vme_context, &vme_context->me_context);
4503     gen9_vme_mbenc_context_init_vp9(ctx, vme_context, &vme_context->mbenc_context);
4504     gen9_vme_dys_context_init_vp9(ctx, vme_context, &vme_context->dys_context);
4505     gen9_vme_brc_context_init_vp9(ctx, vme_context, &vme_context->brc_context);
4506
4507     vme_context->pfn_set_curbe_brc = gen9_vp9_set_curbe_brc;
4508     vme_context->pfn_set_curbe_me = gen9_vp9_set_curbe_me;
4509     vme_context->pfn_send_me_surface = gen9_vp9_send_me_surface;
4510     vme_context->pfn_send_scaling_surface = gen9_vp9_send_scaling_surface;
4511
4512     vme_context->pfn_set_curbe_scaling = gen9_vp9_set_curbe_scaling_cm;
4513
4514     vme_context->pfn_send_dys_surface = gen9_vp9_send_dys_surface;
4515     vme_context->pfn_set_curbe_dys = gen9_vp9_set_curbe_dys;
4516     vme_context->pfn_set_curbe_mbenc = gen9_vp9_set_curbe_mbenc;
4517     vme_context->pfn_send_mbenc_surface = gen9_vp9_send_mbenc_surface;
4518     return true;
4519 }
4520
4521 static
4522 void gen9_vp9_write_compressed_element(char *buffer,
4523                                        int index,
4524                                        int prob,
4525                                        bool value)
4526 {
4527     struct vp9_compressed_element *base_element, *vp9_element;
4528     base_element = (struct vp9_compressed_element *)buffer;
4529
4530     vp9_element = base_element + (index >> 1);
4531     if (index % 2) {
4532         vp9_element->b_valid = 1;
4533         vp9_element->b_probdiff_select = 1;
4534         vp9_element->b_prob_select = (prob == 252) ? 1 : 0;
4535         vp9_element->b_bin = value;
4536     } else {
4537         vp9_element->a_valid = 1;
4538         vp9_element->a_probdiff_select = 1;
4539         vp9_element->a_prob_select = (prob == 252) ? 1 : 0;
4540         vp9_element->a_bin = value;
4541     }
4542 }
4543
4544 static void
4545 intel_vp9enc_refresh_frame_internal_buffers(VADriverContextP ctx,
4546                                             struct intel_encoder_context *encoder_context)
4547 {
4548     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
4549     VAEncPictureParameterBufferVP9 *pic_param;
4550     struct gen9_vp9_state *vp9_state;
4551     char *buffer;
4552     int i;
4553
4554     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
4555
4556     if (!pak_context || !vp9_state || !vp9_state->pic_param)
4557         return;
4558
4559     pic_param = vp9_state->pic_param;
4560     if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) ||
4561         (pic_param->pic_flags.bits.intra_only) ||
4562         pic_param->pic_flags.bits.error_resilient_mode) {
4563         /* reset current frame_context */
4564         intel_init_default_vp9_probs(&vp9_state->vp9_current_fc);
4565         if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) ||
4566             pic_param->pic_flags.bits.error_resilient_mode ||
4567             (pic_param->pic_flags.bits.reset_frame_context == 3)) {
4568             for (i = 0; i < 4; i++)
4569                 memcpy(&vp9_state->vp9_frame_ctx[i],
4570                        &vp9_state->vp9_current_fc,
4571                        sizeof(FRAME_CONTEXT));
4572         } else if (pic_param->pic_flags.bits.reset_frame_context == 2) {
4573             i = pic_param->pic_flags.bits.frame_context_idx;
4574             memcpy(&vp9_state->vp9_frame_ctx[i],
4575                    &vp9_state->vp9_current_fc, sizeof(FRAME_CONTEXT));
4576         }
4577         /* reset the frame_ctx_idx = 0 */
4578         vp9_state->frame_ctx_idx = 0;
4579     } else {
4580         vp9_state->frame_ctx_idx = pic_param->pic_flags.bits.frame_context_idx;
4581     }
4582
4583     i965_zero_gpe_resource(&pak_context->res_compressed_input_buffer);
4584     buffer = i965_map_gpe_resource(&pak_context->res_compressed_input_buffer);
4585
4586     if (!buffer)
4587         return;
4588
4589     /* write tx_size */
4590     if ((pic_param->luma_ac_qindex == 0) &&
4591         (pic_param->luma_dc_qindex_delta == 0) &&
4592         (pic_param->chroma_ac_qindex_delta == 0) &&
4593         (pic_param->chroma_dc_qindex_delta == 0)) {
4594         /* lossless flag */
4595         /* nothing is needed */
4596         gen9_vp9_write_compressed_element(buffer,
4597                                           0, 128, 0);
4598         gen9_vp9_write_compressed_element(buffer,
4599                                           1, 128, 0);
4600         gen9_vp9_write_compressed_element(buffer,
4601                                           2, 128, 0);
4602     } else {
4603         if (vp9_state->tx_mode == TX_MODE_SELECT) {
4604             gen9_vp9_write_compressed_element(buffer,
4605                                               0, 128, 1);
4606             gen9_vp9_write_compressed_element(buffer,
4607                                               1, 128, 1);
4608             gen9_vp9_write_compressed_element(buffer,
4609                                               2, 128, 1);
4610         } else if (vp9_state->tx_mode == ALLOW_32X32) {
4611             gen9_vp9_write_compressed_element(buffer,
4612                                               0, 128, 1);
4613             gen9_vp9_write_compressed_element(buffer,
4614                                               1, 128, 1);
4615             gen9_vp9_write_compressed_element(buffer,
4616                                               2, 128, 0);
4617         } else {
4618             unsigned int tx_mode;
4619
4620             tx_mode = vp9_state->tx_mode;
4621             gen9_vp9_write_compressed_element(buffer,
4622                                               0, 128, ((tx_mode) & 2));
4623             gen9_vp9_write_compressed_element(buffer,
4624                                               1, 128, ((tx_mode) & 1));
4625             gen9_vp9_write_compressed_element(buffer,
4626                                               2, 128, 0);
4627         }
4628
4629         if (vp9_state->tx_mode == TX_MODE_SELECT) {
4630
4631             gen9_vp9_write_compressed_element(buffer,
4632                                               3, 128, 0);
4633
4634             gen9_vp9_write_compressed_element(buffer,
4635                                               7, 128, 0);
4636
4637             gen9_vp9_write_compressed_element(buffer,
4638                                               15, 128, 0);
4639         }
4640     }
4641     /*Setup all the input&output object*/
4642
4643     {
4644         /* update the coeff_update flag */
4645         gen9_vp9_write_compressed_element(buffer,
4646                                           27, 128, 0);
4647         gen9_vp9_write_compressed_element(buffer,
4648                                           820, 128, 0);
4649         gen9_vp9_write_compressed_element(buffer,
4650                                           1613, 128, 0);
4651         gen9_vp9_write_compressed_element(buffer,
4652                                           2406, 128, 0);
4653     }
4654
4655
4656     if (pic_param->pic_flags.bits.frame_type && !pic_param->pic_flags.bits.intra_only) {
4657         bool allow_comp = !(
4658                               (pic_param->ref_flags.bits.ref_last_sign_bias && pic_param->ref_flags.bits.ref_gf_sign_bias && pic_param->ref_flags.bits.ref_arf_sign_bias) ||
4659                               (!pic_param->ref_flags.bits.ref_last_sign_bias && !pic_param->ref_flags.bits.ref_gf_sign_bias && !pic_param->ref_flags.bits.ref_arf_sign_bias)
4660                           );
4661
4662         if (allow_comp) {
4663             if (pic_param->pic_flags.bits.comp_prediction_mode == REFERENCE_MODE_SELECT) {
4664                 gen9_vp9_write_compressed_element(buffer,
4665                                                   3271, 128, 1);
4666                 gen9_vp9_write_compressed_element(buffer,
4667                                                   3272, 128, 1);
4668             } else if (pic_param->pic_flags.bits.comp_prediction_mode == COMPOUND_REFERENCE) {
4669                 gen9_vp9_write_compressed_element(buffer,
4670                                                   3271, 128, 1);
4671                 gen9_vp9_write_compressed_element(buffer,
4672                                                   3272, 128, 0);
4673             } else {
4674
4675                 gen9_vp9_write_compressed_element(buffer,
4676                                                   3271, 128, 0);
4677                 gen9_vp9_write_compressed_element(buffer,
4678                                                   3272, 128, 0);
4679             }
4680         }
4681     }
4682
4683     i965_unmap_gpe_resource(&pak_context->res_compressed_input_buffer);
4684 }
4685
4686
4687 static void
4688 gen9_pak_vp9_pipe_mode_select(VADriverContextP ctx,
4689                               struct encode_state *encode_state,
4690                               struct intel_encoder_context *encoder_context,
4691                               struct gen9_hcpe_pipe_mode_select_param *pipe_mode_param)
4692 {
4693     struct intel_batchbuffer *batch = encoder_context->base.batch;
4694
4695     BEGIN_BCS_BATCH(batch, 6);
4696
4697     OUT_BCS_BATCH(batch, HCP_PIPE_MODE_SELECT | (6 - 2));
4698     OUT_BCS_BATCH(batch,
4699                   (pipe_mode_param->stream_out << 12) |
4700                   (pipe_mode_param->codec_mode << 5) |
4701                   (0 << 3) | /* disable Pic Status / Error Report */
4702                   (pipe_mode_param->stream_out << 2) |
4703                   HCP_CODEC_SELECT_ENCODE);
4704     OUT_BCS_BATCH(batch, 0);
4705     OUT_BCS_BATCH(batch, 0);
4706     OUT_BCS_BATCH(batch, (1 << 6));
4707     OUT_BCS_BATCH(batch, 0);
4708
4709     ADVANCE_BCS_BATCH(batch);
4710 }
4711
4712 static void
4713 gen9_vp9_add_surface_state(VADriverContextP ctx,
4714                            struct encode_state *encode_state,
4715                            struct intel_encoder_context *encoder_context,
4716                            hcp_surface_state *hcp_state)
4717 {
4718     struct intel_batchbuffer *batch = encoder_context->base.batch;
4719     if (!hcp_state)
4720         return;
4721
4722     BEGIN_BCS_BATCH(batch, 3);
4723     OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
4724     OUT_BCS_BATCH(batch,
4725                   (hcp_state->dw1.surface_id << 28) |
4726                   (hcp_state->dw1.surface_pitch - 1)
4727                  );
4728     OUT_BCS_BATCH(batch,
4729                   (hcp_state->dw2.surface_format << 28) |
4730                   (hcp_state->dw2.y_cb_offset)
4731                  );
4732     ADVANCE_BCS_BATCH(batch);
4733 }
4734
4735 static void
4736 gen9_pak_vp9_pipe_buf_addr_state(VADriverContextP ctx,
4737                                  struct encode_state *encode_state,
4738                                  struct intel_encoder_context *encoder_context)
4739 {
4740     struct i965_driver_data *i965 = i965_driver_data(ctx);
4741     struct intel_batchbuffer *batch = encoder_context->base.batch;
4742     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
4743     struct gen9_vp9_state *vp9_state;
4744     unsigned int i;
4745     struct object_surface *obj_surface;
4746
4747     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
4748
4749     if (!vp9_state || !vp9_state->pic_param)
4750         return;
4751
4752
4753     BEGIN_BCS_BATCH(batch, 104);
4754
4755     OUT_BCS_BATCH(batch, HCP_PIPE_BUF_ADDR_STATE | (104 - 2));
4756
4757     obj_surface = encode_state->reconstructed_object;
4758
4759     /* reconstructed obj_surface is already checked. So this is skipped */
4760     /* DW 1..3 decoded surface */
4761     OUT_RELOC64(batch,
4762                 obj_surface->bo,
4763                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4764                 0);
4765     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4766
4767     /* DW 4..6 deblocking line */
4768     OUT_RELOC64(batch,
4769                 pak_context->res_deblocking_filter_line_buffer.bo,
4770                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4771                 0);
4772     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4773
4774     /* DW 7..9 deblocking tile line */
4775     OUT_RELOC64(batch,
4776                 pak_context->res_deblocking_filter_tile_line_buffer.bo,
4777                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4778                 0);
4779     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4780
4781     /* DW 10..12 deblocking tile col */
4782     OUT_RELOC64(batch,
4783                 pak_context->res_deblocking_filter_tile_col_buffer.bo,
4784                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4785                 0);
4786     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4787
4788     /* DW 13..15 metadata line */
4789     OUT_RELOC64(batch,
4790                 pak_context->res_metadata_line_buffer.bo,
4791                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4792                 0);
4793     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4794
4795     /* DW 16..18 metadata tile line */
4796     OUT_RELOC64(batch,
4797                 pak_context->res_metadata_tile_line_buffer.bo,
4798                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4799                 0);
4800     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4801
4802     /* DW 19..21 metadata tile col */
4803     OUT_RELOC64(batch,
4804                 pak_context->res_metadata_tile_col_buffer.bo,
4805                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4806                 0);
4807     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4808
4809     /* DW 22..30 SAO is not used for VP9 */
4810     OUT_BCS_BATCH(batch, 0);
4811     OUT_BCS_BATCH(batch, 0);
4812     OUT_BCS_BATCH(batch, 0);
4813     OUT_BCS_BATCH(batch, 0);
4814     OUT_BCS_BATCH(batch, 0);
4815     OUT_BCS_BATCH(batch, 0);
4816     OUT_BCS_BATCH(batch, 0);
4817     OUT_BCS_BATCH(batch, 0);
4818     OUT_BCS_BATCH(batch, 0);
4819
4820     /* DW 31..33 Current Motion vector temporal buffer */
4821     OUT_RELOC64(batch,
4822                 pak_context->res_mv_temporal_buffer[vp9_state->curr_mv_temporal_index].bo,
4823                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4824                 0);
4825     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4826
4827     /* DW 34..36 Not used */
4828     OUT_BCS_BATCH(batch, 0);
4829     OUT_BCS_BATCH(batch, 0);
4830     OUT_BCS_BATCH(batch, 0);
4831
4832     /* Only the first three reference_frame is used for VP9 */
4833     /* DW 37..52 for reference_frame */
4834     i = 0;
4835     if (vp9_state->picture_coding_type) {
4836         for (i = 0; i < 3; i++) {
4837
4838             if (pak_context->reference_surfaces[i].bo) {
4839                 OUT_RELOC64(batch,
4840                             pak_context->reference_surfaces[i].bo,
4841                             I915_GEM_DOMAIN_INSTRUCTION, 0,
4842                             0);
4843             } else {
4844                 OUT_BCS_BATCH(batch, 0);
4845                 OUT_BCS_BATCH(batch, 0);
4846             }
4847         }
4848     }
4849
4850     for (; i < 8; i++) {
4851         OUT_BCS_BATCH(batch, 0);
4852         OUT_BCS_BATCH(batch, 0);
4853     }
4854
4855     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4856
4857     /* DW 54..56 for source input */
4858     OUT_RELOC64(batch,
4859                 pak_context->uncompressed_picture_source.bo,
4860                 I915_GEM_DOMAIN_INSTRUCTION, 0,
4861                 0);
4862     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4863
4864     /* DW 57..59 StreamOut is not used */
4865     OUT_BCS_BATCH(batch, 0);
4866     OUT_BCS_BATCH(batch, 0);
4867     OUT_BCS_BATCH(batch, 0);
4868
4869     /* DW 60..62. Not used for encoder */
4870     OUT_BCS_BATCH(batch, 0);
4871     OUT_BCS_BATCH(batch, 0);
4872     OUT_BCS_BATCH(batch, 0);
4873
4874     /* DW 63..65. ILDB Not used for encoder */
4875     OUT_BCS_BATCH(batch, 0);
4876     OUT_BCS_BATCH(batch, 0);
4877     OUT_BCS_BATCH(batch, 0);
4878
4879     /* DW 66..81 For the collocated motion vector temporal buffer */
4880     if (vp9_state->picture_coding_type) {
4881         int prev_index = vp9_state->curr_mv_temporal_index ^ 0x01;
4882         OUT_RELOC64(batch,
4883                     pak_context->res_mv_temporal_buffer[prev_index].bo,
4884                     I915_GEM_DOMAIN_INSTRUCTION, 0,
4885                     0);
4886     } else {
4887         OUT_BCS_BATCH(batch, 0);
4888         OUT_BCS_BATCH(batch, 0);
4889     }
4890
4891     for (i = 1; i < 8; i++) {
4892         OUT_BCS_BATCH(batch, 0);
4893         OUT_BCS_BATCH(batch, 0);
4894     }
4895     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4896
4897     /* DW 83..85 VP9 prob buffer */
4898     OUT_RELOC64(batch,
4899                 pak_context->res_prob_buffer.bo,
4900                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4901                 0);
4902
4903     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4904
4905     /* DW 86..88 Segment id buffer */
4906     if (pak_context->res_segmentid_buffer.bo) {
4907         OUT_RELOC64(batch,
4908                     pak_context->res_segmentid_buffer.bo,
4909                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4910                     0);
4911     } else {
4912         OUT_BCS_BATCH(batch, 0);
4913         OUT_BCS_BATCH(batch, 0);
4914     }
4915     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4916
4917     /* DW 89..91 HVD line rowstore buffer */
4918     OUT_RELOC64(batch,
4919                 pak_context->res_hvd_line_buffer.bo,
4920                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4921                 0);
4922     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4923
4924     /* DW 92..94 HVD tile line rowstore buffer */
4925     OUT_RELOC64(batch,
4926                 pak_context->res_hvd_tile_line_buffer.bo,
4927                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4928                 0);
4929     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4930
4931     /* DW 95..97 SAO streamout. Not used for VP9 */
4932     OUT_BCS_BATCH(batch, 0);
4933     OUT_BCS_BATCH(batch, 0);
4934     OUT_BCS_BATCH(batch, 0);
4935
4936     /* reserved for KBL. 98..100 */
4937     OUT_BCS_BATCH(batch, 0);
4938     OUT_BCS_BATCH(batch, 0);
4939     OUT_BCS_BATCH(batch, 0);
4940
4941     /* 101..103 */
4942     OUT_BCS_BATCH(batch, 0);
4943     OUT_BCS_BATCH(batch, 0);
4944     OUT_BCS_BATCH(batch, 0);
4945
4946     ADVANCE_BCS_BATCH(batch);
4947 }
4948
4949 static void
4950 gen9_pak_vp9_ind_obj_base_addr_state(VADriverContextP ctx,
4951                                      struct encode_state *encode_state,
4952                                      struct intel_encoder_context *encoder_context)
4953 {
4954     struct i965_driver_data *i965 = i965_driver_data(ctx);
4955     struct intel_batchbuffer *batch = encoder_context->base.batch;
4956     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
4957     struct gen9_vp9_state *vp9_state;
4958
4959     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
4960
4961     /* to do */
4962     BEGIN_BCS_BATCH(batch, 29);
4963
4964     OUT_BCS_BATCH(batch, HCP_IND_OBJ_BASE_ADDR_STATE | (29 - 2));
4965
4966     /* indirect bitstream object base */
4967     OUT_BCS_BATCH(batch, 0);
4968     OUT_BCS_BATCH(batch, 0);
4969     OUT_BCS_BATCH(batch, 0);
4970     /* the upper bound of indirect bitstream object */
4971     OUT_BCS_BATCH(batch, 0);
4972     OUT_BCS_BATCH(batch, 0);
4973
4974     /* DW 6: Indirect CU object base address */
4975     OUT_RELOC64(batch,
4976                 pak_context->res_mb_code_surface.bo,
4977                 I915_GEM_DOMAIN_INSTRUCTION, 0,   /* No write domain */
4978                 vp9_state->mb_data_offset);
4979     /* default attribute */
4980     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4981
4982     /* DW 9..11, PAK-BSE */
4983     OUT_RELOC64(batch,
4984                 pak_context->indirect_pak_bse_object.bo,
4985                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4986                 pak_context->indirect_pak_bse_object.offset);
4987     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4988
4989     /* DW 12..13 upper bound */
4990     OUT_RELOC64(batch,
4991                 pak_context->indirect_pak_bse_object.bo,
4992                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4993                 pak_context->indirect_pak_bse_object.end_offset);
4994
4995     /* DW 14..16 compressed header buffer */
4996     OUT_RELOC64(batch,
4997                 pak_context->res_compressed_input_buffer.bo,
4998                 I915_GEM_DOMAIN_INSTRUCTION, 0,
4999                 0);
5000     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5001
5002     /* DW 17..19 prob counter streamout */
5003     OUT_RELOC64(batch,
5004                 pak_context->res_prob_counter_buffer.bo,
5005                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5006                 0);
5007     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5008
5009     /* DW 20..22 prob delta streamin */
5010     OUT_RELOC64(batch,
5011                 pak_context->res_prob_delta_buffer.bo,
5012                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5013                 0);
5014     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5015
5016     /* DW 23..25 Tile record streamout */
5017     OUT_RELOC64(batch,
5018                 pak_context->res_tile_record_streamout_buffer.bo,
5019                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5020                 0);
5021     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5022
5023     /* DW 26..28 CU record streamout */
5024     OUT_RELOC64(batch,
5025                 pak_context->res_cu_stat_streamout_buffer.bo,
5026                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5027                 0);
5028     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5029
5030     ADVANCE_BCS_BATCH(batch);
5031 }
5032
5033 static void
5034 gen9_pak_vp9_segment_state(VADriverContextP ctx,
5035                            struct encode_state *encode_state,
5036                            struct intel_encoder_context *encoder_context,
5037                            VAEncSegParamVP9 *seg_param, uint8_t seg_id)
5038 {
5039     struct intel_batchbuffer *batch = encoder_context->base.batch;
5040     uint32_t batch_value, tmp;
5041     VAEncPictureParameterBufferVP9 *pic_param;
5042
5043     if (!encode_state->pic_param_ext ||
5044         !encode_state->pic_param_ext->buffer) {
5045         return;
5046     }
5047
5048     pic_param = (VAEncPictureParameterBufferVP9 *)encode_state->pic_param_ext->buffer;
5049
5050     batch_value = seg_param->seg_flags.bits.segment_reference;
5051     if (pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME ||
5052         pic_param->pic_flags.bits.intra_only)
5053         batch_value = 0;
5054
5055     BEGIN_BCS_BATCH(batch, 8);
5056
5057     OUT_BCS_BATCH(batch, HCP_VP9_SEGMENT_STATE | (8 - 2));
5058     OUT_BCS_BATCH(batch, seg_id << 0); /* DW 1 - SegmentID */
5059     OUT_BCS_BATCH(batch,
5060                   (seg_param->seg_flags.bits.segment_reference_enabled << 3) |
5061                   (batch_value << 1) |
5062                   (seg_param->seg_flags.bits.segment_reference_skipped << 0)
5063                  );
5064
5065     /* DW 3..6 is not used for encoder */
5066     OUT_BCS_BATCH(batch, 0);
5067     OUT_BCS_BATCH(batch, 0);
5068     OUT_BCS_BATCH(batch, 0);
5069     OUT_BCS_BATCH(batch, 0);
5070
5071     /* DW 7 Mode */
5072     tmp = intel_convert_sign_mag(seg_param->segment_qindex_delta, 9);
5073     batch_value = tmp;
5074     tmp = intel_convert_sign_mag(seg_param->segment_lf_level_delta, 7);
5075     batch_value |= (tmp << 16);
5076     OUT_BCS_BATCH(batch, batch_value);
5077
5078     ADVANCE_BCS_BATCH(batch);
5079
5080 }
5081
5082 static void
5083 intel_vp9enc_construct_pak_insertobj_batchbuffer(VADriverContextP ctx,
5084                                                  struct intel_encoder_context *encoder_context,
5085                                                  struct i965_gpe_resource *obj_batch_buffer)
5086 {
5087     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5088     struct gen9_vp9_state *vp9_state;
5089     int uncompressed_header_length;
5090     unsigned int *cmd_ptr;
5091     unsigned int dw_length, bits_in_last_dw;
5092
5093     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5094
5095     if (!pak_context || !vp9_state || !vp9_state->pic_param)
5096         return;
5097
5098     uncompressed_header_length = vp9_state->header_length;
5099     cmd_ptr = i965_map_gpe_resource(obj_batch_buffer);
5100
5101     if (!cmd_ptr)
5102         return;
5103
5104     bits_in_last_dw = uncompressed_header_length % 4;
5105     bits_in_last_dw *= 8;
5106
5107     if (bits_in_last_dw == 0)
5108         bits_in_last_dw = 32;
5109
5110     /* get the DWORD length of the inserted_data */
5111     dw_length = ALIGN(uncompressed_header_length, 4) / 4;
5112     *cmd_ptr++ = HCP_INSERT_PAK_OBJECT | dw_length;
5113
5114     *cmd_ptr++ = ((0 << 31) | /* indirect payload */
5115                   (0 << 16) | /* the start offset in first DW */
5116                   (0 << 15) |
5117                   (bits_in_last_dw << 8) | /* bits_in_last_dw */
5118                   (0 << 4) |  /* skip emulation byte count. 0 for VP9 */
5119                   (0 << 3) |  /* emulation flag. 0 for VP9 */
5120                   (1 << 2) |  /* last header flag. */
5121                   (0 << 1));
5122     memcpy(cmd_ptr, vp9_state->alias_insert_data, dw_length * sizeof(unsigned int));
5123
5124     cmd_ptr += dw_length;
5125
5126     *cmd_ptr++ = MI_NOOP;
5127     *cmd_ptr++ = MI_BATCH_BUFFER_END;
5128     i965_unmap_gpe_resource(obj_batch_buffer);
5129 }
5130
5131 static void
5132 gen9_vp9_pak_picture_level(VADriverContextP ctx,
5133                            struct encode_state *encode_state,
5134                            struct intel_encoder_context *encoder_context)
5135 {
5136     struct intel_batchbuffer *batch = encoder_context->base.batch;
5137     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5138     struct object_surface *obj_surface;
5139     VAEncPictureParameterBufferVP9 *pic_param;
5140     VAEncMiscParameterTypeVP9PerSegmantParam *seg_param, tmp_seg_param;
5141     struct gen9_vp9_state *vp9_state;
5142     struct gen9_surface_vp9 *vp9_priv_surface;
5143     int i;
5144     struct gen9_hcpe_pipe_mode_select_param mode_param;
5145     hcp_surface_state hcp_surface;
5146     struct gpe_mi_batch_buffer_start_parameter second_level_batch;
5147     int segment_count;
5148
5149     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5150
5151     if (!pak_context || !vp9_state || !vp9_state->pic_param)
5152         return;
5153
5154     pic_param = vp9_state->pic_param;
5155     seg_param = vp9_state->segment_param;
5156
5157     if (vp9_state->curr_pak_pass == 0) {
5158         intel_vp9enc_construct_pak_insertobj_batchbuffer(ctx, encoder_context,
5159                                                          &pak_context->res_pak_uncompressed_input_buffer);
5160
5161         // Check if driver already programmed pic state as part of BRC update kernel programming.
5162         if (!vp9_state->brc_enabled) {
5163             intel_vp9enc_construct_picstate_batchbuf(ctx, encode_state,
5164                                                      encoder_context, &pak_context->res_pic_state_brc_write_hfw_read_buffer);
5165         }
5166     }
5167
5168     if (vp9_state->curr_pak_pass == 0) {
5169         intel_vp9enc_refresh_frame_internal_buffers(ctx, encoder_context);
5170     }
5171
5172     {
5173         /* copy the frame_context[frame_idx] into curr_frame_context */
5174         memcpy(&vp9_state->vp9_current_fc,
5175                &(vp9_state->vp9_frame_ctx[vp9_state->frame_ctx_idx]),
5176                sizeof(FRAME_CONTEXT));
5177         {
5178             uint8_t *prob_ptr;
5179
5180             prob_ptr = i965_map_gpe_resource(&pak_context->res_prob_buffer);
5181
5182             if (!prob_ptr)
5183                 return;
5184
5185             /* copy the current fc to vp9_prob buffer */
5186             memcpy(prob_ptr, &vp9_state->vp9_current_fc, sizeof(FRAME_CONTEXT));
5187             if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) ||
5188                 pic_param->pic_flags.bits.intra_only) {
5189                 FRAME_CONTEXT *frame_ptr = (FRAME_CONTEXT *)prob_ptr;
5190
5191                 memcpy(frame_ptr->partition_prob, vp9_kf_partition_probs,
5192                        sizeof(vp9_kf_partition_probs));
5193                 memcpy(frame_ptr->uv_mode_prob, vp9_kf_uv_mode_prob,
5194                        sizeof(vp9_kf_uv_mode_prob));
5195             }
5196             i965_unmap_gpe_resource(&pak_context->res_prob_buffer);
5197         }
5198     }
5199
5200     if (vp9_state->brc_enabled && vp9_state->curr_pak_pass) {
5201         /* read image status and insert the conditional end cmd */
5202         /* image ctrl/status is already accessed */
5203         struct gpe_mi_conditional_batch_buffer_end_parameter mi_cond_end;
5204         struct vp9_encode_status_buffer_internal *status_buffer;
5205
5206         status_buffer = &vp9_state->status_buffer;
5207         memset(&mi_cond_end, 0, sizeof(mi_cond_end));
5208         mi_cond_end.offset = status_buffer->image_status_mask_offset;
5209         mi_cond_end.bo = status_buffer->bo;
5210         mi_cond_end.compare_data = 0;
5211         mi_cond_end.compare_mask_mode_disabled = 1;
5212         gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch,
5213                                                  &mi_cond_end);
5214     }
5215
5216     mode_param.codec_mode = 1;
5217     mode_param.stream_out = 0;
5218     gen9_pak_vp9_pipe_mode_select(ctx, encode_state, encoder_context, &mode_param);
5219
5220     /* reconstructed surface */
5221     memset(&hcp_surface, 0, sizeof(hcp_surface));
5222     obj_surface = encode_state->reconstructed_object;
5223     hcp_surface.dw1.surface_id = 0;
5224     hcp_surface.dw1.surface_pitch = obj_surface->width;
5225     hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5226     hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5227     gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5228                                &hcp_surface);
5229
5230     /* Input surface */
5231     if (vp9_state->dys_in_use &&
5232         ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
5233          (pic_param->frame_height_src != pic_param->frame_height_dst))) {
5234         vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
5235         obj_surface = vp9_priv_surface->dys_surface_obj;
5236     } else {
5237         obj_surface = encode_state->input_yuv_object;
5238     }
5239
5240     hcp_surface.dw1.surface_id = 1;
5241     hcp_surface.dw1.surface_pitch = obj_surface->width;
5242     hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5243     hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5244     gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5245                                &hcp_surface);
5246
5247     if (vp9_state->picture_coding_type) {
5248         /* Add surface for last */
5249         if (vp9_state->last_ref_obj) {
5250             obj_surface = vp9_state->last_ref_obj;
5251             hcp_surface.dw1.surface_id = 2;
5252             hcp_surface.dw1.surface_pitch = obj_surface->width;
5253             hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5254             hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5255             gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5256                                        &hcp_surface);
5257         }
5258         if (vp9_state->golden_ref_obj) {
5259             obj_surface = vp9_state->golden_ref_obj;
5260             hcp_surface.dw1.surface_id = 3;
5261             hcp_surface.dw1.surface_pitch = obj_surface->width;
5262             hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5263             hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5264             gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5265                                        &hcp_surface);
5266         }
5267         if (vp9_state->alt_ref_obj) {
5268             obj_surface = vp9_state->alt_ref_obj;
5269             hcp_surface.dw1.surface_id = 4;
5270             hcp_surface.dw1.surface_pitch = obj_surface->width;
5271             hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5272             hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5273             gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5274                                        &hcp_surface);
5275         }
5276     }
5277
5278     gen9_pak_vp9_pipe_buf_addr_state(ctx, encode_state, encoder_context);
5279
5280     gen9_pak_vp9_ind_obj_base_addr_state(ctx, encode_state, encoder_context);
5281
5282     // Using picstate zero with updated QP and LF deltas by HuC for repak, irrespective of how many Pak passes were run in multi-pass mode.
5283     memset(&second_level_batch, 0, sizeof(second_level_batch));
5284
5285     if (vp9_state->curr_pak_pass == 0) {
5286         second_level_batch.offset = 0;
5287     } else
5288         second_level_batch.offset = vp9_state->curr_pak_pass * VP9_PIC_STATE_BUFFER_SIZE;
5289
5290     second_level_batch.is_second_level = 1;
5291     second_level_batch.bo = pak_context->res_pic_state_brc_write_hfw_read_buffer.bo;
5292
5293     gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5294
5295     if (pic_param->pic_flags.bits.segmentation_enabled &&
5296         seg_param)
5297         segment_count = 8;
5298     else {
5299         segment_count = 1;
5300         memset(&tmp_seg_param, 0, sizeof(tmp_seg_param));
5301         seg_param = &tmp_seg_param;
5302     }
5303     for (i = 0; i < segment_count; i++) {
5304         gen9_pak_vp9_segment_state(ctx, encode_state,
5305                                    encoder_context,
5306                                    &seg_param->seg_data[i], i);
5307     }
5308
5309     /* Insert the uncompressed header buffer */
5310     second_level_batch.is_second_level = 1;
5311     second_level_batch.offset = 0;
5312     second_level_batch.bo = pak_context->res_pak_uncompressed_input_buffer.bo;
5313
5314     gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5315
5316     /* PAK_OBJECT */
5317     second_level_batch.is_second_level = 1;
5318     second_level_batch.offset = 0;
5319     second_level_batch.bo = pak_context->res_mb_code_surface.bo;
5320     gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5321
5322     return;
5323 }
5324
5325 static void
5326 gen9_vp9_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5327 {
5328     struct intel_batchbuffer *batch = encoder_context->base.batch;
5329     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5330     struct gpe_mi_store_register_mem_parameter mi_store_reg_mem_param;
5331     struct gpe_mi_flush_dw_parameter mi_flush_dw_param;
5332     //struct gpe_mi_copy_mem_parameter mi_copy_mem_param;
5333     struct vp9_encode_status_buffer_internal *status_buffer;
5334     struct gen9_vp9_state *vp9_state;
5335
5336     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5337     if (!vp9_state || !pak_context || !batch)
5338         return;
5339
5340     status_buffer = &(vp9_state->status_buffer);
5341
5342     memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
5343     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);
5344
5345     memset(&mi_store_reg_mem_param, 0, sizeof(mi_store_reg_mem_param));
5346     mi_store_reg_mem_param.bo = status_buffer->bo;
5347     mi_store_reg_mem_param.offset = status_buffer->bs_byte_count_offset;
5348     mi_store_reg_mem_param.mmio_offset = status_buffer->vp9_bs_frame_reg_offset;
5349     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5350
5351     mi_store_reg_mem_param.bo = pak_context->res_brc_bitstream_size_buffer.bo;
5352     mi_store_reg_mem_param.offset = 0;
5353     mi_store_reg_mem_param.mmio_offset = status_buffer->vp9_bs_frame_reg_offset;
5354     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5355
5356     /* Read HCP Image status */
5357     mi_store_reg_mem_param.bo = status_buffer->bo;
5358     mi_store_reg_mem_param.offset = status_buffer->image_status_mask_offset;
5359     mi_store_reg_mem_param.mmio_offset =
5360         status_buffer->vp9_image_mask_reg_offset;
5361     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5362
5363     mi_store_reg_mem_param.bo = status_buffer->bo;
5364     mi_store_reg_mem_param.offset = status_buffer->image_status_ctrl_offset;
5365     mi_store_reg_mem_param.mmio_offset =
5366         status_buffer->vp9_image_ctrl_reg_offset;
5367     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5368
5369     mi_store_reg_mem_param.bo = pak_context->res_brc_bitstream_size_buffer.bo;
5370     mi_store_reg_mem_param.offset = 4;
5371     mi_store_reg_mem_param.mmio_offset =
5372         status_buffer->vp9_image_ctrl_reg_offset;
5373     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5374
5375     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);
5376
5377     return;
5378 }
5379
5380 static VAStatus
5381 gen9_vp9_pak_pipeline_prepare(VADriverContextP ctx,
5382                               struct encode_state *encode_state,
5383                               struct intel_encoder_context *encoder_context)
5384 {
5385     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5386     struct object_surface *obj_surface;
5387     struct object_buffer *obj_buffer;
5388     struct i965_coded_buffer_segment *coded_buffer_segment;
5389     VAEncPictureParameterBufferVP9 *pic_param;
5390     struct gen9_vp9_state *vp9_state;
5391     dri_bo *bo;
5392     int i;
5393
5394     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5395     if (!vp9_state ||
5396         !vp9_state->pic_param)
5397         return VA_STATUS_ERROR_INVALID_PARAMETER;
5398
5399     pic_param = vp9_state->pic_param;
5400
5401     /* reconstructed surface */
5402     obj_surface = encode_state->reconstructed_object;
5403     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5404
5405     dri_bo_unreference(pak_context->reconstructed_object.bo);
5406
5407     pak_context->reconstructed_object.bo = obj_surface->bo;
5408     dri_bo_reference(pak_context->reconstructed_object.bo);
5409
5410     /* set vp9 reference frames */
5411     for (i = 0; i < ARRAY_ELEMS(pak_context->reference_surfaces); i++) {
5412         if (pak_context->reference_surfaces[i].bo)
5413             dri_bo_unreference(pak_context->reference_surfaces[i].bo);
5414         pak_context->reference_surfaces[i].bo = NULL;
5415     }
5416
5417     /* Three reference frames are enough for VP9 */
5418     if (pic_param->pic_flags.bits.frame_type &&
5419         !pic_param->pic_flags.bits.intra_only) {
5420         for (i = 0; i < 3; i++) {
5421             obj_surface = encode_state->reference_objects[i];
5422             if (obj_surface && obj_surface->bo) {
5423                 pak_context->reference_surfaces[i].bo = obj_surface->bo;
5424                 dri_bo_reference(obj_surface->bo);
5425             }
5426         }
5427     }
5428
5429     /* input YUV surface */
5430     dri_bo_unreference(pak_context->uncompressed_picture_source.bo);
5431     pak_context->uncompressed_picture_source.bo = NULL;
5432     obj_surface = encode_state->reconstructed_object;
5433     if (vp9_state->dys_in_use &&
5434         ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
5435          (pic_param->frame_height_src != pic_param->frame_height_dst))) {
5436         struct gen9_surface_vp9 *vp9_priv_surface =
5437             (struct gen9_surface_vp9 *)(obj_surface->private_data);
5438         obj_surface = vp9_priv_surface->dys_surface_obj;
5439     } else
5440         obj_surface = encode_state->input_yuv_object;
5441
5442     pak_context->uncompressed_picture_source.bo = obj_surface->bo;
5443     dri_bo_reference(pak_context->uncompressed_picture_source.bo);
5444
5445     /* coded buffer */
5446     dri_bo_unreference(pak_context->indirect_pak_bse_object.bo);
5447     pak_context->indirect_pak_bse_object.bo = NULL;
5448     obj_buffer = encode_state->coded_buf_object;
5449     bo = obj_buffer->buffer_store->bo;
5450     pak_context->indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
5451     pak_context->indirect_pak_bse_object.end_offset = ALIGN((obj_buffer->size_element - 0x1000), 0x1000);
5452     pak_context->indirect_pak_bse_object.bo = bo;
5453     dri_bo_reference(pak_context->indirect_pak_bse_object.bo);
5454
5455     /* set the internal flag to 0 to indicate the coded size is unknown */
5456     dri_bo_map(bo, 1);
5457     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
5458     coded_buffer_segment->mapped = 0;
5459     coded_buffer_segment->codec = encoder_context->codec;
5460     coded_buffer_segment->status_support = 1;
5461     dri_bo_unmap(bo);
5462
5463     return VA_STATUS_SUCCESS;
5464 }
5465
5466 static void
5467 gen9_vp9_pak_brc_prepare(struct encode_state *encode_state,
5468                          struct intel_encoder_context *encoder_context)
5469 {
5470 }
5471
5472 static void
5473 gen9_vp9_pak_context_destroy(void *context)
5474 {
5475     struct gen9_encoder_context_vp9 *pak_context = context;
5476     int i;
5477
5478     dri_bo_unreference(pak_context->reconstructed_object.bo);
5479     pak_context->reconstructed_object.bo = NULL;
5480
5481     dri_bo_unreference(pak_context->uncompressed_picture_source.bo);
5482     pak_context->uncompressed_picture_source.bo = NULL;
5483
5484     dri_bo_unreference(pak_context->indirect_pak_bse_object.bo);
5485     pak_context->indirect_pak_bse_object.bo = NULL;
5486
5487     for (i = 0; i < 8; i++) {
5488         dri_bo_unreference(pak_context->reference_surfaces[i].bo);
5489         pak_context->reference_surfaces[i].bo = NULL;
5490     }
5491
5492     /* vme & pak same the same structure, so don't free the context here */
5493 }
5494
5495 static VAStatus
5496 gen9_vp9_pak_pipeline(VADriverContextP ctx,
5497                       VAProfile profile,
5498                       struct encode_state *encode_state,
5499                       struct intel_encoder_context *encoder_context)
5500 {
5501     struct i965_driver_data *i965 = i965_driver_data(ctx);
5502     struct intel_batchbuffer *batch = encoder_context->base.batch;
5503     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5504     VAStatus va_status;
5505     struct gen9_vp9_state *vp9_state;
5506     VAEncPictureParameterBufferVP9 *pic_param;
5507     int i;
5508
5509     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5510
5511     if (!vp9_state || !vp9_state->pic_param || !pak_context)
5512         return VA_STATUS_ERROR_INVALID_PARAMETER;
5513
5514     va_status = gen9_vp9_pak_pipeline_prepare(ctx, encode_state, encoder_context);
5515
5516     if (va_status != VA_STATUS_SUCCESS)
5517         return va_status;
5518
5519     if (i965->intel.has_bsd2)
5520         intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
5521     else
5522         intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
5523
5524     intel_batchbuffer_emit_mi_flush(batch);
5525
5526     BEGIN_BCS_BATCH(batch, 64);
5527     for (i = 0; i < 64; i++)
5528         OUT_BCS_BATCH(batch, MI_NOOP);
5529
5530     ADVANCE_BCS_BATCH(batch);
5531
5532     for (vp9_state->curr_pak_pass = 0;
5533          vp9_state->curr_pak_pass < vp9_state->num_pak_passes;
5534          vp9_state->curr_pak_pass++) {
5535
5536         if (vp9_state->curr_pak_pass == 0) {
5537             /* Initialize the VP9 Image Ctrl reg for the first pass */
5538             struct gpe_mi_load_register_imm_parameter mi_load_reg_imm;
5539             struct vp9_encode_status_buffer_internal *status_buffer;
5540
5541             status_buffer = &(vp9_state->status_buffer);
5542             memset(&mi_load_reg_imm, 0, sizeof(mi_load_reg_imm));
5543             mi_load_reg_imm.mmio_offset = status_buffer->vp9_image_ctrl_reg_offset;
5544             mi_load_reg_imm.data = 0;
5545             gen8_gpe_mi_load_register_imm(ctx, batch, &mi_load_reg_imm);
5546         }
5547         gen9_vp9_pak_picture_level(ctx, encode_state, encoder_context);
5548         gen9_vp9_read_mfc_status(ctx, encoder_context);
5549     }
5550
5551     intel_batchbuffer_end_atomic(batch);
5552     intel_batchbuffer_flush(batch);
5553
5554     pic_param = vp9_state->pic_param;
5555     vp9_state->vp9_last_frame.frame_width = pic_param->frame_width_dst;
5556     vp9_state->vp9_last_frame.frame_height = pic_param->frame_height_dst;
5557     vp9_state->vp9_last_frame.frame_type = pic_param->pic_flags.bits.frame_type;
5558     vp9_state->vp9_last_frame.show_frame = pic_param->pic_flags.bits.show_frame;
5559     vp9_state->vp9_last_frame.refresh_frame_context = pic_param->pic_flags.bits.refresh_frame_context;
5560     vp9_state->vp9_last_frame.frame_context_idx = pic_param->pic_flags.bits.frame_context_idx;
5561     vp9_state->vp9_last_frame.intra_only = pic_param->pic_flags.bits.intra_only;
5562     vp9_state->frame_number++;
5563     vp9_state->curr_mv_temporal_index ^= 1;
5564     vp9_state->first_frame = 0;
5565
5566     return VA_STATUS_SUCCESS;
5567 }
5568
5569 Bool
5570 gen9_vp9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5571 {
5572     struct gen9_encoder_context_vp9 *vme_context = NULL;
5573     struct gen9_vp9_state *vp9_state = NULL;
5574
5575     vme_context = calloc(1, sizeof(struct gen9_encoder_context_vp9));
5576     vp9_state = calloc(1, sizeof(struct gen9_vp9_state));
5577
5578     if (!vme_context || !vp9_state) {
5579         if (vme_context)
5580             free(vme_context);
5581         if (vp9_state)
5582             free(vp9_state);
5583         return false;
5584     }
5585
5586     encoder_context->enc_priv_state = vp9_state;
5587     vme_context->enc_priv_state = vp9_state;
5588
5589     /* Initialize the features that are supported by VP9 */
5590     vme_context->hme_supported = 1;
5591     vme_context->use_hw_scoreboard = 1;
5592     vme_context->use_hw_non_stalling_scoreboard = 1;
5593
5594     vp9_state->tx_mode = TX_MODE_SELECT;
5595     vp9_state->multi_ref_qp_check = 0;
5596     vp9_state->target_usage = INTEL_ENC_VP9_TU_NORMAL;
5597     vp9_state->num_pak_passes = 1;
5598     vp9_state->hme_supported = vme_context->hme_supported;
5599     vp9_state->b16xme_supported = 1;
5600
5601     if (encoder_context->rate_control_mode != VA_RC_NONE &&
5602         encoder_context->rate_control_mode != VA_RC_CQP) {
5603         vp9_state->brc_enabled = 1;
5604         vp9_state->brc_distortion_buffer_supported = 1;
5605         vp9_state->brc_constant_buffer_supported = 1;
5606         vp9_state->num_pak_passes = 4;
5607     }
5608     vp9_state->dys_enabled = 1; /* this is supported by default */
5609     vp9_state->first_frame = 1;
5610
5611     /* the definition of status buffer offset for VP9 */
5612     {
5613         struct vp9_encode_status_buffer_internal *status_buffer;
5614         uint32_t base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
5615
5616         status_buffer = &vp9_state->status_buffer;
5617         memset(status_buffer, 0,
5618                sizeof(struct vp9_encode_status_buffer_internal));
5619
5620         status_buffer->bs_byte_count_offset = base_offset + offsetof(struct vp9_encode_status, bs_byte_count);
5621         status_buffer->image_status_mask_offset = base_offset + offsetof(struct vp9_encode_status, image_status_mask);
5622         status_buffer->image_status_ctrl_offset = base_offset + offsetof(struct vp9_encode_status, image_status_ctrl);
5623         status_buffer->media_index_offset       = base_offset + offsetof(struct vp9_encode_status, media_index);
5624
5625         status_buffer->vp9_bs_frame_reg_offset = 0x1E9E0;
5626         status_buffer->vp9_image_mask_reg_offset = 0x1E9F0;
5627         status_buffer->vp9_image_ctrl_reg_offset = 0x1E9F4;
5628     }
5629
5630     gen9_vme_kernels_context_init_vp9(ctx, encoder_context, vme_context);
5631
5632     encoder_context->vme_context = vme_context;
5633     encoder_context->vme_pipeline = gen9_vme_pipeline_vp9;
5634     encoder_context->vme_context_destroy = gen9_vme_context_destroy_vp9;
5635
5636     return true;
5637 }
5638
5639 static VAStatus
5640 gen9_vp9_get_coded_status(VADriverContextP ctx,
5641                           struct intel_encoder_context *encoder_context,
5642                           struct i965_coded_buffer_segment *coded_buf_seg)
5643 {
5644     struct vp9_encode_status *vp9_encode_status;
5645
5646     if (!encoder_context || !coded_buf_seg)
5647         return VA_STATUS_ERROR_INVALID_BUFFER;
5648
5649     vp9_encode_status = (struct vp9_encode_status *)coded_buf_seg->codec_private_data;
5650     coded_buf_seg->base.size = vp9_encode_status->bs_byte_count;
5651
5652     /* One VACodedBufferSegment for VP9 will be added later.
5653      * It will be linked to the next element of coded_buf_seg->base.next
5654      */
5655
5656     return VA_STATUS_SUCCESS;
5657 }
5658
5659 Bool
5660 gen9_vp9_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5661 {
5662     /* VME & PAK share the same context */
5663     struct gen9_encoder_context_vp9 *pak_context = encoder_context->vme_context;
5664
5665     if (!pak_context)
5666         return false;
5667
5668     encoder_context->mfc_context = pak_context;
5669     encoder_context->mfc_context_destroy = gen9_vp9_pak_context_destroy;
5670     encoder_context->mfc_pipeline = gen9_vp9_pak_pipeline;
5671     encoder_context->mfc_brc_prepare = gen9_vp9_pak_brc_prepare;
5672     encoder_context->get_status = gen9_vp9_get_coded_status;
5673     return true;
5674 }