OSDN Git Service

05d86daec99b65bf4721046be29753adc0753e96
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_vp9_encoder.c
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWAR
23  *
24  * Authors:
25  *    Zhao, Yakui <yakui.zhao@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <stdbool.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35 #include <va/va.h>
36
37 #include "intel_batchbuffer.h"
38 #include "intel_driver.h"
39
40 #include "i965_defines.h"
41 #include "i965_drv_video.h"
42 #include "i965_encoder.h"
43 #include "gen9_vp9_encapi.h"
44 #include "gen9_vp9_encoder.h"
45 #include "gen9_vp9_encoder_kernels.h"
46 #include "vp9_probs.h"
47 #include "gen9_vp9_const_def.h"
48
49 #define MAX_VP9_ENCODER_SURFACES        64
50
51 #define MAX_URB_SIZE                    4096 /* In register */
52 #define NUM_KERNELS_PER_GPE_CONTEXT     1
53
54 #define VP9_BRC_KBPS                    1000
55
56 #define BRC_KERNEL_CBR                  0x0010
57 #define BRC_KERNEL_VBR                  0x0020
58 #define BRC_KERNEL_AVBR                 0x0040
59 #define BRC_KERNEL_CQL                  0x0080
60
61 #define DEFAULT_MOCS                      0x02
62 #define VP9_PIC_STATE_BUFFER_SIZE 192
63
64 typedef struct _intel_kernel_header_
65 {
66     uint32_t       reserved                        : 6;
67     uint32_t       kernel_start_pointer            : 26;
68 } intel_kernel_header;
69
70 typedef struct _intel_vp9_kernel_header {
71     int nKernelCount;
72     intel_kernel_header PLY_DSCALE;
73     intel_kernel_header VP9_ME_P;
74     intel_kernel_header VP9_Enc_I_32x32;
75     intel_kernel_header VP9_Enc_I_16x16;
76     intel_kernel_header VP9_Enc_P;
77     intel_kernel_header VP9_Enc_TX;
78     intel_kernel_header VP9_DYS;
79
80     intel_kernel_header VP9BRC_Intra_Distortion;
81     intel_kernel_header VP9BRC_Init;
82     intel_kernel_header VP9BRC_Reset;
83     intel_kernel_header VP9BRC_Update;
84 } intel_vp9_kernel_header;
85
86 #define DYS_1X_FLAG    0x01
87 #define DYS_4X_FLAG    0x02
88 #define DYS_16X_FLAG   0x04
89
90 struct vp9_surface_param {
91     uint32_t frame_width;
92     uint32_t frame_height;
93 };
94
95 static uint32_t intel_convert_sign_mag(int val, int sign_bit_pos)
96 {
97     uint32_t ret_val = 0;
98     if (val < 0)
99     {
100         val = -val;
101         ret_val = ((1 << (sign_bit_pos - 1)) | (val & ((1 << (sign_bit_pos - 1)) - 1)));
102     }
103     else
104     {
105         ret_val = val & ((1 << (sign_bit_pos - 1)) - 1);
106     }
107     return ret_val;
108 }
109
110 static bool
111 intel_vp9_get_kernel_header_and_size(
112     void                             *pvbinary,
113     int                              binary_size,
114     INTEL_VP9_ENC_OPERATION          operation,
115     int                              krnstate_idx,
116     struct i965_kernel               *ret_kernel)
117 {
118     typedef uint32_t BIN_PTR[4];
119
120     char *bin_start;
121     intel_vp9_kernel_header      *pkh_table;
122     intel_kernel_header          *pcurr_header, *pinvalid_entry, *pnext_header;
123     int next_krnoffset;
124
125     if (!pvbinary || !ret_kernel)
126         return false;
127
128     bin_start = (char *)pvbinary;
129     pkh_table = (intel_vp9_kernel_header *)pvbinary;
130     pinvalid_entry = &(pkh_table->VP9BRC_Update) + 1;
131     next_krnoffset = binary_size;
132
133     if ((operation == INTEL_VP9_ENC_SCALING4X) || (operation == INTEL_VP9_ENC_SCALING2X))
134     {
135         pcurr_header = &pkh_table->PLY_DSCALE;
136     }
137     else if (operation == INTEL_VP9_ENC_ME)
138     {
139         pcurr_header = &pkh_table->VP9_ME_P;
140     }
141     else if (operation == INTEL_VP9_ENC_MBENC)
142     {
143         pcurr_header = &pkh_table->VP9_Enc_I_32x32;
144     }
145     else if (operation == INTEL_VP9_ENC_DYS)
146     {
147         pcurr_header = &pkh_table->VP9_DYS;
148     }
149     else if (operation == INTEL_VP9_ENC_BRC)
150     {
151         pcurr_header = &pkh_table->VP9BRC_Intra_Distortion;
152     }
153     else
154     {
155         return false;
156     }
157
158     pcurr_header += krnstate_idx;
159     ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
160
161     pnext_header = (pcurr_header + 1);
162     if (pnext_header < pinvalid_entry)
163     {
164         next_krnoffset = pnext_header->kernel_start_pointer << 6;
165     }
166     ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
167
168     return true;
169 }
170
171
172 static void
173 gen9_free_surfaces_vp9(void **data)
174 {
175     struct gen9_surface_vp9 *vp9_surface;
176
177     if (!data || !*data)
178         return;
179
180     vp9_surface = *data;
181
182     if (vp9_surface->scaled_4x_surface_obj) {
183         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->scaled_4x_surface_id, 1);
184         vp9_surface->scaled_4x_surface_id = VA_INVALID_SURFACE;
185         vp9_surface->scaled_4x_surface_obj = NULL;
186     }
187
188     if (vp9_surface->scaled_16x_surface_obj) {
189         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->scaled_16x_surface_id, 1);
190         vp9_surface->scaled_16x_surface_id = VA_INVALID_SURFACE;
191         vp9_surface->scaled_16x_surface_obj = NULL;
192     }
193
194     if (vp9_surface->dys_4x_surface_obj) {
195         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_4x_surface_id, 1);
196         vp9_surface->dys_4x_surface_id = VA_INVALID_SURFACE;
197         vp9_surface->dys_4x_surface_obj = NULL;
198     }
199
200     if (vp9_surface->dys_16x_surface_obj) {
201         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_16x_surface_id, 1);
202         vp9_surface->dys_16x_surface_id = VA_INVALID_SURFACE;
203         vp9_surface->dys_16x_surface_obj = NULL;
204     }
205
206     if (vp9_surface->dys_surface_obj) {
207         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_surface_id, 1);
208         vp9_surface->dys_surface_id = VA_INVALID_SURFACE;
209         vp9_surface->dys_surface_obj = NULL;
210     }
211
212     free(vp9_surface);
213
214     *data = NULL;
215
216     return;
217 }
218
219 static VAStatus
220 gen9_vp9_init_check_surfaces(VADriverContextP ctx,
221                              struct object_surface *obj_surface,
222                              struct vp9_surface_param *surface_param)
223 {
224     struct i965_driver_data *i965 = i965_driver_data(ctx);
225     struct gen9_surface_vp9 *vp9_surface;
226     int downscaled_width_4x, downscaled_height_4x;
227     int downscaled_width_16x, downscaled_height_16x;
228
229     if (!obj_surface || !obj_surface->bo)
230         return VA_STATUS_ERROR_INVALID_SURFACE;
231
232     if (obj_surface->private_data &&
233         obj_surface->free_private_data != gen9_free_surfaces_vp9) {
234         obj_surface->free_private_data(&obj_surface->private_data);
235         obj_surface->private_data = NULL;
236     }
237
238     if (obj_surface->private_data) {
239         /* if the frame width/height is already the same as the expected,
240          * it is unncessary to reallocate it.
241          */
242         vp9_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
243         if (vp9_surface->frame_width >= surface_param->frame_width ||
244             vp9_surface->frame_height >= surface_param->frame_height)
245             return VA_STATUS_SUCCESS;
246
247         obj_surface->free_private_data(&obj_surface->private_data);
248         obj_surface->private_data = NULL;
249         vp9_surface = NULL;
250     }
251
252     vp9_surface = calloc(1, sizeof(struct gen9_surface_vp9));
253
254     if (!vp9_surface)
255         return VA_STATUS_ERROR_ALLOCATION_FAILED;
256
257     vp9_surface->ctx = ctx;
258     obj_surface->private_data = vp9_surface;
259     obj_surface->free_private_data = gen9_free_surfaces_vp9;
260
261     vp9_surface->frame_width = surface_param->frame_width;
262     vp9_surface->frame_height = surface_param->frame_height;
263
264     downscaled_width_4x = ALIGN(surface_param->frame_width / 4, 16);
265     downscaled_height_4x = ALIGN(surface_param->frame_height / 4, 16);
266
267     i965_CreateSurfaces(ctx,
268                         downscaled_width_4x,
269                         downscaled_height_4x,
270                         VA_RT_FORMAT_YUV420,
271                         1,
272                         &vp9_surface->scaled_4x_surface_id);
273
274     vp9_surface->scaled_4x_surface_obj = SURFACE(vp9_surface->scaled_4x_surface_id);
275
276     if (!vp9_surface->scaled_4x_surface_obj) {
277         return VA_STATUS_ERROR_ALLOCATION_FAILED;
278     }
279
280     i965_check_alloc_surface_bo(ctx, vp9_surface->scaled_4x_surface_obj, 1,
281                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
282
283     downscaled_width_16x = ALIGN(surface_param->frame_width / 16, 16);
284     downscaled_height_16x = ALIGN(surface_param->frame_height / 16, 16);
285     i965_CreateSurfaces(ctx,
286                         downscaled_width_16x,
287                         downscaled_height_16x,
288                         VA_RT_FORMAT_YUV420,
289                         1,
290                         &vp9_surface->scaled_16x_surface_id);
291     vp9_surface->scaled_16x_surface_obj = SURFACE(vp9_surface->scaled_16x_surface_id);
292
293     if (!vp9_surface->scaled_16x_surface_obj) {
294         return VA_STATUS_ERROR_ALLOCATION_FAILED;
295     }
296
297     i965_check_alloc_surface_bo(ctx, vp9_surface->scaled_16x_surface_obj, 1,
298                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
299
300     return VA_STATUS_SUCCESS;
301 }
302
303 static VAStatus
304 gen9_vp9_check_dys_surfaces(VADriverContextP ctx,
305                             struct object_surface *obj_surface,
306                             struct vp9_surface_param *surface_param)
307 {
308     struct i965_driver_data *i965 = i965_driver_data(ctx);
309     struct gen9_surface_vp9 *vp9_surface;
310     int dys_width_4x, dys_height_4x;
311     int dys_width_16x, dys_height_16x;
312
313     /* As this is handled after the surface checking, it is unnecessary
314      * to check the surface bo and vp9_priv_surface again
315      */
316
317     vp9_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
318
319     if (!vp9_surface)
320         return VA_STATUS_ERROR_INVALID_SURFACE;
321
322     /* if the frame_width/height of dys_surface is the same as
323      * the expected, it is unnecessary to allocate it again
324      */
325     if (vp9_surface->dys_frame_width == surface_param->frame_width &&
326         vp9_surface->dys_frame_width == surface_param->frame_width)
327         return VA_STATUS_SUCCESS;
328
329     if (vp9_surface->dys_4x_surface_obj) {
330         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_4x_surface_id, 1);
331         vp9_surface->dys_4x_surface_id = VA_INVALID_SURFACE;
332         vp9_surface->dys_4x_surface_obj = NULL;
333     }
334
335     if (vp9_surface->dys_16x_surface_obj) {
336         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_16x_surface_id, 1);
337         vp9_surface->dys_16x_surface_id = VA_INVALID_SURFACE;
338         vp9_surface->dys_16x_surface_obj = NULL;
339     }
340
341     if (vp9_surface->dys_surface_obj) {
342         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_surface_id, 1);
343         vp9_surface->dys_surface_id = VA_INVALID_SURFACE;
344         vp9_surface->dys_surface_obj = NULL;
345     }
346
347     vp9_surface->dys_frame_width = surface_param->frame_width;
348     vp9_surface->dys_frame_height = surface_param->frame_height;
349
350     i965_CreateSurfaces(ctx,
351                         surface_param->frame_width,
352                         surface_param->frame_height,
353                         VA_RT_FORMAT_YUV420,
354                         1,
355                         &vp9_surface->dys_surface_id);
356     vp9_surface->dys_surface_obj = SURFACE(vp9_surface->dys_surface_id);
357
358     if (!vp9_surface->dys_surface_obj) {
359         return VA_STATUS_ERROR_ALLOCATION_FAILED;
360     }
361
362     i965_check_alloc_surface_bo(ctx, vp9_surface->dys_surface_obj, 1,
363                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
364
365     dys_width_4x = ALIGN(surface_param->frame_width / 4, 16);
366     dys_height_4x = ALIGN(surface_param->frame_width / 4, 16);
367
368     i965_CreateSurfaces(ctx,
369                         dys_width_4x,
370                         dys_height_4x,
371                         VA_RT_FORMAT_YUV420,
372                         1,
373                         &vp9_surface->dys_4x_surface_id);
374
375     vp9_surface->dys_4x_surface_obj = SURFACE(vp9_surface->dys_4x_surface_id);
376
377     if (!vp9_surface->dys_4x_surface_obj) {
378         return VA_STATUS_ERROR_ALLOCATION_FAILED;
379     }
380
381     i965_check_alloc_surface_bo(ctx, vp9_surface->dys_4x_surface_obj, 1,
382                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
383
384     dys_width_16x = ALIGN(surface_param->frame_width / 16, 16);
385     dys_height_16x = ALIGN(surface_param->frame_width / 16, 16);
386     i965_CreateSurfaces(ctx,
387                         dys_width_16x,
388                         dys_height_16x,
389                         VA_RT_FORMAT_YUV420,
390                         1,
391                         &vp9_surface->dys_16x_surface_id);
392     vp9_surface->dys_16x_surface_obj = SURFACE(vp9_surface->dys_16x_surface_id);
393
394     if (!vp9_surface->dys_16x_surface_obj) {
395         return VA_STATUS_ERROR_ALLOCATION_FAILED;
396     }
397
398     i965_check_alloc_surface_bo(ctx, vp9_surface->dys_16x_surface_obj, 1,
399                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
400
401     return VA_STATUS_SUCCESS;
402 }
403
404 static VAStatus
405 gen9_vp9_allocate_resources(VADriverContextP ctx,
406                             struct encode_state *encode_state,
407                             struct intel_encoder_context *encoder_context,
408                             int allocate)
409 {
410     struct i965_driver_data *i965 = i965_driver_data(ctx);
411     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
412     struct gen9_vp9_state *vp9_state;
413     int allocate_flag, i;
414     int res_size;
415     uint32_t        frame_width_in_sb, frame_height_in_sb, frame_sb_num;
416     unsigned int width, height;
417
418     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
419
420     if (!vp9_state || !vp9_state->pic_param)
421         return VA_STATUS_ERROR_INVALID_PARAMETER;
422
423     /* the buffer related with BRC is not changed. So it is allocated
424      * based on the input parameter
425      */
426     if (allocate) {
427         i965_free_gpe_resource(&vme_context->res_brc_history_buffer);
428         i965_free_gpe_resource(&vme_context->res_brc_const_data_buffer);
429         i965_free_gpe_resource(&vme_context->res_brc_mbenc_curbe_write_buffer);
430         i965_free_gpe_resource(&vme_context->res_pic_state_brc_read_buffer);
431         i965_free_gpe_resource(&vme_context->res_pic_state_brc_write_hfw_read_buffer);
432         i965_free_gpe_resource(&vme_context->res_pic_state_hfw_write_buffer);
433         i965_free_gpe_resource(&vme_context->res_seg_state_brc_read_buffer);
434         i965_free_gpe_resource(&vme_context->res_seg_state_brc_write_buffer);
435         i965_free_gpe_resource(&vme_context->res_brc_bitstream_size_buffer);
436         i965_free_gpe_resource(&vme_context->res_brc_hfw_data_buffer);
437         i965_free_gpe_resource(&vme_context->res_brc_mmdk_pak_buffer);
438
439         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
440                                  &vme_context->res_brc_history_buffer,
441                                  VP9_BRC_HISTORY_BUFFER_SIZE,
442                                  "Brc History buffer");
443         if (!allocate_flag)
444             goto failed_allocation;
445         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
446                                  &vme_context->res_brc_const_data_buffer,
447                                  VP9_BRC_CONSTANTSURFACE_SIZE,
448                                  "Brc Constant buffer");
449         if (!allocate_flag)
450             goto failed_allocation;
451
452         res_size = ALIGN(sizeof(vp9_mbenc_curbe_data), 64) + 128 +
453            ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) * NUM_VP9_MBENC;
454         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
455                                  &vme_context->res_brc_mbenc_curbe_write_buffer,
456                                  res_size,
457                                  "Brc Curbe write");
458         if (!allocate_flag)
459             goto failed_allocation;
460
461         res_size = VP9_PIC_STATE_BUFFER_SIZE * 4;
462         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
463                                  &vme_context->res_pic_state_brc_read_buffer,
464                                  res_size,
465                                  "Pic State Brc_read");
466         if (!allocate_flag)
467             goto failed_allocation;
468
469         res_size = VP9_PIC_STATE_BUFFER_SIZE * 4;
470         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
471                                  &vme_context->res_pic_state_brc_write_hfw_read_buffer,
472                                  res_size,
473                                  "Pic State Brc_write Hfw_Read");
474         if (!allocate_flag)
475             goto failed_allocation;
476
477         res_size = VP9_PIC_STATE_BUFFER_SIZE * 4;
478         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
479                                  &vme_context->res_pic_state_hfw_write_buffer,
480                                  res_size,
481                                  "Pic State Hfw Write");
482         if (!allocate_flag)
483             goto failed_allocation;
484
485         res_size = VP9_SEGMENT_STATE_BUFFER_SIZE;
486         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
487                                  &vme_context->res_seg_state_brc_read_buffer,
488                                  res_size,
489                                  "Segment state brc_read");
490         if (!allocate_flag)
491             goto failed_allocation;
492
493         res_size = VP9_SEGMENT_STATE_BUFFER_SIZE;
494         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
495                                  &vme_context->res_seg_state_brc_write_buffer,
496                                  res_size,
497                                  "Segment state brc_write");
498         if (!allocate_flag)
499             goto failed_allocation;
500
501         res_size = VP9_BRC_BITSTREAM_SIZE_BUFFER_SIZE;
502         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
503                                  &vme_context->res_brc_bitstream_size_buffer,
504                                  res_size,
505                                  "Brc bitstream buffer");
506         if (!allocate_flag)
507             goto failed_allocation;
508
509         res_size = VP9_HFW_BRC_DATA_BUFFER_SIZE;
510         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
511                                  &vme_context->res_brc_hfw_data_buffer,
512                                  res_size,
513                                  "mfw Brc data");
514         if (!allocate_flag)
515             goto failed_allocation;
516
517         res_size = VP9_BRC_MMDK_PAK_BUFFER_SIZE;
518         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
519                                  &vme_context->res_brc_mmdk_pak_buffer,
520                                  res_size,
521                                  "Brc mmdk_pak");
522         if (!allocate_flag)
523             goto failed_allocation;
524     }
525
526     /* If the width/height of allocated buffer is greater than the expected,
527      * it is unnecessary to allocate it again
528      */
529     if (vp9_state->res_width >= vp9_state->frame_width &&
530         vp9_state->res_height >= vp9_state->frame_height) {
531
532         return VA_STATUS_SUCCESS;
533     }
534     frame_width_in_sb = ALIGN(vp9_state->frame_width, 64) / 64;
535     frame_height_in_sb = ALIGN(vp9_state->frame_height, 64) / 64;
536     frame_sb_num  = frame_width_in_sb * frame_height_in_sb;
537
538     i965_free_gpe_resource(&vme_context->res_hvd_line_buffer);
539     res_size = frame_width_in_sb * 64;
540     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
541                                  &vme_context->res_hvd_line_buffer,
542                                  res_size,
543                                  "VP9 hvd line line");
544     if (!allocate_flag)
545         goto failed_allocation;
546
547     i965_free_gpe_resource(&vme_context->res_hvd_tile_line_buffer);
548     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
549                                  &vme_context->res_hvd_tile_line_buffer,
550                                  res_size,
551                                  "VP9 hvd tile_line line");
552     if (!allocate_flag)
553         goto failed_allocation;
554
555     i965_free_gpe_resource(&vme_context->res_deblocking_filter_line_buffer);
556     res_size = frame_width_in_sb * 18 * 64;
557     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
558                                  &vme_context->res_deblocking_filter_line_buffer,
559                                  res_size,
560                                  "VP9 deblocking filter line");
561     if (!allocate_flag)
562         goto failed_allocation;
563
564     i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_line_buffer);
565     res_size = frame_width_in_sb * 18 * 64;
566     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
567                                  &vme_context->res_deblocking_filter_tile_line_buffer,
568                                  res_size,
569                                  "VP9 deblocking tile line");
570     if (!allocate_flag)
571         goto failed_allocation;
572
573     i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_col_buffer);
574     res_size = frame_height_in_sb * 17 * 64;
575     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
576                                  &vme_context->res_deblocking_filter_tile_col_buffer,
577                                  res_size,
578                                  "VP9 deblocking tile col");
579     if (!allocate_flag)
580         goto failed_allocation;
581
582     i965_free_gpe_resource(&vme_context->res_metadata_line_buffer);
583     res_size = frame_width_in_sb * 5 * 64;
584     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
585                                  &vme_context->res_metadata_line_buffer,
586                                  res_size,
587                                  "VP9 metadata line");
588     if (!allocate_flag)
589         goto failed_allocation;
590
591     i965_free_gpe_resource(&vme_context->res_metadata_tile_line_buffer);
592     res_size = frame_width_in_sb * 5 * 64;
593     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
594                                  &vme_context->res_metadata_tile_line_buffer,
595                                  res_size,
596                                  "VP9 metadata tile line");
597     if (!allocate_flag)
598         goto failed_allocation;
599
600     i965_free_gpe_resource(&vme_context->res_metadata_tile_col_buffer);
601     res_size = frame_height_in_sb * 5 * 64;
602     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
603                                  &vme_context->res_metadata_tile_col_buffer,
604                                  res_size,
605                                  "VP9 metadata tile col");
606     if (!allocate_flag)
607         goto failed_allocation;
608
609     i965_free_gpe_resource(&vme_context->res_prob_buffer);
610     res_size = 2048;
611     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
612                                  &vme_context->res_prob_buffer,
613                                  res_size,
614                                  "VP9 prob");
615     if (!allocate_flag)
616         goto failed_allocation;
617
618     i965_free_gpe_resource(&vme_context->res_segmentid_buffer);
619     res_size = frame_sb_num * 64;
620     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
621                                  &vme_context->res_segmentid_buffer,
622                                  res_size,
623                                  "VP9 segment id");
624     if (!allocate_flag)
625         goto failed_allocation;
626
627     i965_zero_gpe_resource(&vme_context->res_segmentid_buffer);
628
629     i965_free_gpe_resource(&vme_context->res_prob_delta_buffer);
630     res_size = 29 * 64;
631     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
632                                  &vme_context->res_prob_delta_buffer,
633                                  res_size,
634                                  "VP9 prob delta");
635     if (!allocate_flag)
636         goto failed_allocation;
637
638     i965_zero_gpe_resource(&vme_context->res_segmentid_buffer);
639
640     i965_free_gpe_resource(&vme_context->res_prob_delta_buffer);
641     res_size = 29 * 64;
642     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
643                                  &vme_context->res_prob_delta_buffer,
644                                  res_size,
645                                  "VP9 prob delta");
646     if (!allocate_flag)
647         goto failed_allocation;
648
649     i965_free_gpe_resource(&vme_context->res_compressed_input_buffer);
650     res_size = 32 * 64;
651     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
652                                  &vme_context->res_compressed_input_buffer,
653                                  res_size,
654                                  "VP9 compressed_input buffer");
655     if (!allocate_flag)
656         goto failed_allocation;
657
658     i965_free_gpe_resource(&vme_context->res_prob_counter_buffer);
659     res_size = 193 * 64;
660     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
661                                  &vme_context->res_prob_counter_buffer,
662                                  res_size,
663                                  "VP9 prob counter");
664     if (!allocate_flag)
665         goto failed_allocation;
666
667     i965_free_gpe_resource(&vme_context->res_tile_record_streamout_buffer);
668     res_size = frame_sb_num * 64;
669     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
670                                  &vme_context->res_tile_record_streamout_buffer,
671                                  res_size,
672                                  "VP9 tile record stream_out");
673     if (!allocate_flag)
674         goto failed_allocation;
675
676     i965_free_gpe_resource(&vme_context->res_cu_stat_streamout_buffer);
677     res_size = frame_sb_num * 64;
678     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
679                                  &vme_context->res_cu_stat_streamout_buffer,
680                                  res_size,
681                                  "VP9 CU stat stream_out");
682     if (!allocate_flag)
683         goto failed_allocation;
684
685     width = vp9_state->downscaled_width_4x_in_mb * 32;
686     height = vp9_state->downscaled_height_4x_in_mb * 16;
687     i965_free_gpe_resource(&vme_context->s4x_memv_data_buffer);
688     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
689                                  &vme_context->s4x_memv_data_buffer,
690                                  width, height,
691                                  ALIGN(width, 64),
692                                  "VP9 4x MEMV data");
693     if (!allocate_flag)
694         goto failed_allocation;
695
696     width = vp9_state->downscaled_width_4x_in_mb * 8;
697     height = vp9_state->downscaled_height_4x_in_mb * 16;
698     i965_free_gpe_resource(&vme_context->s4x_memv_distortion_buffer);
699     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
700                                  &vme_context->s4x_memv_distortion_buffer,
701                                  width, height,
702                                  ALIGN(width, 64),
703                                  "VP9 4x MEMV distorion");
704     if (!allocate_flag)
705         goto failed_allocation;
706
707     width = ALIGN(vp9_state->downscaled_width_16x_in_mb * 32, 64);
708     height = vp9_state->downscaled_height_16x_in_mb * 16;
709     i965_free_gpe_resource(&vme_context->s16x_memv_data_buffer);
710     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
711                                  &vme_context->s16x_memv_data_buffer,
712                                  width, height,
713                                  width,
714                                  "VP9 16x MEMV data");
715     if (!allocate_flag)
716         goto failed_allocation;
717
718     width = vp9_state->frame_width_in_mb * 16;
719     height = vp9_state->frame_height_in_mb * 8;
720     i965_free_gpe_resource(&vme_context->res_output_16x16_inter_modes);
721     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
722                                  &vme_context->res_output_16x16_inter_modes,
723                                  width, height,
724                                  ALIGN(width, 64),
725                                  "VP9 output inter_mode");
726     if (!allocate_flag)
727         goto failed_allocation;
728
729     res_size = vp9_state->frame_width_in_mb * vp9_state->frame_height_in_mb *
730                16 * 4;
731     for (i = 0; i < 2; i++) {
732         i965_free_gpe_resource(&vme_context->res_mode_decision[i]);
733         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
734                                    &vme_context->res_mode_decision[i],
735                                    res_size,
736                                    "VP9 mode decision");
737         if (!allocate_flag)
738             goto failed_allocation;
739
740     }
741
742     res_size = frame_sb_num * 9 * 64;
743     for (i = 0; i < 2; i++) {
744         i965_free_gpe_resource(&vme_context->res_mv_temporal_buffer[i]);
745         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
746                                    &vme_context->res_mv_temporal_buffer[i],
747                                    res_size,
748                                    "VP9 temporal mv");
749         if (!allocate_flag)
750             goto failed_allocation;
751     }
752
753     vp9_state->mb_data_offset = ALIGN(frame_sb_num * 16, 4096) + 4096;
754     res_size = vp9_state->mb_data_offset + frame_sb_num * 64 * 64 + 1000;
755     i965_free_gpe_resource(&vme_context->res_mb_code_surface);
756     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
757                                  &vme_context->res_mb_code_surface,
758                                  ALIGN(res_size, 4096),
759                                  "VP9 mb_code surface");
760     if (!allocate_flag)
761         goto failed_allocation;
762
763     res_size = 128;
764     i965_free_gpe_resource(&vme_context->res_pak_uncompressed_input_buffer);
765     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
766                                  &vme_context->res_pak_uncompressed_input_buffer,
767                                  ALIGN(res_size, 4096),
768                                  "VP9 pak_uncompressed_input");
769     if (!allocate_flag)
770         goto failed_allocation;
771
772     if (!vme_context->frame_header_data) {
773         /* allocate 512 bytes for generating the uncompressed header */
774         vme_context->frame_header_data = calloc(1, 512);
775     }
776
777     vp9_state->res_width = vp9_state->frame_width;
778     vp9_state->res_height = vp9_state->frame_height;
779
780     return VA_STATUS_SUCCESS;
781
782 failed_allocation:
783     return VA_STATUS_ERROR_ALLOCATION_FAILED;
784 }
785
786 static void
787 gen9_vp9_free_resources(struct gen9_encoder_context_vp9 *vme_context)
788 {
789     int i;
790     struct gen9_vp9_state *vp9_state = (struct gen9_vp9_state *) vme_context->enc_priv_state;
791
792     if (vp9_state->brc_enabled) {
793         i965_free_gpe_resource(&vme_context->res_brc_history_buffer);
794         i965_free_gpe_resource(&vme_context->res_brc_const_data_buffer);
795         i965_free_gpe_resource(&vme_context->res_brc_mbenc_curbe_write_buffer);
796         i965_free_gpe_resource(&vme_context->res_pic_state_brc_read_buffer);
797         i965_free_gpe_resource(&vme_context->res_pic_state_brc_write_hfw_read_buffer);
798         i965_free_gpe_resource(&vme_context->res_pic_state_hfw_write_buffer);
799         i965_free_gpe_resource(&vme_context->res_seg_state_brc_read_buffer);
800         i965_free_gpe_resource(&vme_context->res_seg_state_brc_write_buffer);
801         i965_free_gpe_resource(&vme_context->res_brc_bitstream_size_buffer);
802         i965_free_gpe_resource(&vme_context->res_brc_hfw_data_buffer);
803         i965_free_gpe_resource(&vme_context->res_brc_mmdk_pak_buffer);
804     }
805
806     i965_free_gpe_resource(&vme_context->res_hvd_line_buffer);
807     i965_free_gpe_resource(&vme_context->res_hvd_tile_line_buffer);
808     i965_free_gpe_resource(&vme_context->res_deblocking_filter_line_buffer);
809     i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_line_buffer);
810     i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_col_buffer);
811     i965_free_gpe_resource(&vme_context->res_metadata_line_buffer);
812     i965_free_gpe_resource(&vme_context->res_metadata_tile_line_buffer);
813     i965_free_gpe_resource(&vme_context->res_metadata_tile_col_buffer);
814     i965_free_gpe_resource(&vme_context->res_prob_buffer);
815     i965_free_gpe_resource(&vme_context->res_segmentid_buffer);
816     i965_free_gpe_resource(&vme_context->res_prob_delta_buffer);
817     i965_free_gpe_resource(&vme_context->res_prob_counter_buffer);
818     i965_free_gpe_resource(&vme_context->res_tile_record_streamout_buffer);
819     i965_free_gpe_resource(&vme_context->res_cu_stat_streamout_buffer);
820     i965_free_gpe_resource(&vme_context->s4x_memv_data_buffer);
821     i965_free_gpe_resource(&vme_context->s4x_memv_distortion_buffer);
822     i965_free_gpe_resource(&vme_context->s16x_memv_data_buffer);
823     i965_free_gpe_resource(&vme_context->res_output_16x16_inter_modes);
824     for (i = 0; i < 2; i++) {
825         i965_free_gpe_resource(&vme_context->res_mode_decision[i]);
826     }
827
828     for (i = 0; i < 2; i++) {
829         i965_free_gpe_resource(&vme_context->res_mv_temporal_buffer[i]);
830     }
831
832     i965_free_gpe_resource(&vme_context->res_compressed_input_buffer);
833     i965_free_gpe_resource(&vme_context->res_mb_code_surface);
834     i965_free_gpe_resource(&vme_context->res_pak_uncompressed_input_buffer);
835
836     if (vme_context->frame_header_data) {
837         free(vme_context->frame_header_data);
838         vme_context->frame_header_data = NULL;
839     }
840     return;
841 }
842
843 static void
844 gen9_init_media_object_walker_parameter(struct intel_encoder_context *encoder_context,
845                                         struct vp9_encoder_kernel_walker_parameter *kernel_walker_param,
846                                         struct gpe_media_object_walker_parameter *walker_param)
847 {
848     memset(walker_param, 0, sizeof(*walker_param));
849
850     walker_param->use_scoreboard = kernel_walker_param->use_scoreboard;
851
852     walker_param->block_resolution.x = kernel_walker_param->resolution_x;
853     walker_param->block_resolution.y = kernel_walker_param->resolution_y;
854
855     walker_param->global_resolution.x = kernel_walker_param->resolution_x;
856     walker_param->global_resolution.y = kernel_walker_param->resolution_y;
857
858     walker_param->global_outer_loop_stride.x = kernel_walker_param->resolution_x;
859     walker_param->global_outer_loop_stride.y = 0;
860
861     walker_param->global_inner_loop_unit.x = 0;
862     walker_param->global_inner_loop_unit.y = kernel_walker_param->resolution_y;
863
864     walker_param->local_loop_exec_count = 0xFFFF;  //MAX VALUE
865     walker_param->global_loop_exec_count = 0xFFFF;  //MAX VALUE
866
867     if (kernel_walker_param->no_dependency) {
868         walker_param->scoreboard_mask = 0;
869         walker_param->use_scoreboard = 0;
870         // Raster scan walking pattern
871         walker_param->local_outer_loop_stride.x = 0;
872         walker_param->local_outer_loop_stride.y = 1;
873         walker_param->local_inner_loop_unit.x = 1;
874         walker_param->local_inner_loop_unit.y = 0;
875         walker_param->local_end.x = kernel_walker_param->resolution_x - 1;
876         walker_param->local_end.y = 0;
877     } else {
878         walker_param->local_end.x = 0;
879         walker_param->local_end.y = 0;
880
881         if (kernel_walker_param->walker_degree == VP9_45Z_DEGREE) {
882             // 45z degree
883             walker_param->scoreboard_mask = 0x0F;
884
885             walker_param->global_loop_exec_count = 0x3FF;
886             walker_param->local_loop_exec_count = 0x3FF;
887
888             walker_param->global_resolution.x = (unsigned int)(kernel_walker_param->resolution_x / 2.f) + 1;
889             walker_param->global_resolution.y = 2 * kernel_walker_param->resolution_y;
890
891             walker_param->global_start.x = 0;
892             walker_param->global_start.y = 0;
893
894             walker_param->global_outer_loop_stride.x = walker_param->global_resolution.x;
895             walker_param->global_outer_loop_stride.y = 0;
896
897             walker_param->global_inner_loop_unit.x = 0;
898             walker_param->global_inner_loop_unit.y = walker_param->global_resolution.y;
899
900             walker_param->block_resolution.x = walker_param->global_resolution.x;
901             walker_param->block_resolution.y = walker_param->global_resolution.y;
902
903             walker_param->local_start.x = 0;
904             walker_param->local_start.y = 0;
905
906             walker_param->local_outer_loop_stride.x = 1;
907             walker_param->local_outer_loop_stride.y = 0;
908
909             walker_param->local_inner_loop_unit.x = -1;
910             walker_param->local_inner_loop_unit.y = 4;
911
912             walker_param->middle_loop_extra_steps = 3;
913             walker_param->mid_loop_unit_x = 0;
914             walker_param->mid_loop_unit_y = 1;
915         } else {
916             // 26 degree
917             walker_param->scoreboard_mask = 0x0F;
918             walker_param->local_outer_loop_stride.x = 1;
919             walker_param->local_outer_loop_stride.y = 0;
920             walker_param->local_inner_loop_unit.x = -2;
921             walker_param->local_inner_loop_unit.y = 1;
922         }
923     }
924 }
925
926 static void
927 gen9_add_2d_gpe_surface(VADriverContextP ctx,
928                         struct i965_gpe_context *gpe_context,
929                         struct object_surface *obj_surface,
930                         int is_uv_surface,
931                         int is_media_block_rw,
932                         unsigned int format,
933                         int index)
934 {
935     struct i965_gpe_resource gpe_resource;
936     struct i965_gpe_surface gpe_surface;
937
938     memset(&gpe_surface, 0, sizeof(gpe_surface));
939
940     i965_object_surface_to_2d_gpe_resource(&gpe_resource, obj_surface);
941     gpe_surface.gpe_resource = &gpe_resource;
942     gpe_surface.is_2d_surface = 1;
943     gpe_surface.is_uv_surface = !!is_uv_surface;
944     gpe_surface.is_media_block_rw = !!is_media_block_rw;
945
946     gpe_surface.cacheability_control = DEFAULT_MOCS;
947     gpe_surface.format = format;
948
949     gen9_gpe_context_add_surface(gpe_context, &gpe_surface, index);
950     i965_free_gpe_resource(&gpe_resource);
951 }
952
953 static void
954 gen9_add_adv_gpe_surface(VADriverContextP ctx,
955                          struct i965_gpe_context *gpe_context,
956                          struct object_surface *obj_surface,
957                          int index)
958 {
959     struct i965_gpe_resource gpe_resource;
960     struct i965_gpe_surface gpe_surface;
961
962     memset(&gpe_surface, 0, sizeof(gpe_surface));
963
964     i965_object_surface_to_2d_gpe_resource(&gpe_resource, obj_surface);
965     gpe_surface.gpe_resource = &gpe_resource;
966     gpe_surface.is_adv_surface = 1;
967     gpe_surface.cacheability_control = DEFAULT_MOCS;
968     gpe_surface.v_direction = 2;
969
970     gen9_gpe_context_add_surface(gpe_context, &gpe_surface, index);
971     i965_free_gpe_resource(&gpe_resource);
972 }
973
974 static void
975 gen9_add_buffer_gpe_surface(VADriverContextP ctx,
976                             struct i965_gpe_context *gpe_context,
977                             struct i965_gpe_resource *gpe_buffer,
978                             int is_raw_buffer,
979                             unsigned int size,
980                             unsigned int offset,
981                             int index)
982 {
983     struct i965_gpe_surface gpe_surface;
984
985     memset(&gpe_surface, 0, sizeof(gpe_surface));
986
987     gpe_surface.gpe_resource = gpe_buffer;
988     gpe_surface.is_buffer = 1;
989     gpe_surface.is_raw_buffer = !!is_raw_buffer;
990     gpe_surface.cacheability_control = DEFAULT_MOCS;
991     gpe_surface.size = size;
992     gpe_surface.offset = offset;
993
994     gen9_gpe_context_add_surface(gpe_context, &gpe_surface, index);
995 }
996
997 static void
998 gen9_add_buffer_2d_gpe_surface(VADriverContextP ctx,
999                                struct i965_gpe_context *gpe_context,
1000                                struct i965_gpe_resource *gpe_buffer,
1001                                int is_media_block_rw,
1002                                unsigned int format,
1003                                int index)
1004 {
1005     struct i965_gpe_surface gpe_surface;
1006
1007     memset(&gpe_surface, 0, sizeof(gpe_surface));
1008
1009     gpe_surface.gpe_resource = gpe_buffer;
1010     gpe_surface.is_2d_surface = 1;
1011     gpe_surface.is_media_block_rw = !!is_media_block_rw;
1012     gpe_surface.cacheability_control = DEFAULT_MOCS;
1013     gpe_surface.format = format;
1014
1015     gen9_gpe_context_add_surface(gpe_context, &gpe_surface, index);
1016 }
1017
1018 static void
1019 gen9_add_dri_buffer_gpe_surface(VADriverContextP ctx,
1020                                 struct i965_gpe_context *gpe_context,
1021                                 dri_bo *bo,
1022                                 int is_raw_buffer,
1023                                 unsigned int size,
1024                                 unsigned int offset,
1025                                 int index)
1026 {
1027     struct i965_gpe_resource gpe_resource;
1028
1029     i965_dri_object_to_buffer_gpe_resource(&gpe_resource, bo);
1030     gen9_add_buffer_gpe_surface(ctx,
1031                                 gpe_context,
1032                                 &gpe_resource,
1033                                 is_raw_buffer,
1034                                 size,
1035                                 offset,
1036                                 index);
1037
1038     i965_free_gpe_resource(&gpe_resource);
1039 }
1040
1041 /*
1042 static void
1043 gen9_add_dri_buffer_2d_gpe_surface(VADriverContextP ctx,
1044                                    struct i965_gpe_context *gpe_context,
1045                                    dri_bo *bo,
1046                                    unsigned int width,
1047                                    unsigned int height,
1048                                    unsigned int pitch,
1049                                    int is_media_block_rw,
1050                                    unsigned int format,
1051                                    int index)
1052 {
1053     struct i965_gpe_resource gpe_resource;
1054
1055     i965_dri_object_to_2d_gpe_resource(&gpe_resource, bo, width, height, pitch);
1056     gen9_add_buffer_2d_gpe_surface(ctx,
1057                                    gpe_context,
1058                                    &gpe_resource,
1059                                    is_media_block_rw,
1060                                    format,
1061                                    index);
1062
1063     i965_free_gpe_resource(&gpe_resource);
1064 }
1065 */
1066
1067 static void
1068 gen9_run_kernel_media_object(VADriverContextP ctx,
1069                              struct intel_encoder_context *encoder_context,
1070                              struct i965_gpe_context *gpe_context,
1071                              int media_function,
1072                              struct gpe_media_object_parameter *param)
1073 {
1074     struct intel_batchbuffer *batch = encoder_context->base.batch;
1075     struct vp9_encode_status_buffer_internal *status_buffer;
1076     struct gen9_vp9_state *vp9_state;
1077     struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1078
1079     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
1080     if (!vp9_state || !batch)
1081         return;
1082
1083     intel_batchbuffer_start_atomic(batch, 0x1000);
1084
1085     status_buffer = &(vp9_state->status_buffer);
1086     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1087     mi_store_data_imm.bo = status_buffer->bo;
1088     mi_store_data_imm.offset = status_buffer->media_index_offset;
1089     mi_store_data_imm.dw0 = media_function;
1090     gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1091
1092     intel_batchbuffer_emit_mi_flush(batch);
1093     gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
1094     gen8_gpe_media_object(ctx, gpe_context, batch, param);
1095     gen8_gpe_media_state_flush(ctx, gpe_context, batch);
1096
1097     gen9_gpe_pipeline_end(ctx, gpe_context, batch);
1098
1099     intel_batchbuffer_end_atomic(batch);
1100
1101     intel_batchbuffer_flush(batch);
1102 }
1103
1104 static void
1105 gen9_run_kernel_media_object_walker(VADriverContextP ctx,
1106                                     struct intel_encoder_context *encoder_context,
1107                                     struct i965_gpe_context *gpe_context,
1108                                     int media_function,
1109                                     struct gpe_media_object_walker_parameter *param)
1110 {
1111     struct intel_batchbuffer *batch = encoder_context->base.batch;
1112     struct vp9_encode_status_buffer_internal *status_buffer;
1113     struct gen9_vp9_state *vp9_state;
1114     struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1115
1116     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
1117     if (!vp9_state || !batch)
1118         return;
1119
1120     intel_batchbuffer_start_atomic(batch, 0x1000);
1121
1122     intel_batchbuffer_emit_mi_flush(batch);
1123
1124     status_buffer = &(vp9_state->status_buffer);
1125     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1126     mi_store_data_imm.bo = status_buffer->bo;
1127     mi_store_data_imm.offset = status_buffer->media_index_offset;
1128     mi_store_data_imm.dw0 = media_function;
1129     gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1130
1131     gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
1132     gen8_gpe_media_object_walker(ctx, gpe_context, batch, param);
1133     gen8_gpe_media_state_flush(ctx, gpe_context, batch);
1134
1135     gen9_gpe_pipeline_end(ctx, gpe_context, batch);
1136
1137     intel_batchbuffer_end_atomic(batch);
1138
1139     intel_batchbuffer_flush(batch);
1140 }
1141
1142 static
1143 void gen9_vp9_set_curbe_brc(VADriverContextP ctx,
1144                             struct encode_state *encode_state,
1145                             struct i965_gpe_context *gpe_context,
1146                             struct intel_encoder_context *encoder_context,
1147                             struct gen9_vp9_brc_curbe_param *param)
1148 {
1149     VAEncSequenceParameterBufferVP9 *seq_param;
1150     VAEncPictureParameterBufferVP9  *pic_param;
1151     VAEncMiscParameterTypeVP9PerSegmantParam *segment_param;
1152     vp9_brc_curbe_data      *cmd;
1153     double                  dbps_ratio, dInputBitsPerFrame;
1154     struct gen9_vp9_state *vp9_state;
1155
1156     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1157
1158     pic_param      = param->ppic_param;
1159     seq_param      = param->pseq_param;
1160     segment_param  = param->psegment_param;
1161
1162     cmd = i965_gpe_context_map_curbe(gpe_context);
1163
1164     if (!cmd)
1165         return;
1166
1167     memset(cmd, 0, sizeof(vp9_brc_curbe_data));
1168
1169     if (!vp9_state->dys_enabled)
1170     {
1171         cmd->dw0.frame_width  = pic_param->frame_width_src;
1172         cmd->dw0.frame_height = pic_param->frame_height_src;
1173     }
1174     else
1175     {
1176         cmd->dw0.frame_width  = pic_param->frame_width_dst;
1177         cmd->dw0.frame_height = pic_param->frame_height_dst;
1178     }
1179
1180     cmd->dw1.frame_type           = vp9_state->picture_coding_type;
1181     cmd->dw1.segmentation_enable  = 0;
1182     cmd->dw1.ref_frame_flags      = vp9_state->ref_frame_flag;
1183     cmd->dw1.num_tlevels          = 1;
1184
1185     switch(param->media_state_type)
1186     {
1187         case VP9_MEDIA_STATE_BRC_INIT_RESET:
1188         {
1189             cmd->dw3.max_level_ratiot0 = 0;
1190             cmd->dw3.max_level_ratiot1 = 0;
1191             cmd->dw3.max_level_ratiot2 = 0;
1192             cmd->dw3.max_level_ratiot3 = 0;
1193
1194             cmd->dw4.profile_level_max_frame    = seq_param->max_frame_width *
1195                                seq_param->max_frame_height;
1196             cmd->dw5.init_buf_fullness         = vp9_state->init_vbv_buffer_fullness_in_bit;
1197             cmd->dw6.buf_size                  = vp9_state->vbv_buffer_size_in_bit;
1198             cmd->dw7.target_bit_rate           = (vp9_state->target_bit_rate  + VP9_BRC_KBPS - 1) / VP9_BRC_KBPS *
1199                                                   VP9_BRC_KBPS;
1200             cmd->dw8.max_bit_rate           = (vp9_state->max_bit_rate  + VP9_BRC_KBPS - 1) / VP9_BRC_KBPS *
1201                                                   VP9_BRC_KBPS;
1202             cmd->dw9.min_bit_rate           = (vp9_state->min_bit_rate  + VP9_BRC_KBPS - 1) / VP9_BRC_KBPS *
1203                                                   VP9_BRC_KBPS;
1204             cmd->dw10.frame_ratem           = vp9_state->framerate.num;
1205             cmd->dw11.frame_rated           = vp9_state->framerate.den;
1206
1207             cmd->dw14.avbr_accuracy         = 30;
1208             cmd->dw14.avbr_convergence      = 150;
1209
1210             if (encoder_context->rate_control_mode == VA_RC_CBR)
1211             {
1212                 cmd->dw12.brc_flag    = BRC_KERNEL_CBR;
1213                 cmd->dw8.max_bit_rate  = cmd->dw7.target_bit_rate;
1214                 cmd->dw9.min_bit_rate  = 0;
1215             }
1216             else if (encoder_context->rate_control_mode == VA_RC_VBR)
1217             {
1218                 cmd->dw12.brc_flag    = BRC_KERNEL_VBR;
1219             }
1220             else
1221             {
1222                 cmd->dw12.brc_flag = BRC_KERNEL_CQL;
1223                 cmd->dw16.cq_level = 30;
1224             }
1225             cmd->dw12.gopp = seq_param->intra_period - 1;
1226
1227             cmd->dw13.init_frame_width   = pic_param->frame_width_src;
1228             cmd->dw13.init_frame_height   = pic_param->frame_height_src;
1229
1230             cmd->dw15.min_qp          = 0;
1231             cmd->dw15.max_qp          = 255;
1232
1233             cmd->dw16.cq_level            = 30;
1234
1235             cmd->dw17.enable_dynamic_scaling = vp9_state->dys_in_use;
1236             cmd->dw17.brc_overshoot_cbr_pct = 150;
1237
1238             dInputBitsPerFrame = (double)cmd->dw8.max_bit_rate * (double)vp9_state->framerate.den / (double)vp9_state->framerate.num;
1239             dbps_ratio         = dInputBitsPerFrame / ((double)vp9_state->vbv_buffer_size_in_bit / 30.0);
1240             if (dbps_ratio < 0.1)
1241                 dbps_ratio = 0.1;
1242             if (dbps_ratio > 3.5)
1243                 dbps_ratio = 3.5;
1244
1245             *param->pbrc_init_reset_buf_size_in_bits  = cmd->dw6.buf_size;
1246             *param->pbrc_init_reset_input_bits_per_frame  = dInputBitsPerFrame;
1247
1248             cmd->dw18.pframe_deviation_threshold0 = (uint32_t)(-50 * pow(0.90, dbps_ratio));
1249             cmd->dw18.pframe_deviation_threshold1  = (uint32_t)(-50 * pow(0.66, dbps_ratio));
1250             cmd->dw18.pframe_deviation_threshold2  = (uint32_t)(-50 * pow(0.46, dbps_ratio));
1251             cmd->dw18.pframe_deviation_threshold3  = (uint32_t)(-50 * pow(0.3, dbps_ratio));
1252             cmd->dw19.pframe_deviation_threshold4  = (uint32_t)(50 * pow(0.3, dbps_ratio));
1253             cmd->dw19.pframe_deviation_threshold5  = (uint32_t)(50 * pow(0.46, dbps_ratio));
1254             cmd->dw19.pframe_deviation_threshold6  = (uint32_t)(50 * pow(0.7, dbps_ratio));
1255             cmd->dw19.pframe_deviation_threshold7  = (uint32_t)(50 * pow(0.9, dbps_ratio));
1256
1257             cmd->dw20.vbr_deviation_threshold0     = (uint32_t)(-50 * pow(0.9, dbps_ratio));
1258             cmd->dw20.vbr_deviation_threshold1     = (uint32_t)(-50 * pow(0.7, dbps_ratio));
1259             cmd->dw20.vbr_deviation_threshold2     = (uint32_t)(-50 * pow(0.5, dbps_ratio));
1260             cmd->dw20.vbr_deviation_threshold3     = (uint32_t)(-50 * pow(0.3, dbps_ratio));
1261             cmd->dw21.vbr_deviation_threshold4     = (uint32_t)(100 * pow(0.4, dbps_ratio));
1262             cmd->dw21.vbr_deviation_threshold5     = (uint32_t)(100 * pow(0.5, dbps_ratio));
1263             cmd->dw21.vbr_deviation_threshold6     = (uint32_t)(100 * pow(0.75, dbps_ratio));
1264             cmd->dw21.vbr_deviation_threshold7     = (uint32_t)(100 * pow(0.9, dbps_ratio));
1265
1266             cmd->dw22.kframe_deviation_threshold0  = (uint32_t)(-50 * pow(0.8, dbps_ratio));
1267             cmd->dw22.kframe_deviation_threshold1  = (uint32_t)(-50 * pow(0.6, dbps_ratio));
1268             cmd->dw22.kframe_deviation_threshold2  = (uint32_t)(-50 * pow(0.34, dbps_ratio));
1269             cmd->dw22.kframe_deviation_threshold3  = (uint32_t)(-50 * pow(0.2, dbps_ratio));
1270             cmd->dw23.kframe_deviation_threshold4  = (uint32_t)(50 * pow(0.2, dbps_ratio));
1271             cmd->dw23.kframe_deviation_threshold5  = (uint32_t)(50 * pow(0.4, dbps_ratio));
1272             cmd->dw23.kframe_deviation_threshold6  = (uint32_t)(50 * pow(0.66, dbps_ratio));
1273             cmd->dw23.kframe_deviation_threshold7  = (uint32_t)(50 * pow(0.9, dbps_ratio));
1274
1275             break;
1276         }
1277         case VP9_MEDIA_STATE_BRC_UPDATE:
1278         {
1279             cmd->dw15.min_qp          = 0;
1280             cmd->dw15.max_qp          = 255;
1281
1282             cmd->dw25.frame_number    = param->frame_number;
1283
1284             // Used in dynamic scaling. set to zero for now
1285             cmd->dw27.hrd_buffer_fullness_upper_limit = 0;
1286             cmd->dw28.hrd_buffer_fullness_lower_limit = 0;
1287
1288             if (pic_param->pic_flags.bits.segmentation_enabled) {
1289                 cmd->dw32.seg_delta_qp0              = segment_param->seg_data[0].segment_qindex_delta;
1290                 cmd->dw32.seg_delta_qp1              = segment_param->seg_data[1].segment_qindex_delta;
1291                 cmd->dw32.seg_delta_qp2              = segment_param->seg_data[2].segment_qindex_delta;
1292                 cmd->dw32.seg_delta_qp3              = segment_param->seg_data[3].segment_qindex_delta;
1293
1294                 cmd->dw33.seg_delta_qp4              = segment_param->seg_data[4].segment_qindex_delta;
1295                 cmd->dw33.seg_delta_qp5              = segment_param->seg_data[5].segment_qindex_delta;
1296                 cmd->dw33.seg_delta_qp6              = segment_param->seg_data[6].segment_qindex_delta;
1297                 cmd->dw33.seg_delta_qp7              = segment_param->seg_data[7].segment_qindex_delta;
1298             }
1299
1300             //cmd->dw34.temporal_id                = pPicParams->temporal_idi;
1301             cmd->dw34.temporal_id                = 0;
1302             cmd->dw34.multi_ref_qp_check         = param->multi_ref_qp_check;
1303
1304             cmd->dw35.max_num_pak_passes         = param->brc_num_pak_passes;
1305             cmd->dw35.sync_async                 = 0;
1306             cmd->dw35.mbrc                       = param->mbbrc_enabled;
1307             if (*param->pbrc_init_current_target_buf_full_in_bits >
1308                  ((double)(*param->pbrc_init_reset_buf_size_in_bits))) {
1309                 *param->pbrc_init_current_target_buf_full_in_bits -=
1310                      (double)(*param->pbrc_init_reset_buf_size_in_bits);
1311                 cmd->dw35.overflow = 1;
1312             }
1313             else
1314                 cmd->dw35.overflow = 0;
1315
1316             cmd->dw24.target_size                 = (uint32_t)(*param->pbrc_init_current_target_buf_full_in_bits);
1317
1318             cmd->dw36.segmentation               = pic_param->pic_flags.bits.segmentation_enabled;
1319
1320             *param->pbrc_init_current_target_buf_full_in_bits += *param->pbrc_init_reset_input_bits_per_frame;
1321
1322             cmd->dw38.qdelta_ydc  = pic_param->luma_dc_qindex_delta;
1323             cmd->dw38.qdelta_uvdc = pic_param->chroma_dc_qindex_delta;
1324             cmd->dw38.qdelta_uvac = pic_param->chroma_ac_qindex_delta;
1325
1326             break;
1327         }
1328         case VP9_MEDIA_STATE_ENC_I_FRAME_DIST:
1329             cmd->dw2.intra_mode_disable        = 0;
1330             break;
1331         default:
1332             break;
1333     }
1334
1335     cmd->dw48.brc_y4x_input_bti                = VP9_BTI_BRC_SRCY4X_G9;
1336     cmd->dw49.brc_vme_coarse_intra_input_bti   = VP9_BTI_BRC_VME_COARSE_INTRA_G9;
1337     cmd->dw50.brc_history_buffer_bti           = VP9_BTI_BRC_HISTORY_G9;
1338     cmd->dw51.brc_const_data_input_bti         = VP9_BTI_BRC_CONSTANT_DATA_G9;
1339     cmd->dw52.brc_distortion_bti               = VP9_BTI_BRC_DISTORTION_G9;
1340     cmd->dw53.brc_mmdk_pak_output_bti          = VP9_BTI_BRC_MMDK_PAK_OUTPUT_G9;
1341     cmd->dw54.brc_enccurbe_input_bti           = VP9_BTI_BRC_MBENC_CURBE_INPUT_G9;
1342     cmd->dw55.brc_enccurbe_output_bti          = VP9_BTI_BRC_MBENC_CURBE_OUTPUT_G9;
1343     cmd->dw56.brc_pic_state_input_bti          = VP9_BTI_BRC_PIC_STATE_INPUT_G9;
1344     cmd->dw57.brc_pic_state_output_bti         = VP9_BTI_BRC_PIC_STATE_OUTPUT_G9;
1345     cmd->dw58.brc_seg_state_input_bti          = VP9_BTI_BRC_SEGMENT_STATE_INPUT_G9;
1346     cmd->dw59.brc_seg_state_output_bti         = VP9_BTI_BRC_SEGMENT_STATE_OUTPUT_G9;
1347     cmd->dw60.brc_bitstream_size_data_bti      = VP9_BTI_BRC_BITSTREAM_SIZE_G9;
1348     cmd->dw61.brc_hfw_data_output_bti          = VP9_BTI_BRC_HFW_DATA_G9;
1349
1350     i965_gpe_context_unmap_curbe(gpe_context);
1351     return;
1352 }
1353
1354 static void
1355 gen9_brc_init_reset_add_surfaces_vp9(VADriverContextP ctx,
1356                                      struct encode_state *encode_state,
1357                                      struct intel_encoder_context *encoder_context,
1358                                      struct i965_gpe_context *gpe_context)
1359 {
1360     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1361
1362     gen9_add_buffer_gpe_surface(ctx,
1363                                 gpe_context,
1364                                 &vme_context->res_brc_history_buffer,
1365                                 0,
1366                                 vme_context->res_brc_history_buffer.size,
1367                                 0,
1368                                 VP9_BTI_BRC_HISTORY_G9);
1369
1370     gen9_add_buffer_2d_gpe_surface(ctx,
1371                                    gpe_context,
1372                                    &vme_context->s4x_memv_distortion_buffer,
1373                                    1,
1374                                    I965_SURFACEFORMAT_R8_UNORM,
1375                                    VP9_BTI_BRC_DISTORTION_G9);
1376 }
1377
1378 /* The function related with BRC */
1379 static VAStatus
1380 gen9_vp9_brc_init_reset_kernel(VADriverContextP ctx,
1381                                struct encode_state *encode_state,
1382                                struct intel_encoder_context *encoder_context)
1383 {
1384     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1385     struct vp9_brc_context *brc_context = &vme_context->brc_context;
1386     struct gpe_media_object_parameter media_object_param;
1387     struct i965_gpe_context *gpe_context;
1388     int gpe_index = VP9_BRC_INIT;
1389     int media_function = VP9_MEDIA_STATE_BRC_INIT_RESET;
1390     struct gen9_vp9_brc_curbe_param                brc_initreset_curbe;
1391     VAEncPictureParameterBufferVP9 *pic_param;
1392     struct gen9_vp9_state *vp9_state;
1393
1394     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1395
1396     if (!vp9_state || !vp9_state->pic_param)
1397         return VA_STATUS_ERROR_INVALID_PARAMETER;
1398
1399     pic_param = vp9_state->pic_param;
1400
1401     if (vp9_state->brc_inited)
1402         gpe_index = VP9_BRC_RESET;
1403
1404     gpe_context = &brc_context->gpe_contexts[gpe_index];
1405
1406     gen8_gpe_context_init(ctx, gpe_context);
1407     gen9_gpe_reset_binding_table(ctx, gpe_context);
1408
1409     brc_initreset_curbe.media_state_type    = media_function;
1410     brc_initreset_curbe.curr_frame          = pic_param->reconstructed_frame;
1411     brc_initreset_curbe.ppic_param          = vp9_state->pic_param;
1412     brc_initreset_curbe.pseq_param          = vp9_state->seq_param;
1413     brc_initreset_curbe.psegment_param      = vp9_state->segment_param;
1414     brc_initreset_curbe.frame_width         = vp9_state->frame_width;
1415     brc_initreset_curbe.frame_height        = vp9_state->frame_height;
1416     brc_initreset_curbe.pbrc_init_current_target_buf_full_in_bits =
1417                           &vp9_state->brc_init_current_target_buf_full_in_bits;
1418     brc_initreset_curbe.pbrc_init_reset_buf_size_in_bits =
1419                           &vp9_state->brc_init_reset_buf_size_in_bits;
1420     brc_initreset_curbe.pbrc_init_reset_input_bits_per_frame =
1421                           &vp9_state->brc_init_reset_input_bits_per_frame;
1422     brc_initreset_curbe.picture_coding_type  = vp9_state->picture_coding_type;
1423     brc_initreset_curbe.initbrc            = !vp9_state->brc_inited;
1424     brc_initreset_curbe.mbbrc_enabled      = 0;
1425     brc_initreset_curbe.ref_frame_flag      = vp9_state->ref_frame_flag;
1426
1427     vme_context->pfn_set_curbe_brc(ctx, encode_state,
1428                                    gpe_context,
1429                                    encoder_context,
1430                                    &brc_initreset_curbe);
1431
1432     gen9_brc_init_reset_add_surfaces_vp9(ctx, encode_state, encoder_context, gpe_context);
1433     gen8_gpe_setup_interface_data(ctx, gpe_context);
1434
1435     memset(&media_object_param, 0, sizeof(media_object_param));
1436     gen9_run_kernel_media_object(ctx, encoder_context, gpe_context, media_function, &media_object_param);
1437
1438     return VA_STATUS_SUCCESS;
1439 }
1440
1441 static void
1442 gen9_brc_intra_dist_add_surfaces_vp9(VADriverContextP ctx,
1443                                      struct encode_state *encode_state,
1444                                      struct intel_encoder_context *encoder_context,
1445                                      struct i965_gpe_context *gpe_context)
1446 {
1447     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1448
1449     struct object_surface *obj_surface;
1450     struct gen9_surface_vp9 *vp9_priv_surface;
1451
1452     /* sScaled4xSurface surface */
1453     obj_surface = encode_state->reconstructed_object;
1454
1455     vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
1456
1457     obj_surface = vp9_priv_surface->scaled_4x_surface_obj;
1458     gen9_add_2d_gpe_surface(ctx, gpe_context,
1459                             obj_surface,
1460                             0, 1,
1461                             I965_SURFACEFORMAT_R8_UNORM,
1462                             VP9_BTI_BRC_SRCY4X_G9
1463                             );
1464
1465     gen9_add_adv_gpe_surface(ctx, gpe_context,
1466                              obj_surface,
1467                              VP9_BTI_BRC_VME_COARSE_INTRA_G9);
1468
1469     gen9_add_buffer_2d_gpe_surface(ctx,
1470                                    gpe_context,
1471                                    &vme_context->s4x_memv_distortion_buffer,
1472                                    1,
1473                                    I965_SURFACEFORMAT_R8_UNORM,
1474                                    VP9_BTI_BRC_DISTORTION_G9);
1475
1476      return;
1477 }
1478
1479 /* The function related with BRC */
1480 static VAStatus
1481 gen9_vp9_brc_intra_dist_kernel(VADriverContextP ctx,
1482                                struct encode_state *encode_state,
1483                                struct intel_encoder_context *encoder_context)
1484 {
1485     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1486     struct vp9_brc_context *brc_context = &vme_context->brc_context;
1487     struct i965_gpe_context *gpe_context;
1488     int gpe_index = VP9_BRC_INTRA_DIST;
1489     int media_function = VP9_MEDIA_STATE_ENC_I_FRAME_DIST;
1490     struct gen9_vp9_brc_curbe_param                brc_intra_dist_curbe;
1491     VAEncPictureParameterBufferVP9 *pic_param;
1492     struct gen9_vp9_state *vp9_state;
1493     struct gpe_media_object_walker_parameter media_object_walker_param;
1494     struct vp9_encoder_kernel_walker_parameter kernel_walker_param;
1495
1496     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1497
1498     if (!vp9_state || !vp9_state->pic_param)
1499         return VA_STATUS_ERROR_INVALID_PARAMETER;
1500
1501     pic_param = vp9_state->pic_param;
1502
1503     gpe_context = &brc_context->gpe_contexts[gpe_index];
1504
1505     gen8_gpe_context_init(ctx, gpe_context);
1506     gen9_gpe_reset_binding_table(ctx, gpe_context);
1507
1508     brc_intra_dist_curbe.media_state_type    = media_function;
1509     brc_intra_dist_curbe.curr_frame          = pic_param->reconstructed_frame;
1510     brc_intra_dist_curbe.ppic_param          = vp9_state->pic_param;
1511     brc_intra_dist_curbe.pseq_param          = vp9_state->seq_param;
1512     brc_intra_dist_curbe.psegment_param      = vp9_state->segment_param;
1513     brc_intra_dist_curbe.frame_width         = vp9_state->frame_width;
1514     brc_intra_dist_curbe.frame_height        = vp9_state->frame_height;
1515     brc_intra_dist_curbe.pbrc_init_current_target_buf_full_in_bits =
1516                           &vp9_state->brc_init_current_target_buf_full_in_bits;
1517     brc_intra_dist_curbe.pbrc_init_reset_buf_size_in_bits =
1518                           &vp9_state->brc_init_reset_buf_size_in_bits;
1519     brc_intra_dist_curbe.pbrc_init_reset_input_bits_per_frame =
1520                           &vp9_state->brc_init_reset_input_bits_per_frame;
1521     brc_intra_dist_curbe.picture_coding_type  = vp9_state->picture_coding_type;
1522     brc_intra_dist_curbe.initbrc            = !vp9_state->brc_inited;
1523     brc_intra_dist_curbe.mbbrc_enabled      = 0;
1524     brc_intra_dist_curbe.ref_frame_flag      = vp9_state->ref_frame_flag;
1525
1526     vme_context->pfn_set_curbe_brc(ctx, encode_state,
1527                                    gpe_context,
1528                                    encoder_context,
1529                                    &brc_intra_dist_curbe);
1530
1531     /* zero distortion buffer */
1532     i965_zero_gpe_resource(&vme_context->s4x_memv_distortion_buffer);
1533
1534     gen9_brc_intra_dist_add_surfaces_vp9(ctx, encode_state, encoder_context, gpe_context);
1535     gen8_gpe_setup_interface_data(ctx, gpe_context);
1536
1537     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
1538     kernel_walker_param.resolution_x = vme_context->downscaled_width_in_mb4x;
1539     kernel_walker_param.resolution_y = vme_context->downscaled_height_in_mb4x;
1540     kernel_walker_param.no_dependency = 1;
1541
1542     gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
1543
1544     gen9_run_kernel_media_object_walker(ctx, encoder_context,
1545                                         gpe_context,
1546                                         media_function,
1547                                         &media_object_walker_param);
1548
1549     return VA_STATUS_SUCCESS;
1550 }
1551
1552 static void
1553 intel_vp9enc_construct_picstate_batchbuf(VADriverContextP ctx,
1554                                             struct encode_state *encode_state,
1555                                             struct intel_encoder_context *encoder_context,
1556                                             struct i965_gpe_resource *gpe_resource)
1557 {
1558     struct gen9_vp9_state *vp9_state;
1559     VAEncPictureParameterBufferVP9 *pic_param;
1560     int frame_width_minus1, frame_height_minus1;
1561     int is_lossless = 0;
1562     int is_intra_only = 0;
1563     unsigned int last_frame_type;
1564     unsigned int ref_flags;
1565     unsigned int use_prev_frame_mvs, adapt_flag;
1566     struct gen9_surface_vp9 *vp9_surface = NULL;
1567     struct object_surface *obj_surface = NULL;
1568     uint32_t scale_h = 0;
1569     uint32_t scale_w = 0;
1570
1571     char *pdata;
1572     int i, j;
1573     unsigned int *cmd_ptr, cmd_value, tmp;
1574
1575     pdata = i965_map_gpe_resource(gpe_resource);
1576     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1577
1578     if (!vp9_state || !vp9_state->pic_param || !pdata)
1579         return;
1580
1581     pic_param = vp9_state->pic_param;
1582     frame_width_minus1 = ALIGN(pic_param->frame_width_dst, 8) - 1;
1583     frame_height_minus1 = ALIGN(pic_param->frame_height_dst, 8) - 1;
1584     if ((pic_param->luma_ac_qindex == 0) &&
1585         (pic_param->luma_dc_qindex_delta == 0) &&
1586         (pic_param->chroma_ac_qindex_delta == 0) &&
1587         (pic_param->chroma_dc_qindex_delta == 0))
1588         is_lossless = 1;
1589
1590     if (pic_param->pic_flags.bits.frame_type)
1591         is_intra_only = pic_param->pic_flags.bits.intra_only;
1592
1593     last_frame_type = vp9_state->vp9_last_frame.frame_type;
1594
1595     use_prev_frame_mvs = 0;
1596     if (pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) {
1597         last_frame_type = 0;
1598         ref_flags = 0;
1599     } else {
1600         ref_flags = ((pic_param->ref_flags.bits.ref_arf_sign_bias << 9) |
1601                      (pic_param->ref_flags.bits.ref_gf_sign_bias << 8) |
1602                      (pic_param->ref_flags.bits.ref_last_sign_bias << 7)
1603                     );
1604         if (!pic_param->pic_flags.bits.error_resilient_mode &&
1605             (pic_param->frame_width_dst == vp9_state->vp9_last_frame.frame_width) &&
1606             (pic_param->frame_height_dst == vp9_state->vp9_last_frame.frame_height) &&
1607             !pic_param->pic_flags.bits.intra_only &&
1608             vp9_state->vp9_last_frame.show_frame &&
1609             ((vp9_state->vp9_last_frame.frame_type == HCP_VP9_INTER_FRAME) &&
1610              !vp9_state->vp9_last_frame.intra_only)
1611            )
1612             use_prev_frame_mvs = 1;
1613     }
1614     adapt_flag = 0;
1615     if (!pic_param->pic_flags.bits.error_resilient_mode &&
1616         !pic_param->pic_flags.bits.frame_parallel_decoding_mode)
1617         adapt_flag = 1;
1618
1619     for (i = 0; i < 4; i++) {
1620         uint32_t non_first_pass;
1621         non_first_pass = 1;
1622         if (i == 0)
1623             non_first_pass = 0;
1624
1625         cmd_ptr =(unsigned int *)(pdata + i * VP9_PIC_STATE_BUFFER_SIZE);
1626
1627         *cmd_ptr++ = (HCP_VP9_PIC_STATE | (33 - 2));
1628         *cmd_ptr++ = (frame_height_minus1 << 16 |
1629                       frame_width_minus1);
1630         /* dw2 */
1631         *cmd_ptr++ = ( 0 << 31 | /* disable segment_in */
1632                        0 << 30 | /* disable segment_out */
1633                        is_lossless << 29 | /* loseless */
1634                        (pic_param->pic_flags.bits.segmentation_enabled && pic_param->pic_flags.bits.segmentation_temporal_update) << 28 | /* temporal update */
1635                        (pic_param->pic_flags.bits.segmentation_enabled && pic_param->pic_flags.bits.segmentation_update_map) << 27 | /* temporal update */
1636                        (pic_param->pic_flags.bits.segmentation_enabled << 26) |
1637                        (pic_param->sharpness_level << 23) |
1638                        (pic_param->filter_level << 17) |
1639                        (pic_param->pic_flags.bits.frame_parallel_decoding_mode << 16) |
1640                        (pic_param->pic_flags.bits.error_resilient_mode << 15) |
1641                        (pic_param->pic_flags.bits.refresh_frame_context << 14) |
1642                        (last_frame_type << 13) |
1643                        (vp9_state->tx_mode == TX_MODE_SELECT) << 12 |
1644                        (pic_param->pic_flags.bits.comp_prediction_mode == REFERENCE_MODE_SELECT) << 11 |
1645                        (use_prev_frame_mvs) << 10 |
1646                        ref_flags |
1647                        (pic_param->pic_flags.bits.mcomp_filter_type << 4) |
1648                        (pic_param->pic_flags.bits.allow_high_precision_mv << 3) |
1649                        (is_intra_only << 2) |
1650                        (adapt_flag << 1) |
1651                        (pic_param->pic_flags.bits.frame_type) << 0);
1652
1653         *cmd_ptr++ =((0 << 28) | /* VP9Profile0 */
1654                      (0 << 24) | /* 8-bit depth */
1655                      (0 << 22) | /* only 420 format */
1656                      (0 << 0)  | /* sse statistics */
1657                      (pic_param->log2_tile_rows << 8) |
1658                      (pic_param->log2_tile_columns << 0));
1659
1660         /* dw4..6 */
1661         if (pic_param->pic_flags.bits.frame_type &&
1662             !pic_param->pic_flags.bits.intra_only) {
1663             for (j = 0; j < 3; j++) {
1664                 obj_surface = encode_state->reference_objects[j];
1665                 scale_w = 0;
1666                 scale_h = 0;
1667                 if (obj_surface && obj_surface->private_data) {
1668                     vp9_surface = obj_surface->private_data;
1669                     scale_w = (vp9_surface->frame_width  << 14) / pic_param->frame_width_dst;
1670                     scale_h = (vp9_surface->frame_height << 14) / pic_param->frame_height_dst;
1671                     *cmd_ptr++ = (scale_w << 16 |
1672                                   scale_h);
1673                 } else
1674                     *cmd_ptr++ = 0;
1675             }
1676         } else {
1677             *cmd_ptr++ = 0;
1678             *cmd_ptr++ = 0;
1679             *cmd_ptr++ = 0;
1680         }
1681         /* dw7..9 */
1682         for(j = 0; j < 3; j++) {
1683             obj_surface = encode_state->reference_objects[j];
1684             vp9_surface = NULL;
1685
1686             if (obj_surface && obj_surface->private_data) {
1687                 vp9_surface = obj_surface->private_data;
1688                 *cmd_ptr++ = (vp9_surface->frame_height - 1) << 16 |
1689                              (vp9_surface->frame_width - 1);
1690             } else
1691                 *cmd_ptr++ = 0;
1692         }
1693         /* dw10 */
1694         *cmd_ptr++ = 0;
1695         /* dw11 */
1696         *cmd_ptr++ = (1 << 1);
1697         *cmd_ptr++ = 0;
1698
1699         /* dw13 */
1700         *cmd_ptr++ = ((1 << 25) | /* header insertation for VP9 */
1701                       (0 << 24) | /* tail insertation */
1702                       (pic_param->luma_ac_qindex << 16) |
1703                       0 /* compressed header bin count */);
1704
1705         /* dw14 */
1706         tmp = intel_convert_sign_mag(pic_param->luma_dc_qindex_delta, 5);
1707         cmd_value = (tmp << 16);
1708         tmp = intel_convert_sign_mag(pic_param->chroma_dc_qindex_delta, 5);
1709         cmd_value |= (tmp << 8);
1710         tmp = intel_convert_sign_mag(pic_param->chroma_ac_qindex_delta, 5);
1711         cmd_value |= tmp;
1712         *cmd_ptr++ = cmd_value;
1713
1714         tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[0], 7);
1715         cmd_value = tmp;
1716         tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[1], 7);
1717         cmd_value |= (tmp << 8);
1718         tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[2], 7);
1719         cmd_value |= (tmp << 16);
1720         tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[3], 7);
1721         cmd_value |= (tmp << 24);
1722         *cmd_ptr++ = cmd_value;
1723
1724         /* dw16 */
1725         tmp = intel_convert_sign_mag(pic_param->mode_lf_delta[0], 7);
1726         cmd_value = tmp;
1727         tmp = intel_convert_sign_mag(pic_param->mode_lf_delta[1], 7);
1728         cmd_value |= (tmp << 8);
1729         *cmd_ptr++ = cmd_value;
1730
1731         /* dw17 */
1732         *cmd_ptr++ = vp9_state->frame_header.bit_offset_ref_lf_delta |
1733                       (vp9_state->frame_header.bit_offset_mode_lf_delta << 16);
1734         *cmd_ptr++ = vp9_state->frame_header.bit_offset_qindex |
1735                       (vp9_state->frame_header.bit_offset_lf_level << 16);
1736
1737         /* dw19 */
1738         *cmd_ptr++ = (1 << 26 | (1 << 25) |
1739                       non_first_pass << 16);
1740         /* dw20 */
1741         *cmd_ptr++ = (1 << 31) | (256);
1742
1743         /* dw21 */
1744         *cmd_ptr++ = (0 << 31) | 1;
1745
1746         /* dw22-dw24. Frame_delta_qindex_range */
1747         *cmd_ptr++ = 0;
1748         *cmd_ptr++ = 0;
1749         *cmd_ptr++ = 0;
1750
1751         /* dw25-26. frame_delta_lf_range */
1752         *cmd_ptr++ = 0;
1753         *cmd_ptr++ = 0;
1754
1755         /* dw27. frame_delta_lf_min */
1756         *cmd_ptr++ = 0;
1757
1758         /* dw28..30 */
1759         *cmd_ptr++ = 0;
1760         *cmd_ptr++ = 0;
1761         *cmd_ptr++ = 0;
1762
1763         /* dw31 */
1764         *cmd_ptr++ = (0 << 30) | 1;
1765         /* dw32 */
1766         *cmd_ptr++ = vp9_state->frame_header.bit_offset_first_partition_size;
1767
1768         *cmd_ptr++ = 0;
1769         *cmd_ptr++ = MI_BATCH_BUFFER_END;
1770     }
1771
1772     i965_unmap_gpe_resource(gpe_resource);
1773 }
1774
1775 static void
1776 gen9_brc_update_add_surfaces_vp9(VADriverContextP ctx,
1777                                      struct encode_state *encode_state,
1778                                      struct intel_encoder_context *encoder_context,
1779                                      struct i965_gpe_context *brc_gpe_context,
1780                                      struct i965_gpe_context *mbenc_gpe_context)
1781 {
1782     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1783
1784     /* 0. BRC history buffer */
1785     gen9_add_buffer_gpe_surface(ctx,
1786                                 brc_gpe_context,
1787                                 &vme_context->res_brc_history_buffer,
1788                                 0,
1789                                 vme_context->res_brc_history_buffer.size,
1790                                 0,
1791                                 VP9_BTI_BRC_HISTORY_G9);
1792
1793     /* 1. Constant data buffer */
1794     gen9_add_buffer_gpe_surface(ctx,
1795                                 brc_gpe_context,
1796                                 &vme_context->res_brc_const_data_buffer,
1797                                 0,
1798                                 vme_context->res_brc_const_data_buffer.size,
1799                                 0,
1800                                 VP9_BTI_BRC_CONSTANT_DATA_G9);
1801
1802     /* 2. Distortion 2D surface buffer */
1803     gen9_add_buffer_2d_gpe_surface(ctx,
1804                                    brc_gpe_context,
1805                                    &vme_context->s4x_memv_distortion_buffer,
1806                                    1,
1807                                    I965_SURFACEFORMAT_R8_UNORM,
1808                                    VP9_BTI_BRC_DISTORTION_G9);
1809
1810     /* 3. pak buffer */
1811     gen9_add_buffer_gpe_surface(ctx,
1812                                 brc_gpe_context,
1813                                 &vme_context->res_brc_mmdk_pak_buffer,
1814                                 0,
1815                                 vme_context->res_brc_mmdk_pak_buffer.size,
1816                                 0,
1817                                 VP9_BTI_BRC_MMDK_PAK_OUTPUT_G9);
1818     /* 4. Mbenc curbe input buffer */
1819     gen9_add_dri_buffer_gpe_surface(ctx,
1820                                     brc_gpe_context,
1821                                     mbenc_gpe_context->curbe.bo,
1822                                     0,
1823                                     ALIGN(mbenc_gpe_context->curbe.length, 64),
1824                                     mbenc_gpe_context->curbe.offset,
1825                                     VP9_BTI_BRC_MBENC_CURBE_INPUT_G9);
1826     /* 5. Mbenc curbe output buffer */
1827     gen9_add_dri_buffer_gpe_surface(ctx,
1828                                     brc_gpe_context,
1829                                     mbenc_gpe_context->curbe.bo,
1830                                     0,
1831                                     ALIGN(mbenc_gpe_context->curbe.length, 64),
1832                                     mbenc_gpe_context->curbe.offset,
1833                                     VP9_BTI_BRC_MBENC_CURBE_OUTPUT_G9);
1834
1835     /* 6. BRC_PIC_STATE read buffer */
1836     gen9_add_buffer_gpe_surface(ctx, brc_gpe_context,
1837                                 &vme_context->res_pic_state_brc_read_buffer,
1838                                 0,
1839                                 vme_context->res_pic_state_brc_read_buffer.size,
1840                                 0,
1841                                 VP9_BTI_BRC_PIC_STATE_INPUT_G9);
1842
1843     /* 7. BRC_PIC_STATE write buffer */
1844     gen9_add_buffer_gpe_surface(ctx, brc_gpe_context,
1845                                 &vme_context->res_pic_state_brc_write_hfw_read_buffer,
1846                                 0,
1847                                 vme_context->res_pic_state_brc_write_hfw_read_buffer.size,
1848                                 0,
1849                                 VP9_BTI_BRC_PIC_STATE_OUTPUT_G9);
1850
1851     /* 8. SEGMENT_STATE read buffer */
1852     gen9_add_buffer_gpe_surface(ctx, brc_gpe_context,
1853                                 &vme_context->res_seg_state_brc_read_buffer,
1854                                 0,
1855                                 vme_context->res_seg_state_brc_read_buffer.size,
1856                                 0,
1857                                 VP9_BTI_BRC_SEGMENT_STATE_INPUT_G9);
1858
1859     /* 9. SEGMENT_STATE write buffer */
1860     gen9_add_buffer_gpe_surface(ctx, brc_gpe_context,
1861                                 &vme_context->res_seg_state_brc_write_buffer,
1862                                 0,
1863                                 vme_context->res_seg_state_brc_write_buffer.size,
1864                                 0,
1865                                 VP9_BTI_BRC_SEGMENT_STATE_OUTPUT_G9);
1866
1867     /* 10. Bitstream size buffer */
1868     gen9_add_buffer_gpe_surface(ctx, brc_gpe_context,
1869                                 &vme_context->res_brc_bitstream_size_buffer,
1870                                 0,
1871                                 vme_context->res_brc_bitstream_size_buffer.size,
1872                                 0,
1873                                 VP9_BTI_BRC_BITSTREAM_SIZE_G9);
1874
1875     gen9_add_buffer_gpe_surface(ctx, brc_gpe_context,
1876                                 &vme_context->res_brc_hfw_data_buffer,
1877                                 0,
1878                                 vme_context->res_brc_hfw_data_buffer.size,
1879                                 0,
1880                                 VP9_BTI_BRC_HFW_DATA_G9);
1881
1882      return;
1883 }
1884
1885 static VAStatus
1886 gen9_vp9_brc_update_kernel(VADriverContextP ctx,
1887                                struct encode_state *encode_state,
1888                                struct intel_encoder_context *encoder_context)
1889 {
1890     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1891     struct vp9_brc_context *brc_context = &vme_context->brc_context;
1892     struct i965_gpe_context *brc_gpe_context, *mbenc_gpe_context;
1893     int mbenc_index, gpe_index = VP9_BRC_UPDATE;
1894     int media_function = VP9_MEDIA_STATE_BRC_UPDATE;
1895     int mbenc_function;
1896     struct gen9_vp9_brc_curbe_param        brc_update_curbe_param;
1897     VAEncPictureParameterBufferVP9 *pic_param;
1898     struct gen9_vp9_state *vp9_state;
1899     struct gen9_vp9_mbenc_curbe_param    mbenc_curbe_param;
1900     struct gpe_media_object_parameter media_object_param;
1901
1902     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1903     if (!vp9_state || !vp9_state->pic_param)
1904         return VA_STATUS_ERROR_INVALID_PARAMETER;
1905
1906     pic_param = vp9_state->pic_param;
1907     // Setup VP9 MbEnc Curbe
1908     if (vp9_state->picture_coding_type) {
1909         mbenc_function = VP9_MEDIA_STATE_MBENC_P;
1910         mbenc_index = VP9_MBENC_IDX_INTER;
1911     } else {
1912         mbenc_function = VP9_MEDIA_STATE_MBENC_I_32x32;
1913         mbenc_index = VP9_MBENC_IDX_KEY_32x32;
1914     }
1915
1916     mbenc_gpe_context = &(vme_context->mbenc_context.gpe_contexts[mbenc_index]);
1917
1918     memset(&mbenc_curbe_param, 0, sizeof(mbenc_curbe_param));
1919
1920     mbenc_curbe_param.ppic_param             = vp9_state->pic_param;
1921     mbenc_curbe_param.pseq_param             = vp9_state->seq_param;
1922     mbenc_curbe_param.psegment_param         = vp9_state->segment_param;
1923     //mbenc_curbe_param.ppRefList              = &(vp9_state->pRefList[0]);
1924     mbenc_curbe_param.last_ref_obj           = vp9_state->last_ref_obj;
1925     mbenc_curbe_param.golden_ref_obj         = vp9_state->golden_ref_obj;
1926     mbenc_curbe_param.alt_ref_obj            = vp9_state->alt_ref_obj;
1927     mbenc_curbe_param.frame_width_in_mb      = ALIGN(vp9_state->frame_width, 16) / 16;
1928     mbenc_curbe_param.frame_height_in_mb     = ALIGN(vp9_state->frame_height, 16) / 16;
1929     mbenc_curbe_param.hme_enabled            = vp9_state->hme_enabled;
1930     mbenc_curbe_param.ref_frame_flag         = vp9_state->ref_frame_flag;
1931     mbenc_curbe_param.multi_ref_qp_check     = vp9_state->multi_ref_qp_check;
1932     mbenc_curbe_param.picture_coding_type    = vp9_state->picture_coding_type;
1933     mbenc_curbe_param.media_state_type       = mbenc_function;
1934
1935     vme_context->pfn_set_curbe_mbenc(ctx, encode_state,
1936                                 mbenc_gpe_context,
1937                                 encoder_context,
1938                                 &mbenc_curbe_param);
1939
1940     vp9_state->mbenc_curbe_set_in_brc_update = true;
1941
1942     brc_gpe_context = &brc_context->gpe_contexts[gpe_index];
1943
1944     gen8_gpe_context_init(ctx, brc_gpe_context);
1945     gen9_gpe_reset_binding_table(ctx, brc_gpe_context);
1946
1947     memset(&brc_update_curbe_param, 0, sizeof(brc_update_curbe_param));
1948
1949     // Setup BRC Update Curbe
1950     brc_update_curbe_param.media_state_type       = media_function;
1951     brc_update_curbe_param.curr_frame               = pic_param->reconstructed_frame;
1952     brc_update_curbe_param.ppic_param             = vp9_state->pic_param;
1953     brc_update_curbe_param.pseq_param             = vp9_state->seq_param;
1954     brc_update_curbe_param.psegment_param         = vp9_state->segment_param;
1955     brc_update_curbe_param.picture_coding_type    = vp9_state->picture_coding_type;
1956     brc_update_curbe_param.frame_width_in_mb      = ALIGN(vp9_state->frame_width, 16) / 16;
1957     brc_update_curbe_param.frame_height_in_mb     = ALIGN(vp9_state->frame_height, 16) / 16;
1958     brc_update_curbe_param.hme_enabled            = vp9_state->hme_enabled;
1959     brc_update_curbe_param.b_used_ref             = 1;
1960     brc_update_curbe_param.frame_number           = vp9_state->frame_number;
1961     brc_update_curbe_param.ref_frame_flag         = vp9_state->ref_frame_flag;
1962     brc_update_curbe_param.mbbrc_enabled          = 0;
1963     brc_update_curbe_param.multi_ref_qp_check     = vp9_state->multi_ref_qp_check;
1964     brc_update_curbe_param.brc_num_pak_passes     = vp9_state->num_pak_passes;
1965
1966     brc_update_curbe_param.pbrc_init_current_target_buf_full_in_bits =
1967                           &vp9_state->brc_init_current_target_buf_full_in_bits;
1968     brc_update_curbe_param.pbrc_init_reset_buf_size_in_bits =
1969                           &vp9_state->brc_init_reset_buf_size_in_bits;
1970     brc_update_curbe_param.pbrc_init_reset_input_bits_per_frame =
1971                           &vp9_state->brc_init_reset_input_bits_per_frame;
1972
1973     vme_context->pfn_set_curbe_brc(ctx, encode_state,
1974                                    brc_gpe_context,
1975                                    encoder_context,
1976                                    &brc_update_curbe_param);
1977
1978
1979     // Check if the constant data surface is present
1980     if (vp9_state->brc_constant_buffer_supported)
1981     {
1982         char *brc_const_buffer;
1983         brc_const_buffer = i965_map_gpe_resource(&vme_context->res_brc_const_data_buffer);
1984
1985         if (!brc_const_buffer)
1986             return VA_STATUS_ERROR_OPERATION_FAILED;
1987
1988         if (vp9_state->picture_coding_type)
1989             memcpy(brc_const_buffer, vp9_brc_const_data_p_g9,
1990                    sizeof(vp9_brc_const_data_p_g9));
1991         else
1992             memcpy(brc_const_buffer, vp9_brc_const_data_i_g9,
1993                    sizeof(vp9_brc_const_data_i_g9));
1994
1995         i965_unmap_gpe_resource(&vme_context->res_brc_const_data_buffer);
1996     }
1997
1998     if (pic_param->pic_flags.bits.segmentation_enabled)
1999     {
2000           //reallocate the vme_state->mb_segment_map_surface
2001           /* this will be added later */
2002     }
2003
2004     {
2005         pic_param->filter_level = 0;
2006         // clear the filter level value in picParams ebfore programming pic state, as this value will be determined and updated by BRC.
2007         intel_vp9enc_construct_picstate_batchbuf(ctx, encode_state,
2008                  encoder_context, &vme_context->res_pic_state_brc_read_buffer);
2009     }
2010
2011     gen9_brc_update_add_surfaces_vp9(ctx, encode_state,
2012                                      encoder_context,
2013                                      brc_gpe_context,
2014                                      mbenc_gpe_context);
2015
2016     gen8_gpe_setup_interface_data(ctx, brc_gpe_context);
2017     memset(&media_object_param, 0, sizeof(media_object_param));
2018     gen9_run_kernel_media_object(ctx, encoder_context,
2019                                  brc_gpe_context,
2020                                  media_function,
2021                                  &media_object_param);
2022     return VA_STATUS_SUCCESS;
2023 }
2024
2025 static
2026 void gen9_vp9_set_curbe_me(VADriverContextP ctx,
2027                             struct encode_state *encode_state,
2028                             struct i965_gpe_context *gpe_context,
2029                             struct intel_encoder_context *encoder_context,
2030                             struct gen9_vp9_me_curbe_param *param)
2031 {
2032     vp9_me_curbe_data        *me_cmd;
2033     int enc_media_state;
2034     int                                       me_mode;
2035     unsigned int                                       width, height;
2036     uint32_t                                  l0_ref_frames;
2037     uint32_t                                  scale_factor;
2038
2039     if (param->b16xme_enabled) {
2040         if (param->use_16x_me)
2041             me_mode = VP9_ENC_ME16X_BEFORE_ME4X;
2042         else
2043             me_mode = VP9_ENC_ME4X_AFTER_ME16X;
2044     } else {
2045         me_mode = VP9_ENC_ME4X_ONLY;
2046     }
2047
2048     if (me_mode == VP9_ENC_ME16X_BEFORE_ME4X)
2049         scale_factor = 16;
2050     else
2051         scale_factor = 4;
2052
2053     if (param->use_16x_me)
2054         enc_media_state = VP9_MEDIA_STATE_16X_ME;
2055     else
2056         enc_media_state = VP9_MEDIA_STATE_4X_ME;
2057
2058     me_cmd = i965_gpe_context_map_curbe(gpe_context);
2059
2060     if (!me_cmd)
2061         return;
2062
2063     memset(me_cmd, 0, sizeof(vp9_me_curbe_data));
2064
2065     me_cmd->dw1.max_num_mvs           = 0x10;
2066     me_cmd->dw1.bi_weight             = 0x00;
2067
2068     me_cmd->dw2.max_num_su            = 0x39;
2069     me_cmd->dw2.max_len_sp            = 0x39;
2070
2071     me_cmd->dw3.sub_mb_part_mask       = 0x77;
2072     me_cmd->dw3.inter_sad             = 0x00;
2073     me_cmd->dw3.intra_sad            = 0x00;
2074     me_cmd->dw3.bme_disable_fbr      = 0x01;
2075     me_cmd->dw3.sub_pel_mode         = 0x03;
2076
2077     width = param->frame_width / scale_factor;
2078     height = param->frame_height / scale_factor;
2079
2080     me_cmd->dw4.picture_width        = ALIGN(width, 16) / 16;
2081     me_cmd->dw4.picture_height_minus1       = ALIGN(height, 16) / 16 - 1;
2082
2083     me_cmd->dw5.ref_width            = 0x30;
2084     me_cmd->dw5.ref_height           = 0x28;
2085
2086     if (enc_media_state == VP9_MEDIA_STATE_4X_ME)
2087         me_cmd->dw6.write_distortions = 0x01;
2088
2089     me_cmd->dw6.use_mv_from_prev_step   = me_mode == VP9_ENC_ME4X_AFTER_ME16X ? 1 : 0;
2090     me_cmd->dw6.super_combine_dist    = 0x5;
2091     me_cmd->dw6.max_vmvr              = 0x7fc;
2092
2093     l0_ref_frames = (param->ref_frame_flag & 0x01) +
2094                     !!(param->ref_frame_flag & 0x02) +
2095                     !!(param->ref_frame_flag & 0x04);
2096     me_cmd->dw13.num_ref_idx_l0_minus1 = (l0_ref_frames > 0) ? l0_ref_frames - 1 : 0;
2097     me_cmd->dw13.num_ref_idx_l1_minus1 =  0;
2098
2099     me_cmd->dw14.l0_ref_pic_polarity_bits = 0;
2100     me_cmd->dw14.l1_ref_pic_polarity_bits = 0;
2101
2102     me_cmd->dw15.mv_shift_factor        = 0x02;
2103
2104     {
2105         memcpy((void *)((char *)me_cmd + 64),
2106                vp9_diamond_ime_search_path_delta,
2107                sizeof(vp9_diamond_ime_search_path_delta));
2108     }
2109
2110
2111     me_cmd->dw32._4x_memv_output_data_surf_index     = VP9_BTI_ME_MV_DATA_SURFACE;
2112     me_cmd->dw33._16x_32x_memv_input_data_surf_index = VP9_BTI_16XME_MV_DATA_SURFACE;
2113     me_cmd->dw34._4x_me_output_dist_surf_index       = VP9_BTI_ME_DISTORTION_SURFACE;
2114     me_cmd->dw35._4x_me_output_brc_dist_surf_index   = VP9_BTI_ME_BRC_DISTORTION_SURFACE;
2115     me_cmd->dw36.vme_fwd_inter_pred_surf_index       = VP9_BTI_ME_CURR_PIC_L0;
2116     me_cmd->dw37.vme_bdw_inter_pred_surf_index       = VP9_BTI_ME_CURR_PIC_L1;
2117
2118     i965_gpe_context_unmap_curbe(gpe_context);
2119 }
2120
2121 static void
2122 gen9_vp9_send_me_surface(VADriverContextP ctx,
2123                          struct encode_state *encode_state,
2124                          struct i965_gpe_context *gpe_context,
2125                          struct intel_encoder_context *encoder_context,
2126                          struct gen9_vp9_me_surface_param *param)
2127 {
2128     struct i965_driver_data *i965 = i965_driver_data(ctx);
2129     struct object_surface *obj_surface;
2130     struct gen9_surface_vp9 *vp9_priv_surface;
2131     struct object_surface *input_surface;
2132     struct i965_gpe_resource *gpe_resource;
2133     int ref_bti;
2134
2135     obj_surface = SURFACE(param->curr_pic);
2136
2137     if (!obj_surface || !obj_surface->private_data)
2138         return;
2139
2140     vp9_priv_surface = obj_surface->private_data;
2141     if (param->use_16x_me)
2142     {
2143         gpe_resource = param->pres_16x_memv_data_buffer;
2144     }
2145     else
2146     {
2147         gpe_resource = param->pres_4x_memv_data_buffer;
2148     }
2149
2150     gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
2151                                    gpe_resource,
2152                                    1,
2153                                    I965_SURFACEFORMAT_R8_UNORM,
2154                                    VP9_BTI_ME_MV_DATA_SURFACE);
2155
2156     if (param->b16xme_enabled) {
2157         gpe_resource = param->pres_16x_memv_data_buffer;
2158         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
2159                                        gpe_resource,
2160                                        1,
2161                                        I965_SURFACEFORMAT_R8_UNORM,
2162                                        VP9_BTI_16XME_MV_DATA_SURFACE);
2163     }
2164
2165     if (!param->use_16x_me) {
2166         gpe_resource = param->pres_me_brc_distortion_buffer;
2167
2168         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
2169                                        gpe_resource,
2170                                        1,
2171                                        I965_SURFACEFORMAT_R8_UNORM,
2172                                        VP9_BTI_ME_BRC_DISTORTION_SURFACE);
2173
2174         gpe_resource = param->pres_me_distortion_buffer;
2175
2176         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
2177                                        gpe_resource,
2178                                        1,
2179                                        I965_SURFACEFORMAT_R8_UNORM,
2180                                        VP9_BTI_ME_DISTORTION_SURFACE);
2181     }
2182
2183     if (param->use_16x_me)
2184         input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2185     else
2186         input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2187
2188     gen9_add_adv_gpe_surface(ctx, gpe_context,
2189                              input_surface,
2190                              VP9_BTI_ME_CURR_PIC_L0);
2191
2192     ref_bti = VP9_BTI_ME_CURR_PIC_L0 + 1;
2193
2194
2195     if (param->last_ref_pic) {
2196         obj_surface = param->last_ref_pic;
2197         vp9_priv_surface = obj_surface->private_data;
2198
2199         if (param->use_16x_me)
2200             input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2201         else
2202             input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2203
2204         if (param->dys_enabled &&
2205             ((vp9_priv_surface->frame_width != param->frame_width) ||
2206              (vp9_priv_surface->frame_height != param->frame_height))) {
2207             if (param->use_16x_me)
2208                 input_surface = vp9_priv_surface->dys_16x_surface_obj;
2209             else
2210                 input_surface = vp9_priv_surface->dys_4x_surface_obj;
2211         }
2212         gen9_add_adv_gpe_surface(ctx, gpe_context,
2213                                  input_surface,
2214                                  ref_bti);
2215         gen9_add_adv_gpe_surface(ctx, gpe_context,
2216                                  input_surface,
2217                                  ref_bti + 1);
2218         ref_bti += 2;
2219     }
2220
2221     if (param->golden_ref_pic) {
2222         obj_surface = param->golden_ref_pic;
2223         vp9_priv_surface = obj_surface->private_data;
2224
2225         if (param->use_16x_me)
2226             input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2227         else
2228             input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2229
2230         if (param->dys_enabled &&
2231             ((vp9_priv_surface->frame_width != param->frame_width) ||
2232              (vp9_priv_surface->frame_height != param->frame_height))) {
2233             if (param->use_16x_me)
2234                 input_surface = vp9_priv_surface->dys_16x_surface_obj;
2235             else
2236                 input_surface = vp9_priv_surface->dys_4x_surface_obj;
2237         }
2238
2239         gen9_add_adv_gpe_surface(ctx, gpe_context,
2240                                  input_surface,
2241                                  ref_bti);
2242         gen9_add_adv_gpe_surface(ctx, gpe_context,
2243                                  input_surface,
2244                                  ref_bti + 1);
2245         ref_bti += 2;
2246     }
2247
2248     if (param->alt_ref_pic) {
2249         obj_surface = param->alt_ref_pic;
2250         vp9_priv_surface = obj_surface->private_data;
2251
2252         if (param->use_16x_me)
2253             input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2254         else
2255             input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2256
2257         if (param->dys_enabled &&
2258             ((vp9_priv_surface->frame_width != param->frame_width) ||
2259              (vp9_priv_surface->frame_height != param->frame_height))) {
2260             if (param->use_16x_me)
2261                 input_surface = vp9_priv_surface->dys_16x_surface_obj;
2262             else
2263                 input_surface = vp9_priv_surface->dys_4x_surface_obj;
2264         }
2265         gen9_add_adv_gpe_surface(ctx, gpe_context,
2266                                  input_surface,
2267                                  ref_bti);
2268         gen9_add_adv_gpe_surface(ctx, gpe_context,
2269                                  input_surface,
2270                                  ref_bti + 1);
2271         ref_bti += 2;
2272     }
2273
2274     return;
2275 }
2276
2277 static
2278 void gen9_me_add_surfaces_vp9(VADriverContextP ctx,
2279                               struct encode_state *encode_state,
2280                               struct intel_encoder_context *encoder_context,
2281                               struct i965_gpe_context *gpe_context,
2282                               int use_16x_me)
2283 {
2284     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2285     struct gen9_vp9_me_surface_param  me_surface_param;
2286     struct gen9_vp9_state *vp9_state;
2287
2288     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
2289
2290     /* sScaled4xSurface surface */
2291     memset(&me_surface_param, 0, sizeof(me_surface_param));
2292     me_surface_param.last_ref_pic = vp9_state->last_ref_obj;
2293     me_surface_param.golden_ref_pic = vp9_state->golden_ref_obj;
2294     me_surface_param.alt_ref_pic = vp9_state->alt_ref_obj;
2295     me_surface_param.curr_pic = vp9_state->curr_frame;
2296     me_surface_param.pres_4x_memv_data_buffer  = &vme_context->s4x_memv_data_buffer;
2297     me_surface_param.pres_16x_memv_data_buffer = &vme_context->s16x_memv_data_buffer;
2298     me_surface_param.pres_me_distortion_buffer = &vme_context->s4x_memv_distortion_buffer;
2299     me_surface_param.pres_me_brc_distortion_buffer = &vme_context->s4x_memv_distortion_buffer;
2300
2301     if (use_16x_me) {
2302         me_surface_param.downscaled_width_in_mb = vp9_state->downscaled_width_16x_in_mb;
2303         me_surface_param.downscaled_height_in_mb = vp9_state->downscaled_height_16x_in_mb;
2304     } else {
2305         me_surface_param.downscaled_width_in_mb = vp9_state->downscaled_width_4x_in_mb;
2306         me_surface_param.downscaled_height_in_mb = vp9_state->downscaled_height_4x_in_mb;
2307     }
2308     me_surface_param.frame_width  = vp9_state->frame_width;
2309     me_surface_param.frame_height  = vp9_state->frame_height;
2310
2311     me_surface_param.use_16x_me = use_16x_me;
2312     me_surface_param.b16xme_enabled = vp9_state->b16xme_enabled;
2313     me_surface_param.dys_enabled = vp9_state->dys_in_use;
2314
2315     vme_context->pfn_send_me_surface(ctx, encode_state,
2316                                      gpe_context,
2317                                      encoder_context,
2318                                      &me_surface_param);
2319     return;
2320 }
2321
2322 static VAStatus
2323 gen9_vp9_me_kernel(VADriverContextP ctx,
2324                    struct encode_state *encode_state,
2325                    struct intel_encoder_context *encoder_context,
2326                    int use_16x_me)
2327 {
2328     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2329     struct i965_gpe_context *gpe_context;
2330     int media_function;
2331     struct gen9_vp9_me_curbe_param me_curbe_param;
2332     struct gen9_vp9_state *vp9_state;
2333     struct gpe_media_object_walker_parameter media_object_walker_param;
2334     struct vp9_encoder_kernel_walker_parameter kernel_walker_param;
2335
2336     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2337     if (!vp9_state || !vp9_state->pic_param)
2338         return VA_STATUS_ERROR_INVALID_PARAMETER;
2339
2340     if (use_16x_me)
2341         media_function = VP9_MEDIA_STATE_16X_ME;
2342     else
2343         media_function = VP9_MEDIA_STATE_4X_ME;
2344
2345     gpe_context = &(vme_context->me_context.gpe_context);
2346
2347     gen8_gpe_context_init(ctx, gpe_context);
2348     gen9_gpe_reset_binding_table(ctx, gpe_context);
2349
2350     memset(&me_curbe_param, 0, sizeof(me_curbe_param));
2351     me_curbe_param.ppic_param = vp9_state->pic_param;
2352     me_curbe_param.pseq_param = vp9_state->seq_param;
2353     me_curbe_param.frame_width = vp9_state->frame_width;
2354     me_curbe_param.frame_height = vp9_state->frame_height;
2355     me_curbe_param.ref_frame_flag = vp9_state->ref_frame_flag;
2356     me_curbe_param.use_16x_me = use_16x_me;
2357     me_curbe_param.b16xme_enabled = vp9_state->b16xme_enabled;
2358     vme_context->pfn_set_curbe_me(ctx, encode_state,
2359                                   gpe_context,
2360                                   encoder_context,
2361                                   &me_curbe_param);
2362
2363     gen9_me_add_surfaces_vp9(ctx, encode_state,
2364                              encoder_context,
2365                              gpe_context,
2366                              use_16x_me);
2367
2368     gen8_gpe_setup_interface_data(ctx, gpe_context);
2369
2370     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2371     if (use_16x_me) {
2372         kernel_walker_param.resolution_x = vp9_state->downscaled_width_16x_in_mb;
2373         kernel_walker_param.resolution_y = vp9_state->downscaled_height_16x_in_mb;
2374     } else {
2375         kernel_walker_param.resolution_x = vp9_state->downscaled_width_4x_in_mb;
2376         kernel_walker_param.resolution_y = vp9_state->downscaled_height_4x_in_mb;
2377     }
2378     kernel_walker_param.no_dependency = 1;
2379
2380     gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
2381
2382     gen9_run_kernel_media_object_walker(ctx, encoder_context,
2383                                         gpe_context,
2384                                         media_function,
2385                                         &media_object_walker_param);
2386
2387     return VA_STATUS_SUCCESS;
2388 }
2389
2390 static void
2391 gen9_vp9_set_curbe_scaling_cm(VADriverContextP ctx,
2392                             struct encode_state *encode_state,
2393                             struct i965_gpe_context *gpe_context,
2394                             struct intel_encoder_context *encoder_context,
2395                             struct gen9_vp9_scaling_curbe_param *curbe_param)
2396 {
2397     vp9_scaling4x_curbe_data_cm *curbe_cmd;
2398
2399     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
2400
2401     if (!curbe_cmd)
2402         return;
2403
2404     memset(curbe_cmd, 0, sizeof(vp9_scaling4x_curbe_data_cm));
2405
2406     curbe_cmd->dw0.input_picture_width = curbe_param->input_picture_width;
2407     curbe_cmd->dw0.input_picture_height = curbe_param->input_picture_height;
2408
2409     curbe_cmd->dw1.input_y_bti = VP9_BTI_SCALING_FRAME_SRC_Y;
2410     curbe_cmd->dw2.output_y_bti = VP9_BTI_SCALING_FRAME_DST_Y;
2411
2412
2413     curbe_cmd->dw6.enable_mb_variance_output = 0;
2414     curbe_cmd->dw6.enable_mb_pixel_average_output = 0;
2415     curbe_cmd->dw6.enable_blk8x8_stat_output = 0;
2416
2417     if (curbe_param->mb_variance_output_enabled ||
2418         curbe_param->mb_pixel_average_output_enabled)
2419     {
2420         curbe_cmd->dw10.mbv_proc_stat_bti = VP9_BTI_SCALING_FRAME_MBVPROCSTATS_DST_CM;
2421     }
2422
2423     i965_gpe_context_unmap_curbe(gpe_context);
2424     return;
2425 }
2426
2427 static void
2428 gen9_vp9_send_scaling_surface(VADriverContextP ctx,
2429                               struct encode_state *encode_state,
2430                               struct i965_gpe_context *gpe_context,
2431                               struct intel_encoder_context *encoder_context,
2432                               struct gen9_vp9_scaling_surface_param *scaling_surface_param)
2433 {
2434     vp9_bti_scaling_offset *scaling_bti;
2435     unsigned int surface_format;
2436
2437     scaling_bti = scaling_surface_param->p_scaling_bti;
2438
2439     if (scaling_surface_param->scaling_out_use_32unorm_surf_fmt)
2440         surface_format = I965_SURFACEFORMAT_R32_UNORM;
2441     else if (scaling_surface_param->scaling_out_use_16unorm_surf_fmt)
2442         surface_format = I965_SURFACEFORMAT_R16_UNORM;
2443     else
2444         surface_format = I965_SURFACEFORMAT_R8_UNORM;
2445
2446     gen9_add_2d_gpe_surface(ctx, gpe_context,
2447                             scaling_surface_param->input_surface,
2448                             0, 1, surface_format,
2449                             scaling_bti->scaling_frame_src_y);
2450
2451     gen9_add_2d_gpe_surface(ctx, gpe_context,
2452                             scaling_surface_param->output_surface,
2453                             0, 1, surface_format,
2454                             scaling_bti->scaling_frame_dst_y);
2455
2456
2457     return;
2458 }
2459
2460 static VAStatus
2461 gen9_vp9_scaling_kernel(VADriverContextP ctx,
2462                         struct encode_state *encode_state,
2463                         struct intel_encoder_context *encoder_context,
2464                         int use_16x_scaling)
2465 {
2466     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2467     struct i965_gpe_context *gpe_context;
2468     int media_function;
2469     struct gen9_vp9_scaling_curbe_param scaling_curbe_param;
2470     struct gen9_vp9_scaling_surface_param scaling_surface_param;
2471     struct gen9_vp9_state *vp9_state;
2472     VAEncPictureParameterBufferVP9  *pic_param;
2473     struct gpe_media_object_walker_parameter media_object_walker_param;
2474     struct vp9_encoder_kernel_walker_parameter kernel_walker_param;
2475     struct object_surface *obj_surface;
2476     struct object_surface *input_surface, *output_surface;
2477     struct gen9_surface_vp9 *vp9_priv_surface;
2478     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
2479     unsigned int input_frame_width, input_frame_height;
2480     unsigned int output_frame_width, output_frame_height;
2481
2482     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2483     if (!vp9_state || !vp9_state->pic_param)
2484         return VA_STATUS_ERROR_INVALID_PARAMETER;
2485
2486     pic_param = vp9_state->pic_param;
2487
2488     if (use_16x_scaling)
2489         media_function = VP9_MEDIA_STATE_16X_SCALING;
2490     else
2491         media_function = VP9_MEDIA_STATE_4X_SCALING;
2492
2493     gpe_context = &(vme_context->scaling_context.gpe_contexts[0]);
2494
2495     gen8_gpe_context_init(ctx, gpe_context);
2496     gen9_gpe_reset_binding_table(ctx, gpe_context);
2497
2498     obj_surface = encode_state->reconstructed_object;
2499     vp9_priv_surface = obj_surface->private_data;
2500
2501     if (use_16x_scaling)
2502     {
2503         downscaled_width_in_mb      = vp9_state->downscaled_width_16x_in_mb;
2504         downscaled_height_in_mb      = vp9_state->downscaled_height_16x_in_mb;
2505
2506         input_surface               = vp9_priv_surface->scaled_4x_surface_obj;
2507         input_frame_width           = vp9_state->frame_width_4x;
2508         input_frame_height          = vp9_state->frame_height_4x;
2509
2510         output_surface              = vp9_priv_surface->scaled_16x_surface_obj;
2511         output_frame_width          = vp9_state->frame_width_16x;
2512         output_frame_height         = vp9_state->frame_height_16x;
2513     } else {
2514         downscaled_width_in_mb      = vp9_state->downscaled_width_4x_in_mb;
2515         downscaled_height_in_mb      = vp9_state->downscaled_height_4x_in_mb;
2516
2517         if (vp9_state->dys_in_use &&
2518                ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
2519                 (pic_param->frame_height_src != pic_param->frame_height_dst)))
2520             input_surface               = vp9_priv_surface->dys_surface_obj;
2521         else
2522             input_surface               = encode_state->input_yuv_object;
2523
2524         input_frame_width           = vp9_state->frame_width;
2525         input_frame_height          = vp9_state->frame_height;
2526
2527         output_surface              = vp9_priv_surface->scaled_4x_surface_obj;
2528         output_frame_width          = vp9_state->frame_width_4x;
2529         output_frame_height         = vp9_state->frame_height_4x;
2530     }
2531
2532     memset(&scaling_curbe_param, 0, sizeof(scaling_curbe_param));
2533
2534     scaling_curbe_param.input_picture_width  = input_frame_width;
2535     scaling_curbe_param.input_picture_height = input_frame_height;
2536
2537     scaling_curbe_param.use_16x_scaling = use_16x_scaling;
2538     scaling_curbe_param.use_32x_scaling = 0;
2539
2540     if (use_16x_scaling)
2541         scaling_curbe_param.mb_variance_output_enabled = 0;
2542     else
2543         scaling_curbe_param.mb_variance_output_enabled = vp9_state->adaptive_transform_decision_enabled;
2544
2545     scaling_curbe_param.blk8x8_stat_enabled = 0;
2546
2547     vme_context->pfn_set_curbe_scaling(ctx, encode_state,
2548                                   gpe_context,
2549                                   encoder_context,
2550                                   &scaling_curbe_param);
2551
2552     memset(&scaling_surface_param, 0, sizeof(scaling_surface_param));
2553     scaling_surface_param.p_scaling_bti = (void *)(&vme_context->scaling_context.scaling_4x_bti);
2554     scaling_surface_param.input_surface                      = input_surface;
2555     scaling_surface_param.input_frame_width                  = input_frame_width;
2556     scaling_surface_param.input_frame_height                 = input_frame_height;
2557
2558     scaling_surface_param.output_surface                     = output_surface;
2559     scaling_surface_param.output_frame_width                 = output_frame_width;
2560     scaling_surface_param.output_frame_height                = output_frame_height;
2561     scaling_surface_param.scaling_out_use_16unorm_surf_fmt   = 0;
2562     scaling_surface_param.scaling_out_use_32unorm_surf_fmt   = 1;
2563
2564     vme_context->pfn_send_scaling_surface(ctx, encode_state,
2565                                           gpe_context,
2566                                           encoder_context,
2567                                           &scaling_surface_param);
2568
2569     gen8_gpe_setup_interface_data(ctx, gpe_context);
2570
2571     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2572     /* the scaling is based on 8x8 blk level */
2573     kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
2574     kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
2575     kernel_walker_param.no_dependency = 1;
2576
2577     gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
2578
2579     gen9_run_kernel_media_object_walker(ctx, encoder_context,
2580                                         gpe_context,
2581                                         media_function,
2582                                         &media_object_walker_param);
2583
2584     return VA_STATUS_SUCCESS;
2585 }
2586
2587 static void
2588 gen9_vp9_dys_set_sampler_state(struct i965_gpe_context *gpe_context)
2589 {
2590     struct gen9_sampler_8x8_avs                *sampler_cmd;
2591
2592     if (!gpe_context)
2593         return;
2594
2595     dri_bo_map(gpe_context->sampler.bo, 1);
2596
2597     if (!gpe_context->sampler.bo->virtual)
2598         return;
2599
2600     sampler_cmd = (struct gen9_sampler_8x8_avs *)
2601        (gpe_context->sampler.bo->virtual + gpe_context->sampler.offset);
2602
2603     memset(sampler_cmd, 0, sizeof(struct gen9_sampler_8x8_avs));
2604
2605     sampler_cmd->dw0.r3c_coefficient                      = 15;
2606     sampler_cmd->dw0.r3x_coefficient                      = 6;
2607     sampler_cmd->dw0.strong_edge_threshold                = 8;
2608     sampler_cmd->dw0.weak_edge_threshold                  = 1;
2609     sampler_cmd->dw0.gain_factor                          = 32;
2610
2611     sampler_cmd->dw2.r5c_coefficient                     = 3;
2612     sampler_cmd->dw2.r5cx_coefficient                    = 8;
2613     sampler_cmd->dw2.r5x_coefficient                     = 9;
2614     sampler_cmd->dw2.strong_edge_weight                  = 6;
2615     sampler_cmd->dw2.regular_weight                      = 3;
2616     sampler_cmd->dw2.non_edge_weight                     = 2;
2617     sampler_cmd->dw2.global_noise_estimation             = 255;
2618
2619     sampler_cmd->dw3.enable_8tap_adaptive_filter         = 0;
2620     sampler_cmd->dw3.cos_alpha                           = 79;
2621     sampler_cmd->dw3.sin_alpha                           = 101;
2622
2623     sampler_cmd->dw5.diamond_du                           = 0;
2624     sampler_cmd->dw5.hs_margin                            = 3;
2625     sampler_cmd->dw5.diamond_alpha                        = 100;
2626
2627     sampler_cmd->dw7.inv_margin_vyl                       = 3300;
2628
2629     sampler_cmd->dw8.inv_margin_vyu                       = 1600;
2630
2631     sampler_cmd->dw10.y_slope2                            = 24;
2632     sampler_cmd->dw10.s0l                                 = 1792;
2633
2634     sampler_cmd->dw12.y_slope1                            = 24;
2635
2636     sampler_cmd->dw14.s0u                                = 256;
2637
2638     sampler_cmd->dw15.s2u                                = 1792;
2639     sampler_cmd->dw15.s1u                                = 0;
2640
2641     memcpy(sampler_cmd->coefficients,
2642            &gen9_vp9_avs_coeffs[0],
2643            17 * sizeof(struct gen8_sampler_8x8_avs_coefficients));
2644
2645     sampler_cmd->dw152.default_sharpness_level     = 255;
2646     sampler_cmd->dw152.max_derivative_4_pixels     = 7;
2647     sampler_cmd->dw152.max_derivative_8_pixels     = 20;
2648     sampler_cmd->dw152.transition_area_with_4_pixels    = 4;
2649     sampler_cmd->dw152.transition_area_with_8_pixels    = 5;
2650
2651     sampler_cmd->dw153.bypass_x_adaptive_filtering  = 1;
2652     sampler_cmd->dw153.bypass_y_adaptive_filtering  = 1;
2653     sampler_cmd->dw153.adaptive_filter_for_all_channel = 0;
2654
2655     memcpy(sampler_cmd->extra_coefficients,
2656            &gen9_vp9_avs_coeffs[17 * 8],
2657            15 * sizeof(struct gen8_sampler_8x8_avs_coefficients));
2658
2659     dri_bo_unmap(gpe_context->sampler.bo);
2660 }
2661
2662 static void
2663 gen9_vp9_set_curbe_dys(VADriverContextP ctx,
2664                        struct encode_state *encode_state,
2665                        struct i965_gpe_context *gpe_context,
2666                        struct intel_encoder_context *encoder_context,
2667                        struct gen9_vp9_dys_curbe_param *curbe_param)
2668 {
2669     vp9_dys_curbe_data  *curbe_cmd;
2670
2671     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
2672
2673     if (!curbe_cmd)
2674         return;
2675
2676     memset(curbe_cmd, 0, sizeof(vp9_dys_curbe_data));
2677
2678     curbe_cmd->dw0.input_frame_width    = curbe_param->input_width;
2679     curbe_cmd->dw0.input_frame_height   = curbe_param->input_height;
2680
2681     curbe_cmd->dw1.output_frame_width   = curbe_param->output_width;
2682     curbe_cmd->dw1.output_frame_height  = curbe_param->output_height;
2683
2684     curbe_cmd->dw2.delta_u                 = 1.0f / curbe_param->output_width;
2685     curbe_cmd->dw3.delta_v                 = 1.0f / curbe_param->output_height;
2686
2687     curbe_cmd->dw16.input_frame_nv12_bti  = VP9_BTI_DYS_INPUT_NV12;
2688     curbe_cmd->dw17.output_frame_y_bti    = VP9_BTI_DYS_OUTPUT_Y;
2689     curbe_cmd->dw18.avs_sample_idx            = 0;
2690
2691     i965_gpe_context_unmap_curbe(gpe_context);
2692 }
2693
2694 static void
2695 gen9_vp9_send_dys_surface(VADriverContextP ctx,
2696                        struct encode_state *encode_state,
2697                        struct i965_gpe_context *gpe_context,
2698                        struct intel_encoder_context *encoder_context,
2699                        struct gen9_vp9_dys_surface_param *surface_param)
2700 {
2701
2702     if (surface_param->input_frame)
2703         gen9_add_adv_gpe_surface(ctx,
2704                                  gpe_context,
2705                                  surface_param->input_frame,
2706                                  VP9_BTI_DYS_INPUT_NV12);
2707
2708     if (surface_param->output_frame) {
2709         gen9_add_2d_gpe_surface(ctx,
2710                                 gpe_context,
2711                                 surface_param->output_frame,
2712                                 0,
2713                                 1,
2714                                 I965_SURFACEFORMAT_R8_UNORM,
2715                                 VP9_BTI_DYS_OUTPUT_Y);
2716
2717         gen9_add_2d_gpe_surface(ctx,
2718                                 gpe_context,
2719                                 surface_param->output_frame,
2720                                 1,
2721                                 1,
2722                                 I965_SURFACEFORMAT_R16_UINT,
2723                                 VP9_BTI_DYS_OUTPUT_UV);
2724     }
2725
2726     return;
2727 }
2728
2729 static VAStatus
2730 gen9_vp9_dys_kernel(VADriverContextP ctx,
2731                         struct encode_state *encode_state,
2732                         struct intel_encoder_context *encoder_context,
2733                         gen9_vp9_dys_kernel_param *dys_kernel_param)
2734 {
2735     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2736     struct i965_gpe_context *gpe_context;
2737     int media_function;
2738     struct gen9_vp9_dys_curbe_param                 curbe_param;
2739     struct gen9_vp9_dys_surface_param               surface_param;
2740     struct gpe_media_object_walker_parameter        media_object_walker_param;
2741     struct vp9_encoder_kernel_walker_parameter      kernel_walker_param;
2742     unsigned int                                    resolution_x, resolution_y;
2743
2744     media_function = VP9_MEDIA_STATE_DYS;
2745     gpe_context = &vme_context->dys_context.gpe_context;
2746
2747     //gen8_gpe_context_init(ctx, gpe_context);
2748     gen9_gpe_reset_binding_table(ctx, gpe_context);
2749
2750     /* sampler state is configured only when initializing the GPE context */
2751
2752     memset(&curbe_param, 0, sizeof(curbe_param));
2753     curbe_param.input_width   = dys_kernel_param->input_width;
2754     curbe_param.input_height  = dys_kernel_param->input_height;
2755     curbe_param.output_width = dys_kernel_param->output_width;
2756     curbe_param.output_height = dys_kernel_param->output_height;
2757     vme_context->pfn_set_curbe_dys(ctx, encode_state,
2758                                   gpe_context,
2759                                   encoder_context,
2760                                   &curbe_param);
2761
2762     // Add surface states
2763     memset(&surface_param, 0, sizeof(surface_param));
2764     surface_param.input_frame = dys_kernel_param->input_surface;
2765     surface_param.output_frame = dys_kernel_param->output_surface;
2766     surface_param.vert_line_stride = 0;
2767     surface_param.vert_line_stride_offset = 0;
2768
2769     vme_context->pfn_send_dys_surface(ctx,
2770                                       encode_state,
2771                                       gpe_context,
2772                                       encoder_context,
2773                                       &surface_param);
2774
2775     resolution_x = ALIGN(dys_kernel_param->output_width, 16) / 16;
2776     resolution_y = ALIGN(dys_kernel_param->output_height, 16) / 16;
2777
2778     gen8_gpe_setup_interface_data(ctx, gpe_context);
2779
2780     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2781     kernel_walker_param.resolution_x = resolution_x;
2782     kernel_walker_param.resolution_y = resolution_y;
2783     kernel_walker_param.no_dependency = 1;
2784
2785     gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
2786
2787     gen9_run_kernel_media_object_walker(ctx, encoder_context,
2788                                         gpe_context,
2789                                         media_function,
2790                                         &media_object_walker_param);
2791
2792     return VA_STATUS_SUCCESS;
2793 }
2794
2795 static VAStatus
2796 gen9_vp9_run_dys_refframes(VADriverContextP ctx,
2797                           struct encode_state *encode_state,
2798                           struct intel_encoder_context *encoder_context)
2799 {
2800     struct gen9_vp9_state *vp9_state;
2801     VAEncPictureParameterBufferVP9  *pic_param;
2802     gen9_vp9_dys_kernel_param dys_kernel_param;
2803     struct object_surface *obj_surface;
2804     struct object_surface *input_surface, *output_surface;
2805     struct gen9_surface_vp9 *vp9_priv_surface;
2806
2807     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2808
2809     if (!vp9_state || !vp9_state->pic_param)
2810         return VA_STATUS_ERROR_INVALID_PARAMETER;
2811
2812     pic_param = vp9_state->pic_param;
2813
2814     if ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
2815         (pic_param->frame_height_src != pic_param->frame_height_dst)) {
2816         input_surface = encode_state->input_yuv_object;
2817         obj_surface = encode_state->reconstructed_object;
2818         vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2819         output_surface = vp9_priv_surface->dys_surface_obj;
2820
2821         memset(&dys_kernel_param, 0, sizeof(dys_kernel_param));
2822         dys_kernel_param.input_width = pic_param->frame_width_src;
2823         dys_kernel_param.input_height = pic_param->frame_height_src;
2824         dys_kernel_param.input_surface = input_surface;
2825         dys_kernel_param.output_width = pic_param->frame_width_dst;
2826         dys_kernel_param.output_height = pic_param->frame_height_dst;
2827         dys_kernel_param.output_surface = output_surface;
2828         gen9_vp9_dys_kernel(ctx, encode_state,
2829                             encoder_context,
2830                             &dys_kernel_param);
2831     }
2832
2833     if ((vp9_state->dys_ref_frame_flag & VP9_LAST_REF) &&
2834          vp9_state->last_ref_obj) {
2835         obj_surface = vp9_state->last_ref_obj;
2836         vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2837
2838         input_surface = obj_surface;
2839         output_surface = vp9_priv_surface->dys_surface_obj;
2840
2841         dys_kernel_param.input_width = vp9_priv_surface->frame_width;
2842         dys_kernel_param.input_height = vp9_priv_surface->frame_height;
2843         dys_kernel_param.input_surface = input_surface;
2844
2845         dys_kernel_param.output_width = pic_param->frame_width_dst;
2846         dys_kernel_param.output_height = pic_param->frame_height_dst;
2847         dys_kernel_param.output_surface = output_surface;
2848
2849         gen9_vp9_dys_kernel(ctx, encode_state,
2850                             encoder_context,
2851                             &dys_kernel_param);
2852
2853         if (vp9_state->hme_enabled) {
2854             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 4), 16);
2855             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_height / 4), 16);
2856             dys_kernel_param.input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2857
2858             dys_kernel_param.output_width = vp9_state->frame_width_4x;
2859             dys_kernel_param.output_height = vp9_state->frame_height_4x;
2860             dys_kernel_param.output_surface = vp9_priv_surface->dys_4x_surface_obj;
2861
2862             gen9_vp9_dys_kernel(ctx, encode_state,
2863                                 encoder_context,
2864                                 &dys_kernel_param);
2865
2866             /* Does it really need to do the 16x HME if the
2867              * resolution is different?
2868              * Maybe it should be restricted
2869              */
2870             if (vp9_state->b16xme_enabled) {
2871                 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 16), 16);
2872                 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 16), 16);
2873                 dys_kernel_param.input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2874
2875                 dys_kernel_param.output_width = vp9_state->frame_width_16x;
2876                 dys_kernel_param.output_height = vp9_state->frame_height_16x;
2877                 dys_kernel_param.output_surface = vp9_priv_surface->dys_16x_surface_obj;
2878
2879                 gen9_vp9_dys_kernel(ctx, encode_state,
2880                                     encoder_context,
2881                                     &dys_kernel_param);
2882             }
2883         }
2884     }
2885
2886     if ((vp9_state->dys_ref_frame_flag & VP9_GOLDEN_REF) &&
2887          vp9_state->golden_ref_obj) {
2888         obj_surface = vp9_state->golden_ref_obj;
2889         vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2890
2891         input_surface = obj_surface;
2892         output_surface = vp9_priv_surface->dys_surface_obj;
2893
2894         dys_kernel_param.input_width = vp9_priv_surface->frame_width;
2895         dys_kernel_param.input_height = vp9_priv_surface->frame_height;
2896         dys_kernel_param.input_surface = input_surface;
2897
2898         dys_kernel_param.output_width = pic_param->frame_width_dst;
2899         dys_kernel_param.output_height = pic_param->frame_height_dst;
2900         dys_kernel_param.output_surface = output_surface;
2901
2902         gen9_vp9_dys_kernel(ctx, encode_state,
2903                             encoder_context,
2904                             &dys_kernel_param);
2905
2906         if (vp9_state->hme_enabled) {
2907             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 4), 16);
2908             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_height / 4), 16);
2909             dys_kernel_param.input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2910
2911             dys_kernel_param.output_width = vp9_state->frame_width_4x;
2912             dys_kernel_param.output_height = vp9_state->frame_height_4x;
2913             dys_kernel_param.output_surface = vp9_priv_surface->dys_4x_surface_obj;
2914
2915             gen9_vp9_dys_kernel(ctx, encode_state,
2916                                 encoder_context,
2917                                 &dys_kernel_param);
2918
2919             /* Does it really need to do the 16x HME if the
2920              * resolution is different?
2921              * Maybe it should be restricted
2922              */
2923             if (vp9_state->b16xme_enabled) {
2924                 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 16), 16);
2925                 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 16), 16);
2926                 dys_kernel_param.input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2927
2928                 dys_kernel_param.output_width = vp9_state->frame_width_16x;
2929                 dys_kernel_param.output_height = vp9_state->frame_height_16x;
2930                 dys_kernel_param.output_surface = vp9_priv_surface->dys_16x_surface_obj;
2931
2932                 gen9_vp9_dys_kernel(ctx, encode_state,
2933                                     encoder_context,
2934                                     &dys_kernel_param);
2935             }
2936         }
2937     }
2938
2939     if ((vp9_state->dys_ref_frame_flag & VP9_ALT_REF) &&
2940          vp9_state->alt_ref_obj) {
2941         obj_surface = vp9_state->alt_ref_obj;
2942         vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2943
2944         input_surface = obj_surface;
2945         output_surface = vp9_priv_surface->dys_surface_obj;
2946
2947         dys_kernel_param.input_width = vp9_priv_surface->frame_width;
2948         dys_kernel_param.input_height = vp9_priv_surface->frame_height;
2949         dys_kernel_param.input_surface = input_surface;
2950
2951         dys_kernel_param.output_width = pic_param->frame_width_dst;
2952         dys_kernel_param.output_height = pic_param->frame_height_dst;
2953         dys_kernel_param.output_surface = output_surface;
2954
2955         gen9_vp9_dys_kernel(ctx, encode_state,
2956                             encoder_context,
2957                             &dys_kernel_param);
2958
2959         if (vp9_state->hme_enabled) {
2960             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 4), 16);
2961             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_height / 4), 16);
2962             dys_kernel_param.input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2963
2964             dys_kernel_param.output_width = vp9_state->frame_width_4x;
2965             dys_kernel_param.output_height = vp9_state->frame_height_4x;
2966             dys_kernel_param.output_surface = vp9_priv_surface->dys_4x_surface_obj;
2967
2968             gen9_vp9_dys_kernel(ctx, encode_state,
2969                                 encoder_context,
2970                                 &dys_kernel_param);
2971
2972             /* Does it really need to do the 16x HME if the
2973              * resolution is different?
2974              * Maybe it should be restricted
2975              */
2976             if (vp9_state->b16xme_enabled) {
2977                 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 16), 16);
2978                 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 16), 16);
2979                 dys_kernel_param.input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2980
2981                 dys_kernel_param.output_width = vp9_state->frame_width_16x;
2982                 dys_kernel_param.output_height = vp9_state->frame_height_16x;
2983                 dys_kernel_param.output_surface = vp9_priv_surface->dys_16x_surface_obj;
2984
2985                 gen9_vp9_dys_kernel(ctx, encode_state,
2986                                     encoder_context,
2987                                     &dys_kernel_param);
2988             }
2989         }
2990     }
2991
2992     return VA_STATUS_SUCCESS;
2993 }
2994
2995 static void
2996 gen9_vp9_set_curbe_mbenc(VADriverContextP ctx,
2997                          struct encode_state *encode_state,
2998                          struct i965_gpe_context *gpe_context,
2999                          struct intel_encoder_context *encoder_context,
3000                          struct gen9_vp9_mbenc_curbe_param *curbe_param)
3001 {
3002     struct gen9_vp9_state *vp9_state;
3003     VAEncMiscParameterTypeVP9PerSegmantParam *seg_param, tmp_seg_param;
3004     vp9_mbenc_curbe_data  *curbe_cmd;
3005     VAEncPictureParameterBufferVP9  *pic_param;
3006     int i, segment_count;
3007     int seg_qindex;
3008     struct object_surface *obj_surface;
3009     struct gen9_surface_vp9 *vp9_priv_surface;
3010
3011     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3012
3013     if (!vp9_state || !vp9_state->pic_param)
3014         return;
3015
3016     pic_param = curbe_param->ppic_param;
3017     seg_param = curbe_param->psegment_param;
3018
3019     if (!seg_param) {
3020         memset(&tmp_seg_param, 0, sizeof(tmp_seg_param));
3021         seg_param = &tmp_seg_param;
3022     }
3023
3024     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
3025
3026     if (!curbe_cmd)
3027         return;
3028
3029     memset(curbe_cmd, 0, sizeof(vp9_mbenc_curbe_data));
3030
3031     if (vp9_state->dys_in_use)
3032     {
3033         curbe_cmd->dw0.frame_width = pic_param->frame_width_dst;
3034         curbe_cmd->dw0.frame_height = pic_param->frame_height_dst;
3035     }
3036     else
3037     {
3038         curbe_cmd->dw0.frame_width = pic_param->frame_width_src;
3039         curbe_cmd->dw0.frame_height = pic_param->frame_height_src;
3040     }
3041
3042     curbe_cmd->dw1.frame_type = curbe_param->picture_coding_type;
3043
3044     curbe_cmd->dw1.segmentation_enable = pic_param->pic_flags.bits.segmentation_enabled;
3045     if (pic_param->pic_flags.bits.segmentation_enabled)
3046         segment_count = 8;
3047     else
3048         segment_count = 1;
3049
3050     curbe_cmd->dw1.ref_frame_flags = curbe_param->ref_frame_flag;
3051
3052     //right now set them to normal settings
3053     if (curbe_param->picture_coding_type)
3054     {
3055         switch (vp9_state->target_usage)
3056         {
3057         case INTEL_ENC_VP9_TU_QUALITY:
3058             curbe_cmd->dw1.min_16for32_check    = 0x00;
3059             curbe_cmd->dw2.multi_pred           = 0x02;
3060             curbe_cmd->dw2.len_sp               = 0x39;
3061             curbe_cmd->dw2.search_x             = 0x30;
3062             curbe_cmd->dw2.search_y             = 0x28;
3063             curbe_cmd->dw3.min_ref_for32_check = 0x01;
3064             curbe_cmd->dw4.skip16_threshold     = 0x000A;
3065             curbe_cmd->dw4.disable_mr_threshold = 0x000C;
3066
3067             memcpy(&curbe_cmd->dw16,
3068                     vp9_diamond_ime_search_path_delta,
3069                     14 * sizeof(unsigned int));
3070             break;
3071         case INTEL_ENC_VP9_TU_PERFORMANCE:
3072             curbe_cmd->dw1.min_16for32_check    = 0x02;
3073             curbe_cmd->dw2.multi_pred           = 0x00;
3074             curbe_cmd->dw2.len_sp               = 0x10;
3075             curbe_cmd->dw2.search_x             = 0x20;
3076             curbe_cmd->dw2.search_y             = 0x20;
3077             curbe_cmd->dw3.min_ref_for32_check = 0x03;
3078             curbe_cmd->dw4.skip16_threshold     = 0x0014;
3079             curbe_cmd->dw4.disable_mr_threshold = 0x0016;
3080
3081             memcpy(&curbe_cmd->dw16,
3082                     vp9_fullspiral_ime_search_path_delta,
3083                     14 * sizeof(unsigned int));
3084
3085             break;
3086         default:  // normal settings
3087             curbe_cmd->dw1.min_16for32_check     = 0x01;
3088             curbe_cmd->dw2.multi_pred           = 0x00;
3089             curbe_cmd->dw2.len_sp               = 0x19;
3090             curbe_cmd->dw2.search_x             = 0x30;
3091             curbe_cmd->dw2.search_y             = 0x28;
3092             curbe_cmd->dw3.min_ref_for32_check = 0x02;
3093             curbe_cmd->dw4.skip16_threshold     = 0x000F;
3094             curbe_cmd->dw4.disable_mr_threshold = 0x0011;
3095
3096             memcpy(&curbe_cmd->dw16,
3097                     vp9_diamond_ime_search_path_delta,
3098                     14 * sizeof(unsigned int));
3099             break;
3100         }
3101
3102         curbe_cmd->dw3.hme_enabled               = curbe_param->hme_enabled;
3103         curbe_cmd->dw3.multi_ref_qp_check         = curbe_param->multi_ref_qp_check;
3104         // co-located predictor must be disabled when dynamic scaling is enabled
3105         curbe_cmd->dw3.disable_temp_pred    = vp9_state->dys_in_use;
3106     }
3107
3108     curbe_cmd->dw5.inter_round = 0;
3109     curbe_cmd->dw5.intra_round = 4;
3110     curbe_cmd->dw5.frame_qpindex = pic_param->luma_ac_qindex;
3111
3112     for (i = 0; i < segment_count; i++)
3113     {
3114         seg_qindex = pic_param->luma_ac_qindex + pic_param->luma_dc_qindex_delta
3115                      + seg_param->seg_data[i].segment_qindex_delta;
3116
3117         seg_qindex = CLAMP(0, 255, seg_qindex);
3118
3119         if (curbe_param->picture_coding_type)
3120             memcpy(&curbe_cmd->segments[i],
3121                    &intel_vp9_costlut_p[seg_qindex * 16],
3122                    16 * sizeof(unsigned int));
3123         else
3124             memcpy(&curbe_cmd->segments[i],
3125                    &intel_vp9_costlut_key[seg_qindex * 16],
3126                    16 * sizeof(unsigned int));
3127     }
3128
3129     if (curbe_param->picture_coding_type)
3130     {
3131         if (curbe_cmd->dw3.multi_ref_qp_check)
3132         {
3133             if (curbe_param->ref_frame_flag & 0x01)
3134             {
3135                 obj_surface = curbe_param->last_ref_obj;
3136                 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3137                 curbe_cmd->dw8.last_ref_qp = vp9_quant_dc[vp9_priv_surface->qp_value];
3138             }
3139
3140             if (curbe_param->ref_frame_flag & 0x02)
3141             {
3142                 obj_surface = curbe_param->golden_ref_obj;
3143                 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3144                 curbe_cmd->dw8.golden_ref_qp = vp9_quant_dc[vp9_priv_surface->qp_value];
3145             }
3146
3147             if (curbe_param->ref_frame_flag & 0x04)
3148             {
3149                 obj_surface = curbe_param->alt_ref_obj;
3150                 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3151                 curbe_cmd->dw9.alt_ref_qp = vp9_quant_dc[vp9_priv_surface->qp_value];
3152             }
3153         }
3154     }
3155     curbe_cmd->dw160.enc_curr_y_surf_bti           = VP9_BTI_MBENC_CURR_Y_G9;
3156     curbe_cmd->dw162.enc_curr_nv12_surf_bti        = VP9_BTI_MBENC_CURR_NV12_G9;
3157     curbe_cmd->dw166.segmentation_map_bti          = VP9_BTI_MBENC_SEGMENTATION_MAP_G9;
3158     curbe_cmd->dw172.mode_decision_bti           = VP9_BTI_MBENC_MODE_DECISION_G9;
3159     curbe_cmd->dw167.tx_curbe_bti                = VP9_BTI_MBENC_TX_CURBE_G9;
3160     curbe_cmd->dw168.hme_mvdata_bti             = VP9_BTI_MBENC_HME_MV_DATA_G9;
3161     curbe_cmd->dw169.hme_distortion_bti          = VP9_BTI_MBENC_HME_DISTORTION_G9;
3162     curbe_cmd->dw171.mode_decision_prev_bti      = VP9_BTI_MBENC_MODE_DECISION_PREV_G9;
3163     curbe_cmd->dw172.mode_decision_bti           = VP9_BTI_MBENC_MODE_DECISION_G9;
3164     curbe_cmd->dw173.output_16x16_inter_modes_bti = VP9_BTI_MBENC_OUT_16x16_INTER_MODES_G9;
3165     curbe_cmd->dw174.cu_record_bti               = VP9_BTI_MBENC_CU_RECORDS_G9;
3166     curbe_cmd->dw175.pak_data_bti                = VP9_BTI_MBENC_PAK_DATA_G9;
3167
3168     i965_gpe_context_unmap_curbe(gpe_context);
3169     return;
3170 }
3171
3172 static void
3173 gen9_vp9_send_mbenc_surface(VADriverContextP ctx,
3174                             struct encode_state *encode_state,
3175                             struct i965_gpe_context *gpe_context,
3176                             struct intel_encoder_context *encoder_context,
3177                             struct gen9_vp9_mbenc_surface_param *mbenc_param)
3178 {
3179     struct gen9_vp9_state *vp9_state;
3180     unsigned int            res_size;
3181     unsigned int            frame_width_in_sb, frame_height_in_sb;
3182     struct object_surface   *obj_surface, *tmp_input;
3183     struct gen9_surface_vp9 *vp9_priv_surface;
3184     int media_function;
3185
3186     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3187
3188     if (!vp9_state || !vp9_state->pic_param)
3189         return;
3190
3191     frame_width_in_sb = ALIGN(mbenc_param->frame_width, 64) / 64;
3192     frame_height_in_sb = ALIGN(mbenc_param->frame_height, 64) / 64;
3193     media_function = mbenc_param->media_state_type;
3194
3195     switch (media_function)
3196     {
3197     case VP9_MEDIA_STATE_MBENC_I_32x32:
3198     {
3199         obj_surface = mbenc_param->curr_frame_obj;
3200
3201         gen9_add_2d_gpe_surface(ctx,
3202                                 gpe_context,
3203                                 obj_surface,
3204                                 0,
3205                                 1,
3206                                 I965_SURFACEFORMAT_R8_UNORM,
3207                                 VP9_BTI_MBENC_CURR_Y_G9);
3208
3209         gen9_add_2d_gpe_surface(ctx,
3210                                 gpe_context,
3211                                 obj_surface,
3212                                 1,
3213                                 1,
3214                                 I965_SURFACEFORMAT_R16_UINT,
3215                                 VP9_BTI_MBENC_CURR_UV_G9);
3216
3217
3218         if (mbenc_param->segmentation_enabled)
3219         {
3220            gen9_add_buffer_2d_gpe_surface(ctx,
3221                                    gpe_context,
3222                                    mbenc_param->pres_segmentation_map,
3223                                    1,
3224                                    I965_SURFACEFORMAT_R8_UNORM,
3225                                    VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3226
3227         }
3228
3229         res_size = 16 * mbenc_param->frame_width_in_mb *
3230                  mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3231         gen9_add_buffer_gpe_surface(ctx,
3232                                     gpe_context,
3233                                     mbenc_param->pres_mode_decision,
3234                                     0,
3235                                     res_size / 4,
3236                                     0,
3237                                     VP9_BTI_MBENC_MODE_DECISION_G9);
3238
3239         break;
3240     }
3241     case VP9_MEDIA_STATE_MBENC_I_16x16:
3242     {
3243         obj_surface = mbenc_param->curr_frame_obj;
3244
3245         gen9_add_2d_gpe_surface(ctx,
3246                                 gpe_context,
3247                                 obj_surface,
3248                                 0,
3249                                 1,
3250                                 I965_SURFACEFORMAT_R8_UNORM,
3251                                 VP9_BTI_MBENC_CURR_Y_G9);
3252
3253         gen9_add_2d_gpe_surface(ctx,
3254                                 gpe_context,
3255                                 obj_surface,
3256                                 1,
3257                                 1,
3258                                 I965_SURFACEFORMAT_R16_UINT,
3259                                 VP9_BTI_MBENC_CURR_UV_G9);
3260
3261         gen9_add_adv_gpe_surface(ctx, gpe_context,
3262                                  obj_surface,
3263                                  VP9_BTI_MBENC_CURR_NV12_G9);
3264
3265         if (mbenc_param->segmentation_enabled)
3266         {
3267            gen9_add_buffer_2d_gpe_surface(ctx,
3268                                    gpe_context,
3269                                    mbenc_param->pres_segmentation_map,
3270                                    1,
3271                                    I965_SURFACEFORMAT_R8_UNORM,
3272                                    VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3273
3274         }
3275
3276         res_size = 16 * mbenc_param->frame_width_in_mb *
3277                  mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3278         gen9_add_buffer_gpe_surface(ctx,
3279                                     gpe_context,
3280                                     mbenc_param->pres_mode_decision,
3281                                     0,
3282                                     res_size / 4,
3283                                     0,
3284                                     VP9_BTI_MBENC_MODE_DECISION_G9);
3285
3286         res_size = 160;
3287
3288         gen9_add_dri_buffer_gpe_surface(ctx,
3289                                         gpe_context,
3290                                         mbenc_param->gpe_context_tx->curbe.bo,
3291                                         0,
3292                                         ALIGN(res_size, 64),
3293                                         mbenc_param->gpe_context_tx->curbe.offset,
3294                                         VP9_BTI_MBENC_TX_CURBE_G9);
3295
3296         break;
3297     }
3298     case VP9_MEDIA_STATE_MBENC_P:
3299     {
3300         obj_surface = mbenc_param->curr_frame_obj;
3301
3302         gen9_add_2d_gpe_surface(ctx,
3303                                 gpe_context,
3304                                 obj_surface,
3305                                 0,
3306                                 1,
3307                                 I965_SURFACEFORMAT_R8_UNORM,
3308                                 VP9_BTI_MBENC_CURR_Y_G9);
3309
3310         gen9_add_2d_gpe_surface(ctx, gpe_context,
3311                                 obj_surface,
3312                                 1,
3313                                 1,
3314                                 I965_SURFACEFORMAT_R16_UINT,
3315                                 VP9_BTI_MBENC_CURR_UV_G9);
3316
3317         gen9_add_adv_gpe_surface(ctx, gpe_context,
3318                                  obj_surface,
3319                                  VP9_BTI_MBENC_CURR_NV12_G9);
3320
3321         if (mbenc_param->last_ref_obj)
3322         {
3323             obj_surface = mbenc_param->last_ref_obj;
3324             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3325
3326             if (vp9_state->dys_in_use &&
3327                 ((vp9_priv_surface->frame_width != vp9_state->frame_width) ||
3328                  (vp9_priv_surface->frame_height != vp9_state->frame_height)))
3329                 tmp_input = vp9_priv_surface->dys_surface_obj;
3330             else
3331                 tmp_input = obj_surface;
3332
3333             gen9_add_adv_gpe_surface(ctx, gpe_context,
3334                                  tmp_input,
3335                                  VP9_BTI_MBENC_LAST_NV12_G9);
3336
3337             gen9_add_adv_gpe_surface(ctx, gpe_context,
3338                                  tmp_input,
3339                                  VP9_BTI_MBENC_LAST_NV12_G9 + 1);
3340
3341         }
3342
3343         if (mbenc_param->golden_ref_obj)
3344         {
3345             obj_surface = mbenc_param->golden_ref_obj;
3346             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3347
3348             if (vp9_state->dys_in_use &&
3349                 ((vp9_priv_surface->frame_width != vp9_state->frame_width) ||
3350                  (vp9_priv_surface->frame_height != vp9_state->frame_height)))
3351                 tmp_input = vp9_priv_surface->dys_surface_obj;
3352             else
3353                 tmp_input = obj_surface;
3354
3355             gen9_add_adv_gpe_surface(ctx, gpe_context,
3356                                  tmp_input,
3357                                  VP9_BTI_MBENC_GOLD_NV12_G9);
3358
3359             gen9_add_adv_gpe_surface(ctx, gpe_context,
3360                                  tmp_input,
3361                                  VP9_BTI_MBENC_GOLD_NV12_G9 + 1);
3362
3363         }
3364
3365         if (mbenc_param->alt_ref_obj)
3366         {
3367             obj_surface = mbenc_param->alt_ref_obj;
3368             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3369
3370             if (vp9_state->dys_in_use &&
3371                 ((vp9_priv_surface->frame_width != vp9_state->frame_width) ||
3372                  (vp9_priv_surface->frame_height != vp9_state->frame_height)))
3373                 tmp_input = vp9_priv_surface->dys_surface_obj;
3374             else
3375                 tmp_input = obj_surface;
3376
3377             gen9_add_adv_gpe_surface(ctx, gpe_context,
3378                                  tmp_input,
3379                                  VP9_BTI_MBENC_ALTREF_NV12_G9);
3380
3381             gen9_add_adv_gpe_surface(ctx, gpe_context,
3382                                  tmp_input,
3383                                  VP9_BTI_MBENC_ALTREF_NV12_G9 + 1);
3384
3385         }
3386
3387         if (mbenc_param->hme_enabled)
3388         {
3389             gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3390                                        mbenc_param->ps4x_memv_data_buffer,
3391                                        1,
3392                                        I965_SURFACEFORMAT_R8_UNORM,
3393                                        VP9_BTI_MBENC_HME_MV_DATA_G9);
3394
3395             gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3396                                        mbenc_param->ps4x_memv_distortion_buffer,
3397                                        1,
3398                                        I965_SURFACEFORMAT_R8_UNORM,
3399                                        VP9_BTI_MBENC_HME_DISTORTION_G9);
3400         }
3401
3402         if (mbenc_param->segmentation_enabled)
3403         {
3404            gen9_add_buffer_2d_gpe_surface(ctx,
3405                                    gpe_context,
3406                                    mbenc_param->pres_segmentation_map,
3407                                    1,
3408                                    I965_SURFACEFORMAT_R8_UNORM,
3409                                    VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3410
3411         }
3412
3413         res_size = 16 * mbenc_param->frame_width_in_mb *
3414                  mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3415         gen9_add_buffer_gpe_surface(ctx,
3416                                     gpe_context,
3417                                     mbenc_param->pres_mode_decision_prev,
3418                                     0,
3419                                     res_size / 4,
3420                                     0,
3421                                     VP9_BTI_MBENC_MODE_DECISION_PREV_G9);
3422
3423         gen9_add_buffer_gpe_surface(ctx,
3424                                     gpe_context,
3425                                     mbenc_param->pres_mode_decision,
3426                                     0,
3427                                     res_size / 4,
3428                                     0,
3429                                     VP9_BTI_MBENC_MODE_DECISION_G9);
3430
3431         gen9_add_buffer_2d_gpe_surface(ctx,
3432                                    gpe_context,
3433                                    mbenc_param->pres_output_16x16_inter_modes,
3434                                    1,
3435                                    I965_SURFACEFORMAT_R8_UNORM,
3436                                    VP9_BTI_MBENC_OUT_16x16_INTER_MODES_G9);
3437
3438         res_size = 160;
3439
3440         gen9_add_dri_buffer_gpe_surface(ctx,
3441                                         gpe_context,
3442                                         mbenc_param->gpe_context_tx->curbe.bo,
3443                                         0,
3444                                         ALIGN(res_size, 64),
3445                                         mbenc_param->gpe_context_tx->curbe.offset,
3446                                         VP9_BTI_MBENC_TX_CURBE_G9);
3447
3448
3449         break;
3450     }
3451     case VP9_MEDIA_STATE_MBENC_TX:
3452     {
3453         obj_surface = mbenc_param->curr_frame_obj;
3454
3455         gen9_add_2d_gpe_surface(ctx,
3456                                 gpe_context,
3457                                 obj_surface,
3458                                 0,
3459                                 1,
3460                                 I965_SURFACEFORMAT_R8_UNORM,
3461                                 VP9_BTI_MBENC_CURR_Y_G9);
3462
3463         gen9_add_2d_gpe_surface(ctx,
3464                                 gpe_context,
3465                                 obj_surface,
3466                                 1,
3467                                 1,
3468                                 I965_SURFACEFORMAT_R16_UINT,
3469                                 VP9_BTI_MBENC_CURR_UV_G9);
3470
3471         if (mbenc_param->segmentation_enabled)
3472         {
3473            gen9_add_buffer_2d_gpe_surface(ctx,
3474                                    gpe_context,
3475                                    mbenc_param->pres_segmentation_map,
3476                                    1,
3477                                    I965_SURFACEFORMAT_R8_UNORM,
3478                                    VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3479
3480         }
3481
3482         res_size = 16 * mbenc_param->frame_width_in_mb *
3483                  mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3484         gen9_add_buffer_gpe_surface(ctx,
3485                                     gpe_context,
3486                                     mbenc_param->pres_mode_decision,
3487                                     0,
3488                                     res_size / 4,
3489                                     0,
3490                                     VP9_BTI_MBENC_MODE_DECISION_G9);
3491
3492         res_size = frame_width_in_sb * frame_height_in_sb * 4 * sizeof(unsigned int);
3493         gen9_add_buffer_gpe_surface(ctx,
3494                                     gpe_context,
3495                                     mbenc_param->pres_mb_code_surface,
3496                                     0,
3497                                     res_size / 4,
3498                                     0,
3499                                     VP9_BTI_MBENC_PAK_DATA_G9);
3500
3501         // CU Record
3502         res_size = frame_width_in_sb * frame_height_in_sb *
3503                    64 * 16 * sizeof(unsigned int);
3504
3505         gen9_add_buffer_gpe_surface(ctx,
3506                                     gpe_context,
3507                                     mbenc_param->pres_mb_code_surface,
3508                                     0,
3509                                     res_size / 4,
3510                                     mbenc_param->mb_data_offset,
3511                                     VP9_BTI_MBENC_CU_RECORDS_G9);
3512     }
3513     default:
3514         break;
3515     }
3516
3517     return;
3518 }
3519
3520 static VAStatus
3521 gen9_vp9_mbenc_kernel(VADriverContextP ctx,
3522                       struct encode_state *encode_state,
3523                       struct intel_encoder_context *encoder_context,
3524                       int media_function)
3525 {
3526     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
3527     struct i965_gpe_context *gpe_context, *tx_gpe_context;
3528     struct gpe_media_object_walker_parameter        media_object_walker_param;
3529     struct vp9_encoder_kernel_walker_parameter      kernel_walker_param;
3530     unsigned int    resolution_x, resolution_y;
3531     struct gen9_vp9_state *vp9_state;
3532     VAEncPictureParameterBufferVP9  *pic_param;
3533     struct gen9_vp9_mbenc_curbe_param               curbe_param;
3534     struct gen9_vp9_mbenc_surface_param             surface_param;
3535     VAStatus    va_status = VA_STATUS_SUCCESS;
3536     int mbenc_gpe_index = 0;
3537     struct object_surface *obj_surface;
3538     struct gen9_surface_vp9 *vp9_priv_surface;
3539
3540     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3541
3542     if (!vp9_state || !vp9_state->pic_param)
3543         return VA_STATUS_ERROR_ENCODING_ERROR;
3544
3545     pic_param = vp9_state->pic_param;
3546
3547     switch (media_function)
3548     {
3549         case VP9_MEDIA_STATE_MBENC_I_32x32:
3550             mbenc_gpe_index = VP9_MBENC_IDX_KEY_32x32;
3551             break;
3552
3553         case VP9_MEDIA_STATE_MBENC_I_16x16:
3554             mbenc_gpe_index = VP9_MBENC_IDX_KEY_16x16;
3555             break;
3556
3557         case VP9_MEDIA_STATE_MBENC_P:
3558             mbenc_gpe_index = VP9_MBENC_IDX_INTER;
3559             break;
3560
3561         case VP9_MEDIA_STATE_MBENC_TX:
3562             mbenc_gpe_index = VP9_MBENC_IDX_TX;
3563             break;
3564
3565         default:
3566             va_status = VA_STATUS_ERROR_OPERATION_FAILED;
3567             return va_status;
3568     }
3569
3570     gpe_context = &(vme_context->mbenc_context.gpe_contexts[mbenc_gpe_index]);
3571     tx_gpe_context = &(vme_context->mbenc_context.gpe_contexts[VP9_MBENC_IDX_TX]);
3572
3573     gen9_gpe_reset_binding_table(ctx, gpe_context);
3574
3575     // Set curbe
3576     if (!vp9_state->mbenc_curbe_set_in_brc_update)
3577     {
3578         if(media_function == VP9_MEDIA_STATE_MBENC_I_32x32 ||
3579            media_function == VP9_MEDIA_STATE_MBENC_P)
3580         {
3581             memset(&curbe_param, 0, sizeof(curbe_param));
3582             curbe_param.ppic_param            = vp9_state->pic_param;
3583             curbe_param.pseq_param            = vp9_state->seq_param;
3584             curbe_param.psegment_param        = vp9_state->segment_param;
3585             curbe_param.frame_width_in_mb     = vp9_state->frame_width_in_mb;
3586             curbe_param.frame_height_in_mb    = vp9_state->frame_height_in_mb;
3587             curbe_param.last_ref_obj          = vp9_state->last_ref_obj;
3588             curbe_param.golden_ref_obj        = vp9_state->golden_ref_obj;
3589             curbe_param.alt_ref_obj           = vp9_state->alt_ref_obj;
3590             curbe_param.hme_enabled           = vp9_state->hme_enabled;
3591             curbe_param.ref_frame_flag        = vp9_state->ref_frame_flag;
3592             curbe_param.picture_coding_type   = vp9_state->picture_coding_type;
3593             curbe_param.media_state_type      = media_function;
3594             curbe_param.mbenc_curbe_set_in_brc_update = vp9_state->mbenc_curbe_set_in_brc_update;
3595
3596             vme_context->pfn_set_curbe_mbenc(ctx,
3597                                              encode_state,
3598                                              gpe_context,
3599                                              encoder_context,
3600                                              &curbe_param);
3601         }
3602     }
3603
3604     memset(&surface_param, 0, sizeof(surface_param));
3605     surface_param.media_state_type             = media_function;
3606     surface_param.picture_coding_type          = vp9_state->picture_coding_type;
3607     surface_param.frame_width                  = vp9_state->frame_width;
3608     surface_param.frame_height                 = vp9_state->frame_height;
3609     surface_param.frame_width_in_mb            = vp9_state->frame_width_in_mb;
3610     surface_param.frame_height_in_mb           = vp9_state->frame_height_in_mb;
3611     surface_param.hme_enabled                  = vp9_state->hme_enabled;
3612     surface_param.segmentation_enabled         = pic_param->pic_flags.bits.segmentation_enabled;
3613     surface_param.pres_segmentation_map        = &vme_context->mb_segment_map_surface;
3614     surface_param.ps4x_memv_data_buffer        = &vme_context->s4x_memv_data_buffer;
3615     surface_param.ps4x_memv_distortion_buffer  = &vme_context->s4x_memv_distortion_buffer;
3616     surface_param.pres_mode_decision           =
3617               &vme_context->res_mode_decision[vp9_state->curr_mode_decision_index];
3618     surface_param.pres_mode_decision_prev      =
3619               &vme_context->res_mode_decision[!vp9_state->curr_mode_decision_index];
3620     surface_param.pres_output_16x16_inter_modes = &vme_context->res_output_16x16_inter_modes;
3621     surface_param.pres_mbenc_curbe_buffer      = NULL;
3622     surface_param.last_ref_obj               = vp9_state->last_ref_obj;
3623     surface_param.golden_ref_obj             = vp9_state->golden_ref_obj;
3624     surface_param.alt_ref_obj                  = vp9_state->alt_ref_obj;
3625     surface_param.pres_mb_code_surface         = &vme_context->res_mb_code_surface;
3626     surface_param.gpe_context_tx               = tx_gpe_context;
3627     surface_param.mb_data_offset             = vp9_state->mb_data_offset;
3628
3629     obj_surface = encode_state->reconstructed_object;
3630     vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3631     if (vp9_state->dys_in_use &&
3632         (pic_param->frame_width_src != pic_param->frame_height_dst ||
3633          pic_param->frame_height_src != pic_param->frame_height_dst)) {
3634         obj_surface = vp9_priv_surface->dys_surface_obj;
3635     } else
3636         obj_surface = encode_state->input_yuv_object;
3637
3638     surface_param.curr_frame_obj             = obj_surface;
3639
3640     vme_context->pfn_send_mbenc_surface(ctx,
3641                                         encode_state,
3642                                         gpe_context,
3643                                         encoder_context,
3644                                         &surface_param);
3645
3646     if (media_function == VP9_MEDIA_STATE_MBENC_I_32x32) {
3647         resolution_x = ALIGN(vp9_state->frame_width, 32) / 32;
3648         resolution_y = ALIGN(vp9_state->frame_height, 32) / 32;
3649     } else {
3650         resolution_x = ALIGN(vp9_state->frame_width, 16) / 16;
3651         resolution_y = ALIGN(vp9_state->frame_height, 16) / 16;
3652     }
3653
3654     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
3655     kernel_walker_param.resolution_x = resolution_x;
3656     kernel_walker_param.resolution_y = resolution_y;
3657
3658     if (media_function == VP9_MEDIA_STATE_MBENC_P ||
3659         media_function == VP9_MEDIA_STATE_MBENC_I_16x16) {
3660         kernel_walker_param.use_scoreboard = 1;
3661         kernel_walker_param.no_dependency = 0;
3662         kernel_walker_param.walker_degree = VP9_45Z_DEGREE;
3663     } else {
3664         kernel_walker_param.use_scoreboard = 0;
3665         kernel_walker_param.no_dependency = 1;
3666     }
3667
3668     gen8_gpe_setup_interface_data(ctx, gpe_context);
3669
3670     gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
3671
3672     gen9_run_kernel_media_object_walker(ctx, encoder_context,
3673                                         gpe_context,
3674                                         media_function,
3675                                         &media_object_walker_param);
3676     return va_status;
3677 }
3678
3679 static void
3680 gen9_init_gpe_context_vp9(VADriverContextP ctx,
3681                           struct i965_gpe_context *gpe_context,
3682                           struct vp9_encoder_kernel_parameter *kernel_param)
3683 {
3684     struct i965_driver_data *i965 = i965_driver_data(ctx);
3685
3686     gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
3687
3688     gpe_context->sampler.entry_size = 0;
3689     gpe_context->sampler.max_entries = 0;
3690
3691     if (kernel_param->sampler_size) {
3692         gpe_context->sampler.entry_size = ALIGN(kernel_param->sampler_size, 64);
3693         gpe_context->sampler.max_entries = 1;
3694     }
3695
3696     gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
3697     gpe_context->idrt.max_entries = NUM_KERNELS_PER_GPE_CONTEXT;
3698
3699     gpe_context->surface_state_binding_table.max_entries = MAX_VP9_ENCODER_SURFACES;
3700     gpe_context->surface_state_binding_table.binding_table_offset = 0;
3701     gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_VP9_ENCODER_SURFACES * 4, 64);
3702     gpe_context->surface_state_binding_table.length = ALIGN(MAX_VP9_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_VP9_ENCODER_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
3703
3704     if (i965->intel.eu_total > 0)
3705         gpe_context->vfe_state.max_num_threads = 6 * i965->intel.eu_total;
3706     else
3707         gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads
3708
3709     gpe_context->vfe_state.curbe_allocation_size = MAX(1, ALIGN(gpe_context->curbe.length, 32) >> 5); // in registers
3710     gpe_context->vfe_state.urb_entry_size = MAX(1, ALIGN(kernel_param->inline_data_size, 32) >> 5); // in registers
3711     gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE -
3712                                               gpe_context->vfe_state.curbe_allocation_size -
3713                                               ((gpe_context->idrt.entry_size >> 5) *
3714                                                gpe_context->idrt.max_entries)) / gpe_context->vfe_state.urb_entry_size;
3715     gpe_context->vfe_state.num_urb_entries = CLAMP(1, 127, gpe_context->vfe_state.num_urb_entries);
3716     gpe_context->vfe_state.gpgpu_mode = 0;
3717 }
3718
3719 static void
3720 gen9_init_vfe_scoreboard_vp9(struct i965_gpe_context *gpe_context,
3721                              struct vp9_encoder_scoreboard_parameter *scoreboard_param)
3722 {
3723     gpe_context->vfe_desc5.scoreboard0.mask = scoreboard_param->mask;
3724     gpe_context->vfe_desc5.scoreboard0.type = scoreboard_param->type;
3725     gpe_context->vfe_desc5.scoreboard0.enable = scoreboard_param->enable;
3726
3727     if (scoreboard_param->walkpat_flag) {
3728         gpe_context->vfe_desc5.scoreboard0.mask = 0x0F;
3729         gpe_context->vfe_desc5.scoreboard0.type = 1;
3730
3731         gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0x0;
3732         gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0xF;
3733
3734         gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
3735         gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xE;
3736
3737         gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0xF;
3738         gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0x3;
3739
3740         gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
3741         gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0x1;
3742     } else {
3743         // Scoreboard 0
3744         gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0xF;
3745         gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0x0;
3746
3747         // Scoreboard 1
3748         gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
3749         gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xF;
3750
3751         // Scoreboard 2
3752         gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0x1;
3753         gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0xF;
3754
3755         // Scoreboard 3
3756         gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
3757         gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0xF;
3758
3759         // Scoreboard 4
3760         gpe_context->vfe_desc7.scoreboard2.delta_x4 = 0xF;
3761         gpe_context->vfe_desc7.scoreboard2.delta_y4 = 0x1;
3762
3763         // Scoreboard 5
3764         gpe_context->vfe_desc7.scoreboard2.delta_x5 = 0x0;
3765         gpe_context->vfe_desc7.scoreboard2.delta_y5 = 0xE;
3766
3767         // Scoreboard 6
3768         gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0x1;
3769         gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
3770
3771         // Scoreboard 7
3772         gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0xF;
3773         gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
3774     }
3775 }
3776
3777 #define VP9_MI_BLOCK_MASK     0x07
3778 #define VP9_VME_REF_WIN       48
3779
3780 static VAStatus
3781 gen9_encode_vp9_check_parameter(VADriverContextP ctx,
3782                               struct encode_state *encode_state,
3783                               struct intel_encoder_context *encoder_context)
3784 {
3785     struct i965_driver_data *i965 = i965_driver_data(ctx);
3786     struct gen9_vp9_state *vp9_state;
3787     VAEncPictureParameterBufferVP9  *pic_param;
3788     VAEncMiscParameterTypeVP9PerSegmantParam *seg_param;
3789     VAEncSequenceParameterBufferVP9 *seq_param;
3790     struct object_surface *obj_surface;
3791     struct object_buffer *obj_buffer;
3792     struct gen9_surface_vp9 *vp9_priv_surface;
3793
3794     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3795
3796     if (!encode_state->pic_param_ext ||
3797         !encode_state->pic_param_ext->buffer) {
3798         return VA_STATUS_ERROR_INVALID_PARAMETER;
3799     }
3800     pic_param = (VAEncPictureParameterBufferVP9 *)encode_state->pic_param_ext->buffer;
3801
3802     if (pic_param->frame_width_src & VP9_MI_BLOCK_MASK ||
3803         pic_param->frame_height_src & VP9_MI_BLOCK_MASK ||
3804         pic_param->frame_width_dst & VP9_MI_BLOCK_MASK ||
3805         pic_param->frame_height_dst & VP9_MI_BLOCK_MASK)
3806         return VA_STATUS_ERROR_INVALID_PARAMETER;
3807
3808     obj_buffer = BUFFER(pic_param->coded_buf);
3809
3810     if (!obj_buffer ||
3811         !obj_buffer->buffer_store ||
3812         !obj_buffer->buffer_store->bo)
3813         return VA_STATUS_ERROR_INVALID_PARAMETER;
3814
3815     encode_state->coded_buf_object = obj_buffer;
3816
3817     vp9_state->status_buffer.bo = obj_buffer->buffer_store->bo;
3818
3819     encode_state->reconstructed_object = SURFACE(pic_param->reconstructed_frame);
3820
3821     if (!encode_state->reconstructed_object ||
3822         !encode_state->input_yuv_object)
3823         return VA_STATUS_ERROR_INVALID_PARAMETER;
3824
3825     vp9_state->curr_frame = pic_param->reconstructed_frame;
3826     vp9_state->ref_frame_flag = 0;
3827     if (pic_param->pic_flags.bits.frame_type == KEY_FRAME ||
3828         pic_param->pic_flags.bits.intra_only) {
3829         /* this will be regarded as I-frame type */
3830         vp9_state->picture_coding_type = 0;
3831         vp9_state->last_ref_obj = NULL;
3832         vp9_state->golden_ref_obj = NULL;
3833         vp9_state->alt_ref_obj = NULL;
3834     } else {
3835         vp9_state->picture_coding_type = 1;
3836         vp9_state->ref_frame_flag = pic_param->ref_flags.bits.ref_frame_ctrl_l0 |
3837                                     pic_param->ref_flags.bits.ref_frame_ctrl_l1;
3838
3839         obj_surface = SURFACE(pic_param->reference_frames[pic_param->ref_flags.bits.ref_last_idx]);
3840         vp9_state->last_ref_obj = obj_surface;
3841         if (!obj_surface ||
3842             !obj_surface->bo ||
3843             !obj_surface->private_data) {
3844             vp9_state->last_ref_obj = NULL;
3845             vp9_state->ref_frame_flag &= ~(VP9_LAST_REF);
3846         }
3847
3848         obj_surface = SURFACE(pic_param->reference_frames[pic_param->ref_flags.bits.ref_gf_idx]);
3849         vp9_state->golden_ref_obj = obj_surface;
3850         if (!obj_surface ||
3851             !obj_surface->bo ||
3852             !obj_surface->private_data) {
3853             vp9_state->golden_ref_obj = NULL;
3854             vp9_state->ref_frame_flag &= ~(VP9_GOLDEN_REF);
3855         }
3856
3857         obj_surface = SURFACE(pic_param->reference_frames[pic_param->ref_flags.bits.ref_arf_idx]);
3858         vp9_state->alt_ref_obj = obj_surface;
3859         if (!obj_surface ||
3860             !obj_surface->bo ||
3861             !obj_surface->private_data) {
3862             vp9_state->alt_ref_obj = NULL;
3863             vp9_state->ref_frame_flag &= ~(VP9_ALT_REF);
3864         }
3865
3866         /* remove the duplicated flag and ref frame list */
3867         if (vp9_state->ref_frame_flag & VP9_LAST_REF) {
3868             if (pic_param->reference_frames[pic_param->ref_flags.bits.ref_last_idx] ==
3869                 pic_param->reference_frames[pic_param->ref_flags.bits.ref_gf_idx]) {
3870                 vp9_state->ref_frame_flag &= ~(VP9_GOLDEN_REF);
3871                 vp9_state->golden_ref_obj = NULL;
3872             }
3873
3874             if (pic_param->reference_frames[pic_param->ref_flags.bits.ref_last_idx] ==
3875                 pic_param->reference_frames[pic_param->ref_flags.bits.ref_arf_idx]) {
3876                 vp9_state->ref_frame_flag &= ~(VP9_ALT_REF);
3877                 vp9_state->alt_ref_obj = NULL;
3878             }
3879         }
3880
3881         if (vp9_state->ref_frame_flag & VP9_GOLDEN_REF) {
3882             if (pic_param->reference_frames[pic_param->ref_flags.bits.ref_gf_idx] ==
3883                 pic_param->reference_frames[pic_param->ref_flags.bits.ref_arf_idx]) {
3884                 vp9_state->ref_frame_flag &= ~(VP9_ALT_REF);
3885                 vp9_state->alt_ref_obj = NULL;
3886             }
3887         }
3888
3889         if (vp9_state->ref_frame_flag == 0)
3890             return VA_STATUS_ERROR_INVALID_PARAMETER;
3891     }
3892
3893     seg_param = NULL;
3894     if (pic_param->pic_flags.bits.segmentation_enabled) {
3895         if (!encode_state->q_matrix ||
3896             !encode_state->q_matrix->buffer) {
3897             return VA_STATUS_ERROR_INVALID_PARAMETER;
3898         }
3899         seg_param = (VAEncMiscParameterTypeVP9PerSegmantParam *)
3900                            encode_state->q_matrix->buffer;
3901     }
3902
3903     seq_param = NULL;
3904     if (encode_state->seq_param_ext &&
3905         encode_state->seq_param_ext->buffer)
3906         seq_param = (VAEncSequenceParameterBufferVP9 *)encode_state->seq_param_ext->buffer;
3907
3908     if (!seq_param) {
3909         seq_param = &vp9_state->bogus_seq_param;
3910     }
3911
3912     vp9_state->pic_param = pic_param;
3913     vp9_state->segment_param = seg_param;
3914     vp9_state->seq_param = seq_param;
3915
3916     obj_surface = encode_state->reconstructed_object;
3917     if (pic_param->frame_width_dst > obj_surface->orig_width ||
3918         pic_param->frame_height_dst > obj_surface->orig_height)
3919         return VA_STATUS_ERROR_INVALID_SURFACE;
3920
3921     if (!vp9_state->dys_enabled &&
3922          ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
3923           (pic_param->frame_height_src != pic_param->frame_height_dst)))
3924         return VA_STATUS_ERROR_UNIMPLEMENTED;
3925
3926     if (vp9_state->brc_enabled) {
3927         if (vp9_state->first_frame || vp9_state->picture_coding_type == KEY_FRAME) {
3928             vp9_state->brc_reset = encoder_context->brc.need_reset || vp9_state->first_frame;
3929
3930             if (!encoder_context->brc.framerate[0].num || !encoder_context->brc.framerate[0].den ||
3931                 !encoder_context->brc.bits_per_second[0])
3932                 return VA_STATUS_ERROR_INVALID_PARAMETER;
3933
3934             vp9_state->gop_size = encoder_context->brc.gop_size;
3935             vp9_state->framerate = encoder_context->brc.framerate[0];
3936
3937             if (encoder_context->rate_control_mode == VA_RC_CBR ||
3938                 !encoder_context->brc.target_percentage[0]) {
3939                 vp9_state->target_bit_rate = encoder_context->brc.bits_per_second[0];
3940                 vp9_state->max_bit_rate = vp9_state->target_bit_rate;
3941                 vp9_state->min_bit_rate = vp9_state->target_bit_rate;
3942             } else {
3943                 vp9_state->max_bit_rate = encoder_context->brc.bits_per_second[0];
3944                 vp9_state->target_bit_rate = vp9_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100;
3945                 if (2 * vp9_state->target_bit_rate < vp9_state->max_bit_rate)
3946                     vp9_state->min_bit_rate = 0;
3947                 else
3948                     vp9_state->min_bit_rate = 2 * vp9_state->target_bit_rate - vp9_state->max_bit_rate;
3949             }
3950
3951             if (encoder_context->brc.hrd_buffer_size)
3952                 vp9_state->vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;
3953             else if (encoder_context->brc.window_size)
3954                 vp9_state->vbv_buffer_size_in_bit = (uint64_t)vp9_state->max_bit_rate * encoder_context->brc.window_size / 1000;
3955             else
3956                 vp9_state->vbv_buffer_size_in_bit = vp9_state->max_bit_rate;
3957             if (encoder_context->brc.hrd_initial_buffer_fullness)
3958                 vp9_state->init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;
3959             else
3960                 vp9_state->init_vbv_buffer_fullness_in_bit = vp9_state->vbv_buffer_size_in_bit / 2;
3961         }
3962     }
3963
3964     vp9_state->frame_width = pic_param->frame_width_dst;
3965     vp9_state->frame_height = pic_param->frame_height_dst;
3966
3967     vp9_state->frame_width_4x = ALIGN(vp9_state->frame_width / 4, 16);
3968     vp9_state->frame_height_4x = ALIGN(vp9_state->frame_height / 4, 16);
3969
3970     vp9_state->frame_width_16x = ALIGN(vp9_state->frame_width / 16, 16);
3971     vp9_state->frame_height_16x = ALIGN(vp9_state->frame_height / 16, 16);
3972
3973     vp9_state->frame_width_in_mb = ALIGN(vp9_state->frame_width, 16) / 16;
3974     vp9_state->frame_height_in_mb = ALIGN(vp9_state->frame_height, 16) / 16;
3975
3976     vp9_state->downscaled_width_4x_in_mb = vp9_state->frame_width_4x / 16;
3977     vp9_state->downscaled_height_4x_in_mb = vp9_state->frame_height_4x / 16;
3978     vp9_state->downscaled_width_16x_in_mb = vp9_state->frame_width_16x / 16;
3979     vp9_state->downscaled_height_16x_in_mb = vp9_state->frame_height_16x / 16;
3980
3981     vp9_state->dys_in_use = 0;
3982     if(pic_param->frame_width_src != pic_param->frame_width_dst ||
3983        pic_param->frame_height_src != pic_param->frame_height_dst)
3984         vp9_state->dys_in_use = 1;
3985     vp9_state->dys_ref_frame_flag = 0;
3986     /* check the dys setting. The dys is supported by default. */
3987     if (pic_param->pic_flags.bits.frame_type != KEY_FRAME &&
3988         !pic_param->pic_flags.bits.intra_only) {
3989         vp9_state->dys_ref_frame_flag = vp9_state->ref_frame_flag;
3990
3991         if ((vp9_state->ref_frame_flag & VP9_LAST_REF) &&
3992              vp9_state->last_ref_obj) {
3993             obj_surface = vp9_state->last_ref_obj;
3994             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3995
3996             if (vp9_state->frame_width == vp9_priv_surface->frame_width &&
3997                 vp9_state->frame_height == vp9_priv_surface->frame_height)
3998                 vp9_state->dys_ref_frame_flag &= ~(VP9_LAST_REF);
3999         }
4000         if ((vp9_state->ref_frame_flag & VP9_GOLDEN_REF) &&
4001              vp9_state->golden_ref_obj) {
4002             obj_surface = vp9_state->golden_ref_obj;
4003             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
4004
4005             if (vp9_state->frame_width == vp9_priv_surface->frame_width &&
4006                 vp9_state->frame_height == vp9_priv_surface->frame_height)
4007                 vp9_state->dys_ref_frame_flag &= ~(VP9_GOLDEN_REF);
4008         }
4009         if ((vp9_state->ref_frame_flag & VP9_ALT_REF) &&
4010              vp9_state->alt_ref_obj) {
4011             obj_surface = vp9_state->alt_ref_obj;
4012             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
4013
4014             if (vp9_state->frame_width == vp9_priv_surface->frame_width &&
4015                 vp9_state->frame_height == vp9_priv_surface->frame_height)
4016                 vp9_state->dys_ref_frame_flag &= ~(VP9_ALT_REF);
4017         }
4018         if (vp9_state->dys_ref_frame_flag)
4019             vp9_state->dys_in_use = 1;
4020     }
4021
4022     if (vp9_state->hme_supported) {
4023         vp9_state->hme_enabled = 1;
4024     } else {
4025         vp9_state->hme_enabled = 0;
4026     }
4027
4028     if (vp9_state->b16xme_supported) {
4029         vp9_state->b16xme_enabled = 1;
4030     } else {
4031         vp9_state->b16xme_enabled = 0;
4032     }
4033
4034     /* disable HME/16xME if the size is too small */
4035     if (vp9_state->frame_width_4x <= VP9_VME_REF_WIN ||
4036         vp9_state->frame_height_4x <= VP9_VME_REF_WIN) {
4037         vp9_state->hme_enabled = 0;
4038         vp9_state->b16xme_enabled = 0;
4039     }
4040
4041     if (vp9_state->frame_width_16x < VP9_VME_REF_WIN ||
4042         vp9_state->frame_height_16x < VP9_VME_REF_WIN)
4043         vp9_state->b16xme_enabled = 0;
4044
4045     if (pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME ||
4046         pic_param->pic_flags.bits.intra_only) {
4047         vp9_state->hme_enabled = 0;
4048         vp9_state->b16xme_enabled = 0;
4049     }
4050
4051     vp9_state->mbenc_keyframe_dist_enabled = 0;
4052     if ((vp9_state->picture_coding_type == KEY_FRAME) &&
4053         vp9_state->brc_distortion_buffer_supported)
4054         vp9_state->mbenc_keyframe_dist_enabled = 1;
4055
4056     return VA_STATUS_SUCCESS;
4057 }
4058
4059 static VAStatus
4060 gen9_vme_gpe_kernel_prepare_vp9(VADriverContextP ctx,
4061                               struct encode_state *encode_state,
4062                               struct intel_encoder_context *encoder_context)
4063 {
4064     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4065     struct vp9_surface_param surface_param;
4066     struct gen9_vp9_state *vp9_state;
4067     VAEncPictureParameterBufferVP9  *pic_param;
4068     struct object_surface *obj_surface;
4069     struct gen9_surface_vp9 *vp9_surface;
4070     int driver_header_flag = 0;
4071     VAStatus va_status;
4072
4073     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
4074
4075     if (!vp9_state || !vp9_state->pic_param)
4076         return VA_STATUS_ERROR_INVALID_PARAMETER;
4077
4078     pic_param = vp9_state->pic_param;
4079
4080     /* this is to check whether the driver should generate the uncompressed header */
4081     driver_header_flag = 1;
4082     if (encode_state->packed_header_data_ext &&
4083         encode_state->packed_header_data_ext[0] &&
4084         pic_param->bit_offset_first_partition_size) {
4085         VAEncPackedHeaderParameterBuffer *param = NULL;
4086
4087         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_params_ext[0]->buffer;
4088
4089         if (param->type == VAEncPackedHeaderRawData) {
4090             char *header_data;
4091             unsigned int length_in_bits;
4092
4093             header_data = (char *)encode_state->packed_header_data_ext[0]->buffer;
4094             length_in_bits = param->bit_length;
4095             driver_header_flag = 0;
4096
4097             vp9_state->frame_header.bit_offset_first_partition_size =
4098                           pic_param->bit_offset_first_partition_size;
4099             vp9_state->header_length = ALIGN(length_in_bits, 8) >> 3;
4100             vp9_state->alias_insert_data = header_data;
4101
4102             vp9_state->frame_header.bit_offset_ref_lf_delta = pic_param->bit_offset_ref_lf_delta;
4103             vp9_state->frame_header.bit_offset_mode_lf_delta = pic_param->bit_offset_mode_lf_delta;
4104             vp9_state->frame_header.bit_offset_lf_level = pic_param->bit_offset_lf_level;
4105             vp9_state->frame_header.bit_offset_qindex = pic_param->bit_offset_qindex;
4106             vp9_state->frame_header.bit_offset_segmentation = pic_param->bit_offset_segmentation;
4107             vp9_state->frame_header.bit_size_segmentation = pic_param->bit_size_segmentation;
4108         }
4109     }
4110
4111     if (driver_header_flag) {
4112         memset(&vp9_state->frame_header, 0, sizeof(vp9_state->frame_header));
4113         intel_write_uncompressed_header(encode_state,
4114                                         VAProfileVP9Profile0,
4115                                         vme_context->frame_header_data,
4116                                         &vp9_state->header_length,
4117                                         &vp9_state->frame_header);
4118         vp9_state->alias_insert_data = vme_context->frame_header_data;
4119     }
4120
4121     va_status = i965_check_alloc_surface_bo(ctx, encode_state->input_yuv_object,
4122                                     1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
4123     if (va_status != VA_STATUS_SUCCESS)
4124         return va_status;
4125
4126     va_status = i965_check_alloc_surface_bo(ctx, encode_state->reconstructed_object,
4127                                     1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
4128
4129     if (va_status != VA_STATUS_SUCCESS)
4130         return va_status;
4131
4132     surface_param.frame_width = vp9_state->frame_width;
4133     surface_param.frame_height = vp9_state->frame_height;
4134     va_status = gen9_vp9_init_check_surfaces(ctx,
4135                                              encode_state->reconstructed_object,
4136                                              &surface_param);
4137
4138     {
4139         vp9_surface = (struct gen9_surface_vp9*)encode_state->reconstructed_object;
4140
4141         vp9_surface->qp_value = pic_param->luma_ac_qindex + pic_param->luma_dc_qindex_delta;
4142     }
4143     if (vp9_state->dys_in_use &&
4144         (pic_param->frame_width_src != pic_param->frame_width_dst ||
4145          pic_param->frame_height_src != pic_param->frame_height_dst)) {
4146         surface_param.frame_width = pic_param->frame_width_dst;
4147         surface_param.frame_height = pic_param->frame_height_dst;
4148         va_status = gen9_vp9_check_dys_surfaces(ctx,
4149                                     encode_state->reconstructed_object,
4150                                     &surface_param);
4151
4152         if (va_status)
4153             return va_status;
4154     }
4155
4156     if (vp9_state->dys_ref_frame_flag) {
4157         if ((vp9_state->dys_ref_frame_flag & VP9_LAST_REF) &&
4158              vp9_state->last_ref_obj) {
4159             obj_surface = vp9_state->last_ref_obj;
4160             surface_param.frame_width = vp9_state->frame_width;
4161             surface_param.frame_height = vp9_state->frame_height;
4162             va_status = gen9_vp9_check_dys_surfaces(ctx,
4163                                     obj_surface,
4164                                     &surface_param);
4165
4166             if (va_status)
4167                 return va_status;
4168         }
4169         if ((vp9_state->dys_ref_frame_flag & VP9_GOLDEN_REF) &&
4170              vp9_state->golden_ref_obj) {
4171             obj_surface = vp9_state->golden_ref_obj;
4172             surface_param.frame_width = vp9_state->frame_width;
4173             surface_param.frame_height = vp9_state->frame_height;
4174             va_status = gen9_vp9_check_dys_surfaces(ctx,
4175                                     obj_surface,
4176                                     &surface_param);
4177
4178             if (va_status)
4179                 return va_status;
4180         }
4181         if ((vp9_state->dys_ref_frame_flag & VP9_ALT_REF) &&
4182              vp9_state->alt_ref_obj) {
4183             obj_surface = vp9_state->alt_ref_obj;
4184             surface_param.frame_width = vp9_state->frame_width;
4185             surface_param.frame_height = vp9_state->frame_height;
4186             va_status = gen9_vp9_check_dys_surfaces(ctx,
4187                                     obj_surface,
4188                                     &surface_param);
4189
4190             if (va_status)
4191                 return va_status;
4192         }
4193     }
4194
4195     if (va_status != VA_STATUS_SUCCESS)
4196         return va_status;
4197     /* check the corresponding ref_frame_flag && dys_ref_frame_flag */
4198
4199     return VA_STATUS_SUCCESS;
4200 }
4201
4202 static VAStatus
4203 gen9_vme_gpe_kernel_init_vp9(VADriverContextP ctx,
4204                               struct encode_state *encode_state,
4205                               struct intel_encoder_context *encoder_context)
4206 {
4207     struct i965_driver_data *i965 = i965_driver_data(ctx);
4208     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4209     struct vp9_mbenc_context *mbenc_context = &vme_context->mbenc_context;
4210     struct vp9_dys_context *dys_context = &vme_context->dys_context;
4211     struct gpe_dynamic_state_parameter ds_param;
4212     int i;
4213
4214     /*
4215      * BRC will update MBEnc curbe data buffer, so initialize GPE context for
4216      * MBEnc first
4217      */
4218     for (i = 0; i < NUM_VP9_MBENC; i++) {
4219         gen8_gpe_context_init(ctx, &mbenc_context->gpe_contexts[i]);
4220     }
4221
4222     /*
4223      * VP9_MBENC_XXX uses the same dynamic state buffer as they share the same
4224      * curbe_buffer.
4225      */
4226     ds_param.bo_size = ALIGN(sizeof(vp9_mbenc_curbe_data), 64) + 128 +
4227            ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) * NUM_VP9_MBENC;
4228     mbenc_context->mbenc_bo_dys = dri_bo_alloc(i965->intel.bufmgr,
4229                                                "mbenc_dys",
4230                                                ds_param.bo_size,
4231                                                0x1000);
4232     mbenc_context->mbenc_bo_size = ds_param.bo_size;
4233
4234     ds_param.bo = mbenc_context->mbenc_bo_dys;
4235     ds_param.curbe_offset = 0;
4236     ds_param.sampler_offset = ALIGN(sizeof(vp9_mbenc_curbe_data), 64);
4237     for (i = 0; i < NUM_VP9_MBENC; i++) {
4238         ds_param.idrt_offset = ds_param.sampler_offset + 128 +
4239                    ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) * i;
4240
4241         gen8_gpe_context_set_dynamic_buffer(ctx,
4242                                             &mbenc_context->gpe_contexts[i],
4243                                             &ds_param);
4244     }
4245
4246     gen8_gpe_context_init(ctx, &dys_context->gpe_context);
4247     gen9_vp9_dys_set_sampler_state(&dys_context->gpe_context);
4248
4249     return VA_STATUS_SUCCESS;
4250 }
4251
4252 static VAStatus
4253 gen9_vme_gpe_kernel_final_vp9(VADriverContextP ctx,
4254                               struct encode_state *encode_state,
4255                               struct intel_encoder_context *encoder_context)
4256 {
4257     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4258     struct vp9_mbenc_context *mbenc_context = &vme_context->mbenc_context;
4259
4260     dri_bo_unreference(mbenc_context->mbenc_bo_dys);
4261     mbenc_context->mbenc_bo_dys = NULL;
4262
4263     return VA_STATUS_SUCCESS;
4264 }
4265
4266 static VAStatus
4267 gen9_vme_gpe_kernel_run_vp9(VADriverContextP ctx,
4268                               struct encode_state *encode_state,
4269                               struct intel_encoder_context *encoder_context)
4270 {
4271     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4272     struct gen9_vp9_state *vp9_state;
4273     int i;
4274
4275     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
4276
4277     if (!vp9_state || !vp9_state->pic_param)
4278         return VA_STATUS_ERROR_INVALID_PARAMETER;
4279
4280     if (vp9_state->dys_in_use) {
4281         gen9_vp9_run_dys_refframes(ctx, encode_state, encoder_context);
4282     }
4283
4284     if (vp9_state->brc_enabled && (vp9_state->brc_reset || !vp9_state->brc_inited)) {
4285         gen9_vp9_brc_init_reset_kernel(ctx, encode_state, encoder_context);
4286     }
4287
4288     if (vp9_state->picture_coding_type == KEY_FRAME) {
4289         for (i = 0; i < 2; i++)
4290             i965_zero_gpe_resource(&vme_context->res_mode_decision[i]);
4291     }
4292
4293     if (vp9_state->hme_supported) {
4294         gen9_vp9_scaling_kernel(ctx, encode_state,
4295                                 encoder_context,
4296                                 0);
4297         if (vp9_state->b16xme_supported) {
4298             gen9_vp9_scaling_kernel(ctx, encode_state,
4299                                     encoder_context,
4300                                     1);
4301         }
4302     }
4303
4304     if (vp9_state->picture_coding_type && vp9_state->hme_enabled) {
4305         if (vp9_state->b16xme_enabled)
4306             gen9_vp9_me_kernel(ctx, encode_state,
4307                                encoder_context,
4308                                1);
4309
4310         gen9_vp9_me_kernel(ctx, encode_state,
4311                            encoder_context,
4312                            0);
4313     }
4314
4315     if (vp9_state->brc_enabled) {
4316         if (vp9_state->mbenc_keyframe_dist_enabled)
4317             gen9_vp9_brc_intra_dist_kernel(ctx,
4318                                            encode_state,
4319                                            encoder_context);
4320
4321         gen9_vp9_brc_update_kernel(ctx, encode_state,
4322                                    encoder_context);
4323     }
4324
4325     if (vp9_state->picture_coding_type == KEY_FRAME) {
4326         gen9_vp9_mbenc_kernel(ctx, encode_state,
4327                               encoder_context,
4328                               VP9_MEDIA_STATE_MBENC_I_32x32);
4329         gen9_vp9_mbenc_kernel(ctx, encode_state,
4330                               encoder_context,
4331                               VP9_MEDIA_STATE_MBENC_I_16x16);
4332     } else {
4333         gen9_vp9_mbenc_kernel(ctx, encode_state,
4334                               encoder_context,
4335                               VP9_MEDIA_STATE_MBENC_P);
4336     }
4337
4338     gen9_vp9_mbenc_kernel(ctx, encode_state,
4339                           encoder_context,
4340                           VP9_MEDIA_STATE_MBENC_TX);
4341
4342     vp9_state->curr_mode_decision_index ^= 1;
4343     if (vp9_state->brc_enabled) {
4344         vp9_state->brc_inited = 1;
4345         vp9_state->brc_reset = 0;
4346     }
4347
4348     return VA_STATUS_SUCCESS;
4349 }
4350
4351 static VAStatus
4352 gen9_vme_pipeline_vp9(VADriverContextP ctx,
4353                        VAProfile profile,
4354                        struct encode_state *encode_state,
4355                        struct intel_encoder_context *encoder_context)
4356 {
4357     VAStatus va_status;
4358     struct gen9_vp9_state *vp9_state;
4359
4360     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
4361
4362     if (!vp9_state)
4363         return VA_STATUS_ERROR_INVALID_CONTEXT;
4364
4365     va_status = gen9_encode_vp9_check_parameter(ctx, encode_state, encoder_context);
4366     if (va_status != VA_STATUS_SUCCESS)
4367         return va_status;
4368
4369     va_status = gen9_vp9_allocate_resources(ctx, encode_state,
4370                             encoder_context,
4371                             !vp9_state->brc_allocated);
4372
4373     if (va_status != VA_STATUS_SUCCESS)
4374         return va_status;
4375     vp9_state->brc_allocated = 1;
4376
4377     va_status = gen9_vme_gpe_kernel_prepare_vp9(ctx, encode_state, encoder_context);
4378
4379     if (va_status != VA_STATUS_SUCCESS)
4380         return va_status;
4381
4382     va_status = gen9_vme_gpe_kernel_init_vp9(ctx, encode_state, encoder_context);
4383     if (va_status != VA_STATUS_SUCCESS)
4384         return va_status;
4385
4386     va_status = gen9_vme_gpe_kernel_run_vp9(ctx, encode_state, encoder_context);
4387     if (va_status != VA_STATUS_SUCCESS)
4388         return va_status;
4389
4390     gen9_vme_gpe_kernel_final_vp9(ctx, encode_state, encoder_context);
4391
4392     return VA_STATUS_SUCCESS;
4393 }
4394
4395 static void
4396 gen9_vme_brc_context_destroy_vp9(struct vp9_brc_context *brc_context)
4397 {
4398     int i;
4399
4400     for (i = 0; i < NUM_VP9_BRC; i++)
4401         gen8_gpe_context_destroy(&brc_context->gpe_contexts[i]);
4402 }
4403
4404 static void
4405 gen9_vme_scaling_context_destroy_vp9(struct vp9_scaling_context *scaling_context)
4406 {
4407     int i;
4408
4409     for (i = 0; i < NUM_VP9_SCALING; i++)
4410         gen8_gpe_context_destroy(&scaling_context->gpe_contexts[i]);
4411 }
4412
4413 static void
4414 gen9_vme_me_context_destroy_vp9(struct vp9_me_context *me_context)
4415 {
4416     gen8_gpe_context_destroy(&me_context->gpe_context);
4417 }
4418
4419 static void
4420 gen9_vme_mbenc_context_destroy_vp9(struct vp9_mbenc_context *mbenc_context)
4421 {
4422     int i;
4423
4424     for (i = 0; i < NUM_VP9_MBENC; i++)
4425         gen8_gpe_context_destroy(&mbenc_context->gpe_contexts[i]);
4426     dri_bo_unreference(mbenc_context->mbenc_bo_dys);
4427     mbenc_context->mbenc_bo_size = 0;
4428 }
4429
4430 static void
4431 gen9_vme_dys_context_destroy_vp9(struct vp9_dys_context *dys_context)
4432 {
4433     gen8_gpe_context_destroy(&dys_context->gpe_context);
4434 }
4435
4436 static void
4437 gen9_vme_kernel_context_destroy_vp9(struct gen9_encoder_context_vp9 *vme_context)
4438 {
4439     gen9_vp9_free_resources(vme_context);
4440     gen9_vme_scaling_context_destroy_vp9(&vme_context->scaling_context);
4441     gen9_vme_me_context_destroy_vp9(&vme_context->me_context);
4442     gen9_vme_mbenc_context_destroy_vp9(&vme_context->mbenc_context);
4443     gen9_vme_brc_context_destroy_vp9(&vme_context->brc_context);
4444     gen9_vme_dys_context_destroy_vp9(&vme_context->dys_context);
4445
4446     return;
4447 }
4448
4449 static void
4450 gen9_vme_context_destroy_vp9(void *context)
4451 {
4452     struct gen9_encoder_context_vp9 *vme_context = context;
4453
4454     if (!vme_context)
4455         return;
4456
4457     gen9_vme_kernel_context_destroy_vp9(vme_context);
4458
4459     free(vme_context);
4460
4461     return;
4462 }
4463
4464 static void
4465 gen9_vme_scaling_context_init_vp9(VADriverContextP ctx,
4466                                    struct gen9_encoder_context_vp9 *vme_context,
4467                                    struct vp9_scaling_context *scaling_context)
4468 {
4469     struct i965_gpe_context *gpe_context = NULL;
4470     struct vp9_encoder_kernel_parameter kernel_param;
4471     struct vp9_encoder_scoreboard_parameter scoreboard_param;
4472     struct i965_kernel scale_kernel;
4473
4474     kernel_param.curbe_size = sizeof(vp9_scaling4x_curbe_data_cm);
4475     kernel_param.inline_data_size = sizeof(vp9_scaling4x_inline_data_cm);
4476     kernel_param.sampler_size = 0;
4477
4478     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4479     scoreboard_param.mask = 0xFF;
4480     scoreboard_param.enable = vme_context->use_hw_scoreboard;
4481     scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4482     scoreboard_param.walkpat_flag = 0;
4483
4484     gpe_context = &scaling_context->gpe_contexts[0];
4485     gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4486     gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4487
4488     scaling_context->scaling_4x_bti.scaling_frame_src_y = VP9_BTI_SCALING_FRAME_SRC_Y;
4489     scaling_context->scaling_4x_bti.scaling_frame_dst_y = VP9_BTI_SCALING_FRAME_DST_Y;
4490     scaling_context->scaling_4x_bti.scaling_frame_mbv_proc_stat_dst =
4491                            VP9_BTI_SCALING_FRAME_MBVPROCSTATS_DST_CM;
4492
4493     memset(&scale_kernel, 0, sizeof(scale_kernel));
4494
4495     intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4496                                          sizeof(media_vp9_kernels),
4497                                          INTEL_VP9_ENC_SCALING4X,
4498                                          0,
4499                                          &scale_kernel);
4500
4501     gen8_gpe_load_kernels(ctx,
4502                           gpe_context,
4503                           &scale_kernel,
4504                           1);
4505
4506     kernel_param.curbe_size = sizeof(vp9_scaling2x_curbe_data_cm);
4507     kernel_param.inline_data_size = 0;
4508     kernel_param.sampler_size = 0;
4509
4510     gpe_context = &scaling_context->gpe_contexts[1];
4511     gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4512     gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4513
4514     memset(&scale_kernel, 0, sizeof(scale_kernel));
4515
4516     intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4517                                          sizeof(media_vp9_kernels),
4518                                          INTEL_VP9_ENC_SCALING2X,
4519                                          0,
4520                                          &scale_kernel);
4521
4522     gen8_gpe_load_kernels(ctx,
4523                           gpe_context,
4524                           &scale_kernel,
4525                           1);
4526
4527     scaling_context->scaling_2x_bti.scaling_frame_src_y = VP9_BTI_SCALING_FRAME_SRC_Y;
4528     scaling_context->scaling_2x_bti.scaling_frame_dst_y = VP9_BTI_SCALING_FRAME_DST_Y;
4529     return;
4530 }
4531
4532 static void
4533 gen9_vme_me_context_init_vp9(VADriverContextP ctx,
4534                              struct gen9_encoder_context_vp9 *vme_context,
4535                              struct vp9_me_context *me_context)
4536 {
4537     struct i965_gpe_context *gpe_context = NULL;
4538     struct vp9_encoder_kernel_parameter kernel_param;
4539     struct vp9_encoder_scoreboard_parameter scoreboard_param;
4540     struct i965_kernel scale_kernel;
4541
4542     kernel_param.curbe_size = sizeof(vp9_me_curbe_data);
4543     kernel_param.inline_data_size = 0;
4544     kernel_param.sampler_size = 0;
4545
4546     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4547     scoreboard_param.mask = 0xFF;
4548     scoreboard_param.enable = vme_context->use_hw_scoreboard;
4549     scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4550     scoreboard_param.walkpat_flag = 0;
4551
4552     gpe_context = &me_context->gpe_context;
4553     gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4554     gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4555
4556     memset(&scale_kernel, 0, sizeof(scale_kernel));
4557
4558     intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4559                                          sizeof(media_vp9_kernels),
4560                                          INTEL_VP9_ENC_ME,
4561                                          0,
4562                                          &scale_kernel);
4563
4564     gen8_gpe_load_kernels(ctx,
4565                           gpe_context,
4566                           &scale_kernel,
4567                           1);
4568
4569     return;
4570 }
4571
4572 static void
4573 gen9_vme_mbenc_context_init_vp9(VADriverContextP ctx,
4574                                  struct gen9_encoder_context_vp9 *vme_context,
4575                                  struct vp9_mbenc_context *mbenc_context)
4576 {
4577     struct i965_gpe_context *gpe_context = NULL;
4578     struct vp9_encoder_kernel_parameter kernel_param;
4579     struct vp9_encoder_scoreboard_parameter scoreboard_param;
4580     int i;
4581     struct i965_kernel scale_kernel;
4582
4583     kernel_param.curbe_size = sizeof(vp9_mbenc_curbe_data);
4584     kernel_param.inline_data_size = 0;
4585     kernel_param.sampler_size = 0;
4586
4587     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4588     scoreboard_param.mask = 0xFF;
4589     scoreboard_param.enable = vme_context->use_hw_scoreboard;
4590     scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4591
4592     for (i = 0; i < NUM_VP9_MBENC; i++) {
4593         gpe_context = &mbenc_context->gpe_contexts[i];
4594
4595         if ((i == VP9_MBENC_IDX_KEY_16x16) ||
4596             (i == VP9_MBENC_IDX_INTER)) {
4597             scoreboard_param.walkpat_flag = 1;
4598         } else
4599             scoreboard_param.walkpat_flag = 0;
4600
4601         gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4602         gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4603
4604         memset(&scale_kernel, 0, sizeof(scale_kernel));
4605
4606         intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4607                                          sizeof(media_vp9_kernels),
4608                                          INTEL_VP9_ENC_MBENC,
4609                                          i,
4610                                          &scale_kernel);
4611
4612         gen8_gpe_load_kernels(ctx,
4613                               gpe_context,
4614                               &scale_kernel,
4615                               1);
4616     }
4617 }
4618
4619 static void
4620 gen9_vme_brc_context_init_vp9(VADriverContextP ctx,
4621                               struct gen9_encoder_context_vp9 *vme_context,
4622                               struct vp9_brc_context *brc_context)
4623 {
4624     struct i965_gpe_context *gpe_context = NULL;
4625     struct vp9_encoder_kernel_parameter kernel_param;
4626     struct vp9_encoder_scoreboard_parameter scoreboard_param;
4627     int i;
4628     struct i965_kernel scale_kernel;
4629
4630     kernel_param.curbe_size = sizeof(vp9_brc_curbe_data);
4631     kernel_param.inline_data_size = 0;
4632     kernel_param.sampler_size = 0;
4633
4634     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4635     scoreboard_param.mask = 0xFF;
4636     scoreboard_param.enable = vme_context->use_hw_scoreboard;
4637     scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4638
4639     for (i = 0; i < NUM_VP9_BRC; i++) {
4640         gpe_context = &brc_context->gpe_contexts[i];
4641         gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4642         gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4643
4644         memset(&scale_kernel, 0, sizeof(scale_kernel));
4645
4646         intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4647                                          sizeof(media_vp9_kernels),
4648                                          INTEL_VP9_ENC_BRC,
4649                                          i,
4650                                          &scale_kernel);
4651
4652         gen8_gpe_load_kernels(ctx,
4653                               gpe_context,
4654                               &scale_kernel,
4655                               1);
4656     }
4657 }
4658
4659 static void
4660 gen9_vme_dys_context_init_vp9(VADriverContextP ctx,
4661                                struct gen9_encoder_context_vp9 *vme_context,
4662                                struct vp9_dys_context *dys_context)
4663 {
4664     struct i965_gpe_context *gpe_context = NULL;
4665     struct vp9_encoder_kernel_parameter kernel_param;
4666     struct vp9_encoder_scoreboard_parameter scoreboard_param;
4667     struct i965_kernel scale_kernel;
4668
4669     kernel_param.curbe_size = sizeof(vp9_dys_curbe_data);
4670     kernel_param.inline_data_size = 0;
4671     kernel_param.sampler_size = sizeof(struct gen9_sampler_8x8_avs);
4672
4673     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4674     scoreboard_param.mask = 0xFF;
4675     scoreboard_param.enable = vme_context->use_hw_scoreboard;
4676     scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4677     scoreboard_param.walkpat_flag = 0;
4678
4679     gpe_context = &dys_context->gpe_context;
4680     gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4681     gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4682
4683     memset(&scale_kernel, 0, sizeof(scale_kernel));
4684
4685     intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4686                                          sizeof(media_vp9_kernels),
4687                                          INTEL_VP9_ENC_DYS,
4688                                          0,
4689                                          &scale_kernel);
4690
4691     gen8_gpe_load_kernels(ctx,
4692                           gpe_context,
4693                           &scale_kernel,
4694                           1);
4695
4696     return;
4697 }
4698
4699 static Bool
4700 gen9_vme_kernels_context_init_vp9(VADriverContextP ctx,
4701                                    struct intel_encoder_context *encoder_context,
4702                                    struct gen9_encoder_context_vp9 *vme_context)
4703 {
4704     gen9_vme_scaling_context_init_vp9(ctx, vme_context, &vme_context->scaling_context);
4705     gen9_vme_me_context_init_vp9(ctx, vme_context, &vme_context->me_context);
4706     gen9_vme_mbenc_context_init_vp9(ctx, vme_context, &vme_context->mbenc_context);
4707     gen9_vme_dys_context_init_vp9(ctx, vme_context, &vme_context->dys_context);
4708     gen9_vme_brc_context_init_vp9(ctx, vme_context, &vme_context->brc_context);
4709
4710     vme_context->pfn_set_curbe_brc = gen9_vp9_set_curbe_brc;
4711     vme_context->pfn_set_curbe_me = gen9_vp9_set_curbe_me;
4712     vme_context->pfn_send_me_surface = gen9_vp9_send_me_surface;
4713     vme_context->pfn_send_scaling_surface = gen9_vp9_send_scaling_surface;
4714
4715     vme_context->pfn_set_curbe_scaling = gen9_vp9_set_curbe_scaling_cm;
4716
4717     vme_context->pfn_send_dys_surface = gen9_vp9_send_dys_surface;
4718     vme_context->pfn_set_curbe_dys = gen9_vp9_set_curbe_dys;
4719     vme_context->pfn_set_curbe_mbenc = gen9_vp9_set_curbe_mbenc;
4720     vme_context->pfn_send_mbenc_surface = gen9_vp9_send_mbenc_surface;
4721     return true;
4722 }
4723
4724 static
4725 void gen9_vp9_write_compressed_element(char *buffer,
4726                                        int index,
4727                                        int prob,
4728                                        bool value)
4729 {
4730     struct vp9_compressed_element *base_element, *vp9_element;
4731     base_element = (struct vp9_compressed_element *)buffer;
4732
4733     vp9_element = base_element + (index >> 1);
4734     if (index % 2) {
4735         vp9_element->b_valid = 1;
4736         vp9_element->b_probdiff_select = 1;
4737         vp9_element->b_prob_select = (prob == 252) ? 1: 0;
4738         vp9_element->b_bin = value;
4739     } else {
4740         vp9_element->a_valid = 1;
4741         vp9_element->a_probdiff_select = 1;
4742         vp9_element->a_prob_select = (prob == 252) ? 1: 0;
4743         vp9_element->a_bin = value;
4744     }
4745 }
4746
4747 static void
4748 intel_vp9enc_refresh_frame_internal_buffers(VADriverContextP ctx,
4749                                             struct intel_encoder_context *encoder_context)
4750 {
4751     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
4752     VAEncPictureParameterBufferVP9 *pic_param;
4753     struct gen9_vp9_state *vp9_state;
4754     char *buffer;
4755     int i;
4756
4757     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
4758
4759     if (!pak_context || !vp9_state || !vp9_state->pic_param)
4760         return;
4761
4762     pic_param = vp9_state->pic_param;
4763     if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) ||
4764         (pic_param->pic_flags.bits.intra_only) ||
4765          pic_param->pic_flags.bits.error_resilient_mode) {
4766         /* reset current frame_context */
4767         intel_init_default_vp9_probs(&vp9_state->vp9_current_fc);
4768         if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) ||
4769             pic_param->pic_flags.bits.error_resilient_mode ||
4770             (pic_param->pic_flags.bits.reset_frame_context == 3)) {
4771             for (i = 0; i < 4; i++)
4772                 memcpy(&vp9_state->vp9_frame_ctx[i],
4773                        &vp9_state->vp9_current_fc,
4774                        sizeof(FRAME_CONTEXT));
4775         } else if (pic_param->pic_flags.bits.reset_frame_context == 2) {
4776             i = pic_param->pic_flags.bits.frame_context_idx;
4777             memcpy(&vp9_state->vp9_frame_ctx[i],
4778                    &vp9_state->vp9_current_fc, sizeof(FRAME_CONTEXT));
4779         }
4780         /* reset the frame_ctx_idx = 0 */
4781         vp9_state->frame_ctx_idx = 0;
4782     } else {
4783         vp9_state->frame_ctx_idx = pic_param->pic_flags.bits.frame_context_idx;
4784     }
4785
4786     i965_zero_gpe_resource(&pak_context->res_compressed_input_buffer);
4787     buffer = i965_map_gpe_resource(&pak_context->res_compressed_input_buffer);
4788
4789     if (!buffer)
4790         return;
4791
4792     /* write tx_size */
4793     if ((pic_param->luma_ac_qindex == 0) &&
4794         (pic_param->luma_dc_qindex_delta == 0) &&
4795         (pic_param->chroma_ac_qindex_delta == 0) &&
4796         (pic_param->chroma_dc_qindex_delta == 0)) {
4797         /* lossless flag */
4798         /* nothing is needed */
4799             gen9_vp9_write_compressed_element(buffer,
4800                                           0, 128, 0);
4801             gen9_vp9_write_compressed_element(buffer,
4802                                           1, 128, 0);
4803             gen9_vp9_write_compressed_element(buffer,
4804                                           2, 128, 0);
4805     } else {
4806         if (vp9_state->tx_mode == TX_MODE_SELECT) {
4807             gen9_vp9_write_compressed_element(buffer,
4808                                           0, 128, 1);
4809             gen9_vp9_write_compressed_element(buffer,
4810                                           1, 128, 1);
4811             gen9_vp9_write_compressed_element(buffer,
4812                                           2, 128, 1);
4813         } else if (vp9_state->tx_mode == ALLOW_32X32) {
4814             gen9_vp9_write_compressed_element(buffer,
4815                                           0, 128, 1);
4816             gen9_vp9_write_compressed_element(buffer,
4817                                           1, 128, 1);
4818             gen9_vp9_write_compressed_element(buffer,
4819                                           2, 128, 0);
4820         } else {
4821             unsigned int tx_mode;
4822
4823             tx_mode = vp9_state->tx_mode;
4824             gen9_vp9_write_compressed_element(buffer,
4825                                           0, 128, ((tx_mode) & 2));
4826             gen9_vp9_write_compressed_element(buffer,
4827                                           1, 128, ((tx_mode) & 1));
4828             gen9_vp9_write_compressed_element(buffer,
4829                                           2, 128, 0);
4830         }
4831
4832         if (vp9_state->tx_mode == TX_MODE_SELECT) {
4833
4834             gen9_vp9_write_compressed_element(buffer,
4835                                           3, 128, 0);
4836
4837             gen9_vp9_write_compressed_element(buffer,
4838                                           7, 128, 0);
4839
4840             gen9_vp9_write_compressed_element(buffer,
4841                                           15, 128, 0);
4842         }
4843     }
4844      /*Setup all the input&output object*/
4845
4846     {
4847         /* update the coeff_update flag */
4848         gen9_vp9_write_compressed_element(buffer,
4849                                       27, 128, 0);
4850         gen9_vp9_write_compressed_element(buffer,
4851                                       820, 128, 0);
4852         gen9_vp9_write_compressed_element(buffer,
4853                                       1613, 128, 0);
4854         gen9_vp9_write_compressed_element(buffer,
4855                                       2406, 128, 0);
4856     }
4857
4858
4859     if (pic_param->pic_flags.bits.frame_type && !pic_param->pic_flags.bits.intra_only)
4860     {
4861         bool allow_comp = !(
4862             (pic_param->ref_flags.bits.ref_last_sign_bias && pic_param->ref_flags.bits.ref_gf_sign_bias && pic_param->ref_flags.bits.ref_arf_sign_bias) ||
4863             (!pic_param->ref_flags.bits.ref_last_sign_bias && !pic_param->ref_flags.bits.ref_gf_sign_bias && !pic_param->ref_flags.bits.ref_arf_sign_bias)
4864             );
4865
4866         if (allow_comp)
4867         {
4868             if (pic_param->pic_flags.bits.comp_prediction_mode == REFERENCE_MODE_SELECT) {
4869                 gen9_vp9_write_compressed_element(buffer,
4870                                           3271, 128, 1);
4871                 gen9_vp9_write_compressed_element(buffer,
4872                                           3272, 128, 1);
4873             }
4874             else if (pic_param->pic_flags.bits.comp_prediction_mode == COMPOUND_REFERENCE) {
4875                 gen9_vp9_write_compressed_element(buffer,
4876                                           3271, 128, 1);
4877                 gen9_vp9_write_compressed_element(buffer,
4878                                           3272, 128, 0);
4879             }
4880             else {
4881
4882                 gen9_vp9_write_compressed_element(buffer,
4883                                           3271, 128, 0);
4884                 gen9_vp9_write_compressed_element(buffer,
4885                                           3272, 128, 0);
4886             }
4887         }
4888     }
4889
4890     i965_unmap_gpe_resource(&pak_context->res_compressed_input_buffer);
4891 }
4892
4893
4894 static void
4895 gen9_pak_vp9_pipe_mode_select(VADriverContextP ctx,
4896                            struct encode_state *encode_state,
4897                            struct intel_encoder_context *encoder_context,
4898                            struct gen9_hcpe_pipe_mode_select_param *pipe_mode_param)
4899 {
4900     struct intel_batchbuffer *batch = encoder_context->base.batch;
4901
4902     BEGIN_BCS_BATCH(batch, 6);
4903
4904     OUT_BCS_BATCH(batch, HCP_PIPE_MODE_SELECT | (6 - 2));
4905     OUT_BCS_BATCH(batch,
4906                   (pipe_mode_param->stream_out << 12) |
4907                   (pipe_mode_param->codec_mode << 5) |
4908                   (0 << 3) | /* disable Pic Status / Error Report */
4909                   (pipe_mode_param->stream_out << 2) |
4910                   HCP_CODEC_SELECT_ENCODE);
4911     OUT_BCS_BATCH(batch, 0);
4912     OUT_BCS_BATCH(batch, 0);
4913     OUT_BCS_BATCH(batch, (1 << 6));
4914     OUT_BCS_BATCH(batch, 0);
4915
4916     ADVANCE_BCS_BATCH(batch);
4917 }
4918
4919 static void
4920 gen9_vp9_add_surface_state(VADriverContextP ctx,
4921                        struct encode_state *encode_state,
4922                        struct intel_encoder_context *encoder_context,
4923                        hcp_surface_state *hcp_state)
4924 {
4925     struct intel_batchbuffer *batch = encoder_context->base.batch;
4926     if (!hcp_state)
4927         return;
4928
4929     BEGIN_BCS_BATCH(batch, 3);
4930     OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
4931     OUT_BCS_BATCH(batch,
4932                   (hcp_state->dw1.surface_id << 28) |
4933                   (hcp_state->dw1.surface_pitch - 1)
4934                  );
4935     OUT_BCS_BATCH(batch,
4936                   (hcp_state->dw2.surface_format << 28) |
4937                   (hcp_state->dw2.y_cb_offset)
4938                  );
4939     ADVANCE_BCS_BATCH(batch);
4940 }
4941
4942 static void
4943 gen9_pak_vp9_pipe_buf_addr_state(VADriverContextP ctx,
4944                                  struct encode_state *encode_state,
4945                                  struct intel_encoder_context *encoder_context)
4946 {
4947     struct intel_batchbuffer *batch = encoder_context->base.batch;
4948     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
4949     struct gen9_vp9_state *vp9_state;
4950     unsigned int i;
4951     struct object_surface *obj_surface;
4952
4953     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
4954
4955     if (!vp9_state || !vp9_state->pic_param)
4956          return;
4957
4958
4959     BEGIN_BCS_BATCH(batch, 104);
4960
4961     OUT_BCS_BATCH(batch, HCP_PIPE_BUF_ADDR_STATE | (104 - 2));
4962
4963     obj_surface = encode_state->reconstructed_object;
4964
4965     /* reconstructed obj_surface is already checked. So this is skipped */
4966     /* DW 1..3 decoded surface */
4967     OUT_RELOC64(batch,
4968                 obj_surface->bo,
4969                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4970                 0);
4971     OUT_BCS_BATCH(batch, 0);
4972
4973     /* DW 4..6 deblocking line */
4974     OUT_RELOC64(batch,
4975                 pak_context->res_deblocking_filter_line_buffer.bo,
4976                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4977                 0);
4978     OUT_BCS_BATCH(batch, 0);
4979
4980     /* DW 7..9 deblocking tile line */
4981     OUT_RELOC64(batch,
4982                 pak_context->res_deblocking_filter_tile_line_buffer.bo,
4983                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4984                 0);
4985     OUT_BCS_BATCH(batch, 0);
4986
4987     /* DW 10..12 deblocking tile col */
4988     OUT_RELOC64(batch,
4989                 pak_context->res_deblocking_filter_tile_col_buffer.bo,
4990                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4991                 0);
4992     OUT_BCS_BATCH(batch, 0);
4993
4994     /* DW 13..15 metadata line */
4995     OUT_RELOC64(batch,
4996                 pak_context->res_metadata_line_buffer.bo,
4997                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4998                 0);
4999     OUT_BCS_BATCH(batch, 0);
5000
5001     /* DW 16..18 metadata tile line */
5002     OUT_RELOC64(batch,
5003                 pak_context->res_metadata_tile_line_buffer.bo,
5004                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5005                 0);
5006     OUT_BCS_BATCH(batch, 0);
5007
5008     /* DW 19..21 metadata tile col */
5009     OUT_RELOC64(batch,
5010                 pak_context->res_metadata_tile_col_buffer.bo,
5011                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5012                 0);
5013     OUT_BCS_BATCH(batch, 0);
5014
5015     /* DW 22..30 SAO is not used for VP9 */
5016     OUT_BCS_BATCH(batch, 0);
5017     OUT_BCS_BATCH(batch, 0);
5018     OUT_BCS_BATCH(batch, 0);
5019     OUT_BCS_BATCH(batch, 0);
5020     OUT_BCS_BATCH(batch, 0);
5021     OUT_BCS_BATCH(batch, 0);
5022     OUT_BCS_BATCH(batch, 0);
5023     OUT_BCS_BATCH(batch, 0);
5024     OUT_BCS_BATCH(batch, 0);
5025
5026     /* DW 31..33 Current Motion vector temporal buffer */
5027     OUT_RELOC64(batch,
5028                 pak_context->res_mv_temporal_buffer[vp9_state->curr_mv_temporal_index].bo,
5029                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5030                 0);
5031     OUT_BCS_BATCH(batch, 0);
5032
5033     /* DW 34..36 Not used */
5034     OUT_BCS_BATCH(batch, 0);
5035     OUT_BCS_BATCH(batch, 0);
5036     OUT_BCS_BATCH(batch, 0);
5037
5038     /* Only the first three reference_frame is used for VP9 */
5039     /* DW 37..52 for reference_frame */
5040     i = 0;
5041     if (vp9_state->picture_coding_type) {
5042         for (i = 0; i < 3; i++) {
5043
5044             if (pak_context->reference_surfaces[i].bo) {
5045                 OUT_RELOC64(batch,
5046                             pak_context->reference_surfaces[i].bo,
5047                             I915_GEM_DOMAIN_INSTRUCTION, 0,
5048                             0);
5049             } else {
5050                 OUT_BCS_BATCH(batch, 0);
5051                 OUT_BCS_BATCH(batch, 0);
5052             }
5053         }
5054     }
5055
5056     for (; i < 8; i++) {
5057         OUT_BCS_BATCH(batch, 0);
5058         OUT_BCS_BATCH(batch, 0);
5059     }
5060
5061     OUT_BCS_BATCH(batch, 0);
5062
5063     /* DW 54..56 for source input */
5064     OUT_RELOC64(batch,
5065                 pak_context->uncompressed_picture_source.bo,
5066                 I915_GEM_DOMAIN_INSTRUCTION, 0,
5067                 0);
5068     OUT_BCS_BATCH(batch, 0);
5069
5070     /* DW 57..59 StreamOut is not used */
5071     OUT_BCS_BATCH(batch, 0);
5072     OUT_BCS_BATCH(batch, 0);
5073     OUT_BCS_BATCH(batch, 0);
5074
5075     /* DW 60..62. Not used for encoder */
5076     OUT_BCS_BATCH(batch, 0);
5077     OUT_BCS_BATCH(batch, 0);
5078     OUT_BCS_BATCH(batch, 0);
5079
5080     /* DW 63..65. ILDB Not used for encoder */
5081     OUT_BCS_BATCH(batch, 0);
5082     OUT_BCS_BATCH(batch, 0);
5083     OUT_BCS_BATCH(batch, 0);
5084
5085     /* DW 66..81 For the collocated motion vector temporal buffer */
5086     if (vp9_state->picture_coding_type) {
5087         int prev_index = vp9_state->curr_mv_temporal_index ^ 0x01;
5088         OUT_RELOC64(batch,
5089                 pak_context->res_mv_temporal_buffer[prev_index].bo,
5090                 I915_GEM_DOMAIN_INSTRUCTION, 0,
5091                 0);
5092     } else {
5093         OUT_BCS_BATCH(batch, 0);
5094         OUT_BCS_BATCH(batch, 0);
5095     }
5096
5097     for (i = 1; i < 8; i++) {
5098         OUT_BCS_BATCH(batch, 0);
5099         OUT_BCS_BATCH(batch, 0);
5100     }
5101     OUT_BCS_BATCH(batch, 0);
5102
5103     /* DW 83..85 VP9 prob buffer */
5104     OUT_RELOC64(batch,
5105                 pak_context->res_prob_buffer.bo,
5106                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5107                 0);
5108
5109     OUT_BCS_BATCH(batch, 0);
5110
5111     /* DW 86..88 Segment id buffer */
5112     if (pak_context->res_segmentid_buffer.bo) {
5113         OUT_RELOC64(batch,
5114                     pak_context->res_segmentid_buffer.bo,
5115                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5116                     0);
5117     } else {
5118         OUT_BCS_BATCH(batch, 0);
5119         OUT_BCS_BATCH(batch, 0);
5120     }
5121     OUT_BCS_BATCH(batch, 0);
5122
5123     /* DW 89..91 HVD line rowstore buffer */
5124     OUT_RELOC64(batch,
5125                 pak_context->res_hvd_line_buffer.bo,
5126                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5127                 0);
5128     OUT_BCS_BATCH(batch, 0);
5129
5130     /* DW 92..94 HVD tile line rowstore buffer */
5131     OUT_RELOC64(batch,
5132                 pak_context->res_hvd_tile_line_buffer.bo,
5133                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5134                 0);
5135     OUT_BCS_BATCH(batch, 0);
5136
5137     /* DW 95..97 SAO streamout. Not used for VP9 */
5138     OUT_BCS_BATCH(batch, 0);
5139     OUT_BCS_BATCH(batch, 0);
5140     OUT_BCS_BATCH(batch, 0);
5141
5142     /* reserved for KBL. 98..100 */
5143     OUT_BCS_BATCH(batch, 0);
5144     OUT_BCS_BATCH(batch, 0);
5145     OUT_BCS_BATCH(batch, 0);
5146
5147     /* 101..103 */
5148     OUT_BCS_BATCH(batch, 0);
5149     OUT_BCS_BATCH(batch, 0);
5150     OUT_BCS_BATCH(batch, 0);
5151
5152     ADVANCE_BCS_BATCH(batch);
5153 }
5154
5155 static void
5156 gen9_pak_vp9_ind_obj_base_addr_state(VADriverContextP ctx,
5157                                      struct encode_state *encode_state,
5158                                      struct intel_encoder_context *encoder_context)
5159 {
5160     struct intel_batchbuffer *batch = encoder_context->base.batch;
5161     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5162     struct gen9_vp9_state *vp9_state;
5163
5164     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5165
5166     /* to do */
5167     BEGIN_BCS_BATCH(batch, 29);
5168
5169     OUT_BCS_BATCH(batch, HCP_IND_OBJ_BASE_ADDR_STATE | (29 - 2));
5170
5171     /* indirect bitstream object base */
5172     OUT_BCS_BATCH(batch, 0);
5173     OUT_BCS_BATCH(batch, 0);
5174     OUT_BCS_BATCH(batch, 0);
5175     /* the upper bound of indirect bitstream object */
5176     OUT_BCS_BATCH(batch, 0);
5177     OUT_BCS_BATCH(batch, 0);
5178
5179     /* DW 6: Indirect CU object base address */
5180     OUT_RELOC64(batch,
5181                 pak_context->res_mb_code_surface.bo,
5182                 I915_GEM_DOMAIN_INSTRUCTION, 0,   /* No write domain */
5183                 vp9_state->mb_data_offset);
5184     /* default attribute */
5185     OUT_BCS_BATCH(batch, 0);
5186
5187     /* DW 9..11, PAK-BSE */
5188     OUT_RELOC64(batch,
5189                   pak_context->indirect_pak_bse_object.bo,
5190                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5191                   pak_context->indirect_pak_bse_object.offset);
5192     OUT_BCS_BATCH(batch, 0);
5193
5194     /* DW 12..13 upper bound */
5195     OUT_RELOC64(batch,
5196                   pak_context->indirect_pak_bse_object.bo,
5197                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5198                   pak_context->indirect_pak_bse_object.end_offset);
5199
5200     /* DW 14..16 compressed header buffer */
5201     OUT_RELOC64(batch,
5202                 pak_context->res_compressed_input_buffer.bo,
5203                 I915_GEM_DOMAIN_INSTRUCTION, 0,
5204                 0);
5205     OUT_BCS_BATCH(batch, 0);
5206
5207     /* DW 17..19 prob counter streamout */
5208     OUT_RELOC64(batch,
5209                 pak_context->res_prob_counter_buffer.bo,
5210                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5211                 0);
5212     OUT_BCS_BATCH(batch, 0);
5213
5214     /* DW 20..22 prob delta streamin */
5215     OUT_RELOC64(batch,
5216                 pak_context->res_prob_delta_buffer.bo,
5217                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5218                 0);
5219     OUT_BCS_BATCH(batch, 0);
5220
5221     /* DW 23..25 Tile record streamout */
5222     OUT_RELOC64(batch,
5223                 pak_context->res_tile_record_streamout_buffer.bo,
5224                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5225                 0);
5226     OUT_BCS_BATCH(batch, 0);
5227
5228     /* DW 26..28 CU record streamout */
5229     OUT_RELOC64(batch,
5230                 pak_context->res_cu_stat_streamout_buffer.bo,
5231                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5232                 0);
5233     OUT_BCS_BATCH(batch, 0);
5234
5235     ADVANCE_BCS_BATCH(batch);
5236 }
5237
5238 static void
5239 gen9_pak_vp9_segment_state(VADriverContextP ctx,
5240                            struct encode_state *encode_state,
5241                            struct intel_encoder_context *encoder_context,
5242                            VAEncSegParamVP9 *seg_param, uint8_t seg_id)
5243 {
5244     struct intel_batchbuffer *batch = encoder_context->base.batch;
5245     uint32_t batch_value, tmp;
5246     VAEncPictureParameterBufferVP9 *pic_param;
5247
5248     if (!encode_state->pic_param_ext ||
5249         !encode_state->pic_param_ext->buffer) {
5250         return;
5251     }
5252
5253     pic_param = (VAEncPictureParameterBufferVP9 *)encode_state->pic_param_ext->buffer;
5254
5255     batch_value = seg_param->seg_flags.bits.segment_reference;
5256     if (pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME ||
5257         pic_param->pic_flags.bits.intra_only)
5258         batch_value = 0;
5259
5260     BEGIN_BCS_BATCH(batch, 8);
5261
5262     OUT_BCS_BATCH(batch, HCP_VP9_SEGMENT_STATE | (8 - 2));
5263     OUT_BCS_BATCH(batch, seg_id << 0); /* DW 1 - SegmentID */
5264     OUT_BCS_BATCH(batch,
5265                   (seg_param->seg_flags.bits.segment_reference_enabled << 3) |
5266                   (batch_value << 1) |
5267                   (seg_param->seg_flags.bits.segment_reference_skipped <<0)
5268                  );
5269
5270     /* DW 3..6 is not used for encoder */
5271     OUT_BCS_BATCH(batch, 0);
5272     OUT_BCS_BATCH(batch, 0);
5273     OUT_BCS_BATCH(batch, 0);
5274     OUT_BCS_BATCH(batch, 0);
5275
5276     /* DW 7 Mode */
5277     tmp = intel_convert_sign_mag(seg_param->segment_qindex_delta, 9);
5278     batch_value = tmp;
5279     tmp = intel_convert_sign_mag(seg_param->segment_lf_level_delta, 7);
5280     batch_value |= (tmp << 16);
5281     OUT_BCS_BATCH(batch, batch_value);
5282
5283     ADVANCE_BCS_BATCH(batch);
5284
5285 }
5286
5287 static void
5288 intel_vp9enc_construct_pak_insertobj_batchbuffer(VADriverContextP ctx,
5289                                  struct intel_encoder_context *encoder_context,
5290                                  struct i965_gpe_resource *obj_batch_buffer)
5291 {
5292     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5293     struct gen9_vp9_state *vp9_state;
5294     int uncompressed_header_length;
5295     unsigned int *cmd_ptr;
5296     unsigned int dw_length, bits_in_last_dw;
5297
5298     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5299
5300     if (!pak_context || !vp9_state || !vp9_state->pic_param)
5301         return;
5302
5303     uncompressed_header_length = vp9_state->header_length;
5304     cmd_ptr = i965_map_gpe_resource(obj_batch_buffer);
5305
5306     if (!cmd_ptr)
5307         return;
5308
5309     bits_in_last_dw = uncompressed_header_length % 4;
5310     bits_in_last_dw *= 8;
5311
5312     if (bits_in_last_dw == 0)
5313         bits_in_last_dw = 32;
5314
5315     /* get the DWORD length of the inserted_data */
5316     dw_length = ALIGN(uncompressed_header_length, 4) / 4;
5317     *cmd_ptr++ = HCP_INSERT_PAK_OBJECT | dw_length;
5318
5319     *cmd_ptr++ = ((0 << 31) | /* indirect payload */
5320                   (0 << 16) | /* the start offset in first DW */
5321                   (0 << 15) |
5322                   (bits_in_last_dw << 8) | /* bits_in_last_dw */
5323                   (0 << 4) |  /* skip emulation byte count. 0 for VP9 */
5324                   (0 << 3) |  /* emulation flag. 0 for VP9 */
5325                   (1 << 2) |  /* last header flag. */
5326                   (0 << 1));
5327     memcpy(cmd_ptr, vp9_state->alias_insert_data, dw_length * sizeof(unsigned int));
5328
5329     cmd_ptr += dw_length;
5330
5331     *cmd_ptr++ = MI_NOOP;
5332     *cmd_ptr++ = MI_BATCH_BUFFER_END;
5333     i965_unmap_gpe_resource(obj_batch_buffer);
5334 }
5335
5336 static void
5337 gen9_vp9_pak_picture_level(VADriverContextP ctx,
5338                            struct encode_state *encode_state,
5339                            struct intel_encoder_context *encoder_context)
5340 {
5341     struct intel_batchbuffer *batch = encoder_context->base.batch;
5342     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5343     struct object_surface *obj_surface;
5344     VAEncPictureParameterBufferVP9 *pic_param;
5345     VAEncMiscParameterTypeVP9PerSegmantParam *seg_param, tmp_seg_param;
5346     struct gen9_vp9_state *vp9_state;
5347     struct gen9_surface_vp9 *vp9_priv_surface;
5348     int i;
5349     struct gen9_hcpe_pipe_mode_select_param mode_param;
5350     hcp_surface_state hcp_surface;
5351     struct gpe_mi_batch_buffer_start_parameter second_level_batch;
5352     int segment_count;
5353
5354     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5355
5356     if (!pak_context || !vp9_state || !vp9_state->pic_param)
5357         return;
5358
5359     pic_param = vp9_state->pic_param;
5360     seg_param = vp9_state->segment_param;
5361
5362     if (vp9_state->curr_pak_pass == 0)
5363     {
5364         intel_vp9enc_construct_pak_insertobj_batchbuffer(ctx, encoder_context,
5365                            &pak_context->res_pak_uncompressed_input_buffer);
5366
5367         // Check if driver already programmed pic state as part of BRC update kernel programming.
5368         if (!vp9_state->brc_enabled)
5369         {
5370             intel_vp9enc_construct_picstate_batchbuf(ctx, encode_state,
5371                  encoder_context, &pak_context->res_pic_state_brc_write_hfw_read_buffer);
5372         }
5373     }
5374
5375     if (vp9_state->curr_pak_pass == 0)
5376     {
5377         intel_vp9enc_refresh_frame_internal_buffers(ctx, encoder_context);
5378     }
5379
5380     {
5381         /* copy the frame_context[frame_idx] into curr_frame_context */
5382         memcpy(&vp9_state->vp9_current_fc,
5383                &(vp9_state->vp9_frame_ctx[vp9_state->frame_ctx_idx]),
5384                sizeof(FRAME_CONTEXT));
5385         {
5386             uint8_t *prob_ptr;
5387
5388             prob_ptr = i965_map_gpe_resource(&pak_context->res_prob_buffer);
5389
5390             if (!prob_ptr)
5391                 return;
5392
5393             /* copy the current fc to vp9_prob buffer */
5394             memcpy(prob_ptr, &vp9_state->vp9_current_fc, sizeof(FRAME_CONTEXT));
5395             if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) ||
5396                  pic_param->pic_flags.bits.intra_only) {
5397                  FRAME_CONTEXT *frame_ptr = (FRAME_CONTEXT *)prob_ptr;
5398
5399                  memcpy(frame_ptr->partition_prob, vp9_kf_partition_probs,
5400                         sizeof(vp9_kf_partition_probs));
5401                  memcpy(frame_ptr->uv_mode_prob, vp9_kf_uv_mode_prob,
5402                         sizeof(vp9_kf_uv_mode_prob));
5403             }
5404             i965_unmap_gpe_resource(&pak_context->res_prob_buffer);
5405         }
5406     }
5407
5408     if (vp9_state->brc_enabled && vp9_state->curr_pak_pass) {
5409         /* read image status and insert the conditional end cmd */
5410         /* image ctrl/status is already accessed */
5411         struct gpe_mi_conditional_batch_buffer_end_parameter mi_cond_end;
5412         struct vp9_encode_status_buffer_internal *status_buffer;
5413
5414         status_buffer = &vp9_state->status_buffer;
5415         memset(&mi_cond_end, 0, sizeof(mi_cond_end));
5416         mi_cond_end.offset = status_buffer->image_status_mask_offset;
5417         mi_cond_end.bo = status_buffer->bo;
5418         mi_cond_end.compare_data = 0;
5419         mi_cond_end.compare_mask_mode_disabled = 1;
5420         gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch,
5421                                                  &mi_cond_end);
5422     }
5423
5424     mode_param.codec_mode = 1;
5425     mode_param.stream_out = 0;
5426     gen9_pak_vp9_pipe_mode_select(ctx, encode_state, encoder_context, &mode_param);
5427
5428     /* reconstructed surface */
5429     memset(&hcp_surface, 0, sizeof(hcp_surface));
5430     obj_surface = encode_state->reconstructed_object;
5431     hcp_surface.dw1.surface_id = 0;
5432     hcp_surface.dw1.surface_pitch = obj_surface->width;
5433     hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5434     hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5435     gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5436                                &hcp_surface);
5437
5438     /* Input surface */
5439     if (vp9_state->dys_in_use &&
5440         ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
5441          (pic_param->frame_height_src != pic_param->frame_height_dst))) {
5442         vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
5443         obj_surface = vp9_priv_surface->dys_surface_obj;
5444     } else {
5445         obj_surface = encode_state->input_yuv_object;
5446     }
5447
5448     hcp_surface.dw1.surface_id = 1;
5449     hcp_surface.dw1.surface_pitch = obj_surface->width;
5450     hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5451     hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5452     gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5453                                &hcp_surface);
5454
5455     if (vp9_state->picture_coding_type) {
5456         /* Add surface for last */
5457         if (vp9_state->last_ref_obj) {
5458             obj_surface = vp9_state->last_ref_obj;
5459             hcp_surface.dw1.surface_id = 2;
5460             hcp_surface.dw1.surface_pitch = obj_surface->width;
5461             hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5462             hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5463             gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5464                                &hcp_surface);
5465         }
5466         if (vp9_state->golden_ref_obj) {
5467             obj_surface = vp9_state->golden_ref_obj;
5468             hcp_surface.dw1.surface_id = 3;
5469             hcp_surface.dw1.surface_pitch = obj_surface->width;
5470             hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5471             hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5472             gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5473                                &hcp_surface);
5474         }
5475         if (vp9_state->alt_ref_obj) {
5476             obj_surface = vp9_state->alt_ref_obj;
5477             hcp_surface.dw1.surface_id = 4;
5478             hcp_surface.dw1.surface_pitch = obj_surface->width;
5479             hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5480             hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5481             gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5482                                &hcp_surface);
5483         }
5484     }
5485
5486     gen9_pak_vp9_pipe_buf_addr_state(ctx, encode_state, encoder_context);
5487
5488     gen9_pak_vp9_ind_obj_base_addr_state(ctx, encode_state, encoder_context);
5489
5490     // Using picstate zero with updated QP and LF deltas by HuC for repak, irrespective of how many Pak passes were run in multi-pass mode.
5491     memset(&second_level_batch, 0, sizeof(second_level_batch));
5492
5493     if (vp9_state->curr_pak_pass == 0) {
5494         second_level_batch.offset = 0;
5495     } else
5496         second_level_batch.offset = vp9_state->curr_pak_pass * VP9_PIC_STATE_BUFFER_SIZE;
5497
5498     second_level_batch.is_second_level = 1;
5499     second_level_batch.bo = pak_context->res_pic_state_brc_write_hfw_read_buffer.bo;
5500
5501     gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5502
5503     if (pic_param->pic_flags.bits.segmentation_enabled &&
5504         seg_param)
5505         segment_count = 8;
5506     else {
5507         segment_count = 1;
5508         memset(&tmp_seg_param, 0, sizeof(tmp_seg_param));
5509         seg_param = &tmp_seg_param;
5510     }
5511     for (i = 0; i < segment_count; i++)
5512     {
5513         gen9_pak_vp9_segment_state(ctx, encode_state,
5514                                    encoder_context,
5515                                    &seg_param->seg_data[i], i);
5516     }
5517
5518     /* Insert the uncompressed header buffer */
5519     second_level_batch.is_second_level = 1;
5520     second_level_batch.offset = 0;
5521     second_level_batch.bo = pak_context->res_pak_uncompressed_input_buffer.bo;
5522
5523     gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5524
5525     /* PAK_OBJECT */
5526     second_level_batch.is_second_level = 1;
5527     second_level_batch.offset = 0;
5528     second_level_batch.bo = pak_context->res_mb_code_surface.bo;
5529     gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5530
5531     return;
5532 }
5533
5534 static void
5535 gen9_vp9_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5536 {
5537     struct intel_batchbuffer *batch = encoder_context->base.batch;
5538     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5539     struct gpe_mi_store_register_mem_parameter mi_store_reg_mem_param;
5540     struct gpe_mi_flush_dw_parameter mi_flush_dw_param;
5541     //struct gpe_mi_copy_mem_parameter mi_copy_mem_param;
5542     struct vp9_encode_status_buffer_internal *status_buffer;
5543     struct gen9_vp9_state *vp9_state;
5544
5545     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5546     if (!vp9_state || !pak_context || !batch)
5547         return;
5548
5549     status_buffer = &(vp9_state->status_buffer);
5550
5551     memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
5552     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);
5553
5554     memset(&mi_store_reg_mem_param, 0, sizeof(mi_store_reg_mem_param));
5555     mi_store_reg_mem_param.bo = status_buffer->bo;
5556     mi_store_reg_mem_param.offset = status_buffer->bs_byte_count_offset;
5557     mi_store_reg_mem_param.mmio_offset = status_buffer->vp9_bs_frame_reg_offset;
5558     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5559
5560     mi_store_reg_mem_param.bo = pak_context->res_brc_bitstream_size_buffer.bo;
5561     mi_store_reg_mem_param.offset = 0;
5562     mi_store_reg_mem_param.mmio_offset = status_buffer->vp9_bs_frame_reg_offset;
5563     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5564
5565     /* Read HCP Image status */
5566     mi_store_reg_mem_param.bo = status_buffer->bo;
5567     mi_store_reg_mem_param.offset = status_buffer->image_status_mask_offset;
5568     mi_store_reg_mem_param.mmio_offset =
5569                                status_buffer->vp9_image_mask_reg_offset;
5570     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5571
5572     mi_store_reg_mem_param.bo = status_buffer->bo;
5573     mi_store_reg_mem_param.offset = status_buffer->image_status_ctrl_offset;
5574     mi_store_reg_mem_param.mmio_offset =
5575                                status_buffer->vp9_image_ctrl_reg_offset;
5576     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5577
5578     mi_store_reg_mem_param.bo = pak_context->res_brc_bitstream_size_buffer.bo;
5579     mi_store_reg_mem_param.offset = 4;
5580     mi_store_reg_mem_param.mmio_offset =
5581                                status_buffer->vp9_image_ctrl_reg_offset;
5582     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5583
5584     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);
5585
5586     return;
5587 }
5588
5589 static VAStatus
5590 gen9_vp9_pak_pipeline_prepare(VADriverContextP ctx,
5591                               struct encode_state *encode_state,
5592                               struct intel_encoder_context *encoder_context)
5593 {
5594     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5595     struct object_surface *obj_surface;
5596     struct object_buffer *obj_buffer;
5597     struct i965_coded_buffer_segment *coded_buffer_segment;
5598     VAEncPictureParameterBufferVP9 *pic_param;
5599     struct gen9_vp9_state *vp9_state;
5600     dri_bo *bo;
5601     int i;
5602
5603     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5604     if (!vp9_state ||
5605         !vp9_state->pic_param)
5606         return VA_STATUS_ERROR_INVALID_PARAMETER;
5607
5608     pic_param = vp9_state->pic_param;
5609
5610     /* reconstructed surface */
5611     obj_surface = encode_state->reconstructed_object;
5612     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5613
5614     pak_context->reconstructed_object.bo = obj_surface->bo;
5615     dri_bo_reference(pak_context->reconstructed_object.bo);
5616
5617     /* set vp9 reference frames */
5618     for (i = 0; i < ARRAY_ELEMS(pak_context->reference_surfaces); i++) {
5619         if (pak_context->reference_surfaces[i].bo)
5620             dri_bo_unreference(pak_context->reference_surfaces[i].bo);
5621         pak_context->reference_surfaces[i].bo = NULL;
5622     }
5623
5624     /* Three reference frames are enough for VP9 */
5625     if (pic_param->pic_flags.bits.frame_type &&
5626         !pic_param->pic_flags.bits.intra_only) {
5627         for (i = 0; i < 3; i++) {
5628             obj_surface = encode_state->reference_objects[i];
5629             if (obj_surface && obj_surface->bo) {
5630                 pak_context->reference_surfaces[i].bo = obj_surface->bo;
5631                 dri_bo_reference(obj_surface->bo);
5632             }
5633         }
5634     }
5635
5636     /* input YUV surface */
5637     dri_bo_unreference(pak_context->uncompressed_picture_source.bo);
5638     pak_context->uncompressed_picture_source.bo = NULL;
5639     obj_surface = encode_state->reconstructed_object;
5640     if (vp9_state->dys_in_use &&
5641         ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
5642          (pic_param->frame_height_src != pic_param->frame_height_dst))) {
5643         struct gen9_surface_vp9 *vp9_priv_surface =
5644             (struct gen9_surface_vp9 *)(obj_surface->private_data);
5645         obj_surface = vp9_priv_surface->dys_surface_obj;
5646     } else
5647         obj_surface = encode_state->input_yuv_object;
5648
5649     pak_context->uncompressed_picture_source.bo = obj_surface->bo;
5650     dri_bo_reference(pak_context->uncompressed_picture_source.bo);
5651
5652     /* coded buffer */
5653     dri_bo_unreference(pak_context->indirect_pak_bse_object.bo);
5654     pak_context->indirect_pak_bse_object.bo = NULL;
5655     obj_buffer = encode_state->coded_buf_object;
5656     bo = obj_buffer->buffer_store->bo;
5657     pak_context->indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
5658     pak_context->indirect_pak_bse_object.end_offset = ALIGN((obj_buffer->size_element - 0x1000), 0x1000);
5659     pak_context->indirect_pak_bse_object.bo = bo;
5660     dri_bo_reference(pak_context->indirect_pak_bse_object.bo);
5661
5662     /* set the internal flag to 0 to indicate the coded size is unknown */
5663     dri_bo_map(bo, 1);
5664     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
5665     coded_buffer_segment->mapped = 0;
5666     coded_buffer_segment->codec = encoder_context->codec;
5667     coded_buffer_segment->status_support = 1;
5668     dri_bo_unmap(bo);
5669
5670     return VA_STATUS_SUCCESS;
5671 }
5672
5673 static void
5674 gen9_vp9_pak_brc_prepare(struct encode_state *encode_state,
5675                           struct intel_encoder_context *encoder_context)
5676 {
5677 }
5678
5679 static void
5680 gen9_vp9_pak_context_destroy(void *context)
5681 {
5682     struct gen9_encoder_context_vp9 *pak_context = context;
5683     int i;
5684
5685     dri_bo_unreference(pak_context->reconstructed_object.bo);
5686     pak_context->reconstructed_object.bo = NULL;
5687
5688     dri_bo_unreference(pak_context->uncompressed_picture_source.bo);
5689     pak_context->uncompressed_picture_source.bo = NULL;
5690
5691     dri_bo_unreference(pak_context->indirect_pak_bse_object.bo);
5692     pak_context->indirect_pak_bse_object.bo = NULL;
5693
5694     for (i = 0; i < 8; i++){
5695         dri_bo_unreference(pak_context->reference_surfaces[i].bo);
5696         pak_context->reference_surfaces[i].bo = NULL;
5697     }
5698
5699     /* vme & pak same the same structure, so don't free the context here */
5700 }
5701
5702 static VAStatus
5703 gen9_vp9_pak_pipeline(VADriverContextP ctx,
5704                        VAProfile profile,
5705                        struct encode_state *encode_state,
5706                        struct intel_encoder_context *encoder_context)
5707 {
5708     struct i965_driver_data *i965 = i965_driver_data(ctx);
5709     struct intel_batchbuffer *batch = encoder_context->base.batch;
5710     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5711     VAStatus va_status;
5712     struct gen9_vp9_state *vp9_state;
5713     VAEncPictureParameterBufferVP9 *pic_param;
5714     int i;
5715
5716     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5717
5718     if (!vp9_state || !vp9_state->pic_param || !pak_context)
5719         return VA_STATUS_ERROR_INVALID_PARAMETER;
5720
5721     va_status = gen9_vp9_pak_pipeline_prepare(ctx, encode_state, encoder_context);
5722
5723     if (va_status != VA_STATUS_SUCCESS)
5724         return va_status;
5725
5726     if (i965->intel.has_bsd2)
5727         intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
5728     else
5729         intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
5730
5731     intel_batchbuffer_emit_mi_flush(batch);
5732
5733     BEGIN_BCS_BATCH(batch, 64);
5734     for (i = 0; i < 64; i++)
5735         OUT_BCS_BATCH(batch, MI_NOOP);
5736
5737     ADVANCE_BCS_BATCH(batch);
5738
5739     for (vp9_state->curr_pak_pass = 0;
5740          vp9_state->curr_pak_pass < vp9_state->num_pak_passes;
5741          vp9_state->curr_pak_pass++) {
5742
5743         if (vp9_state->curr_pak_pass == 0) {
5744             /* Initialize the VP9 Image Ctrl reg for the first pass */
5745             struct gpe_mi_load_register_imm_parameter mi_load_reg_imm;
5746             struct vp9_encode_status_buffer_internal *status_buffer;
5747
5748             status_buffer = &(vp9_state->status_buffer);
5749             memset(&mi_load_reg_imm, 0, sizeof(mi_load_reg_imm));
5750             mi_load_reg_imm.mmio_offset = status_buffer->vp9_image_ctrl_reg_offset;
5751             mi_load_reg_imm.data = 0;
5752             gen8_gpe_mi_load_register_imm(ctx, batch, &mi_load_reg_imm);
5753         }
5754         gen9_vp9_pak_picture_level(ctx, encode_state, encoder_context);
5755         gen9_vp9_read_mfc_status(ctx, encoder_context);
5756     }
5757
5758     intel_batchbuffer_end_atomic(batch);
5759     intel_batchbuffer_flush(batch);
5760
5761     pic_param = vp9_state->pic_param;
5762     vp9_state->vp9_last_frame.frame_width = pic_param->frame_width_dst;
5763     vp9_state->vp9_last_frame.frame_height = pic_param->frame_height_dst;
5764     vp9_state->vp9_last_frame.frame_type = pic_param->pic_flags.bits.frame_type;
5765     vp9_state->vp9_last_frame.show_frame = pic_param->pic_flags.bits.show_frame;
5766     vp9_state->vp9_last_frame.refresh_frame_context = pic_param->pic_flags.bits.refresh_frame_context;
5767     vp9_state->vp9_last_frame.frame_context_idx = pic_param->pic_flags.bits.frame_context_idx;
5768     vp9_state->vp9_last_frame.intra_only = pic_param->pic_flags.bits.intra_only;
5769     vp9_state->frame_number++;
5770     vp9_state->curr_mv_temporal_index ^= 1;
5771     vp9_state->first_frame = 0;
5772
5773     return VA_STATUS_SUCCESS;
5774 }
5775
5776 Bool
5777 gen9_vp9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5778 {
5779     struct gen9_encoder_context_vp9 *vme_context = NULL;
5780     struct gen9_vp9_state *vp9_state = NULL;
5781
5782     vme_context = calloc(1, sizeof(struct gen9_encoder_context_vp9));
5783     vp9_state = calloc(1, sizeof(struct gen9_vp9_state));
5784
5785     if (!vme_context || !vp9_state) {
5786         if (vme_context)
5787             free(vme_context);
5788         if (vp9_state)
5789             free(vp9_state);
5790         return false;
5791     }
5792
5793     encoder_context->enc_priv_state = vp9_state;
5794     vme_context->enc_priv_state = vp9_state;
5795
5796     /* Initialize the features that are supported by VP9 */
5797     vme_context->hme_supported = 1;
5798     vme_context->use_hw_scoreboard = 1;
5799     vme_context->use_hw_non_stalling_scoreboard = 1;
5800
5801     vp9_state->tx_mode = TX_MODE_SELECT;
5802     vp9_state->multi_ref_qp_check = 0;
5803     vp9_state->target_usage = INTEL_ENC_VP9_TU_NORMAL;
5804     vp9_state->num_pak_passes = 1;
5805     vp9_state->hme_supported = vme_context->hme_supported;
5806     vp9_state->b16xme_supported = 1;
5807
5808     if (encoder_context->rate_control_mode != VA_RC_NONE &&
5809         encoder_context->rate_control_mode != VA_RC_CQP) {
5810         vp9_state->brc_enabled = 1;
5811         vp9_state->brc_distortion_buffer_supported = 1;
5812         vp9_state->brc_constant_buffer_supported = 1;
5813         vp9_state->num_pak_passes = 4;
5814     }
5815     vp9_state->dys_enabled = 1; /* this is supported by default */
5816     vp9_state->first_frame = 1;
5817
5818     /* the definition of status buffer offset for VP9 */
5819     {
5820         struct vp9_encode_status_buffer_internal *status_buffer;
5821         uint32_t base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
5822
5823         status_buffer = &vp9_state->status_buffer;
5824         memset(status_buffer, 0,
5825                sizeof(struct vp9_encode_status_buffer_internal));
5826
5827         status_buffer->bs_byte_count_offset = base_offset + offsetof(struct vp9_encode_status, bs_byte_count);
5828         status_buffer->image_status_mask_offset = base_offset + offsetof(struct vp9_encode_status, image_status_mask);
5829         status_buffer->image_status_ctrl_offset = base_offset + offsetof(struct vp9_encode_status, image_status_ctrl);
5830         status_buffer->media_index_offset       = base_offset + offsetof(struct vp9_encode_status, media_index);
5831
5832         status_buffer->vp9_bs_frame_reg_offset = 0x1E9E0;
5833         status_buffer->vp9_image_mask_reg_offset = 0x1E9F0;
5834         status_buffer->vp9_image_ctrl_reg_offset = 0x1E9F4;
5835     }
5836
5837     gen9_vme_kernels_context_init_vp9(ctx, encoder_context, vme_context);
5838
5839     encoder_context->vme_context = vme_context;
5840     encoder_context->vme_pipeline = gen9_vme_pipeline_vp9;
5841     encoder_context->vme_context_destroy = gen9_vme_context_destroy_vp9;
5842
5843     return true;
5844 }
5845
5846 static VAStatus
5847 gen9_vp9_get_coded_status(VADriverContextP ctx,
5848                           struct intel_encoder_context *encoder_context,
5849                           struct i965_coded_buffer_segment *coded_buf_seg)
5850 {
5851     struct vp9_encode_status *vp9_encode_status;
5852
5853     if (!encoder_context || !coded_buf_seg)
5854         return VA_STATUS_ERROR_INVALID_BUFFER;
5855
5856     vp9_encode_status = (struct vp9_encode_status *)coded_buf_seg->codec_private_data;
5857     coded_buf_seg->base.size = vp9_encode_status->bs_byte_count;
5858
5859     /* One VACodedBufferSegment for VP9 will be added later.
5860      * It will be linked to the next element of coded_buf_seg->base.next
5861      */
5862
5863     return VA_STATUS_SUCCESS;
5864 }
5865
5866 Bool
5867 gen9_vp9_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5868 {
5869     /* VME & PAK share the same context */
5870     struct gen9_encoder_context_vp9 *pak_context = encoder_context->vme_context;
5871
5872     if (!pak_context)
5873         return false;
5874
5875     encoder_context->mfc_context = pak_context;
5876     encoder_context->mfc_context_destroy = gen9_vp9_pak_context_destroy;
5877     encoder_context->mfc_pipeline = gen9_vp9_pak_pipeline;
5878     encoder_context->mfc_brc_prepare = gen9_vp9_pak_brc_prepare;
5879     encoder_context->get_status = gen9_vp9_get_coded_status;
5880     return true;
5881 }