OSDN Git Service

vp9encoder: encoder to handle properly CSC on input surface
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_vp9_encoder.c
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWAR
23  *
24  * Authors:
25  *    Zhao, Yakui <yakui.zhao@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <stdbool.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35 #include <va/va.h>
36
37 #include "intel_batchbuffer.h"
38 #include "intel_driver.h"
39
40 #include "i965_defines.h"
41 #include "i965_drv_video.h"
42 #include "i965_encoder.h"
43 #include "gen9_vp9_encapi.h"
44 #include "gen9_vp9_encoder.h"
45 #include "gen9_vp9_encoder_kernels.h"
46 #include "vp9_probs.h"
47 #include "gen9_vp9_const_def.h"
48
49 #define MAX_VP9_ENCODER_SURFACES        64
50
51 #define MAX_URB_SIZE                    4096 /* In register */
52 #define NUM_KERNELS_PER_GPE_CONTEXT     1
53
54 #define VP9_BRC_KBPS                    1000
55
56 #define BRC_KERNEL_CBR                  0x0010
57 #define BRC_KERNEL_VBR                  0x0020
58 #define BRC_KERNEL_AVBR                 0x0040
59 #define BRC_KERNEL_CQL                  0x0080
60
61 #define DEFAULT_MOCS                      0x02
62 #define VP9_PIC_STATE_BUFFER_SIZE 192
63
64 typedef struct _intel_kernel_header_
65 {
66     uint32_t       reserved                        : 6;
67     uint32_t       kernel_start_pointer            : 26;
68 } intel_kernel_header;
69
70 typedef struct _intel_vp9_kernel_header {
71     int nKernelCount;
72     intel_kernel_header PLY_DSCALE;
73     intel_kernel_header VP9_ME_P;
74     intel_kernel_header VP9_Enc_I_32x32;
75     intel_kernel_header VP9_Enc_I_16x16;
76     intel_kernel_header VP9_Enc_P;
77     intel_kernel_header VP9_Enc_TX;
78     intel_kernel_header VP9_DYS;
79
80     intel_kernel_header VP9BRC_Intra_Distortion;
81     intel_kernel_header VP9BRC_Init;
82     intel_kernel_header VP9BRC_Reset;
83     intel_kernel_header VP9BRC_Update;
84 } intel_vp9_kernel_header;
85
86 #define DYS_1X_FLAG    0x01
87 #define DYS_4X_FLAG    0x02
88 #define DYS_16X_FLAG   0x04
89
90 struct vp9_surface_param {
91     uint32_t frame_width;
92     uint32_t frame_height;
93 };
94
95 static uint32_t intel_convert_sign_mag(int val, int sign_bit_pos)
96 {
97     uint32_t ret_val = 0;
98     if (val < 0)
99     {
100         val = -val;
101         ret_val = ((1 << (sign_bit_pos - 1)) | (val & ((1 << (sign_bit_pos - 1)) - 1)));
102     }
103     else
104     {
105         ret_val = val & ((1 << (sign_bit_pos - 1)) - 1);
106     }
107     return ret_val;
108 }
109
110 static bool
111 intel_vp9_get_kernel_header_and_size(
112     void                             *pvbinary,
113     int                              binary_size,
114     INTEL_VP9_ENC_OPERATION          operation,
115     int                              krnstate_idx,
116     struct i965_kernel               *ret_kernel)
117 {
118     typedef uint32_t BIN_PTR[4];
119
120     char *bin_start;
121     intel_vp9_kernel_header      *pkh_table;
122     intel_kernel_header          *pcurr_header, *pinvalid_entry, *pnext_header;
123     int next_krnoffset;
124
125     if (!pvbinary || !ret_kernel)
126         return false;
127
128     bin_start = (char *)pvbinary;
129     pkh_table = (intel_vp9_kernel_header *)pvbinary;
130     pinvalid_entry = &(pkh_table->VP9BRC_Update) + 1;
131     next_krnoffset = binary_size;
132
133     if ((operation == INTEL_VP9_ENC_SCALING4X) || (operation == INTEL_VP9_ENC_SCALING2X))
134     {
135         pcurr_header = &pkh_table->PLY_DSCALE;
136     }
137     else if (operation == INTEL_VP9_ENC_ME)
138     {
139         pcurr_header = &pkh_table->VP9_ME_P;
140     }
141     else if (operation == INTEL_VP9_ENC_MBENC)
142     {
143         pcurr_header = &pkh_table->VP9_Enc_I_32x32;
144     }
145     else if (operation == INTEL_VP9_ENC_DYS)
146     {
147         pcurr_header = &pkh_table->VP9_DYS;
148     }
149     else if (operation == INTEL_VP9_ENC_BRC)
150     {
151         pcurr_header = &pkh_table->VP9BRC_Intra_Distortion;
152     }
153     else
154     {
155         return false;
156     }
157
158     pcurr_header += krnstate_idx;
159     ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
160
161     pnext_header = (pcurr_header + 1);
162     if (pnext_header < pinvalid_entry)
163     {
164         next_krnoffset = pnext_header->kernel_start_pointer << 6;
165     }
166     ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
167
168     return true;
169 }
170
171
172 static void
173 gen9_free_surfaces_vp9(void **data)
174 {
175     struct gen9_surface_vp9 *vp9_surface;
176
177     if (!data || !*data)
178         return;
179
180     vp9_surface = *data;
181
182     if (vp9_surface->scaled_4x_surface_obj) {
183         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->scaled_4x_surface_id, 1);
184         vp9_surface->scaled_4x_surface_id = VA_INVALID_SURFACE;
185         vp9_surface->scaled_4x_surface_obj = NULL;
186     }
187
188     if (vp9_surface->scaled_16x_surface_obj) {
189         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->scaled_16x_surface_id, 1);
190         vp9_surface->scaled_16x_surface_id = VA_INVALID_SURFACE;
191         vp9_surface->scaled_16x_surface_obj = NULL;
192     }
193
194     if (vp9_surface->dys_4x_surface_obj) {
195         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_4x_surface_id, 1);
196         vp9_surface->dys_4x_surface_id = VA_INVALID_SURFACE;
197         vp9_surface->dys_4x_surface_obj = NULL;
198     }
199
200     if (vp9_surface->dys_16x_surface_obj) {
201         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_16x_surface_id, 1);
202         vp9_surface->dys_16x_surface_id = VA_INVALID_SURFACE;
203         vp9_surface->dys_16x_surface_obj = NULL;
204     }
205
206     if (vp9_surface->dys_surface_obj) {
207         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_surface_id, 1);
208         vp9_surface->dys_surface_id = VA_INVALID_SURFACE;
209         vp9_surface->dys_surface_obj = NULL;
210     }
211
212     free(vp9_surface);
213
214     *data = NULL;
215
216     return;
217 }
218
219 static VAStatus
220 gen9_vp9_init_check_surfaces(VADriverContextP ctx,
221                              struct object_surface *obj_surface,
222                              struct vp9_surface_param *surface_param)
223 {
224     struct i965_driver_data *i965 = i965_driver_data(ctx);
225     struct gen9_surface_vp9 *vp9_surface;
226     int downscaled_width_4x, downscaled_height_4x;
227     int downscaled_width_16x, downscaled_height_16x;
228
229     if (!obj_surface || !obj_surface->bo)
230         return VA_STATUS_ERROR_INVALID_SURFACE;
231
232     if (obj_surface->private_data &&
233         obj_surface->free_private_data != gen9_free_surfaces_vp9) {
234         obj_surface->free_private_data(&obj_surface->private_data);
235         obj_surface->private_data = NULL;
236     }
237
238     if (obj_surface->private_data) {
239         /* if the frame width/height is already the same as the expected,
240          * it is unncessary to reallocate it.
241          */
242         vp9_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
243         if (vp9_surface->frame_width >= surface_param->frame_width ||
244             vp9_surface->frame_height >= surface_param->frame_height)
245             return VA_STATUS_SUCCESS;
246
247         obj_surface->free_private_data(&obj_surface->private_data);
248         obj_surface->private_data = NULL;
249         vp9_surface = NULL;
250     }
251
252     vp9_surface = calloc(1, sizeof(struct gen9_surface_vp9));
253
254     if (!vp9_surface)
255         return VA_STATUS_ERROR_ALLOCATION_FAILED;
256
257     vp9_surface->ctx = ctx;
258     obj_surface->private_data = vp9_surface;
259     obj_surface->free_private_data = gen9_free_surfaces_vp9;
260
261     vp9_surface->frame_width = surface_param->frame_width;
262     vp9_surface->frame_height = surface_param->frame_height;
263
264     downscaled_width_4x = ALIGN(surface_param->frame_width / 4, 16);
265     downscaled_height_4x = ALIGN(surface_param->frame_height / 4, 16);
266
267     i965_CreateSurfaces(ctx,
268                         downscaled_width_4x,
269                         downscaled_height_4x,
270                         VA_RT_FORMAT_YUV420,
271                         1,
272                         &vp9_surface->scaled_4x_surface_id);
273
274     vp9_surface->scaled_4x_surface_obj = SURFACE(vp9_surface->scaled_4x_surface_id);
275
276     if (!vp9_surface->scaled_4x_surface_obj) {
277         return VA_STATUS_ERROR_ALLOCATION_FAILED;
278     }
279
280     i965_check_alloc_surface_bo(ctx, vp9_surface->scaled_4x_surface_obj, 1,
281                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
282
283     downscaled_width_16x = ALIGN(surface_param->frame_width / 16, 16);
284     downscaled_height_16x = ALIGN(surface_param->frame_height / 16, 16);
285     i965_CreateSurfaces(ctx,
286                         downscaled_width_16x,
287                         downscaled_height_16x,
288                         VA_RT_FORMAT_YUV420,
289                         1,
290                         &vp9_surface->scaled_16x_surface_id);
291     vp9_surface->scaled_16x_surface_obj = SURFACE(vp9_surface->scaled_16x_surface_id);
292
293     if (!vp9_surface->scaled_16x_surface_obj) {
294         return VA_STATUS_ERROR_ALLOCATION_FAILED;
295     }
296
297     i965_check_alloc_surface_bo(ctx, vp9_surface->scaled_16x_surface_obj, 1,
298                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
299
300     return VA_STATUS_SUCCESS;
301 }
302
303 static VAStatus
304 gen9_vp9_check_dys_surfaces(VADriverContextP ctx,
305                             struct object_surface *obj_surface,
306                             struct vp9_surface_param *surface_param)
307 {
308     struct i965_driver_data *i965 = i965_driver_data(ctx);
309     struct gen9_surface_vp9 *vp9_surface;
310     int dys_width_4x, dys_height_4x;
311     int dys_width_16x, dys_height_16x;
312
313     /* As this is handled after the surface checking, it is unnecessary
314      * to check the surface bo and vp9_priv_surface again
315      */
316
317     vp9_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
318
319     if (!vp9_surface)
320         return VA_STATUS_ERROR_INVALID_SURFACE;
321
322     /* if the frame_width/height of dys_surface is the same as
323      * the expected, it is unnecessary to allocate it again
324      */
325     if (vp9_surface->dys_frame_width == surface_param->frame_width &&
326         vp9_surface->dys_frame_width == surface_param->frame_width)
327         return VA_STATUS_SUCCESS;
328
329     if (vp9_surface->dys_4x_surface_obj) {
330         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_4x_surface_id, 1);
331         vp9_surface->dys_4x_surface_id = VA_INVALID_SURFACE;
332         vp9_surface->dys_4x_surface_obj = NULL;
333     }
334
335     if (vp9_surface->dys_16x_surface_obj) {
336         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_16x_surface_id, 1);
337         vp9_surface->dys_16x_surface_id = VA_INVALID_SURFACE;
338         vp9_surface->dys_16x_surface_obj = NULL;
339     }
340
341     if (vp9_surface->dys_surface_obj) {
342         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_surface_id, 1);
343         vp9_surface->dys_surface_id = VA_INVALID_SURFACE;
344         vp9_surface->dys_surface_obj = NULL;
345     }
346
347     vp9_surface->dys_frame_width = surface_param->frame_width;
348     vp9_surface->dys_frame_height = surface_param->frame_height;
349
350     i965_CreateSurfaces(ctx,
351                         surface_param->frame_width,
352                         surface_param->frame_height,
353                         VA_RT_FORMAT_YUV420,
354                         1,
355                         &vp9_surface->dys_surface_id);
356     vp9_surface->dys_surface_obj = SURFACE(vp9_surface->dys_surface_id);
357
358     if (!vp9_surface->dys_surface_obj) {
359         return VA_STATUS_ERROR_ALLOCATION_FAILED;
360     }
361
362     i965_check_alloc_surface_bo(ctx, vp9_surface->dys_surface_obj, 1,
363                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
364
365     dys_width_4x = ALIGN(surface_param->frame_width / 4, 16);
366     dys_height_4x = ALIGN(surface_param->frame_width / 4, 16);
367
368     i965_CreateSurfaces(ctx,
369                         dys_width_4x,
370                         dys_height_4x,
371                         VA_RT_FORMAT_YUV420,
372                         1,
373                         &vp9_surface->dys_4x_surface_id);
374
375     vp9_surface->dys_4x_surface_obj = SURFACE(vp9_surface->dys_4x_surface_id);
376
377     if (!vp9_surface->dys_4x_surface_obj) {
378         return VA_STATUS_ERROR_ALLOCATION_FAILED;
379     }
380
381     i965_check_alloc_surface_bo(ctx, vp9_surface->dys_4x_surface_obj, 1,
382                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
383
384     dys_width_16x = ALIGN(surface_param->frame_width / 16, 16);
385     dys_height_16x = ALIGN(surface_param->frame_width / 16, 16);
386     i965_CreateSurfaces(ctx,
387                         dys_width_16x,
388                         dys_height_16x,
389                         VA_RT_FORMAT_YUV420,
390                         1,
391                         &vp9_surface->dys_16x_surface_id);
392     vp9_surface->dys_16x_surface_obj = SURFACE(vp9_surface->dys_16x_surface_id);
393
394     if (!vp9_surface->dys_16x_surface_obj) {
395         return VA_STATUS_ERROR_ALLOCATION_FAILED;
396     }
397
398     i965_check_alloc_surface_bo(ctx, vp9_surface->dys_16x_surface_obj, 1,
399                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
400
401     return VA_STATUS_SUCCESS;
402 }
403
404 static VAStatus
405 gen9_vp9_allocate_resources(VADriverContextP ctx,
406                             struct encode_state *encode_state,
407                             struct intel_encoder_context *encoder_context,
408                             int allocate)
409 {
410     struct i965_driver_data *i965 = i965_driver_data(ctx);
411     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
412     struct gen9_vp9_state *vp9_state;
413     int allocate_flag, i;
414     int res_size;
415     uint32_t        frame_width_in_sb, frame_height_in_sb, frame_sb_num;
416     unsigned int width, height;
417
418     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
419
420     if (!vp9_state || !vp9_state->pic_param)
421         return VA_STATUS_ERROR_INVALID_PARAMETER;
422
423     /* the buffer related with BRC is not changed. So it is allocated
424      * based on the input parameter
425      */
426     if (allocate) {
427         i965_free_gpe_resource(&vme_context->res_brc_history_buffer);
428         i965_free_gpe_resource(&vme_context->res_brc_const_data_buffer);
429         i965_free_gpe_resource(&vme_context->res_brc_mbenc_curbe_write_buffer);
430         i965_free_gpe_resource(&vme_context->res_pic_state_brc_read_buffer);
431         i965_free_gpe_resource(&vme_context->res_pic_state_brc_write_hfw_read_buffer);
432         i965_free_gpe_resource(&vme_context->res_pic_state_hfw_write_buffer);
433         i965_free_gpe_resource(&vme_context->res_seg_state_brc_read_buffer);
434         i965_free_gpe_resource(&vme_context->res_seg_state_brc_write_buffer);
435         i965_free_gpe_resource(&vme_context->res_brc_bitstream_size_buffer);
436         i965_free_gpe_resource(&vme_context->res_brc_hfw_data_buffer);
437         i965_free_gpe_resource(&vme_context->res_brc_mmdk_pak_buffer);
438
439         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
440                                  &vme_context->res_brc_history_buffer,
441                                  VP9_BRC_HISTORY_BUFFER_SIZE,
442                                  "Brc History buffer");
443         if (!allocate_flag)
444             goto failed_allocation;
445         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
446                                  &vme_context->res_brc_const_data_buffer,
447                                  VP9_BRC_CONSTANTSURFACE_SIZE,
448                                  "Brc Constant buffer");
449         if (!allocate_flag)
450             goto failed_allocation;
451
452         res_size = ALIGN(sizeof(vp9_mbenc_curbe_data), 64) + 128 +
453            ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) * NUM_VP9_MBENC;
454         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
455                                  &vme_context->res_brc_mbenc_curbe_write_buffer,
456                                  res_size,
457                                  "Brc Curbe write");
458         if (!allocate_flag)
459             goto failed_allocation;
460
461         res_size = VP9_PIC_STATE_BUFFER_SIZE * 4;
462         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
463                                  &vme_context->res_pic_state_brc_read_buffer,
464                                  res_size,
465                                  "Pic State Brc_read");
466         if (!allocate_flag)
467             goto failed_allocation;
468
469         res_size = VP9_PIC_STATE_BUFFER_SIZE * 4;
470         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
471                                  &vme_context->res_pic_state_brc_write_hfw_read_buffer,
472                                  res_size,
473                                  "Pic State Brc_write Hfw_Read");
474         if (!allocate_flag)
475             goto failed_allocation;
476
477         res_size = VP9_PIC_STATE_BUFFER_SIZE * 4;
478         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
479                                  &vme_context->res_pic_state_hfw_write_buffer,
480                                  res_size,
481                                  "Pic State Hfw Write");
482         if (!allocate_flag)
483             goto failed_allocation;
484
485         res_size = VP9_SEGMENT_STATE_BUFFER_SIZE;
486         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
487                                  &vme_context->res_seg_state_brc_read_buffer,
488                                  res_size,
489                                  "Segment state brc_read");
490         if (!allocate_flag)
491             goto failed_allocation;
492
493         res_size = VP9_SEGMENT_STATE_BUFFER_SIZE;
494         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
495                                  &vme_context->res_seg_state_brc_write_buffer,
496                                  res_size,
497                                  "Segment state brc_write");
498         if (!allocate_flag)
499             goto failed_allocation;
500
501         res_size = VP9_BRC_BITSTREAM_SIZE_BUFFER_SIZE;
502         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
503                                  &vme_context->res_brc_bitstream_size_buffer,
504                                  res_size,
505                                  "Brc bitstream buffer");
506         if (!allocate_flag)
507             goto failed_allocation;
508
509         res_size = VP9_HFW_BRC_DATA_BUFFER_SIZE;
510         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
511                                  &vme_context->res_brc_hfw_data_buffer,
512                                  res_size,
513                                  "mfw Brc data");
514         if (!allocate_flag)
515             goto failed_allocation;
516
517         res_size = VP9_BRC_MMDK_PAK_BUFFER_SIZE;
518         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
519                                  &vme_context->res_brc_mmdk_pak_buffer,
520                                  res_size,
521                                  "Brc mmdk_pak");
522         if (!allocate_flag)
523             goto failed_allocation;
524     }
525
526     /* If the width/height of allocated buffer is greater than the expected,
527      * it is unnecessary to allocate it again
528      */
529     if (vp9_state->res_width >= vp9_state->frame_width &&
530         vp9_state->res_height >= vp9_state->frame_height) {
531
532         return VA_STATUS_SUCCESS;
533     }
534     frame_width_in_sb = ALIGN(vp9_state->frame_width, 64) / 64;
535     frame_height_in_sb = ALIGN(vp9_state->frame_height, 64) / 64;
536     frame_sb_num  = frame_width_in_sb * frame_height_in_sb;
537
538     i965_free_gpe_resource(&vme_context->res_hvd_line_buffer);
539     res_size = frame_width_in_sb * 64;
540     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
541                                  &vme_context->res_hvd_line_buffer,
542                                  res_size,
543                                  "VP9 hvd line line");
544     if (!allocate_flag)
545         goto failed_allocation;
546
547     i965_free_gpe_resource(&vme_context->res_hvd_tile_line_buffer);
548     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
549                                  &vme_context->res_hvd_tile_line_buffer,
550                                  res_size,
551                                  "VP9 hvd tile_line line");
552     if (!allocate_flag)
553         goto failed_allocation;
554
555     i965_free_gpe_resource(&vme_context->res_deblocking_filter_line_buffer);
556     res_size = frame_width_in_sb * 18 * 64;
557     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
558                                  &vme_context->res_deblocking_filter_line_buffer,
559                                  res_size,
560                                  "VP9 deblocking filter line");
561     if (!allocate_flag)
562         goto failed_allocation;
563
564     i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_line_buffer);
565     res_size = frame_width_in_sb * 18 * 64;
566     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
567                                  &vme_context->res_deblocking_filter_tile_line_buffer,
568                                  res_size,
569                                  "VP9 deblocking tile line");
570     if (!allocate_flag)
571         goto failed_allocation;
572
573     i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_col_buffer);
574     res_size = frame_height_in_sb * 17 * 64;
575     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
576                                  &vme_context->res_deblocking_filter_tile_col_buffer,
577                                  res_size,
578                                  "VP9 deblocking tile col");
579     if (!allocate_flag)
580         goto failed_allocation;
581
582     i965_free_gpe_resource(&vme_context->res_metadata_line_buffer);
583     res_size = frame_width_in_sb * 5 * 64;
584     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
585                                  &vme_context->res_metadata_line_buffer,
586                                  res_size,
587                                  "VP9 metadata line");
588     if (!allocate_flag)
589         goto failed_allocation;
590
591     i965_free_gpe_resource(&vme_context->res_metadata_tile_line_buffer);
592     res_size = frame_width_in_sb * 5 * 64;
593     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
594                                  &vme_context->res_metadata_tile_line_buffer,
595                                  res_size,
596                                  "VP9 metadata tile line");
597     if (!allocate_flag)
598         goto failed_allocation;
599
600     i965_free_gpe_resource(&vme_context->res_metadata_tile_col_buffer);
601     res_size = frame_height_in_sb * 5 * 64;
602     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
603                                  &vme_context->res_metadata_tile_col_buffer,
604                                  res_size,
605                                  "VP9 metadata tile col");
606     if (!allocate_flag)
607         goto failed_allocation;
608
609     i965_free_gpe_resource(&vme_context->res_prob_buffer);
610     res_size = 2048;
611     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
612                                  &vme_context->res_prob_buffer,
613                                  res_size,
614                                  "VP9 prob");
615     if (!allocate_flag)
616         goto failed_allocation;
617
618     i965_free_gpe_resource(&vme_context->res_segmentid_buffer);
619     res_size = frame_sb_num * 64;
620     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
621                                  &vme_context->res_segmentid_buffer,
622                                  res_size,
623                                  "VP9 segment id");
624     if (!allocate_flag)
625         goto failed_allocation;
626
627     i965_zero_gpe_resource(&vme_context->res_segmentid_buffer);
628
629     i965_free_gpe_resource(&vme_context->res_prob_delta_buffer);
630     res_size = 29 * 64;
631     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
632                                  &vme_context->res_prob_delta_buffer,
633                                  res_size,
634                                  "VP9 prob delta");
635     if (!allocate_flag)
636         goto failed_allocation;
637
638     i965_zero_gpe_resource(&vme_context->res_segmentid_buffer);
639
640     i965_free_gpe_resource(&vme_context->res_prob_delta_buffer);
641     res_size = 29 * 64;
642     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
643                                  &vme_context->res_prob_delta_buffer,
644                                  res_size,
645                                  "VP9 prob delta");
646     if (!allocate_flag)
647         goto failed_allocation;
648
649     i965_free_gpe_resource(&vme_context->res_compressed_input_buffer);
650     res_size = 32 * 64;
651     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
652                                  &vme_context->res_compressed_input_buffer,
653                                  res_size,
654                                  "VP9 compressed_input buffer");
655     if (!allocate_flag)
656         goto failed_allocation;
657
658     i965_free_gpe_resource(&vme_context->res_prob_counter_buffer);
659     res_size = 193 * 64;
660     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
661                                  &vme_context->res_prob_counter_buffer,
662                                  res_size,
663                                  "VP9 prob counter");
664     if (!allocate_flag)
665         goto failed_allocation;
666
667     i965_free_gpe_resource(&vme_context->res_tile_record_streamout_buffer);
668     res_size = frame_sb_num * 64;
669     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
670                                  &vme_context->res_tile_record_streamout_buffer,
671                                  res_size,
672                                  "VP9 tile record stream_out");
673     if (!allocate_flag)
674         goto failed_allocation;
675
676     i965_free_gpe_resource(&vme_context->res_cu_stat_streamout_buffer);
677     res_size = frame_sb_num * 64;
678     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
679                                  &vme_context->res_cu_stat_streamout_buffer,
680                                  res_size,
681                                  "VP9 CU stat stream_out");
682     if (!allocate_flag)
683         goto failed_allocation;
684
685     width = vp9_state->downscaled_width_4x_in_mb * 32;
686     height = vp9_state->downscaled_height_4x_in_mb * 16;
687     i965_free_gpe_resource(&vme_context->s4x_memv_data_buffer);
688     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
689                                  &vme_context->s4x_memv_data_buffer,
690                                  width, height,
691                                  ALIGN(width, 64),
692                                  "VP9 4x MEMV data");
693     if (!allocate_flag)
694         goto failed_allocation;
695
696     width = vp9_state->downscaled_width_4x_in_mb * 8;
697     height = vp9_state->downscaled_height_4x_in_mb * 16;
698     i965_free_gpe_resource(&vme_context->s4x_memv_distortion_buffer);
699     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
700                                  &vme_context->s4x_memv_distortion_buffer,
701                                  width, height,
702                                  ALIGN(width, 64),
703                                  "VP9 4x MEMV distorion");
704     if (!allocate_flag)
705         goto failed_allocation;
706
707     width = ALIGN(vp9_state->downscaled_width_16x_in_mb * 32, 64);
708     height = vp9_state->downscaled_height_16x_in_mb * 16;
709     i965_free_gpe_resource(&vme_context->s16x_memv_data_buffer);
710     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
711                                  &vme_context->s16x_memv_data_buffer,
712                                  width, height,
713                                  width,
714                                  "VP9 16x MEMV data");
715     if (!allocate_flag)
716         goto failed_allocation;
717
718     width = vp9_state->frame_width_in_mb * 16;
719     height = vp9_state->frame_height_in_mb * 8;
720     i965_free_gpe_resource(&vme_context->res_output_16x16_inter_modes);
721     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
722                                  &vme_context->res_output_16x16_inter_modes,
723                                  width, height,
724                                  ALIGN(width, 64),
725                                  "VP9 output inter_mode");
726     if (!allocate_flag)
727         goto failed_allocation;
728
729     res_size = vp9_state->frame_width_in_mb * vp9_state->frame_height_in_mb *
730                16 * 4;
731     for (i = 0; i < 2; i++) {
732         i965_free_gpe_resource(&vme_context->res_mode_decision[i]);
733         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
734                                    &vme_context->res_mode_decision[i],
735                                    res_size,
736                                    "VP9 mode decision");
737         if (!allocate_flag)
738             goto failed_allocation;
739
740     }
741
742     res_size = frame_sb_num * 9 * 64;
743     for (i = 0; i < 2; i++) {
744         i965_free_gpe_resource(&vme_context->res_mv_temporal_buffer[i]);
745         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
746                                    &vme_context->res_mv_temporal_buffer[i],
747                                    res_size,
748                                    "VP9 temporal mv");
749         if (!allocate_flag)
750             goto failed_allocation;
751     }
752
753     vp9_state->mb_data_offset = ALIGN(frame_sb_num * 16, 4096) + 4096;
754     res_size = vp9_state->mb_data_offset + frame_sb_num * 64 * 64 + 1000;
755     i965_free_gpe_resource(&vme_context->res_mb_code_surface);
756     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
757                                  &vme_context->res_mb_code_surface,
758                                  ALIGN(res_size, 4096),
759                                  "VP9 mb_code surface");
760     if (!allocate_flag)
761         goto failed_allocation;
762
763     res_size = 128;
764     i965_free_gpe_resource(&vme_context->res_pak_uncompressed_input_buffer);
765     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
766                                  &vme_context->res_pak_uncompressed_input_buffer,
767                                  ALIGN(res_size, 4096),
768                                  "VP9 pak_uncompressed_input");
769     if (!allocate_flag)
770         goto failed_allocation;
771
772     if (!vme_context->frame_header_data) {
773         /* allocate 512 bytes for generating the uncompressed header */
774         vme_context->frame_header_data = calloc(1, 512);
775     }
776
777     vp9_state->res_width = vp9_state->frame_width;
778     vp9_state->res_height = vp9_state->frame_height;
779
780     return VA_STATUS_SUCCESS;
781
782 failed_allocation:
783     return VA_STATUS_ERROR_ALLOCATION_FAILED;
784 }
785
786 static void
787 gen9_vp9_free_resources(struct gen9_encoder_context_vp9 *vme_context)
788 {
789     int i;
790     struct gen9_vp9_state *vp9_state = (struct gen9_vp9_state *) vme_context->enc_priv_state;
791
792     if (vp9_state->brc_enabled) {
793         i965_free_gpe_resource(&vme_context->res_brc_history_buffer);
794         i965_free_gpe_resource(&vme_context->res_brc_const_data_buffer);
795         i965_free_gpe_resource(&vme_context->res_brc_mbenc_curbe_write_buffer);
796         i965_free_gpe_resource(&vme_context->res_pic_state_brc_read_buffer);
797         i965_free_gpe_resource(&vme_context->res_pic_state_brc_write_hfw_read_buffer);
798         i965_free_gpe_resource(&vme_context->res_pic_state_hfw_write_buffer);
799         i965_free_gpe_resource(&vme_context->res_seg_state_brc_read_buffer);
800         i965_free_gpe_resource(&vme_context->res_seg_state_brc_write_buffer);
801         i965_free_gpe_resource(&vme_context->res_brc_bitstream_size_buffer);
802         i965_free_gpe_resource(&vme_context->res_brc_hfw_data_buffer);
803         i965_free_gpe_resource(&vme_context->res_brc_mmdk_pak_buffer);
804     }
805
806     i965_free_gpe_resource(&vme_context->res_hvd_line_buffer);
807     i965_free_gpe_resource(&vme_context->res_hvd_tile_line_buffer);
808     i965_free_gpe_resource(&vme_context->res_deblocking_filter_line_buffer);
809     i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_line_buffer);
810     i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_col_buffer);
811     i965_free_gpe_resource(&vme_context->res_metadata_line_buffer);
812     i965_free_gpe_resource(&vme_context->res_metadata_tile_line_buffer);
813     i965_free_gpe_resource(&vme_context->res_metadata_tile_col_buffer);
814     i965_free_gpe_resource(&vme_context->res_prob_buffer);
815     i965_free_gpe_resource(&vme_context->res_segmentid_buffer);
816     i965_free_gpe_resource(&vme_context->res_prob_delta_buffer);
817     i965_free_gpe_resource(&vme_context->res_prob_counter_buffer);
818     i965_free_gpe_resource(&vme_context->res_tile_record_streamout_buffer);
819     i965_free_gpe_resource(&vme_context->res_cu_stat_streamout_buffer);
820     i965_free_gpe_resource(&vme_context->s4x_memv_data_buffer);
821     i965_free_gpe_resource(&vme_context->s4x_memv_distortion_buffer);
822     i965_free_gpe_resource(&vme_context->s16x_memv_data_buffer);
823     i965_free_gpe_resource(&vme_context->res_output_16x16_inter_modes);
824     for (i = 0; i < 2; i++) {
825         i965_free_gpe_resource(&vme_context->res_mode_decision[i]);
826     }
827
828     for (i = 0; i < 2; i++) {
829         i965_free_gpe_resource(&vme_context->res_mv_temporal_buffer[i]);
830     }
831
832     i965_free_gpe_resource(&vme_context->res_compressed_input_buffer);
833     i965_free_gpe_resource(&vme_context->res_mb_code_surface);
834     i965_free_gpe_resource(&vme_context->res_pak_uncompressed_input_buffer);
835
836     if (vme_context->frame_header_data) {
837         free(vme_context->frame_header_data);
838         vme_context->frame_header_data = NULL;
839     }
840     return;
841 }
842
843 static void
844 gen9_init_media_object_walker_parameter(struct intel_encoder_context *encoder_context,
845                                         struct vp9_encoder_kernel_walker_parameter *kernel_walker_param,
846                                         struct gpe_media_object_walker_parameter *walker_param)
847 {
848     memset(walker_param, 0, sizeof(*walker_param));
849
850     walker_param->use_scoreboard = kernel_walker_param->use_scoreboard;
851
852     walker_param->block_resolution.x = kernel_walker_param->resolution_x;
853     walker_param->block_resolution.y = kernel_walker_param->resolution_y;
854
855     walker_param->global_resolution.x = kernel_walker_param->resolution_x;
856     walker_param->global_resolution.y = kernel_walker_param->resolution_y;
857
858     walker_param->global_outer_loop_stride.x = kernel_walker_param->resolution_x;
859     walker_param->global_outer_loop_stride.y = 0;
860
861     walker_param->global_inner_loop_unit.x = 0;
862     walker_param->global_inner_loop_unit.y = kernel_walker_param->resolution_y;
863
864     walker_param->local_loop_exec_count = 0xFFFF;  //MAX VALUE
865     walker_param->global_loop_exec_count = 0xFFFF;  //MAX VALUE
866
867     if (kernel_walker_param->no_dependency) {
868         walker_param->scoreboard_mask = 0;
869         walker_param->use_scoreboard = 0;
870         // Raster scan walking pattern
871         walker_param->local_outer_loop_stride.x = 0;
872         walker_param->local_outer_loop_stride.y = 1;
873         walker_param->local_inner_loop_unit.x = 1;
874         walker_param->local_inner_loop_unit.y = 0;
875         walker_param->local_end.x = kernel_walker_param->resolution_x - 1;
876         walker_param->local_end.y = 0;
877     } else {
878         walker_param->local_end.x = 0;
879         walker_param->local_end.y = 0;
880
881         if (kernel_walker_param->walker_degree == VP9_45Z_DEGREE) {
882             // 45z degree
883             walker_param->scoreboard_mask = 0x0F;
884
885             walker_param->global_loop_exec_count = 0x3FF;
886             walker_param->local_loop_exec_count = 0x3FF;
887
888             walker_param->global_resolution.x = (unsigned int)(kernel_walker_param->resolution_x / 2.f) + 1;
889             walker_param->global_resolution.y = 2 * kernel_walker_param->resolution_y;
890
891             walker_param->global_start.x = 0;
892             walker_param->global_start.y = 0;
893
894             walker_param->global_outer_loop_stride.x = walker_param->global_resolution.x;
895             walker_param->global_outer_loop_stride.y = 0;
896
897             walker_param->global_inner_loop_unit.x = 0;
898             walker_param->global_inner_loop_unit.y = walker_param->global_resolution.y;
899
900             walker_param->block_resolution.x = walker_param->global_resolution.x;
901             walker_param->block_resolution.y = walker_param->global_resolution.y;
902
903             walker_param->local_start.x = 0;
904             walker_param->local_start.y = 0;
905
906             walker_param->local_outer_loop_stride.x = 1;
907             walker_param->local_outer_loop_stride.y = 0;
908
909             walker_param->local_inner_loop_unit.x = -1;
910             walker_param->local_inner_loop_unit.y = 4;
911
912             walker_param->middle_loop_extra_steps = 3;
913             walker_param->mid_loop_unit_x = 0;
914             walker_param->mid_loop_unit_y = 1;
915         } else {
916             // 26 degree
917             walker_param->scoreboard_mask = 0x0F;
918             walker_param->local_outer_loop_stride.x = 1;
919             walker_param->local_outer_loop_stride.y = 0;
920             walker_param->local_inner_loop_unit.x = -2;
921             walker_param->local_inner_loop_unit.y = 1;
922         }
923     }
924 }
925
926 static void
927 gen9_add_2d_gpe_surface(VADriverContextP ctx,
928                         struct i965_gpe_context *gpe_context,
929                         struct object_surface *obj_surface,
930                         int is_uv_surface,
931                         int is_media_block_rw,
932                         unsigned int format,
933                         int index)
934 {
935     struct i965_gpe_resource gpe_resource;
936     struct i965_gpe_surface gpe_surface;
937
938     memset(&gpe_surface, 0, sizeof(gpe_surface));
939
940     i965_object_surface_to_2d_gpe_resource(&gpe_resource, obj_surface);
941     gpe_surface.gpe_resource = &gpe_resource;
942     gpe_surface.is_2d_surface = 1;
943     gpe_surface.is_uv_surface = !!is_uv_surface;
944     gpe_surface.is_media_block_rw = !!is_media_block_rw;
945
946     gpe_surface.cacheability_control = DEFAULT_MOCS;
947     gpe_surface.format = format;
948
949     gen9_gpe_context_add_surface(gpe_context, &gpe_surface, index);
950     i965_free_gpe_resource(&gpe_resource);
951 }
952
953 static void
954 gen9_add_adv_gpe_surface(VADriverContextP ctx,
955                          struct i965_gpe_context *gpe_context,
956                          struct object_surface *obj_surface,
957                          int index)
958 {
959     struct i965_gpe_resource gpe_resource;
960     struct i965_gpe_surface gpe_surface;
961
962     memset(&gpe_surface, 0, sizeof(gpe_surface));
963
964     i965_object_surface_to_2d_gpe_resource(&gpe_resource, obj_surface);
965     gpe_surface.gpe_resource = &gpe_resource;
966     gpe_surface.is_adv_surface = 1;
967     gpe_surface.cacheability_control = DEFAULT_MOCS;
968     gpe_surface.v_direction = 2;
969
970     gen9_gpe_context_add_surface(gpe_context, &gpe_surface, index);
971     i965_free_gpe_resource(&gpe_resource);
972 }
973
974 static void
975 gen9_add_buffer_gpe_surface(VADriverContextP ctx,
976                             struct i965_gpe_context *gpe_context,
977                             struct i965_gpe_resource *gpe_buffer,
978                             int is_raw_buffer,
979                             unsigned int size,
980                             unsigned int offset,
981                             int index)
982 {
983     struct i965_gpe_surface gpe_surface;
984
985     memset(&gpe_surface, 0, sizeof(gpe_surface));
986
987     gpe_surface.gpe_resource = gpe_buffer;
988     gpe_surface.is_buffer = 1;
989     gpe_surface.is_raw_buffer = !!is_raw_buffer;
990     gpe_surface.cacheability_control = DEFAULT_MOCS;
991     gpe_surface.size = size;
992     gpe_surface.offset = offset;
993
994     gen9_gpe_context_add_surface(gpe_context, &gpe_surface, index);
995 }
996
997 static void
998 gen9_add_buffer_2d_gpe_surface(VADriverContextP ctx,
999                                struct i965_gpe_context *gpe_context,
1000                                struct i965_gpe_resource *gpe_buffer,
1001                                int is_media_block_rw,
1002                                unsigned int format,
1003                                int index)
1004 {
1005     struct i965_gpe_surface gpe_surface;
1006
1007     memset(&gpe_surface, 0, sizeof(gpe_surface));
1008
1009     gpe_surface.gpe_resource = gpe_buffer;
1010     gpe_surface.is_2d_surface = 1;
1011     gpe_surface.is_media_block_rw = !!is_media_block_rw;
1012     gpe_surface.cacheability_control = DEFAULT_MOCS;
1013     gpe_surface.format = format;
1014
1015     gen9_gpe_context_add_surface(gpe_context, &gpe_surface, index);
1016 }
1017
1018 static void
1019 gen9_add_dri_buffer_gpe_surface(VADriverContextP ctx,
1020                                 struct i965_gpe_context *gpe_context,
1021                                 dri_bo *bo,
1022                                 int is_raw_buffer,
1023                                 unsigned int size,
1024                                 unsigned int offset,
1025                                 int index)
1026 {
1027     struct i965_gpe_resource gpe_resource;
1028
1029     i965_dri_object_to_buffer_gpe_resource(&gpe_resource, bo);
1030     gen9_add_buffer_gpe_surface(ctx,
1031                                 gpe_context,
1032                                 &gpe_resource,
1033                                 is_raw_buffer,
1034                                 size,
1035                                 offset,
1036                                 index);
1037
1038     i965_free_gpe_resource(&gpe_resource);
1039 }
1040
1041 /*
1042 static void
1043 gen9_add_dri_buffer_2d_gpe_surface(VADriverContextP ctx,
1044                                    struct i965_gpe_context *gpe_context,
1045                                    dri_bo *bo,
1046                                    unsigned int width,
1047                                    unsigned int height,
1048                                    unsigned int pitch,
1049                                    int is_media_block_rw,
1050                                    unsigned int format,
1051                                    int index)
1052 {
1053     struct i965_gpe_resource gpe_resource;
1054
1055     i965_gpe_dri_object_to_2d_gpe_resource(&gpe_resource, bo, width, height, pitch);
1056     gen9_add_buffer_2d_gpe_surface(ctx,
1057                                    gpe_context,
1058                                    &gpe_resource,
1059                                    is_media_block_rw,
1060                                    format,
1061                                    index);
1062
1063     i965_free_gpe_resource(&gpe_resource);
1064 }
1065 */
1066
1067 static void
1068 gen9_run_kernel_media_object(VADriverContextP ctx,
1069                              struct intel_encoder_context *encoder_context,
1070                              struct i965_gpe_context *gpe_context,
1071                              int media_function,
1072                              struct gpe_media_object_parameter *param)
1073 {
1074     struct intel_batchbuffer *batch = encoder_context->base.batch;
1075     struct vp9_encode_status_buffer_internal *status_buffer;
1076     struct gen9_vp9_state *vp9_state;
1077     struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1078
1079     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
1080     if (!vp9_state || !batch)
1081         return;
1082
1083     intel_batchbuffer_start_atomic(batch, 0x1000);
1084
1085     status_buffer = &(vp9_state->status_buffer);
1086     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1087     mi_store_data_imm.bo = status_buffer->bo;
1088     mi_store_data_imm.offset = status_buffer->media_index_offset;
1089     mi_store_data_imm.dw0 = media_function;
1090     gen9_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1091
1092     intel_batchbuffer_emit_mi_flush(batch);
1093     gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
1094     gen8_gpe_media_object(ctx, gpe_context, batch, param);
1095     gen8_gpe_media_state_flush(ctx, gpe_context, batch);
1096
1097     gen9_gpe_pipeline_end(ctx, gpe_context, batch);
1098
1099     intel_batchbuffer_end_atomic(batch);
1100
1101     intel_batchbuffer_flush(batch);
1102 }
1103
1104 static void
1105 gen9_run_kernel_media_object_walker(VADriverContextP ctx,
1106                                     struct intel_encoder_context *encoder_context,
1107                                     struct i965_gpe_context *gpe_context,
1108                                     int media_function,
1109                                     struct gpe_media_object_walker_parameter *param)
1110 {
1111     struct intel_batchbuffer *batch = encoder_context->base.batch;
1112     struct vp9_encode_status_buffer_internal *status_buffer;
1113     struct gen9_vp9_state *vp9_state;
1114     struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1115
1116     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
1117     if (!vp9_state || !batch)
1118         return;
1119
1120     intel_batchbuffer_start_atomic(batch, 0x1000);
1121
1122     intel_batchbuffer_emit_mi_flush(batch);
1123
1124     status_buffer = &(vp9_state->status_buffer);
1125     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1126     mi_store_data_imm.bo = status_buffer->bo;
1127     mi_store_data_imm.offset = status_buffer->media_index_offset;
1128     mi_store_data_imm.dw0 = media_function;
1129     gen9_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1130
1131     gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
1132     gen9_gpe_media_object_walker(ctx, gpe_context, batch, param);
1133     gen8_gpe_media_state_flush(ctx, gpe_context, batch);
1134
1135     gen9_gpe_pipeline_end(ctx, gpe_context, batch);
1136
1137     intel_batchbuffer_end_atomic(batch);
1138
1139     intel_batchbuffer_flush(batch);
1140 }
1141
1142 static
1143 void gen9_vp9_set_curbe_brc(VADriverContextP ctx,
1144                             struct encode_state *encode_state,
1145                             struct i965_gpe_context *gpe_context,
1146                             struct intel_encoder_context *encoder_context,
1147                             struct gen9_vp9_brc_curbe_param *param)
1148 {
1149     VAEncSequenceParameterBufferVP9 *seq_param;
1150     VAEncPictureParameterBufferVP9  *pic_param;
1151     VAEncMiscParameterTypeVP9PerSegmantParam *segment_param;
1152     vp9_brc_curbe_data      *cmd;
1153     double                  dbps_ratio, dInputBitsPerFrame;
1154     struct gen9_vp9_state *vp9_state;
1155
1156     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1157
1158     pic_param      = param->ppic_param;
1159     seq_param      = param->pseq_param;
1160     segment_param  = param->psegment_param;
1161
1162     cmd = gen8p_gpe_context_map_curbe(gpe_context);
1163
1164     if (!cmd)
1165         return;
1166
1167     memset(cmd, 0, sizeof(vp9_brc_curbe_data));
1168
1169     if (!vp9_state->dys_enabled)
1170     {
1171         cmd->dw0.frame_width  = pic_param->frame_width_src;
1172         cmd->dw0.frame_height = pic_param->frame_height_src;
1173     }
1174     else
1175     {
1176         cmd->dw0.frame_width  = pic_param->frame_width_dst;
1177         cmd->dw0.frame_height = pic_param->frame_height_dst;
1178     }
1179
1180     cmd->dw1.frame_type           = vp9_state->picture_coding_type;
1181     cmd->dw1.segmentation_enable  = 0;
1182     cmd->dw1.ref_frame_flags      = vp9_state->ref_frame_flag;
1183     cmd->dw1.num_tlevels          = 1;
1184
1185     switch(param->media_state_type)
1186     {
1187         case VP9_MEDIA_STATE_BRC_INIT_RESET:
1188         {
1189             cmd->dw3.max_level_ratiot0 = 0;
1190             cmd->dw3.max_level_ratiot1 = 0;
1191             cmd->dw3.max_level_ratiot2 = 0;
1192             cmd->dw3.max_level_ratiot3 = 0;
1193
1194             cmd->dw4.profile_level_max_frame    = seq_param->max_frame_width *
1195                                seq_param->max_frame_height;
1196             cmd->dw5.init_buf_fullness         = vp9_state->init_vbv_buffer_fullness_in_bit;
1197             cmd->dw6.buf_size                  = vp9_state->vbv_buffer_size_in_bit;
1198             cmd->dw7.target_bit_rate           = (vp9_state->target_bit_rate  + VP9_BRC_KBPS - 1) / VP9_BRC_KBPS *
1199                                                   VP9_BRC_KBPS;
1200             cmd->dw8.max_bit_rate           = (vp9_state->max_bit_rate  + VP9_BRC_KBPS - 1) / VP9_BRC_KBPS *
1201                                                   VP9_BRC_KBPS;
1202             cmd->dw9.min_bit_rate           = (vp9_state->min_bit_rate  + VP9_BRC_KBPS - 1) / VP9_BRC_KBPS *
1203                                                   VP9_BRC_KBPS;
1204             cmd->dw10.frame_ratem           = vp9_state->frame_rate;
1205             cmd->dw11.frame_rated           = 1;
1206
1207             cmd->dw14.avbr_accuracy         = 30;
1208             cmd->dw14.avbr_convergence      = 150;
1209
1210             if (encoder_context->rate_control_mode == VA_RC_CBR)
1211             {
1212                 cmd->dw12.brc_flag    = BRC_KERNEL_CBR;
1213                 cmd->dw8.max_bit_rate  = cmd->dw7.target_bit_rate;
1214                 cmd->dw9.min_bit_rate  = 0;
1215             }
1216             else if (encoder_context->rate_control_mode == VA_RC_VBR)
1217             {
1218                 cmd->dw12.brc_flag    = BRC_KERNEL_VBR;
1219             }
1220             else
1221             {
1222                 cmd->dw12.brc_flag = BRC_KERNEL_CQL;
1223                 cmd->dw16.cq_level = 30;
1224             }
1225             cmd->dw12.gopp = seq_param->intra_period - 1;
1226
1227             cmd->dw13.init_frame_width   = pic_param->frame_width_src;
1228             cmd->dw13.init_frame_height   = pic_param->frame_height_src;
1229
1230             cmd->dw15.min_qp          = 0;
1231             cmd->dw15.max_qp          = 255;
1232
1233             cmd->dw16.cq_level            = 30;
1234
1235             cmd->dw17.enable_dynamic_scaling = vp9_state->dys_in_use;
1236             cmd->dw17.brc_overshoot_cbr_pct = 150;
1237
1238             dInputBitsPerFrame = (double)(cmd->dw8.max_bit_rate) / (vp9_state->frame_rate);
1239             dbps_ratio         = dInputBitsPerFrame / ((double)(vp9_state->vbv_buffer_size_in_bit) / 30);
1240             if (dbps_ratio < 0.1)
1241                 dbps_ratio = 0.1;
1242             if (dbps_ratio > 3.5)
1243                 dbps_ratio = 3.5;
1244
1245             *param->pbrc_init_reset_buf_size_in_bits  = cmd->dw6.buf_size;
1246             *param->pbrc_init_reset_input_bits_per_frame  = dInputBitsPerFrame;
1247
1248             cmd->dw18.pframe_deviation_threshold0 = (uint32_t)(-50 * pow(0.90, dbps_ratio));
1249             cmd->dw18.pframe_deviation_threshold1  = (uint32_t)(-50 * pow(0.66, dbps_ratio));
1250             cmd->dw18.pframe_deviation_threshold2  = (uint32_t)(-50 * pow(0.46, dbps_ratio));
1251             cmd->dw18.pframe_deviation_threshold3  = (uint32_t)(-50 * pow(0.3, dbps_ratio));
1252             cmd->dw19.pframe_deviation_threshold4  = (uint32_t)(50 * pow(0.3, dbps_ratio));
1253             cmd->dw19.pframe_deviation_threshold5  = (uint32_t)(50 * pow(0.46, dbps_ratio));
1254             cmd->dw19.pframe_deviation_threshold6  = (uint32_t)(50 * pow(0.7, dbps_ratio));
1255             cmd->dw19.pframe_deviation_threshold7  = (uint32_t)(50 * pow(0.9, dbps_ratio));
1256
1257             cmd->dw20.vbr_deviation_threshold0     = (uint32_t)(-50 * pow(0.9, dbps_ratio));
1258             cmd->dw20.vbr_deviation_threshold1     = (uint32_t)(-50 * pow(0.7, dbps_ratio));
1259             cmd->dw20.vbr_deviation_threshold2     = (uint32_t)(-50 * pow(0.5, dbps_ratio));
1260             cmd->dw20.vbr_deviation_threshold3     = (uint32_t)(-50 * pow(0.3, dbps_ratio));
1261             cmd->dw21.vbr_deviation_threshold4     = (uint32_t)(100 * pow(0.4, dbps_ratio));
1262             cmd->dw21.vbr_deviation_threshold5     = (uint32_t)(100 * pow(0.5, dbps_ratio));
1263             cmd->dw21.vbr_deviation_threshold6     = (uint32_t)(100 * pow(0.75, dbps_ratio));
1264             cmd->dw21.vbr_deviation_threshold7     = (uint32_t)(100 * pow(0.9, dbps_ratio));
1265
1266             cmd->dw22.kframe_deviation_threshold0  = (uint32_t)(-50 * pow(0.8, dbps_ratio));
1267             cmd->dw22.kframe_deviation_threshold1  = (uint32_t)(-50 * pow(0.6, dbps_ratio));
1268             cmd->dw22.kframe_deviation_threshold2  = (uint32_t)(-50 * pow(0.34, dbps_ratio));
1269             cmd->dw22.kframe_deviation_threshold3  = (uint32_t)(-50 * pow(0.2, dbps_ratio));
1270             cmd->dw23.kframe_deviation_threshold4  = (uint32_t)(50 * pow(0.2, dbps_ratio));
1271             cmd->dw23.kframe_deviation_threshold5  = (uint32_t)(50 * pow(0.4, dbps_ratio));
1272             cmd->dw23.kframe_deviation_threshold6  = (uint32_t)(50 * pow(0.66, dbps_ratio));
1273             cmd->dw23.kframe_deviation_threshold7  = (uint32_t)(50 * pow(0.9, dbps_ratio));
1274
1275             break;
1276         }
1277         case VP9_MEDIA_STATE_BRC_UPDATE:
1278         {
1279             cmd->dw15.min_qp          = 0;
1280             cmd->dw15.max_qp          = 255;
1281
1282             cmd->dw25.frame_number    = param->frame_number;
1283
1284             // Used in dynamic scaling. set to zero for now
1285             cmd->dw27.hrd_buffer_fullness_upper_limit = 0;
1286             cmd->dw28.hrd_buffer_fullness_lower_limit = 0;
1287
1288             if (pic_param->pic_flags.bits.segmentation_enabled) {
1289                 cmd->dw32.seg_delta_qp0              = segment_param->seg_data[0].segment_qindex_delta;
1290                 cmd->dw32.seg_delta_qp1              = segment_param->seg_data[1].segment_qindex_delta;
1291                 cmd->dw32.seg_delta_qp2              = segment_param->seg_data[2].segment_qindex_delta;
1292                 cmd->dw32.seg_delta_qp3              = segment_param->seg_data[3].segment_qindex_delta;
1293
1294                 cmd->dw33.seg_delta_qp4              = segment_param->seg_data[4].segment_qindex_delta;
1295                 cmd->dw33.seg_delta_qp5              = segment_param->seg_data[5].segment_qindex_delta;
1296                 cmd->dw33.seg_delta_qp6              = segment_param->seg_data[6].segment_qindex_delta;
1297                 cmd->dw33.seg_delta_qp7              = segment_param->seg_data[7].segment_qindex_delta;
1298             }
1299
1300             //cmd->dw34.temporal_id                = pPicParams->temporal_idi;
1301             cmd->dw34.temporal_id                = 0;
1302             cmd->dw34.multi_ref_qp_check         = param->multi_ref_qp_check;
1303
1304             cmd->dw35.max_num_pak_passes         = param->brc_num_pak_passes;
1305             cmd->dw35.sync_async                 = 0;
1306             cmd->dw35.mbrc                       = param->mbbrc_enabled;
1307             if (*param->pbrc_init_current_target_buf_full_in_bits >
1308                  ((double)(*param->pbrc_init_reset_buf_size_in_bits))) {
1309                 *param->pbrc_init_current_target_buf_full_in_bits -=
1310                      (double)(*param->pbrc_init_reset_buf_size_in_bits);
1311                 cmd->dw35.overflow = 1;
1312             }
1313             else
1314                 cmd->dw35.overflow = 0;
1315
1316             cmd->dw24.target_size                 = (uint32_t)(*param->pbrc_init_current_target_buf_full_in_bits);
1317
1318             cmd->dw36.segmentation               = pic_param->pic_flags.bits.segmentation_enabled;
1319
1320             *param->pbrc_init_current_target_buf_full_in_bits += *param->pbrc_init_reset_input_bits_per_frame;
1321
1322             cmd->dw38.qdelta_ydc  = pic_param->luma_dc_qindex_delta;
1323             cmd->dw38.qdelta_uvdc = pic_param->chroma_dc_qindex_delta;
1324             cmd->dw38.qdelta_uvac = pic_param->chroma_ac_qindex_delta;
1325
1326             break;
1327         }
1328         case VP9_MEDIA_STATE_ENC_I_FRAME_DIST:
1329             cmd->dw2.intra_mode_disable        = 0;
1330             break;
1331         default:
1332             break;
1333     }
1334
1335     cmd->dw48.brc_y4x_input_bti                = VP9_BTI_BRC_SRCY4X_G9;
1336     cmd->dw49.brc_vme_coarse_intra_input_bti   = VP9_BTI_BRC_VME_COARSE_INTRA_G9;
1337     cmd->dw50.brc_history_buffer_bti           = VP9_BTI_BRC_HISTORY_G9;
1338     cmd->dw51.brc_const_data_input_bti         = VP9_BTI_BRC_CONSTANT_DATA_G9;
1339     cmd->dw52.brc_distortion_bti               = VP9_BTI_BRC_DISTORTION_G9;
1340     cmd->dw53.brc_mmdk_pak_output_bti          = VP9_BTI_BRC_MMDK_PAK_OUTPUT_G9;
1341     cmd->dw54.brc_enccurbe_input_bti           = VP9_BTI_BRC_MBENC_CURBE_INPUT_G9;
1342     cmd->dw55.brc_enccurbe_output_bti          = VP9_BTI_BRC_MBENC_CURBE_OUTPUT_G9;
1343     cmd->dw56.brc_pic_state_input_bti          = VP9_BTI_BRC_PIC_STATE_INPUT_G9;
1344     cmd->dw57.brc_pic_state_output_bti         = VP9_BTI_BRC_PIC_STATE_OUTPUT_G9;
1345     cmd->dw58.brc_seg_state_input_bti          = VP9_BTI_BRC_SEGMENT_STATE_INPUT_G9;
1346     cmd->dw59.brc_seg_state_output_bti         = VP9_BTI_BRC_SEGMENT_STATE_OUTPUT_G9;
1347     cmd->dw60.brc_bitstream_size_data_bti      = VP9_BTI_BRC_BITSTREAM_SIZE_G9;
1348     cmd->dw61.brc_hfw_data_output_bti          = VP9_BTI_BRC_HFW_DATA_G9;
1349
1350     gen8p_gpe_context_unmap_curbe(gpe_context);
1351     return;
1352 }
1353
1354 static void
1355 gen9_brc_init_reset_add_surfaces_vp9(VADriverContextP ctx,
1356                                      struct encode_state *encode_state,
1357                                      struct intel_encoder_context *encoder_context,
1358                                      struct i965_gpe_context *gpe_context)
1359 {
1360     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1361
1362     gen9_add_buffer_gpe_surface(ctx,
1363                                 gpe_context,
1364                                 &vme_context->res_brc_history_buffer,
1365                                 0,
1366                                 vme_context->res_brc_history_buffer.size,
1367                                 0,
1368                                 VP9_BTI_BRC_HISTORY_G9);
1369
1370     gen9_add_buffer_2d_gpe_surface(ctx,
1371                                    gpe_context,
1372                                    &vme_context->s4x_memv_distortion_buffer,
1373                                    1,
1374                                    I965_SURFACEFORMAT_R8_UNORM,
1375                                    VP9_BTI_BRC_DISTORTION_G9);
1376 }
1377
1378 /* The function related with BRC */
1379 static VAStatus
1380 gen9_vp9_brc_init_reset_kernel(VADriverContextP ctx,
1381                                struct encode_state *encode_state,
1382                                struct intel_encoder_context *encoder_context)
1383 {
1384     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1385     struct vp9_brc_context *brc_context = &vme_context->brc_context;
1386     struct gpe_media_object_parameter media_object_param;
1387     struct i965_gpe_context *gpe_context;
1388     int gpe_index = VP9_BRC_INIT;
1389     int media_function = VP9_MEDIA_STATE_BRC_INIT_RESET;
1390     struct gen9_vp9_brc_curbe_param                brc_initreset_curbe;
1391     VAEncPictureParameterBufferVP9 *pic_param;
1392     struct gen9_vp9_state *vp9_state;
1393
1394     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1395
1396     if (!vp9_state || !vp9_state->pic_param)
1397         return VA_STATUS_ERROR_INVALID_PARAMETER;
1398
1399     pic_param = vp9_state->pic_param;
1400
1401     if (vp9_state->brc_inited)
1402         gpe_index = VP9_BRC_RESET;
1403
1404     gpe_context = &brc_context->gpe_contexts[gpe_index];
1405
1406     gen8_gpe_context_init(ctx, gpe_context);
1407     gen9_gpe_reset_binding_table(ctx, gpe_context);
1408
1409     brc_initreset_curbe.media_state_type    = media_function;
1410     brc_initreset_curbe.curr_frame          = pic_param->reconstructed_frame;
1411     brc_initreset_curbe.ppic_param          = vp9_state->pic_param;
1412     brc_initreset_curbe.pseq_param          = vp9_state->seq_param;
1413     brc_initreset_curbe.psegment_param      = vp9_state->segment_param;
1414     brc_initreset_curbe.frame_width         = vp9_state->frame_width;
1415     brc_initreset_curbe.frame_height        = vp9_state->frame_height;
1416     brc_initreset_curbe.pbrc_init_current_target_buf_full_in_bits =
1417                           &vp9_state->brc_init_current_target_buf_full_in_bits;
1418     brc_initreset_curbe.pbrc_init_reset_buf_size_in_bits =
1419                           &vp9_state->brc_init_reset_buf_size_in_bits;
1420     brc_initreset_curbe.pbrc_init_reset_input_bits_per_frame =
1421                           &vp9_state->brc_init_reset_input_bits_per_frame;
1422     brc_initreset_curbe.picture_coding_type  = vp9_state->picture_coding_type;
1423     brc_initreset_curbe.initbrc            = !vp9_state->brc_inited;
1424     brc_initreset_curbe.mbbrc_enabled      = 0;
1425     brc_initreset_curbe.ref_frame_flag      = vp9_state->ref_frame_flag;
1426     brc_initreset_curbe.frame_rate           = vp9_state->frame_rate;
1427
1428     vme_context->pfn_set_curbe_brc(ctx, encode_state,
1429                                    gpe_context,
1430                                    encoder_context,
1431                                    &brc_initreset_curbe);
1432
1433     gen9_brc_init_reset_add_surfaces_vp9(ctx, encode_state, encoder_context, gpe_context);
1434     gen8_gpe_setup_interface_data(ctx, gpe_context);
1435
1436     memset(&media_object_param, 0, sizeof(media_object_param));
1437     gen9_run_kernel_media_object(ctx, encoder_context, gpe_context, media_function, &media_object_param);
1438
1439     return VA_STATUS_SUCCESS;
1440 }
1441
1442 static void
1443 gen9_brc_intra_dist_add_surfaces_vp9(VADriverContextP ctx,
1444                                      struct encode_state *encode_state,
1445                                      struct intel_encoder_context *encoder_context,
1446                                      struct i965_gpe_context *gpe_context)
1447 {
1448     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1449
1450     struct object_surface *obj_surface;
1451     struct gen9_surface_vp9 *vp9_priv_surface;
1452
1453     /* sScaled4xSurface surface */
1454     obj_surface = encode_state->reconstructed_object;
1455
1456     vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
1457
1458     obj_surface = vp9_priv_surface->scaled_4x_surface_obj;
1459     gen9_add_2d_gpe_surface(ctx, gpe_context,
1460                             obj_surface,
1461                             0, 1,
1462                             I965_SURFACEFORMAT_R8_UNORM,
1463                             VP9_BTI_BRC_SRCY4X_G9
1464                             );
1465
1466     gen9_add_adv_gpe_surface(ctx, gpe_context,
1467                              obj_surface,
1468                              VP9_BTI_BRC_VME_COARSE_INTRA_G9);
1469
1470     gen9_add_buffer_2d_gpe_surface(ctx,
1471                                    gpe_context,
1472                                    &vme_context->s4x_memv_distortion_buffer,
1473                                    1,
1474                                    I965_SURFACEFORMAT_R8_UNORM,
1475                                    VP9_BTI_BRC_DISTORTION_G9);
1476
1477      return;
1478 }
1479
1480 /* The function related with BRC */
1481 static VAStatus
1482 gen9_vp9_brc_intra_dist_kernel(VADriverContextP ctx,
1483                                struct encode_state *encode_state,
1484                                struct intel_encoder_context *encoder_context)
1485 {
1486     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1487     struct vp9_brc_context *brc_context = &vme_context->brc_context;
1488     struct i965_gpe_context *gpe_context;
1489     int gpe_index = VP9_BRC_INTRA_DIST;
1490     int media_function = VP9_MEDIA_STATE_ENC_I_FRAME_DIST;
1491     struct gen9_vp9_brc_curbe_param                brc_intra_dist_curbe;
1492     VAEncPictureParameterBufferVP9 *pic_param;
1493     struct gen9_vp9_state *vp9_state;
1494     struct gpe_media_object_walker_parameter media_object_walker_param;
1495     struct vp9_encoder_kernel_walker_parameter kernel_walker_param;
1496
1497     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1498
1499     if (!vp9_state || !vp9_state->pic_param)
1500         return VA_STATUS_ERROR_INVALID_PARAMETER;
1501
1502     pic_param = vp9_state->pic_param;
1503
1504     gpe_context = &brc_context->gpe_contexts[gpe_index];
1505
1506     gen8_gpe_context_init(ctx, gpe_context);
1507     gen9_gpe_reset_binding_table(ctx, gpe_context);
1508
1509     brc_intra_dist_curbe.media_state_type    = media_function;
1510     brc_intra_dist_curbe.curr_frame          = pic_param->reconstructed_frame;
1511     brc_intra_dist_curbe.ppic_param          = vp9_state->pic_param;
1512     brc_intra_dist_curbe.pseq_param          = vp9_state->seq_param;
1513     brc_intra_dist_curbe.psegment_param      = vp9_state->segment_param;
1514     brc_intra_dist_curbe.frame_width         = vp9_state->frame_width;
1515     brc_intra_dist_curbe.frame_height        = vp9_state->frame_height;
1516     brc_intra_dist_curbe.pbrc_init_current_target_buf_full_in_bits =
1517                           &vp9_state->brc_init_current_target_buf_full_in_bits;
1518     brc_intra_dist_curbe.pbrc_init_reset_buf_size_in_bits =
1519                           &vp9_state->brc_init_reset_buf_size_in_bits;
1520     brc_intra_dist_curbe.pbrc_init_reset_input_bits_per_frame =
1521                           &vp9_state->brc_init_reset_input_bits_per_frame;
1522     brc_intra_dist_curbe.picture_coding_type  = vp9_state->picture_coding_type;
1523     brc_intra_dist_curbe.initbrc            = !vp9_state->brc_inited;
1524     brc_intra_dist_curbe.mbbrc_enabled      = 0;
1525     brc_intra_dist_curbe.ref_frame_flag      = vp9_state->ref_frame_flag;
1526     brc_intra_dist_curbe.frame_rate           = vp9_state->frame_rate;
1527
1528     vme_context->pfn_set_curbe_brc(ctx, encode_state,
1529                                    gpe_context,
1530                                    encoder_context,
1531                                    &brc_intra_dist_curbe);
1532
1533     /* zero distortion buffer */
1534     i965_zero_gpe_resource(&vme_context->s4x_memv_distortion_buffer);
1535
1536     gen9_brc_intra_dist_add_surfaces_vp9(ctx, encode_state, encoder_context, gpe_context);
1537     gen8_gpe_setup_interface_data(ctx, gpe_context);
1538
1539     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
1540     kernel_walker_param.resolution_x = vme_context->downscaled_width_in_mb4x;
1541     kernel_walker_param.resolution_y = vme_context->downscaled_height_in_mb4x;
1542     kernel_walker_param.no_dependency = 1;
1543
1544     gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
1545
1546     gen9_run_kernel_media_object_walker(ctx, encoder_context,
1547                                         gpe_context,
1548                                         media_function,
1549                                         &media_object_walker_param);
1550
1551     return VA_STATUS_SUCCESS;
1552 }
1553
1554 static void
1555 intel_vp9enc_construct_picstate_batchbuf(VADriverContextP ctx,
1556                                             struct encode_state *encode_state,
1557                                             struct intel_encoder_context *encoder_context,
1558                                             struct i965_gpe_resource *gpe_resource)
1559 {
1560     struct gen9_vp9_state *vp9_state;
1561     VAEncPictureParameterBufferVP9 *pic_param;
1562     int frame_width_minus1, frame_height_minus1;
1563     int is_lossless = 0;
1564     int is_intra_only = 0;
1565     unsigned int last_frame_type;
1566     unsigned int ref_flags;
1567     unsigned int use_prev_frame_mvs, adapt_flag;
1568     struct gen9_surface_vp9 *vp9_surface = NULL;
1569     struct object_surface *obj_surface = NULL;
1570     uint32_t scale_h = 0;
1571     uint32_t scale_w = 0;
1572
1573     char *pdata;
1574     int i, j;
1575     unsigned int *cmd_ptr, cmd_value, tmp;
1576
1577     pdata = i965_map_gpe_resource(gpe_resource);
1578     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1579
1580     if (!vp9_state || !vp9_state->pic_param || !pdata)
1581         return;
1582
1583     pic_param = vp9_state->pic_param;
1584     frame_width_minus1 = ALIGN(pic_param->frame_width_dst, 8) - 1;
1585     frame_height_minus1 = ALIGN(pic_param->frame_height_dst, 8) - 1;
1586     if ((pic_param->luma_ac_qindex == 0) &&
1587         (pic_param->luma_dc_qindex_delta == 0) &&
1588         (pic_param->chroma_ac_qindex_delta == 0) &&
1589         (pic_param->chroma_dc_qindex_delta == 0))
1590         is_lossless = 1;
1591
1592     if (pic_param->pic_flags.bits.frame_type)
1593         is_intra_only = pic_param->pic_flags.bits.intra_only;
1594
1595     last_frame_type = vp9_state->vp9_last_frame.frame_type;
1596
1597     use_prev_frame_mvs = 0;
1598     if (pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) {
1599         last_frame_type = 0;
1600         ref_flags = 0;
1601     } else {
1602         ref_flags = ((pic_param->ref_flags.bits.ref_arf_sign_bias << 9) |
1603                      (pic_param->ref_flags.bits.ref_gf_sign_bias << 8) |
1604                      (pic_param->ref_flags.bits.ref_last_sign_bias << 7)
1605                     );
1606         if (!pic_param->pic_flags.bits.error_resilient_mode &&
1607             (pic_param->frame_width_dst == vp9_state->vp9_last_frame.frame_width) &&
1608             (pic_param->frame_height_dst == vp9_state->vp9_last_frame.frame_height) &&
1609             !pic_param->pic_flags.bits.intra_only &&
1610             vp9_state->vp9_last_frame.show_frame &&
1611             ((vp9_state->vp9_last_frame.frame_type == HCP_VP9_INTER_FRAME) &&
1612              !vp9_state->vp9_last_frame.intra_only)
1613            )
1614             use_prev_frame_mvs = 1;
1615     }
1616     adapt_flag = 0;
1617     if (!pic_param->pic_flags.bits.error_resilient_mode &&
1618         !pic_param->pic_flags.bits.frame_parallel_decoding_mode)
1619         adapt_flag = 1;
1620
1621     for (i = 0; i < 4; i++) {
1622         uint32_t non_first_pass;
1623         non_first_pass = 1;
1624         if (i == 0)
1625             non_first_pass = 0;
1626
1627         cmd_ptr =(unsigned int *)(pdata + i * VP9_PIC_STATE_BUFFER_SIZE);
1628
1629         *cmd_ptr++ = (HCP_VP9_PIC_STATE | (33 - 2));
1630         *cmd_ptr++ = (frame_height_minus1 << 16 |
1631                       frame_width_minus1);
1632         /* dw2 */
1633         *cmd_ptr++ = ( 0 << 31 | /* disable segment_in */
1634                        0 << 30 | /* disable segment_out */
1635                        is_lossless << 29 | /* loseless */
1636                        (pic_param->pic_flags.bits.segmentation_enabled && pic_param->pic_flags.bits.segmentation_temporal_update) << 28 | /* temporal update */
1637                        (pic_param->pic_flags.bits.segmentation_enabled && pic_param->pic_flags.bits.segmentation_update_map) << 27 | /* temporal update */
1638                        (pic_param->pic_flags.bits.segmentation_enabled << 26) |
1639                        (pic_param->sharpness_level << 23) |
1640                        (pic_param->filter_level << 17) |
1641                        (pic_param->pic_flags.bits.frame_parallel_decoding_mode << 16) |
1642                        (pic_param->pic_flags.bits.error_resilient_mode << 15) |
1643                        (pic_param->pic_flags.bits.refresh_frame_context << 14) |
1644                        (last_frame_type << 13) |
1645                        (vp9_state->tx_mode == TX_MODE_SELECT) << 12 |
1646                        (pic_param->pic_flags.bits.comp_prediction_mode == REFERENCE_MODE_SELECT) << 11 |
1647                        (use_prev_frame_mvs) << 10 |
1648                        ref_flags |
1649                        (pic_param->pic_flags.bits.mcomp_filter_type << 4) |
1650                        (pic_param->pic_flags.bits.allow_high_precision_mv << 3) |
1651                        (is_intra_only << 2) |
1652                        (adapt_flag << 1) |
1653                        (pic_param->pic_flags.bits.frame_type) << 0);
1654
1655         *cmd_ptr++ =((0 << 28) | /* VP9Profile0 */
1656                      (0 << 24) | /* 8-bit depth */
1657                      (0 << 22) | /* only 420 format */
1658                      (0 << 0)  | /* sse statistics */
1659                      (pic_param->log2_tile_rows << 8) |
1660                      (pic_param->log2_tile_columns << 0));
1661
1662         /* dw4..6 */
1663         if (pic_param->pic_flags.bits.frame_type &&
1664             !pic_param->pic_flags.bits.intra_only) {
1665             for (j = 0; j < 3; j++) {
1666                 obj_surface = encode_state->reference_objects[j];
1667                 scale_w = 0;
1668                 scale_h = 0;
1669                 if (obj_surface && obj_surface->private_data) {
1670                     vp9_surface = obj_surface->private_data;
1671                     scale_w = (vp9_surface->frame_width  << 14) / pic_param->frame_width_dst;
1672                     scale_h = (vp9_surface->frame_height << 14) / pic_param->frame_height_dst;
1673                     *cmd_ptr++ = (scale_w << 16 |
1674                                   scale_h);
1675                 } else
1676                     *cmd_ptr++ = 0;
1677             }
1678         } else {
1679             *cmd_ptr++ = 0;
1680             *cmd_ptr++ = 0;
1681             *cmd_ptr++ = 0;
1682         }
1683         /* dw7..9 */
1684         for(j = 0; j < 3; j++) {
1685             obj_surface = encode_state->reference_objects[j];
1686             vp9_surface = NULL;
1687
1688             if (obj_surface && obj_surface->private_data) {
1689                 vp9_surface = obj_surface->private_data;
1690                 *cmd_ptr++ = (vp9_surface->frame_height - 1) << 16 |
1691                              (vp9_surface->frame_width - 1);
1692             } else
1693                 *cmd_ptr++ = 0;
1694         }
1695         /* dw10 */
1696         *cmd_ptr++ = 0;
1697         /* dw11 */
1698         *cmd_ptr++ = (1 << 1);
1699         *cmd_ptr++ = 0;
1700
1701         /* dw13 */
1702         *cmd_ptr++ = ((1 << 25) | /* header insertation for VP9 */
1703                       (0 << 24) | /* tail insertation */
1704                       (pic_param->luma_ac_qindex << 16) |
1705                       0 /* compressed header bin count */);
1706
1707         /* dw14 */
1708         tmp = intel_convert_sign_mag(pic_param->luma_dc_qindex_delta, 5);
1709         cmd_value = (tmp << 16);
1710         tmp = intel_convert_sign_mag(pic_param->chroma_dc_qindex_delta, 5);
1711         cmd_value |= (tmp << 8);
1712         tmp = intel_convert_sign_mag(pic_param->chroma_ac_qindex_delta, 5);
1713         cmd_value |= tmp;
1714         *cmd_ptr++ = cmd_value;
1715
1716         tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[0], 7);
1717         cmd_value = tmp;
1718         tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[1], 7);
1719         cmd_value |= (tmp << 8);
1720         tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[2], 7);
1721         cmd_value |= (tmp << 16);
1722         tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[3], 7);
1723         cmd_value |= (tmp << 24);
1724         *cmd_ptr++ = cmd_value;
1725
1726         /* dw16 */
1727         tmp = intel_convert_sign_mag(pic_param->mode_lf_delta[0], 7);
1728         cmd_value = tmp;
1729         tmp = intel_convert_sign_mag(pic_param->mode_lf_delta[1], 7);
1730         cmd_value |= (tmp << 8);
1731         *cmd_ptr++ = cmd_value;
1732
1733         /* dw17 */
1734         *cmd_ptr++ = vp9_state->frame_header.bit_offset_ref_lf_delta |
1735                       (vp9_state->frame_header.bit_offset_mode_lf_delta << 16);
1736         *cmd_ptr++ = vp9_state->frame_header.bit_offset_qindex |
1737                       (vp9_state->frame_header.bit_offset_lf_level << 16);
1738
1739         /* dw19 */
1740         *cmd_ptr++ = (1 << 26 | (1 << 25) |
1741                       non_first_pass << 16);
1742         /* dw20 */
1743         *cmd_ptr++ = (1 << 31) | (256);
1744
1745         /* dw21 */
1746         *cmd_ptr++ = (0 << 31) | 1;
1747
1748         /* dw22-dw24. Frame_delta_qindex_range */
1749         *cmd_ptr++ = 0;
1750         *cmd_ptr++ = 0;
1751         *cmd_ptr++ = 0;
1752
1753         /* dw25-26. frame_delta_lf_range */
1754         *cmd_ptr++ = 0;
1755         *cmd_ptr++ = 0;
1756
1757         /* dw27. frame_delta_lf_min */
1758         *cmd_ptr++ = 0;
1759
1760         /* dw28..30 */
1761         *cmd_ptr++ = 0;
1762         *cmd_ptr++ = 0;
1763         *cmd_ptr++ = 0;
1764
1765         /* dw31 */
1766         *cmd_ptr++ = (0 << 30) | 1;
1767         /* dw32 */
1768         *cmd_ptr++ = vp9_state->frame_header.bit_offset_first_partition_size;
1769
1770         *cmd_ptr++ = 0;
1771         *cmd_ptr++ = MI_BATCH_BUFFER_END;
1772     }
1773
1774     i965_unmap_gpe_resource(gpe_resource);
1775 }
1776
1777 static void
1778 gen9_brc_update_add_surfaces_vp9(VADriverContextP ctx,
1779                                      struct encode_state *encode_state,
1780                                      struct intel_encoder_context *encoder_context,
1781                                      struct i965_gpe_context *brc_gpe_context,
1782                                      struct i965_gpe_context *mbenc_gpe_context)
1783 {
1784     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1785
1786     /* 0. BRC history buffer */
1787     gen9_add_buffer_gpe_surface(ctx,
1788                                 brc_gpe_context,
1789                                 &vme_context->res_brc_history_buffer,
1790                                 0,
1791                                 vme_context->res_brc_history_buffer.size,
1792                                 0,
1793                                 VP9_BTI_BRC_HISTORY_G9);
1794
1795     /* 1. Constant data buffer */
1796     gen9_add_buffer_gpe_surface(ctx,
1797                                 brc_gpe_context,
1798                                 &vme_context->res_brc_const_data_buffer,
1799                                 0,
1800                                 vme_context->res_brc_const_data_buffer.size,
1801                                 0,
1802                                 VP9_BTI_BRC_CONSTANT_DATA_G9);
1803
1804     /* 2. Distortion 2D surface buffer */
1805     gen9_add_buffer_2d_gpe_surface(ctx,
1806                                    brc_gpe_context,
1807                                    &vme_context->s4x_memv_distortion_buffer,
1808                                    1,
1809                                    I965_SURFACEFORMAT_R8_UNORM,
1810                                    VP9_BTI_BRC_DISTORTION_G9);
1811
1812     /* 3. pak buffer */
1813     gen9_add_buffer_gpe_surface(ctx,
1814                                 brc_gpe_context,
1815                                 &vme_context->res_brc_mmdk_pak_buffer,
1816                                 0,
1817                                 vme_context->res_brc_mmdk_pak_buffer.size,
1818                                 0,
1819                                 VP9_BTI_BRC_MMDK_PAK_OUTPUT_G9);
1820     /* 4. Mbenc curbe input buffer */
1821     gen9_add_dri_buffer_gpe_surface(ctx,
1822                                     brc_gpe_context,
1823                                     mbenc_gpe_context->dynamic_state.bo,
1824                                     0,
1825                                     ALIGN(mbenc_gpe_context->curbe_size, 64),
1826                                     mbenc_gpe_context->curbe_offset,
1827                                     VP9_BTI_BRC_MBENC_CURBE_INPUT_G9);
1828     /* 5. Mbenc curbe output buffer */
1829     gen9_add_dri_buffer_gpe_surface(ctx,
1830                                     brc_gpe_context,
1831                                     mbenc_gpe_context->dynamic_state.bo,
1832                                     0,
1833                                     ALIGN(mbenc_gpe_context->curbe_size, 64),
1834                                     mbenc_gpe_context->curbe_offset,
1835                                     VP9_BTI_BRC_MBENC_CURBE_OUTPUT_G9);
1836
1837     /* 6. BRC_PIC_STATE read buffer */
1838     gen9_add_buffer_gpe_surface(ctx, brc_gpe_context,
1839                                 &vme_context->res_pic_state_brc_read_buffer,
1840                                 0,
1841                                 vme_context->res_pic_state_brc_read_buffer.size,
1842                                 0,
1843                                 VP9_BTI_BRC_PIC_STATE_INPUT_G9);
1844
1845     /* 7. BRC_PIC_STATE write buffer */
1846     gen9_add_buffer_gpe_surface(ctx, brc_gpe_context,
1847                                 &vme_context->res_pic_state_brc_write_hfw_read_buffer,
1848                                 0,
1849                                 vme_context->res_pic_state_brc_write_hfw_read_buffer.size,
1850                                 0,
1851                                 VP9_BTI_BRC_PIC_STATE_OUTPUT_G9);
1852
1853     /* 8. SEGMENT_STATE read buffer */
1854     gen9_add_buffer_gpe_surface(ctx, brc_gpe_context,
1855                                 &vme_context->res_seg_state_brc_read_buffer,
1856                                 0,
1857                                 vme_context->res_seg_state_brc_read_buffer.size,
1858                                 0,
1859                                 VP9_BTI_BRC_SEGMENT_STATE_INPUT_G9);
1860
1861     /* 9. SEGMENT_STATE write buffer */
1862     gen9_add_buffer_gpe_surface(ctx, brc_gpe_context,
1863                                 &vme_context->res_seg_state_brc_write_buffer,
1864                                 0,
1865                                 vme_context->res_seg_state_brc_write_buffer.size,
1866                                 0,
1867                                 VP9_BTI_BRC_SEGMENT_STATE_OUTPUT_G9);
1868
1869     /* 10. Bitstream size buffer */
1870     gen9_add_buffer_gpe_surface(ctx, brc_gpe_context,
1871                                 &vme_context->res_brc_bitstream_size_buffer,
1872                                 0,
1873                                 vme_context->res_brc_bitstream_size_buffer.size,
1874                                 0,
1875                                 VP9_BTI_BRC_BITSTREAM_SIZE_G9);
1876
1877     gen9_add_buffer_gpe_surface(ctx, brc_gpe_context,
1878                                 &vme_context->res_brc_hfw_data_buffer,
1879                                 0,
1880                                 vme_context->res_brc_hfw_data_buffer.size,
1881                                 0,
1882                                 VP9_BTI_BRC_HFW_DATA_G9);
1883
1884      return;
1885 }
1886
1887 static VAStatus
1888 gen9_vp9_brc_update_kernel(VADriverContextP ctx,
1889                                struct encode_state *encode_state,
1890                                struct intel_encoder_context *encoder_context)
1891 {
1892     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1893     struct vp9_brc_context *brc_context = &vme_context->brc_context;
1894     struct i965_gpe_context *brc_gpe_context, *mbenc_gpe_context;
1895     int mbenc_index, gpe_index = VP9_BRC_UPDATE;
1896     int media_function = VP9_MEDIA_STATE_BRC_UPDATE;
1897     int mbenc_function;
1898     struct gen9_vp9_brc_curbe_param        brc_update_curbe_param;
1899     VAEncPictureParameterBufferVP9 *pic_param;
1900     struct gen9_vp9_state *vp9_state;
1901     struct gen9_vp9_mbenc_curbe_param    mbenc_curbe_param;
1902     struct gpe_media_object_parameter media_object_param;
1903
1904     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1905     if (!vp9_state || !vp9_state->pic_param)
1906         return VA_STATUS_ERROR_INVALID_PARAMETER;
1907
1908     pic_param = vp9_state->pic_param;
1909     // Setup VP9 MbEnc Curbe
1910     if (vp9_state->picture_coding_type) {
1911         mbenc_function = VP9_MEDIA_STATE_MBENC_P;
1912         mbenc_index = VP9_MBENC_IDX_INTER;
1913     } else {
1914         mbenc_function = VP9_MEDIA_STATE_MBENC_I_32x32;
1915         mbenc_index = VP9_MBENC_IDX_KEY_32x32;
1916     }
1917
1918     mbenc_gpe_context = &(vme_context->mbenc_context.gpe_contexts[mbenc_index]);
1919
1920     memset(&mbenc_curbe_param, 0, sizeof(mbenc_curbe_param));
1921
1922     mbenc_curbe_param.ppic_param             = vp9_state->pic_param;
1923     mbenc_curbe_param.pseq_param             = vp9_state->seq_param;
1924     mbenc_curbe_param.psegment_param         = vp9_state->segment_param;
1925     //mbenc_curbe_param.ppRefList              = &(vp9_state->pRefList[0]);
1926     mbenc_curbe_param.last_ref_obj           = vp9_state->last_ref_obj;
1927     mbenc_curbe_param.golden_ref_obj         = vp9_state->golden_ref_obj;
1928     mbenc_curbe_param.alt_ref_obj            = vp9_state->alt_ref_obj;
1929     mbenc_curbe_param.frame_width_in_mb      = ALIGN(vp9_state->frame_width, 16) / 16;
1930     mbenc_curbe_param.frame_height_in_mb     = ALIGN(vp9_state->frame_height, 16) / 16;
1931     mbenc_curbe_param.hme_enabled            = vp9_state->hme_enabled;
1932     mbenc_curbe_param.ref_frame_flag         = vp9_state->ref_frame_flag;
1933     mbenc_curbe_param.multi_ref_qp_check     = vp9_state->multi_ref_qp_check;
1934     mbenc_curbe_param.picture_coding_type    = vp9_state->picture_coding_type;
1935     mbenc_curbe_param.media_state_type       = mbenc_function;
1936
1937     vme_context->pfn_set_curbe_mbenc(ctx, encode_state,
1938                                 mbenc_gpe_context,
1939                                 encoder_context,
1940                                 &mbenc_curbe_param);
1941
1942     vp9_state->mbenc_curbe_set_in_brc_update = true;
1943
1944     brc_gpe_context = &brc_context->gpe_contexts[gpe_index];
1945
1946     gen8_gpe_context_init(ctx, brc_gpe_context);
1947     gen9_gpe_reset_binding_table(ctx, brc_gpe_context);
1948
1949     memset(&brc_update_curbe_param, 0, sizeof(brc_update_curbe_param));
1950
1951     // Setup BRC Update Curbe
1952     brc_update_curbe_param.media_state_type       = media_function;
1953     brc_update_curbe_param.curr_frame               = pic_param->reconstructed_frame;
1954     brc_update_curbe_param.ppic_param             = vp9_state->pic_param;
1955     brc_update_curbe_param.pseq_param             = vp9_state->seq_param;
1956     brc_update_curbe_param.psegment_param         = vp9_state->segment_param;
1957     brc_update_curbe_param.picture_coding_type    = vp9_state->picture_coding_type;
1958     brc_update_curbe_param.frame_width_in_mb      = ALIGN(vp9_state->frame_width, 16) / 16;
1959     brc_update_curbe_param.frame_height_in_mb     = ALIGN(vp9_state->frame_height, 16) / 16;
1960     brc_update_curbe_param.hme_enabled            = vp9_state->hme_enabled;
1961     brc_update_curbe_param.b_used_ref             = 1;
1962     brc_update_curbe_param.frame_number           = vp9_state->frame_number;
1963     brc_update_curbe_param.ref_frame_flag         = vp9_state->ref_frame_flag;
1964     brc_update_curbe_param.mbbrc_enabled          = 0;
1965     brc_update_curbe_param.multi_ref_qp_check     = vp9_state->multi_ref_qp_check;
1966     brc_update_curbe_param.brc_num_pak_passes     = vp9_state->num_pak_passes;
1967
1968     brc_update_curbe_param.pbrc_init_current_target_buf_full_in_bits =
1969                           &vp9_state->brc_init_current_target_buf_full_in_bits;
1970     brc_update_curbe_param.pbrc_init_reset_buf_size_in_bits =
1971                           &vp9_state->brc_init_reset_buf_size_in_bits;
1972     brc_update_curbe_param.pbrc_init_reset_input_bits_per_frame =
1973                           &vp9_state->brc_init_reset_input_bits_per_frame;
1974
1975     vme_context->pfn_set_curbe_brc(ctx, encode_state,
1976                                    brc_gpe_context,
1977                                    encoder_context,
1978                                    &brc_update_curbe_param);
1979
1980
1981     // Check if the constant data surface is present
1982     if (vp9_state->brc_constant_buffer_supported)
1983     {
1984         char *brc_const_buffer;
1985         brc_const_buffer = i965_map_gpe_resource(&vme_context->res_brc_const_data_buffer);
1986
1987         if (!brc_const_buffer)
1988             return VA_STATUS_ERROR_OPERATION_FAILED;
1989
1990         if (vp9_state->picture_coding_type)
1991             memcpy(brc_const_buffer, vp9_brc_const_data_p_g9,
1992                    sizeof(vp9_brc_const_data_p_g9));
1993         else
1994             memcpy(brc_const_buffer, vp9_brc_const_data_i_g9,
1995                    sizeof(vp9_brc_const_data_i_g9));
1996
1997         i965_unmap_gpe_resource(&vme_context->res_brc_const_data_buffer);
1998     }
1999
2000     if (pic_param->pic_flags.bits.segmentation_enabled)
2001     {
2002           //reallocate the vme_state->mb_segment_map_surface
2003           /* this will be added later */
2004     }
2005
2006     {
2007         pic_param->filter_level = 0;
2008         // clear the filter level value in picParams ebfore programming pic state, as this value will be determined and updated by BRC.
2009         intel_vp9enc_construct_picstate_batchbuf(ctx, encode_state,
2010                  encoder_context, &vme_context->res_pic_state_brc_read_buffer);
2011     }
2012
2013     gen9_brc_update_add_surfaces_vp9(ctx, encode_state,
2014                                      encoder_context,
2015                                      brc_gpe_context,
2016                                      mbenc_gpe_context);
2017
2018     gen8_gpe_setup_interface_data(ctx, brc_gpe_context);
2019     memset(&media_object_param, 0, sizeof(media_object_param));
2020     gen9_run_kernel_media_object(ctx, encoder_context,
2021                                  brc_gpe_context,
2022                                  media_function,
2023                                  &media_object_param);
2024     return VA_STATUS_SUCCESS;
2025 }
2026
2027 static
2028 void gen9_vp9_set_curbe_me(VADriverContextP ctx,
2029                             struct encode_state *encode_state,
2030                             struct i965_gpe_context *gpe_context,
2031                             struct intel_encoder_context *encoder_context,
2032                             struct gen9_vp9_me_curbe_param *param)
2033 {
2034     vp9_me_curbe_data        *me_cmd;
2035     int enc_media_state;
2036     int                                       me_mode;
2037     unsigned int                                       width, height;
2038     uint32_t                                  l0_ref_frames;
2039     uint32_t                                  scale_factor;
2040
2041     if (param->b16xme_enabled) {
2042         if (param->use_16x_me)
2043             me_mode = VP9_ENC_ME16X_BEFORE_ME4X;
2044         else
2045             me_mode = VP9_ENC_ME4X_AFTER_ME16X;
2046     } else {
2047         me_mode = VP9_ENC_ME4X_ONLY;
2048     }
2049
2050     if (me_mode == VP9_ENC_ME16X_BEFORE_ME4X)
2051         scale_factor = 16;
2052     else
2053         scale_factor = 4;
2054
2055     if (param->use_16x_me)
2056         enc_media_state = VP9_MEDIA_STATE_16X_ME;
2057     else
2058         enc_media_state = VP9_MEDIA_STATE_4X_ME;
2059
2060     me_cmd = gen8p_gpe_context_map_curbe(gpe_context);
2061
2062     if (!me_cmd)
2063         return;
2064
2065     memset(me_cmd, 0, sizeof(vp9_me_curbe_data));
2066
2067     me_cmd->dw1.max_num_mvs           = 0x10;
2068     me_cmd->dw1.bi_weight             = 0x00;
2069
2070     me_cmd->dw2.max_num_su            = 0x39;
2071     me_cmd->dw2.max_len_sp            = 0x39;
2072
2073     me_cmd->dw3.sub_mb_part_mask       = 0x77;
2074     me_cmd->dw3.inter_sad             = 0x00;
2075     me_cmd->dw3.intra_sad            = 0x00;
2076     me_cmd->dw3.bme_disable_fbr      = 0x01;
2077     me_cmd->dw3.sub_pel_mode         = 0x03;
2078
2079     width = param->frame_width / scale_factor;
2080     height = param->frame_height / scale_factor;
2081
2082     me_cmd->dw4.picture_width        = ALIGN(width, 16) / 16;
2083     me_cmd->dw4.picture_height_minus1       = ALIGN(height, 16) / 16 - 1;
2084
2085     me_cmd->dw5.ref_width            = 0x30;
2086     me_cmd->dw5.ref_height           = 0x28;
2087
2088     if (enc_media_state == VP9_MEDIA_STATE_4X_ME)
2089         me_cmd->dw6.write_distortions = 0x01;
2090
2091     me_cmd->dw6.use_mv_from_prev_step   = me_mode == VP9_ENC_ME4X_AFTER_ME16X ? 1 : 0;
2092     me_cmd->dw6.super_combine_dist    = 0x5;
2093     me_cmd->dw6.max_vmvr              = 0x7fc;
2094
2095     l0_ref_frames = (param->ref_frame_flag & 0x01) +
2096                     !!(param->ref_frame_flag & 0x02) +
2097                     !!(param->ref_frame_flag & 0x04);
2098     me_cmd->dw13.num_ref_idx_l0_minus1 = (l0_ref_frames > 0) ? l0_ref_frames - 1 : 0;
2099     me_cmd->dw13.num_ref_idx_l1_minus1 =  0;
2100
2101     me_cmd->dw14.l0_ref_pic_polarity_bits = 0;
2102     me_cmd->dw14.l1_ref_pic_polarity_bits = 0;
2103
2104     me_cmd->dw15.mv_shift_factor        = 0x02;
2105
2106     {
2107         memcpy((void *)((char *)me_cmd + 64),
2108                vp9_diamond_ime_search_path_delta,
2109                sizeof(vp9_diamond_ime_search_path_delta));
2110     }
2111
2112
2113     me_cmd->dw32._4x_memv_output_data_surf_index     = VP9_BTI_ME_MV_DATA_SURFACE;
2114     me_cmd->dw33._16x_32x_memv_input_data_surf_index = VP9_BTI_16XME_MV_DATA_SURFACE;
2115     me_cmd->dw34._4x_me_output_dist_surf_index       = VP9_BTI_ME_DISTORTION_SURFACE;
2116     me_cmd->dw35._4x_me_output_brc_dist_surf_index   = VP9_BTI_ME_BRC_DISTORTION_SURFACE;
2117     me_cmd->dw36.vme_fwd_inter_pred_surf_index       = VP9_BTI_ME_CURR_PIC_L0;
2118     me_cmd->dw37.vme_bdw_inter_pred_surf_index       = VP9_BTI_ME_CURR_PIC_L1;
2119
2120     gen8p_gpe_context_unmap_curbe(gpe_context);
2121 }
2122
2123 static void
2124 gen9_vp9_send_me_surface(VADriverContextP ctx,
2125                          struct encode_state *encode_state,
2126                          struct i965_gpe_context *gpe_context,
2127                          struct intel_encoder_context *encoder_context,
2128                          struct gen9_vp9_me_surface_param *param)
2129 {
2130     struct i965_driver_data *i965 = i965_driver_data(ctx);
2131     struct object_surface *obj_surface;
2132     struct gen9_surface_vp9 *vp9_priv_surface;
2133     struct object_surface *input_surface;
2134     struct i965_gpe_resource *gpe_resource;
2135     int ref_bti;
2136
2137     obj_surface = SURFACE(param->curr_pic);
2138
2139     if (!obj_surface || !obj_surface->private_data)
2140         return;
2141
2142     vp9_priv_surface = obj_surface->private_data;
2143     if (param->use_16x_me)
2144     {
2145         gpe_resource = param->pres_16x_memv_data_buffer;
2146     }
2147     else
2148     {
2149         gpe_resource = param->pres_4x_memv_data_buffer;
2150     }
2151
2152     gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
2153                                    gpe_resource,
2154                                    1,
2155                                    I965_SURFACEFORMAT_R8_UNORM,
2156                                    VP9_BTI_ME_MV_DATA_SURFACE);
2157
2158     if (param->b16xme_enabled) {
2159         gpe_resource = param->pres_16x_memv_data_buffer;
2160         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
2161                                        gpe_resource,
2162                                        1,
2163                                        I965_SURFACEFORMAT_R8_UNORM,
2164                                        VP9_BTI_16XME_MV_DATA_SURFACE);
2165     }
2166
2167     if (!param->use_16x_me) {
2168         gpe_resource = param->pres_me_brc_distortion_buffer;
2169
2170         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
2171                                        gpe_resource,
2172                                        1,
2173                                        I965_SURFACEFORMAT_R8_UNORM,
2174                                        VP9_BTI_ME_BRC_DISTORTION_SURFACE);
2175
2176         gpe_resource = param->pres_me_distortion_buffer;
2177
2178         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
2179                                        gpe_resource,
2180                                        1,
2181                                        I965_SURFACEFORMAT_R8_UNORM,
2182                                        VP9_BTI_ME_DISTORTION_SURFACE);
2183     }
2184
2185     if (param->use_16x_me)
2186         input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2187     else
2188         input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2189
2190     gen9_add_adv_gpe_surface(ctx, gpe_context,
2191                              input_surface,
2192                              VP9_BTI_ME_CURR_PIC_L0);
2193
2194     ref_bti = VP9_BTI_ME_CURR_PIC_L0 + 1;
2195
2196
2197     if (param->last_ref_pic) {
2198         obj_surface = param->last_ref_pic;
2199         vp9_priv_surface = obj_surface->private_data;
2200
2201         if (param->use_16x_me)
2202             input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2203         else
2204             input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2205
2206         if (param->dys_enabled &&
2207             ((vp9_priv_surface->frame_width != param->frame_width) ||
2208              (vp9_priv_surface->frame_height != param->frame_height))) {
2209             if (param->use_16x_me)
2210                 input_surface = vp9_priv_surface->dys_16x_surface_obj;
2211             else
2212                 input_surface = vp9_priv_surface->dys_4x_surface_obj;
2213         }
2214         gen9_add_adv_gpe_surface(ctx, gpe_context,
2215                                  input_surface,
2216                                  ref_bti);
2217         gen9_add_adv_gpe_surface(ctx, gpe_context,
2218                                  input_surface,
2219                                  ref_bti + 1);
2220         ref_bti += 2;
2221     }
2222
2223     if (param->golden_ref_pic) {
2224         obj_surface = param->golden_ref_pic;
2225         vp9_priv_surface = obj_surface->private_data;
2226
2227         if (param->use_16x_me)
2228             input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2229         else
2230             input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2231
2232         if (param->dys_enabled &&
2233             ((vp9_priv_surface->frame_width != param->frame_width) ||
2234              (vp9_priv_surface->frame_height != param->frame_height))) {
2235             if (param->use_16x_me)
2236                 input_surface = vp9_priv_surface->dys_16x_surface_obj;
2237             else
2238                 input_surface = vp9_priv_surface->dys_4x_surface_obj;
2239         }
2240
2241         gen9_add_adv_gpe_surface(ctx, gpe_context,
2242                                  input_surface,
2243                                  ref_bti);
2244         gen9_add_adv_gpe_surface(ctx, gpe_context,
2245                                  input_surface,
2246                                  ref_bti + 1);
2247         ref_bti += 2;
2248     }
2249
2250     if (param->alt_ref_pic) {
2251         obj_surface = param->alt_ref_pic;
2252         vp9_priv_surface = obj_surface->private_data;
2253
2254         if (param->use_16x_me)
2255             input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2256         else
2257             input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2258
2259         if (param->dys_enabled &&
2260             ((vp9_priv_surface->frame_width != param->frame_width) ||
2261              (vp9_priv_surface->frame_height != param->frame_height))) {
2262             if (param->use_16x_me)
2263                 input_surface = vp9_priv_surface->dys_16x_surface_obj;
2264             else
2265                 input_surface = vp9_priv_surface->dys_4x_surface_obj;
2266         }
2267         gen9_add_adv_gpe_surface(ctx, gpe_context,
2268                                  input_surface,
2269                                  ref_bti);
2270         gen9_add_adv_gpe_surface(ctx, gpe_context,
2271                                  input_surface,
2272                                  ref_bti + 1);
2273         ref_bti += 2;
2274     }
2275
2276     return;
2277 }
2278
2279 static
2280 void gen9_me_add_surfaces_vp9(VADriverContextP ctx,
2281                               struct encode_state *encode_state,
2282                               struct intel_encoder_context *encoder_context,
2283                               struct i965_gpe_context *gpe_context,
2284                               int use_16x_me)
2285 {
2286     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2287     struct gen9_vp9_me_surface_param  me_surface_param;
2288     struct gen9_vp9_state *vp9_state;
2289
2290     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
2291
2292     /* sScaled4xSurface surface */
2293     memset(&me_surface_param, 0, sizeof(me_surface_param));
2294     me_surface_param.last_ref_pic = vp9_state->last_ref_obj;
2295     me_surface_param.golden_ref_pic = vp9_state->golden_ref_obj;
2296     me_surface_param.alt_ref_pic = vp9_state->alt_ref_obj;
2297     me_surface_param.curr_pic = vp9_state->curr_frame;
2298     me_surface_param.pres_4x_memv_data_buffer  = &vme_context->s4x_memv_data_buffer;
2299     me_surface_param.pres_16x_memv_data_buffer = &vme_context->s16x_memv_data_buffer;
2300     me_surface_param.pres_me_distortion_buffer = &vme_context->s4x_memv_distortion_buffer;
2301     me_surface_param.pres_me_brc_distortion_buffer = &vme_context->s4x_memv_distortion_buffer;
2302
2303     if (use_16x_me) {
2304         me_surface_param.downscaled_width_in_mb = vp9_state->downscaled_width_16x_in_mb;
2305         me_surface_param.downscaled_height_in_mb = vp9_state->downscaled_height_16x_in_mb;
2306     } else {
2307         me_surface_param.downscaled_width_in_mb = vp9_state->downscaled_width_4x_in_mb;
2308         me_surface_param.downscaled_height_in_mb = vp9_state->downscaled_height_4x_in_mb;
2309     }
2310     me_surface_param.frame_width  = vp9_state->frame_width;
2311     me_surface_param.frame_height  = vp9_state->frame_height;
2312
2313     me_surface_param.use_16x_me = use_16x_me;
2314     me_surface_param.b16xme_enabled = vp9_state->b16xme_enabled;
2315     me_surface_param.dys_enabled = vp9_state->dys_in_use;
2316
2317     vme_context->pfn_send_me_surface(ctx, encode_state,
2318                                      gpe_context,
2319                                      encoder_context,
2320                                      &me_surface_param);
2321     return;
2322 }
2323
2324 static VAStatus
2325 gen9_vp9_me_kernel(VADriverContextP ctx,
2326                    struct encode_state *encode_state,
2327                    struct intel_encoder_context *encoder_context,
2328                    int use_16x_me)
2329 {
2330     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2331     struct i965_gpe_context *gpe_context;
2332     int media_function;
2333     struct gen9_vp9_me_curbe_param me_curbe_param;
2334     struct gen9_vp9_state *vp9_state;
2335     struct gpe_media_object_walker_parameter media_object_walker_param;
2336     struct vp9_encoder_kernel_walker_parameter kernel_walker_param;
2337
2338     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2339     if (!vp9_state || !vp9_state->pic_param)
2340         return VA_STATUS_ERROR_INVALID_PARAMETER;
2341
2342     if (use_16x_me)
2343         media_function = VP9_MEDIA_STATE_16X_ME;
2344     else
2345         media_function = VP9_MEDIA_STATE_4X_ME;
2346
2347     gpe_context = &(vme_context->me_context.gpe_context);
2348
2349     gen8_gpe_context_init(ctx, gpe_context);
2350     gen9_gpe_reset_binding_table(ctx, gpe_context);
2351
2352     memset(&me_curbe_param, 0, sizeof(me_curbe_param));
2353     me_curbe_param.ppic_param = vp9_state->pic_param;
2354     me_curbe_param.pseq_param = vp9_state->seq_param;
2355     me_curbe_param.frame_width = vp9_state->frame_width;
2356     me_curbe_param.frame_height = vp9_state->frame_height;
2357     me_curbe_param.ref_frame_flag = vp9_state->ref_frame_flag;
2358     me_curbe_param.use_16x_me = use_16x_me;
2359     me_curbe_param.b16xme_enabled = vp9_state->b16xme_enabled;
2360     vme_context->pfn_set_curbe_me(ctx, encode_state,
2361                                   gpe_context,
2362                                   encoder_context,
2363                                   &me_curbe_param);
2364
2365     gen9_me_add_surfaces_vp9(ctx, encode_state,
2366                              encoder_context,
2367                              gpe_context,
2368                              use_16x_me);
2369
2370     gen8_gpe_setup_interface_data(ctx, gpe_context);
2371
2372     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2373     if (use_16x_me) {
2374         kernel_walker_param.resolution_x = vp9_state->downscaled_width_16x_in_mb;
2375         kernel_walker_param.resolution_y = vp9_state->downscaled_height_16x_in_mb;
2376     } else {
2377         kernel_walker_param.resolution_x = vp9_state->downscaled_width_4x_in_mb;
2378         kernel_walker_param.resolution_y = vp9_state->downscaled_height_4x_in_mb;
2379     }
2380     kernel_walker_param.no_dependency = 1;
2381
2382     gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
2383
2384     gen9_run_kernel_media_object_walker(ctx, encoder_context,
2385                                         gpe_context,
2386                                         media_function,
2387                                         &media_object_walker_param);
2388
2389     return VA_STATUS_SUCCESS;
2390 }
2391
2392 static void
2393 gen9_vp9_set_curbe_scaling_cm(VADriverContextP ctx,
2394                             struct encode_state *encode_state,
2395                             struct i965_gpe_context *gpe_context,
2396                             struct intel_encoder_context *encoder_context,
2397                             struct gen9_vp9_scaling_curbe_param *curbe_param)
2398 {
2399     vp9_scaling4x_curbe_data_cm *curbe_cmd;
2400
2401     curbe_cmd = gen8p_gpe_context_map_curbe(gpe_context);
2402
2403     if (!curbe_cmd)
2404         return;
2405
2406     memset(curbe_cmd, 0, sizeof(vp9_scaling4x_curbe_data_cm));
2407
2408     curbe_cmd->dw0.input_picture_width = curbe_param->input_picture_width;
2409     curbe_cmd->dw0.input_picture_height = curbe_param->input_picture_height;
2410
2411     curbe_cmd->dw1.input_y_bti = VP9_BTI_SCALING_FRAME_SRC_Y;
2412     curbe_cmd->dw2.output_y_bti = VP9_BTI_SCALING_FRAME_DST_Y;
2413
2414
2415     curbe_cmd->dw6.enable_mb_variance_output = 0;
2416     curbe_cmd->dw6.enable_mb_pixel_average_output = 0;
2417     curbe_cmd->dw6.enable_blk8x8_stat_output = 0;
2418
2419     if (curbe_param->mb_variance_output_enabled ||
2420         curbe_param->mb_pixel_average_output_enabled)
2421     {
2422         curbe_cmd->dw10.mbv_proc_stat_bti = VP9_BTI_SCALING_FRAME_MBVPROCSTATS_DST_CM;
2423     }
2424
2425     gen8p_gpe_context_unmap_curbe(gpe_context);
2426     return;
2427 }
2428
2429 static void
2430 gen9_vp9_send_scaling_surface(VADriverContextP ctx,
2431                               struct encode_state *encode_state,
2432                               struct i965_gpe_context *gpe_context,
2433                               struct intel_encoder_context *encoder_context,
2434                               struct gen9_vp9_scaling_surface_param *scaling_surface_param)
2435 {
2436     vp9_bti_scaling_offset *scaling_bti;
2437     unsigned int surface_format;
2438
2439     scaling_bti = scaling_surface_param->p_scaling_bti;
2440
2441     if (scaling_surface_param->scaling_out_use_32unorm_surf_fmt)
2442         surface_format = I965_SURFACEFORMAT_R32_UNORM;
2443     else if (scaling_surface_param->scaling_out_use_16unorm_surf_fmt)
2444         surface_format = I965_SURFACEFORMAT_R16_UNORM;
2445     else
2446         surface_format = I965_SURFACEFORMAT_R8_UNORM;
2447
2448     gen9_add_2d_gpe_surface(ctx, gpe_context,
2449                             scaling_surface_param->input_surface,
2450                             0, 1, surface_format,
2451                             scaling_bti->scaling_frame_src_y);
2452
2453     gen9_add_2d_gpe_surface(ctx, gpe_context,
2454                             scaling_surface_param->output_surface,
2455                             0, 1, surface_format,
2456                             scaling_bti->scaling_frame_dst_y);
2457
2458
2459     return;
2460 }
2461
2462 static VAStatus
2463 gen9_vp9_scaling_kernel(VADriverContextP ctx,
2464                         struct encode_state *encode_state,
2465                         struct intel_encoder_context *encoder_context,
2466                         int use_16x_scaling)
2467 {
2468     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2469     struct i965_gpe_context *gpe_context;
2470     int media_function;
2471     struct gen9_vp9_scaling_curbe_param scaling_curbe_param;
2472     struct gen9_vp9_scaling_surface_param scaling_surface_param;
2473     struct gen9_vp9_state *vp9_state;
2474     VAEncPictureParameterBufferVP9  *pic_param;
2475     struct gpe_media_object_walker_parameter media_object_walker_param;
2476     struct vp9_encoder_kernel_walker_parameter kernel_walker_param;
2477     struct object_surface *obj_surface;
2478     struct object_surface *input_surface, *output_surface;
2479     struct gen9_surface_vp9 *vp9_priv_surface;
2480     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
2481     unsigned int input_frame_width, input_frame_height;
2482     unsigned int output_frame_width, output_frame_height;
2483
2484     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2485     if (!vp9_state || !vp9_state->pic_param)
2486         return VA_STATUS_ERROR_INVALID_PARAMETER;
2487
2488     pic_param = vp9_state->pic_param;
2489
2490     if (use_16x_scaling)
2491         media_function = VP9_MEDIA_STATE_16X_SCALING;
2492     else
2493         media_function = VP9_MEDIA_STATE_4X_SCALING;
2494
2495     gpe_context = &(vme_context->scaling_context.gpe_contexts[0]);
2496
2497     gen8_gpe_context_init(ctx, gpe_context);
2498     gen9_gpe_reset_binding_table(ctx, gpe_context);
2499
2500     obj_surface = encode_state->reconstructed_object;
2501     vp9_priv_surface = obj_surface->private_data;
2502
2503     if (use_16x_scaling)
2504     {
2505         downscaled_width_in_mb      = vp9_state->downscaled_width_16x_in_mb;
2506         downscaled_height_in_mb      = vp9_state->downscaled_height_16x_in_mb;
2507
2508         input_surface               = vp9_priv_surface->scaled_4x_surface_obj;
2509         input_frame_width           = vp9_state->frame_width_4x;
2510         input_frame_height          = vp9_state->frame_height_4x;
2511
2512         output_surface              = vp9_priv_surface->scaled_16x_surface_obj;
2513         output_frame_width          = vp9_state->frame_width_16x;
2514         output_frame_height         = vp9_state->frame_height_16x;
2515     } else {
2516         downscaled_width_in_mb      = vp9_state->downscaled_width_4x_in_mb;
2517         downscaled_height_in_mb      = vp9_state->downscaled_height_4x_in_mb;
2518
2519         if (vp9_state->dys_in_use &&
2520                ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
2521                 (pic_param->frame_height_src != pic_param->frame_height_dst)))
2522             input_surface               = vp9_priv_surface->dys_surface_obj;
2523         else
2524             input_surface               = encode_state->input_yuv_object;
2525
2526         input_frame_width           = vp9_state->frame_width;
2527         input_frame_height          = vp9_state->frame_height;
2528
2529         output_surface              = vp9_priv_surface->scaled_4x_surface_obj;
2530         output_frame_width          = vp9_state->frame_width_4x;
2531         output_frame_height         = vp9_state->frame_height_4x;
2532     }
2533
2534     memset(&scaling_curbe_param, 0, sizeof(scaling_curbe_param));
2535
2536     scaling_curbe_param.input_picture_width  = input_frame_width;
2537     scaling_curbe_param.input_picture_height = input_frame_height;
2538
2539     scaling_curbe_param.use_16x_scaling = use_16x_scaling;
2540     scaling_curbe_param.use_32x_scaling = 0;
2541
2542     if (use_16x_scaling)
2543         scaling_curbe_param.mb_variance_output_enabled = 0;
2544     else
2545         scaling_curbe_param.mb_variance_output_enabled = vp9_state->adaptive_transform_decision_enabled;
2546
2547     scaling_curbe_param.blk8x8_stat_enabled = 0;
2548
2549     vme_context->pfn_set_curbe_scaling(ctx, encode_state,
2550                                   gpe_context,
2551                                   encoder_context,
2552                                   &scaling_curbe_param);
2553
2554     memset(&scaling_surface_param, 0, sizeof(scaling_surface_param));
2555     scaling_surface_param.p_scaling_bti = (void *)(&vme_context->scaling_context.scaling_4x_bti);
2556     scaling_surface_param.input_surface                      = input_surface;
2557     scaling_surface_param.input_frame_width                  = input_frame_width;
2558     scaling_surface_param.input_frame_height                 = input_frame_height;
2559
2560     scaling_surface_param.output_surface                     = output_surface;
2561     scaling_surface_param.output_frame_width                 = output_frame_width;
2562     scaling_surface_param.output_frame_height                = output_frame_height;
2563     scaling_surface_param.scaling_out_use_16unorm_surf_fmt   = 0;
2564     scaling_surface_param.scaling_out_use_32unorm_surf_fmt   = 1;
2565
2566     vme_context->pfn_send_scaling_surface(ctx, encode_state,
2567                                           gpe_context,
2568                                           encoder_context,
2569                                           &scaling_surface_param);
2570
2571     gen8_gpe_setup_interface_data(ctx, gpe_context);
2572
2573     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2574     /* the scaling is based on 8x8 blk level */
2575     kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
2576     kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
2577     kernel_walker_param.no_dependency = 1;
2578
2579     gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
2580
2581     gen9_run_kernel_media_object_walker(ctx, encoder_context,
2582                                         gpe_context,
2583                                         media_function,
2584                                         &media_object_walker_param);
2585
2586     return VA_STATUS_SUCCESS;
2587 }
2588
2589 static void
2590 gen9_vp9_dys_set_sampler_state(struct i965_gpe_context *gpe_context)
2591 {
2592     struct gen9_sampler_8x8_avs                *sampler_cmd;
2593
2594     if (!gpe_context)
2595         return;
2596
2597     dri_bo_map(gpe_context->dynamic_state.bo, 1);
2598
2599     if (!gpe_context->dynamic_state.bo->virtual)
2600         return;
2601
2602     sampler_cmd = (struct gen9_sampler_8x8_avs *)
2603        (gpe_context->dynamic_state.bo->virtual + gpe_context->sampler_offset);
2604
2605     memset(sampler_cmd, 0, sizeof(struct gen9_sampler_8x8_avs));
2606
2607     sampler_cmd->dw0.r3c_coefficient                      = 15;
2608     sampler_cmd->dw0.r3x_coefficient                      = 6;
2609     sampler_cmd->dw0.strong_edge_threshold                = 8;
2610     sampler_cmd->dw0.weak_edge_threshold                  = 1;
2611     sampler_cmd->dw0.gain_factor                          = 32;
2612
2613     sampler_cmd->dw2.r5c_coefficient                     = 3;
2614     sampler_cmd->dw2.r5cx_coefficient                    = 8;
2615     sampler_cmd->dw2.r5x_coefficient                     = 9;
2616     sampler_cmd->dw2.strong_edge_weight                  = 6;
2617     sampler_cmd->dw2.regular_weight                      = 3;
2618     sampler_cmd->dw2.non_edge_weight                     = 2;
2619     sampler_cmd->dw2.global_noise_estimation             = 255;
2620
2621     sampler_cmd->dw3.enable_8tap_adaptive_filter         = 0;
2622     sampler_cmd->dw3.cos_alpha                           = 79;
2623     sampler_cmd->dw3.sin_alpha                           = 101;
2624
2625     sampler_cmd->dw5.diamond_du                           = 0;
2626     sampler_cmd->dw5.hs_margin                            = 3;
2627     sampler_cmd->dw5.diamond_alpha                        = 100;
2628
2629     sampler_cmd->dw7.inv_margin_vyl                       = 3300;
2630
2631     sampler_cmd->dw8.inv_margin_vyu                       = 1600;
2632
2633     sampler_cmd->dw10.y_slope2                            = 24;
2634     sampler_cmd->dw10.s0l                                 = 1792;
2635
2636     sampler_cmd->dw12.y_slope1                            = 24;
2637
2638     sampler_cmd->dw14.s0u                                = 256;
2639
2640     sampler_cmd->dw15.s2u                                = 1792;
2641     sampler_cmd->dw15.s1u                                = 0;
2642
2643     memcpy(sampler_cmd->coefficients,
2644            &gen9_vp9_avs_coeffs[0],
2645            17 * sizeof(struct gen8_sampler_8x8_avs_coefficients));
2646
2647     sampler_cmd->dw152.default_sharpness_level     = 255;
2648     sampler_cmd->dw152.max_derivative_4_pixels     = 7;
2649     sampler_cmd->dw152.max_derivative_8_pixels     = 20;
2650     sampler_cmd->dw152.transition_area_with_4_pixels    = 4;
2651     sampler_cmd->dw152.transition_area_with_8_pixels    = 5;
2652
2653     sampler_cmd->dw153.bypass_x_adaptive_filtering  = 1;
2654     sampler_cmd->dw153.bypass_y_adaptive_filtering  = 1;
2655     sampler_cmd->dw153.adaptive_filter_for_all_channel = 0;
2656
2657     memcpy(sampler_cmd->extra_coefficients,
2658            &gen9_vp9_avs_coeffs[17 * 8],
2659            15 * sizeof(struct gen8_sampler_8x8_avs_coefficients));
2660
2661     dri_bo_unmap(gpe_context->dynamic_state.bo);
2662 }
2663
2664 static void
2665 gen9_vp9_set_curbe_dys(VADriverContextP ctx,
2666                        struct encode_state *encode_state,
2667                        struct i965_gpe_context *gpe_context,
2668                        struct intel_encoder_context *encoder_context,
2669                        struct gen9_vp9_dys_curbe_param *curbe_param)
2670 {
2671     vp9_dys_curbe_data  *curbe_cmd;
2672
2673     curbe_cmd = gen8p_gpe_context_map_curbe(gpe_context);
2674
2675     if (!curbe_cmd)
2676         return;
2677
2678     memset(curbe_cmd, 0, sizeof(vp9_dys_curbe_data));
2679
2680     curbe_cmd->dw0.input_frame_width    = curbe_param->input_width;
2681     curbe_cmd->dw0.input_frame_height   = curbe_param->input_height;
2682
2683     curbe_cmd->dw1.output_frame_width   = curbe_param->output_width;
2684     curbe_cmd->dw1.output_frame_height  = curbe_param->output_height;
2685
2686     curbe_cmd->dw2.delta_u                 = 1.0f / curbe_param->output_width;
2687     curbe_cmd->dw3.delta_v                 = 1.0f / curbe_param->output_height;
2688
2689     curbe_cmd->dw16.input_frame_nv12_bti  = VP9_BTI_DYS_INPUT_NV12;
2690     curbe_cmd->dw17.output_frame_y_bti    = VP9_BTI_DYS_OUTPUT_Y;
2691     curbe_cmd->dw18.avs_sample_idx            = 0;
2692
2693     gen8p_gpe_context_unmap_curbe(gpe_context);
2694 }
2695
2696 static void
2697 gen9_vp9_send_dys_surface(VADriverContextP ctx,
2698                        struct encode_state *encode_state,
2699                        struct i965_gpe_context *gpe_context,
2700                        struct intel_encoder_context *encoder_context,
2701                        struct gen9_vp9_dys_surface_param *surface_param)
2702 {
2703
2704     if (surface_param->input_frame)
2705         gen9_add_adv_gpe_surface(ctx,
2706                                  gpe_context,
2707                                  surface_param->input_frame,
2708                                  VP9_BTI_DYS_INPUT_NV12);
2709
2710     if (surface_param->output_frame) {
2711         gen9_add_2d_gpe_surface(ctx,
2712                                 gpe_context,
2713                                 surface_param->output_frame,
2714                                 0,
2715                                 1,
2716                                 I965_SURFACEFORMAT_R8_UNORM,
2717                                 VP9_BTI_DYS_OUTPUT_Y);
2718
2719         gen9_add_2d_gpe_surface(ctx,
2720                                 gpe_context,
2721                                 surface_param->output_frame,
2722                                 1,
2723                                 1,
2724                                 I965_SURFACEFORMAT_R16_UINT,
2725                                 VP9_BTI_DYS_OUTPUT_UV);
2726     }
2727
2728     return;
2729 }
2730
2731 static VAStatus
2732 gen9_vp9_dys_kernel(VADriverContextP ctx,
2733                         struct encode_state *encode_state,
2734                         struct intel_encoder_context *encoder_context,
2735                         gen9_vp9_dys_kernel_param *dys_kernel_param)
2736 {
2737     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2738     struct i965_gpe_context *gpe_context;
2739     int media_function;
2740     struct gen9_vp9_dys_curbe_param                 curbe_param;
2741     struct gen9_vp9_dys_surface_param               surface_param;
2742     struct gpe_media_object_walker_parameter        media_object_walker_param;
2743     struct vp9_encoder_kernel_walker_parameter      kernel_walker_param;
2744     unsigned int                                    resolution_x, resolution_y;
2745
2746     media_function = VP9_MEDIA_STATE_DYS;
2747     gpe_context = &vme_context->dys_context.gpe_context;
2748
2749     //gen8_gpe_context_init(ctx, gpe_context);
2750     gen9_gpe_reset_binding_table(ctx, gpe_context);
2751
2752     /* sampler state is configured only when initializing the GPE context */
2753
2754     memset(&curbe_param, 0, sizeof(curbe_param));
2755     curbe_param.input_width   = dys_kernel_param->input_width;
2756     curbe_param.input_height  = dys_kernel_param->input_height;
2757     curbe_param.output_width = dys_kernel_param->output_width;
2758     curbe_param.output_height = dys_kernel_param->output_height;
2759     vme_context->pfn_set_curbe_dys(ctx, encode_state,
2760                                   gpe_context,
2761                                   encoder_context,
2762                                   &curbe_param);
2763
2764     // Add surface states
2765     memset(&surface_param, 0, sizeof(surface_param));
2766     surface_param.input_frame = dys_kernel_param->input_surface;
2767     surface_param.output_frame = dys_kernel_param->output_surface;
2768     surface_param.vert_line_stride = 0;
2769     surface_param.vert_line_stride_offset = 0;
2770
2771     vme_context->pfn_send_dys_surface(ctx,
2772                                       encode_state,
2773                                       gpe_context,
2774                                       encoder_context,
2775                                       &surface_param);
2776
2777     resolution_x = ALIGN(dys_kernel_param->output_width, 16) / 16;
2778     resolution_y = ALIGN(dys_kernel_param->output_height, 16) / 16;
2779
2780     gen8_gpe_setup_interface_data(ctx, gpe_context);
2781
2782     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2783     kernel_walker_param.resolution_x = resolution_x;
2784     kernel_walker_param.resolution_y = resolution_y;
2785     kernel_walker_param.no_dependency = 1;
2786
2787     gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
2788
2789     gen9_run_kernel_media_object_walker(ctx, encoder_context,
2790                                         gpe_context,
2791                                         media_function,
2792                                         &media_object_walker_param);
2793
2794     return VA_STATUS_SUCCESS;
2795 }
2796
2797 static VAStatus
2798 gen9_vp9_run_dys_refframes(VADriverContextP ctx,
2799                           struct encode_state *encode_state,
2800                           struct intel_encoder_context *encoder_context)
2801 {
2802     struct gen9_vp9_state *vp9_state;
2803     VAEncPictureParameterBufferVP9  *pic_param;
2804     gen9_vp9_dys_kernel_param dys_kernel_param;
2805     struct object_surface *obj_surface;
2806     struct object_surface *input_surface, *output_surface;
2807     struct gen9_surface_vp9 *vp9_priv_surface;
2808
2809     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2810
2811     if (!vp9_state || !vp9_state->pic_param)
2812         return VA_STATUS_ERROR_INVALID_PARAMETER;
2813
2814     pic_param = vp9_state->pic_param;
2815
2816     if ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
2817         (pic_param->frame_height_src != pic_param->frame_height_dst)) {
2818         input_surface = encode_state->input_yuv_object;
2819         obj_surface = encode_state->reconstructed_object;
2820         vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2821         output_surface = vp9_priv_surface->dys_surface_obj;
2822
2823         memset(&dys_kernel_param, 0, sizeof(dys_kernel_param));
2824         dys_kernel_param.input_width = pic_param->frame_width_src;
2825         dys_kernel_param.input_height = pic_param->frame_height_src;
2826         dys_kernel_param.input_surface = input_surface;
2827         dys_kernel_param.output_width = pic_param->frame_width_dst;
2828         dys_kernel_param.output_height = pic_param->frame_height_dst;
2829         dys_kernel_param.output_surface = output_surface;
2830         gen9_vp9_dys_kernel(ctx, encode_state,
2831                             encoder_context,
2832                             &dys_kernel_param);
2833     }
2834
2835     if ((vp9_state->dys_ref_frame_flag & VP9_LAST_REF) &&
2836          vp9_state->last_ref_obj) {
2837         obj_surface = vp9_state->last_ref_obj;
2838         vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2839
2840         input_surface = obj_surface;
2841         output_surface = vp9_priv_surface->dys_surface_obj;
2842
2843         dys_kernel_param.input_width = vp9_priv_surface->frame_width;
2844         dys_kernel_param.input_height = vp9_priv_surface->frame_height;
2845         dys_kernel_param.input_surface = input_surface;
2846
2847         dys_kernel_param.output_width = pic_param->frame_width_dst;
2848         dys_kernel_param.output_height = pic_param->frame_height_dst;
2849         dys_kernel_param.output_surface = output_surface;
2850
2851         gen9_vp9_dys_kernel(ctx, encode_state,
2852                             encoder_context,
2853                             &dys_kernel_param);
2854
2855         if (vp9_state->hme_enabled) {
2856             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 4), 16);
2857             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_height / 4), 16);
2858             dys_kernel_param.input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2859
2860             dys_kernel_param.output_width = vp9_state->frame_width_4x;
2861             dys_kernel_param.output_height = vp9_state->frame_height_4x;
2862             dys_kernel_param.output_surface = vp9_priv_surface->dys_4x_surface_obj;
2863
2864             gen9_vp9_dys_kernel(ctx, encode_state,
2865                                 encoder_context,
2866                                 &dys_kernel_param);
2867
2868             /* Does it really need to do the 16x HME if the
2869              * resolution is different?
2870              * Maybe it should be restricted
2871              */
2872             if (vp9_state->b16xme_enabled) {
2873                 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 16), 16);
2874                 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 16), 16);
2875                 dys_kernel_param.input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2876
2877                 dys_kernel_param.output_width = vp9_state->frame_width_16x;
2878                 dys_kernel_param.output_height = vp9_state->frame_height_16x;
2879                 dys_kernel_param.output_surface = vp9_priv_surface->dys_16x_surface_obj;
2880
2881                 gen9_vp9_dys_kernel(ctx, encode_state,
2882                                     encoder_context,
2883                                     &dys_kernel_param);
2884             }
2885         }
2886     }
2887
2888     if ((vp9_state->dys_ref_frame_flag & VP9_GOLDEN_REF) &&
2889          vp9_state->golden_ref_obj) {
2890         obj_surface = vp9_state->golden_ref_obj;
2891         vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2892
2893         input_surface = obj_surface;
2894         output_surface = vp9_priv_surface->dys_surface_obj;
2895
2896         dys_kernel_param.input_width = vp9_priv_surface->frame_width;
2897         dys_kernel_param.input_height = vp9_priv_surface->frame_height;
2898         dys_kernel_param.input_surface = input_surface;
2899
2900         dys_kernel_param.output_width = pic_param->frame_width_dst;
2901         dys_kernel_param.output_height = pic_param->frame_height_dst;
2902         dys_kernel_param.output_surface = output_surface;
2903
2904         gen9_vp9_dys_kernel(ctx, encode_state,
2905                             encoder_context,
2906                             &dys_kernel_param);
2907
2908         if (vp9_state->hme_enabled) {
2909             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 4), 16);
2910             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_height / 4), 16);
2911             dys_kernel_param.input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2912
2913             dys_kernel_param.output_width = vp9_state->frame_width_4x;
2914             dys_kernel_param.output_height = vp9_state->frame_height_4x;
2915             dys_kernel_param.output_surface = vp9_priv_surface->dys_4x_surface_obj;
2916
2917             gen9_vp9_dys_kernel(ctx, encode_state,
2918                                 encoder_context,
2919                                 &dys_kernel_param);
2920
2921             /* Does it really need to do the 16x HME if the
2922              * resolution is different?
2923              * Maybe it should be restricted
2924              */
2925             if (vp9_state->b16xme_enabled) {
2926                 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 16), 16);
2927                 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 16), 16);
2928                 dys_kernel_param.input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2929
2930                 dys_kernel_param.output_width = vp9_state->frame_width_16x;
2931                 dys_kernel_param.output_height = vp9_state->frame_height_16x;
2932                 dys_kernel_param.output_surface = vp9_priv_surface->dys_16x_surface_obj;
2933
2934                 gen9_vp9_dys_kernel(ctx, encode_state,
2935                                     encoder_context,
2936                                     &dys_kernel_param);
2937             }
2938         }
2939     }
2940
2941     if ((vp9_state->dys_ref_frame_flag & VP9_ALT_REF) &&
2942          vp9_state->alt_ref_obj) {
2943         obj_surface = vp9_state->alt_ref_obj;
2944         vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2945
2946         input_surface = obj_surface;
2947         output_surface = vp9_priv_surface->dys_surface_obj;
2948
2949         dys_kernel_param.input_width = vp9_priv_surface->frame_width;
2950         dys_kernel_param.input_height = vp9_priv_surface->frame_height;
2951         dys_kernel_param.input_surface = input_surface;
2952
2953         dys_kernel_param.output_width = pic_param->frame_width_dst;
2954         dys_kernel_param.output_height = pic_param->frame_height_dst;
2955         dys_kernel_param.output_surface = output_surface;
2956
2957         gen9_vp9_dys_kernel(ctx, encode_state,
2958                             encoder_context,
2959                             &dys_kernel_param);
2960
2961         if (vp9_state->hme_enabled) {
2962             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 4), 16);
2963             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_height / 4), 16);
2964             dys_kernel_param.input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2965
2966             dys_kernel_param.output_width = vp9_state->frame_width_4x;
2967             dys_kernel_param.output_height = vp9_state->frame_height_4x;
2968             dys_kernel_param.output_surface = vp9_priv_surface->dys_4x_surface_obj;
2969
2970             gen9_vp9_dys_kernel(ctx, encode_state,
2971                                 encoder_context,
2972                                 &dys_kernel_param);
2973
2974             /* Does it really need to do the 16x HME if the
2975              * resolution is different?
2976              * Maybe it should be restricted
2977              */
2978             if (vp9_state->b16xme_enabled) {
2979                 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 16), 16);
2980                 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 16), 16);
2981                 dys_kernel_param.input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2982
2983                 dys_kernel_param.output_width = vp9_state->frame_width_16x;
2984                 dys_kernel_param.output_height = vp9_state->frame_height_16x;
2985                 dys_kernel_param.output_surface = vp9_priv_surface->dys_16x_surface_obj;
2986
2987                 gen9_vp9_dys_kernel(ctx, encode_state,
2988                                     encoder_context,
2989                                     &dys_kernel_param);
2990             }
2991         }
2992     }
2993
2994     return VA_STATUS_SUCCESS;
2995 }
2996
2997 static void
2998 gen9_vp9_set_curbe_mbenc(VADriverContextP ctx,
2999                          struct encode_state *encode_state,
3000                          struct i965_gpe_context *gpe_context,
3001                          struct intel_encoder_context *encoder_context,
3002                          struct gen9_vp9_mbenc_curbe_param *curbe_param)
3003 {
3004     struct gen9_vp9_state *vp9_state;
3005     VAEncMiscParameterTypeVP9PerSegmantParam *seg_param, tmp_seg_param;
3006     vp9_mbenc_curbe_data  *curbe_cmd;
3007     VAEncPictureParameterBufferVP9  *pic_param;
3008     int i, segment_count;
3009     int seg_qindex;
3010     struct object_surface *obj_surface;
3011     struct gen9_surface_vp9 *vp9_priv_surface;
3012
3013     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3014
3015     if (!vp9_state || !vp9_state->pic_param)
3016         return;
3017
3018     pic_param = curbe_param->ppic_param;
3019     seg_param = curbe_param->psegment_param;
3020
3021     if (!seg_param) {
3022         memset(&tmp_seg_param, 0, sizeof(tmp_seg_param));
3023         seg_param = &tmp_seg_param;
3024     }
3025
3026     curbe_cmd = gen8p_gpe_context_map_curbe(gpe_context);
3027
3028     if (!curbe_cmd)
3029         return;
3030
3031     memset(curbe_cmd, 0, sizeof(vp9_mbenc_curbe_data));
3032
3033     if (vp9_state->dys_in_use)
3034     {
3035         curbe_cmd->dw0.frame_width = pic_param->frame_width_dst;
3036         curbe_cmd->dw0.frame_height = pic_param->frame_height_dst;
3037     }
3038     else
3039     {
3040         curbe_cmd->dw0.frame_width = pic_param->frame_width_src;
3041         curbe_cmd->dw0.frame_height = pic_param->frame_height_src;
3042     }
3043
3044     curbe_cmd->dw1.frame_type = curbe_param->picture_coding_type;
3045
3046     curbe_cmd->dw1.segmentation_enable = pic_param->pic_flags.bits.segmentation_enabled;
3047     if (pic_param->pic_flags.bits.segmentation_enabled)
3048         segment_count = 8;
3049     else
3050         segment_count = 1;
3051
3052     curbe_cmd->dw1.ref_frame_flags = curbe_param->ref_frame_flag;
3053
3054     //right now set them to normal settings
3055     if (curbe_param->picture_coding_type)
3056     {
3057         switch (vp9_state->target_usage)
3058         {
3059         case INTEL_ENC_VP9_TU_QUALITY:
3060             curbe_cmd->dw1.min_16for32_check    = 0x00;
3061             curbe_cmd->dw2.multi_pred           = 0x02;
3062             curbe_cmd->dw2.len_sp               = 0x39;
3063             curbe_cmd->dw2.search_x             = 0x30;
3064             curbe_cmd->dw2.search_y             = 0x28;
3065             curbe_cmd->dw3.min_ref_for32_check = 0x01;
3066             curbe_cmd->dw4.skip16_threshold     = 0x000A;
3067             curbe_cmd->dw4.disable_mr_threshold = 0x000C;
3068
3069             memcpy(&curbe_cmd->dw16,
3070                     vp9_diamond_ime_search_path_delta,
3071                     14 * sizeof(unsigned int));
3072             break;
3073         case INTEL_ENC_VP9_TU_PERFORMANCE:
3074             curbe_cmd->dw1.min_16for32_check    = 0x02;
3075             curbe_cmd->dw2.multi_pred           = 0x00;
3076             curbe_cmd->dw2.len_sp               = 0x10;
3077             curbe_cmd->dw2.search_x             = 0x20;
3078             curbe_cmd->dw2.search_y             = 0x20;
3079             curbe_cmd->dw3.min_ref_for32_check = 0x03;
3080             curbe_cmd->dw4.skip16_threshold     = 0x0014;
3081             curbe_cmd->dw4.disable_mr_threshold = 0x0016;
3082
3083             memcpy(&curbe_cmd->dw16,
3084                     vp9_fullspiral_ime_search_path_delta,
3085                     14 * sizeof(unsigned int));
3086
3087             break;
3088         default:  // normal settings
3089             curbe_cmd->dw1.min_16for32_check     = 0x01;
3090             curbe_cmd->dw2.multi_pred           = 0x00;
3091             curbe_cmd->dw2.len_sp               = 0x19;
3092             curbe_cmd->dw2.search_x             = 0x30;
3093             curbe_cmd->dw2.search_y             = 0x28;
3094             curbe_cmd->dw3.min_ref_for32_check = 0x02;
3095             curbe_cmd->dw4.skip16_threshold     = 0x000F;
3096             curbe_cmd->dw4.disable_mr_threshold = 0x0011;
3097
3098             memcpy(&curbe_cmd->dw16,
3099                     vp9_diamond_ime_search_path_delta,
3100                     14 * sizeof(unsigned int));
3101             break;
3102         }
3103
3104         curbe_cmd->dw3.hme_enabled               = curbe_param->hme_enabled;
3105         curbe_cmd->dw3.multi_ref_qp_check         = curbe_param->multi_ref_qp_check;
3106         // co-located predictor must be disabled when dynamic scaling is enabled
3107         curbe_cmd->dw3.disable_temp_pred    = vp9_state->dys_in_use;
3108     }
3109
3110     curbe_cmd->dw5.inter_round = 0;
3111     curbe_cmd->dw5.intra_round = 4;
3112     curbe_cmd->dw5.frame_qpindex = pic_param->luma_ac_qindex;
3113
3114     for (i = 0; i < segment_count; i++)
3115     {
3116         seg_qindex = pic_param->luma_ac_qindex + pic_param->luma_dc_qindex_delta
3117                      + seg_param->seg_data[i].segment_qindex_delta;
3118
3119         seg_qindex = CLAMP(0, 255, seg_qindex);
3120
3121         if (curbe_param->picture_coding_type)
3122             memcpy(&curbe_cmd->segments[i],
3123                    &intel_vp9_costlut_p[seg_qindex * 16],
3124                    16 * sizeof(unsigned int));
3125         else
3126             memcpy(&curbe_cmd->segments[i],
3127                    &intel_vp9_costlut_key[seg_qindex * 16],
3128                    16 * sizeof(unsigned int));
3129     }
3130
3131     if (curbe_param->picture_coding_type)
3132     {
3133         if (curbe_cmd->dw3.multi_ref_qp_check)
3134         {
3135             if (curbe_param->ref_frame_flag & 0x01)
3136             {
3137                 obj_surface = curbe_param->last_ref_obj;
3138                 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3139                 curbe_cmd->dw8.last_ref_qp = vp9_quant_dc[vp9_priv_surface->qp_value];
3140             }
3141
3142             if (curbe_param->ref_frame_flag & 0x02)
3143             {
3144                 obj_surface = curbe_param->golden_ref_obj;
3145                 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3146                 curbe_cmd->dw8.golden_ref_qp = vp9_quant_dc[vp9_priv_surface->qp_value];
3147             }
3148
3149             if (curbe_param->ref_frame_flag & 0x04)
3150             {
3151                 obj_surface = curbe_param->alt_ref_obj;
3152                 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3153                 curbe_cmd->dw9.alt_ref_qp = vp9_quant_dc[vp9_priv_surface->qp_value];
3154             }
3155         }
3156     }
3157     curbe_cmd->dw160.enc_curr_y_surf_bti           = VP9_BTI_MBENC_CURR_Y_G9;
3158     curbe_cmd->dw162.enc_curr_nv12_surf_bti        = VP9_BTI_MBENC_CURR_NV12_G9;
3159     curbe_cmd->dw166.segmentation_map_bti          = VP9_BTI_MBENC_SEGMENTATION_MAP_G9;
3160     curbe_cmd->dw172.mode_decision_bti           = VP9_BTI_MBENC_MODE_DECISION_G9;
3161     curbe_cmd->dw167.tx_curbe_bti                = VP9_BTI_MBENC_TX_CURBE_G9;
3162     curbe_cmd->dw168.hme_mvdata_bti             = VP9_BTI_MBENC_HME_MV_DATA_G9;
3163     curbe_cmd->dw169.hme_distortion_bti          = VP9_BTI_MBENC_HME_DISTORTION_G9;
3164     curbe_cmd->dw171.mode_decision_prev_bti      = VP9_BTI_MBENC_MODE_DECISION_PREV_G9;
3165     curbe_cmd->dw172.mode_decision_bti           = VP9_BTI_MBENC_MODE_DECISION_G9;
3166     curbe_cmd->dw173.output_16x16_inter_modes_bti = VP9_BTI_MBENC_OUT_16x16_INTER_MODES_G9;
3167     curbe_cmd->dw174.cu_record_bti               = VP9_BTI_MBENC_CU_RECORDS_G9;
3168     curbe_cmd->dw175.pak_data_bti                = VP9_BTI_MBENC_PAK_DATA_G9;
3169
3170     gen8p_gpe_context_unmap_curbe(gpe_context);
3171     return;
3172 }
3173
3174 static void
3175 gen9_vp9_send_mbenc_surface(VADriverContextP ctx,
3176                             struct encode_state *encode_state,
3177                             struct i965_gpe_context *gpe_context,
3178                             struct intel_encoder_context *encoder_context,
3179                             struct gen9_vp9_mbenc_surface_param *mbenc_param)
3180 {
3181     struct gen9_vp9_state *vp9_state;
3182     unsigned int            res_size;
3183     unsigned int            frame_width_in_sb, frame_height_in_sb;
3184     struct object_surface   *obj_surface, *tmp_input;
3185     struct gen9_surface_vp9 *vp9_priv_surface;
3186     int media_function;
3187
3188     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3189
3190     if (!vp9_state || !vp9_state->pic_param)
3191         return;
3192
3193     frame_width_in_sb = ALIGN(mbenc_param->frame_width, 64) / 64;
3194     frame_height_in_sb = ALIGN(mbenc_param->frame_height, 64) / 64;
3195     media_function = mbenc_param->media_state_type;
3196
3197     switch (media_function)
3198     {
3199     case VP9_MEDIA_STATE_MBENC_I_32x32:
3200     {
3201         obj_surface = mbenc_param->curr_frame_obj;
3202
3203         gen9_add_2d_gpe_surface(ctx,
3204                                 gpe_context,
3205                                 obj_surface,
3206                                 0,
3207                                 1,
3208                                 I965_SURFACEFORMAT_R8_UNORM,
3209                                 VP9_BTI_MBENC_CURR_Y_G9);
3210
3211         gen9_add_2d_gpe_surface(ctx,
3212                                 gpe_context,
3213                                 obj_surface,
3214                                 1,
3215                                 1,
3216                                 I965_SURFACEFORMAT_R16_UINT,
3217                                 VP9_BTI_MBENC_CURR_UV_G9);
3218
3219
3220         if (mbenc_param->segmentation_enabled)
3221         {
3222            gen9_add_buffer_2d_gpe_surface(ctx,
3223                                    gpe_context,
3224                                    mbenc_param->pres_segmentation_map,
3225                                    1,
3226                                    I965_SURFACEFORMAT_R8_UNORM,
3227                                    VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3228
3229         }
3230
3231         res_size = 16 * mbenc_param->frame_width_in_mb *
3232                  mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3233         gen9_add_buffer_gpe_surface(ctx,
3234                                     gpe_context,
3235                                     mbenc_param->pres_mode_decision,
3236                                     0,
3237                                     res_size / 4,
3238                                     0,
3239                                     VP9_BTI_MBENC_MODE_DECISION_G9);
3240
3241         break;
3242     }
3243     case VP9_MEDIA_STATE_MBENC_I_16x16:
3244     {
3245         obj_surface = mbenc_param->curr_frame_obj;
3246
3247         gen9_add_2d_gpe_surface(ctx,
3248                                 gpe_context,
3249                                 obj_surface,
3250                                 0,
3251                                 1,
3252                                 I965_SURFACEFORMAT_R8_UNORM,
3253                                 VP9_BTI_MBENC_CURR_Y_G9);
3254
3255         gen9_add_2d_gpe_surface(ctx,
3256                                 gpe_context,
3257                                 obj_surface,
3258                                 1,
3259                                 1,
3260                                 I965_SURFACEFORMAT_R16_UINT,
3261                                 VP9_BTI_MBENC_CURR_UV_G9);
3262
3263         gen9_add_adv_gpe_surface(ctx, gpe_context,
3264                                  obj_surface,
3265                                  VP9_BTI_MBENC_CURR_NV12_G9);
3266
3267         if (mbenc_param->segmentation_enabled)
3268         {
3269            gen9_add_buffer_2d_gpe_surface(ctx,
3270                                    gpe_context,
3271                                    mbenc_param->pres_segmentation_map,
3272                                    1,
3273                                    I965_SURFACEFORMAT_R8_UNORM,
3274                                    VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3275
3276         }
3277
3278         res_size = 16 * mbenc_param->frame_width_in_mb *
3279                  mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3280         gen9_add_buffer_gpe_surface(ctx,
3281                                     gpe_context,
3282                                     mbenc_param->pres_mode_decision,
3283                                     0,
3284                                     res_size / 4,
3285                                     0,
3286                                     VP9_BTI_MBENC_MODE_DECISION_G9);
3287
3288         res_size = 160;
3289
3290         gen9_add_dri_buffer_gpe_surface(ctx,
3291                                         gpe_context,
3292                                         mbenc_param->gpe_context_tx->dynamic_state.bo,
3293                                         0,
3294                                         ALIGN(res_size, 64),
3295                                         mbenc_param->gpe_context_tx->curbe_offset,
3296                                         VP9_BTI_MBENC_TX_CURBE_G9);
3297
3298         break;
3299     }
3300     case VP9_MEDIA_STATE_MBENC_P:
3301     {
3302         obj_surface = mbenc_param->curr_frame_obj;
3303
3304         gen9_add_2d_gpe_surface(ctx,
3305                                 gpe_context,
3306                                 obj_surface,
3307                                 0,
3308                                 1,
3309                                 I965_SURFACEFORMAT_R8_UNORM,
3310                                 VP9_BTI_MBENC_CURR_Y_G9);
3311
3312         gen9_add_2d_gpe_surface(ctx, gpe_context,
3313                                 obj_surface,
3314                                 1,
3315                                 1,
3316                                 I965_SURFACEFORMAT_R16_UINT,
3317                                 VP9_BTI_MBENC_CURR_UV_G9);
3318
3319         gen9_add_adv_gpe_surface(ctx, gpe_context,
3320                                  obj_surface,
3321                                  VP9_BTI_MBENC_CURR_NV12_G9);
3322
3323         if (mbenc_param->last_ref_obj)
3324         {
3325             obj_surface = mbenc_param->last_ref_obj;
3326             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3327
3328             if (vp9_state->dys_in_use &&
3329                 ((vp9_priv_surface->frame_width != vp9_state->frame_width) ||
3330                  (vp9_priv_surface->frame_height != vp9_state->frame_height)))
3331                 tmp_input = vp9_priv_surface->dys_surface_obj;
3332             else
3333                 tmp_input = obj_surface;
3334
3335             gen9_add_adv_gpe_surface(ctx, gpe_context,
3336                                  tmp_input,
3337                                  VP9_BTI_MBENC_LAST_NV12_G9);
3338
3339             gen9_add_adv_gpe_surface(ctx, gpe_context,
3340                                  tmp_input,
3341                                  VP9_BTI_MBENC_LAST_NV12_G9 + 1);
3342
3343         }
3344
3345         if (mbenc_param->golden_ref_obj)
3346         {
3347             obj_surface = mbenc_param->golden_ref_obj;
3348             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3349
3350             if (vp9_state->dys_in_use &&
3351                 ((vp9_priv_surface->frame_width != vp9_state->frame_width) ||
3352                  (vp9_priv_surface->frame_height != vp9_state->frame_height)))
3353                 tmp_input = vp9_priv_surface->dys_surface_obj;
3354             else
3355                 tmp_input = obj_surface;
3356
3357             gen9_add_adv_gpe_surface(ctx, gpe_context,
3358                                  tmp_input,
3359                                  VP9_BTI_MBENC_GOLD_NV12_G9);
3360
3361             gen9_add_adv_gpe_surface(ctx, gpe_context,
3362                                  tmp_input,
3363                                  VP9_BTI_MBENC_GOLD_NV12_G9 + 1);
3364
3365         }
3366
3367         if (mbenc_param->alt_ref_obj)
3368         {
3369             obj_surface = mbenc_param->alt_ref_obj;
3370             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3371
3372             if (vp9_state->dys_in_use &&
3373                 ((vp9_priv_surface->frame_width != vp9_state->frame_width) ||
3374                  (vp9_priv_surface->frame_height != vp9_state->frame_height)))
3375                 tmp_input = vp9_priv_surface->dys_surface_obj;
3376             else
3377                 tmp_input = obj_surface;
3378
3379             gen9_add_adv_gpe_surface(ctx, gpe_context,
3380                                  tmp_input,
3381                                  VP9_BTI_MBENC_ALTREF_NV12_G9);
3382
3383             gen9_add_adv_gpe_surface(ctx, gpe_context,
3384                                  tmp_input,
3385                                  VP9_BTI_MBENC_ALTREF_NV12_G9 + 1);
3386
3387         }
3388
3389         if (mbenc_param->hme_enabled)
3390         {
3391             gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3392                                        mbenc_param->ps4x_memv_data_buffer,
3393                                        1,
3394                                        I965_SURFACEFORMAT_R8_UNORM,
3395                                        VP9_BTI_MBENC_HME_MV_DATA_G9);
3396
3397             gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3398                                        mbenc_param->ps4x_memv_distortion_buffer,
3399                                        1,
3400                                        I965_SURFACEFORMAT_R8_UNORM,
3401                                        VP9_BTI_MBENC_HME_DISTORTION_G9);
3402         }
3403
3404         if (mbenc_param->segmentation_enabled)
3405         {
3406            gen9_add_buffer_2d_gpe_surface(ctx,
3407                                    gpe_context,
3408                                    mbenc_param->pres_segmentation_map,
3409                                    1,
3410                                    I965_SURFACEFORMAT_R8_UNORM,
3411                                    VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3412
3413         }
3414
3415         res_size = 16 * mbenc_param->frame_width_in_mb *
3416                  mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3417         gen9_add_buffer_gpe_surface(ctx,
3418                                     gpe_context,
3419                                     mbenc_param->pres_mode_decision_prev,
3420                                     0,
3421                                     res_size / 4,
3422                                     0,
3423                                     VP9_BTI_MBENC_MODE_DECISION_PREV_G9);
3424
3425         gen9_add_buffer_gpe_surface(ctx,
3426                                     gpe_context,
3427                                     mbenc_param->pres_mode_decision,
3428                                     0,
3429                                     res_size / 4,
3430                                     0,
3431                                     VP9_BTI_MBENC_MODE_DECISION_G9);
3432
3433         gen9_add_buffer_2d_gpe_surface(ctx,
3434                                    gpe_context,
3435                                    mbenc_param->pres_output_16x16_inter_modes,
3436                                    1,
3437                                    I965_SURFACEFORMAT_R8_UNORM,
3438                                    VP9_BTI_MBENC_OUT_16x16_INTER_MODES_G9);
3439
3440         res_size = 160;
3441
3442         gen9_add_dri_buffer_gpe_surface(ctx,
3443                                         gpe_context,
3444                                         mbenc_param->gpe_context_tx->dynamic_state.bo,
3445                                         0,
3446                                         ALIGN(res_size, 64),
3447                                         mbenc_param->gpe_context_tx->curbe_offset,
3448                                         VP9_BTI_MBENC_TX_CURBE_G9);
3449
3450
3451         break;
3452     }
3453     case VP9_MEDIA_STATE_MBENC_TX:
3454     {
3455         obj_surface = mbenc_param->curr_frame_obj;
3456
3457         gen9_add_2d_gpe_surface(ctx,
3458                                 gpe_context,
3459                                 obj_surface,
3460                                 0,
3461                                 1,
3462                                 I965_SURFACEFORMAT_R8_UNORM,
3463                                 VP9_BTI_MBENC_CURR_Y_G9);
3464
3465         gen9_add_2d_gpe_surface(ctx,
3466                                 gpe_context,
3467                                 obj_surface,
3468                                 1,
3469                                 1,
3470                                 I965_SURFACEFORMAT_R16_UINT,
3471                                 VP9_BTI_MBENC_CURR_UV_G9);
3472
3473         if (mbenc_param->segmentation_enabled)
3474         {
3475            gen9_add_buffer_2d_gpe_surface(ctx,
3476                                    gpe_context,
3477                                    mbenc_param->pres_segmentation_map,
3478                                    1,
3479                                    I965_SURFACEFORMAT_R8_UNORM,
3480                                    VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3481
3482         }
3483
3484         res_size = 16 * mbenc_param->frame_width_in_mb *
3485                  mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3486         gen9_add_buffer_gpe_surface(ctx,
3487                                     gpe_context,
3488                                     mbenc_param->pres_mode_decision,
3489                                     0,
3490                                     res_size / 4,
3491                                     0,
3492                                     VP9_BTI_MBENC_MODE_DECISION_G9);
3493
3494         res_size = frame_width_in_sb * frame_height_in_sb * 4 * sizeof(unsigned int);
3495         gen9_add_buffer_gpe_surface(ctx,
3496                                     gpe_context,
3497                                     mbenc_param->pres_mb_code_surface,
3498                                     0,
3499                                     res_size / 4,
3500                                     0,
3501                                     VP9_BTI_MBENC_PAK_DATA_G9);
3502
3503         // CU Record
3504         res_size = frame_width_in_sb * frame_height_in_sb *
3505                    64 * 16 * sizeof(unsigned int);
3506
3507         gen9_add_buffer_gpe_surface(ctx,
3508                                     gpe_context,
3509                                     mbenc_param->pres_mb_code_surface,
3510                                     0,
3511                                     res_size / 4,
3512                                     mbenc_param->mb_data_offset,
3513                                     VP9_BTI_MBENC_CU_RECORDS_G9);
3514     }
3515     default:
3516         break;
3517     }
3518
3519     return;
3520 }
3521
3522 static VAStatus
3523 gen9_vp9_mbenc_kernel(VADriverContextP ctx,
3524                       struct encode_state *encode_state,
3525                       struct intel_encoder_context *encoder_context,
3526                       int media_function)
3527 {
3528     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
3529     struct i965_gpe_context *gpe_context, *tx_gpe_context;
3530     struct gpe_media_object_walker_parameter        media_object_walker_param;
3531     struct vp9_encoder_kernel_walker_parameter      kernel_walker_param;
3532     unsigned int    resolution_x, resolution_y;
3533     struct gen9_vp9_state *vp9_state;
3534     VAEncPictureParameterBufferVP9  *pic_param;
3535     struct gen9_vp9_mbenc_curbe_param               curbe_param;
3536     struct gen9_vp9_mbenc_surface_param             surface_param;
3537     VAStatus    va_status = VA_STATUS_SUCCESS;
3538     int mbenc_gpe_index = 0;
3539     struct object_surface *obj_surface;
3540     struct gen9_surface_vp9 *vp9_priv_surface;
3541
3542     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3543
3544     if (!vp9_state || !vp9_state->pic_param)
3545         return VA_STATUS_ERROR_ENCODING_ERROR;
3546
3547     pic_param = vp9_state->pic_param;
3548
3549     switch (media_function)
3550     {
3551         case VP9_MEDIA_STATE_MBENC_I_32x32:
3552             mbenc_gpe_index = VP9_MBENC_IDX_KEY_32x32;
3553             break;
3554
3555         case VP9_MEDIA_STATE_MBENC_I_16x16:
3556             mbenc_gpe_index = VP9_MBENC_IDX_KEY_16x16;
3557             break;
3558
3559         case VP9_MEDIA_STATE_MBENC_P:
3560             mbenc_gpe_index = VP9_MBENC_IDX_INTER;
3561             break;
3562
3563         case VP9_MEDIA_STATE_MBENC_TX:
3564             mbenc_gpe_index = VP9_MBENC_IDX_TX;
3565             break;
3566
3567         default:
3568             va_status = VA_STATUS_ERROR_OPERATION_FAILED;
3569             return va_status;
3570     }
3571
3572     gpe_context = &(vme_context->mbenc_context.gpe_contexts[mbenc_gpe_index]);
3573     tx_gpe_context = &(vme_context->mbenc_context.gpe_contexts[VP9_MBENC_IDX_TX]);
3574
3575     gen9_gpe_reset_binding_table(ctx, gpe_context);
3576
3577     // Set curbe
3578     if (!vp9_state->mbenc_curbe_set_in_brc_update)
3579     {
3580         if(media_function == VP9_MEDIA_STATE_MBENC_I_32x32 ||
3581            media_function == VP9_MEDIA_STATE_MBENC_P)
3582         {
3583             memset(&curbe_param, 0, sizeof(curbe_param));
3584             curbe_param.ppic_param            = vp9_state->pic_param;
3585             curbe_param.pseq_param            = vp9_state->seq_param;
3586             curbe_param.psegment_param        = vp9_state->segment_param;
3587             curbe_param.frame_width_in_mb     = vp9_state->frame_width_in_mb;
3588             curbe_param.frame_height_in_mb    = vp9_state->frame_height_in_mb;
3589             curbe_param.last_ref_obj          = vp9_state->last_ref_obj;
3590             curbe_param.golden_ref_obj        = vp9_state->golden_ref_obj;
3591             curbe_param.alt_ref_obj           = vp9_state->alt_ref_obj;
3592             curbe_param.hme_enabled           = vp9_state->hme_enabled;
3593             curbe_param.ref_frame_flag        = vp9_state->ref_frame_flag;
3594             curbe_param.picture_coding_type   = vp9_state->picture_coding_type;
3595             curbe_param.media_state_type      = media_function;
3596             curbe_param.mbenc_curbe_set_in_brc_update = vp9_state->mbenc_curbe_set_in_brc_update;
3597
3598             vme_context->pfn_set_curbe_mbenc(ctx,
3599                                              encode_state,
3600                                              gpe_context,
3601                                              encoder_context,
3602                                              &curbe_param);
3603         }
3604     }
3605
3606     memset(&surface_param, 0, sizeof(surface_param));
3607     surface_param.media_state_type             = media_function;
3608     surface_param.picture_coding_type          = vp9_state->picture_coding_type;
3609     surface_param.frame_width                  = vp9_state->frame_width;
3610     surface_param.frame_height                 = vp9_state->frame_height;
3611     surface_param.frame_width_in_mb            = vp9_state->frame_width_in_mb;
3612     surface_param.frame_height_in_mb           = vp9_state->frame_height_in_mb;
3613     surface_param.hme_enabled                  = vp9_state->hme_enabled;
3614     surface_param.segmentation_enabled         = pic_param->pic_flags.bits.segmentation_enabled;
3615     surface_param.pres_segmentation_map        = &vme_context->mb_segment_map_surface;
3616     surface_param.ps4x_memv_data_buffer        = &vme_context->s4x_memv_data_buffer;
3617     surface_param.ps4x_memv_distortion_buffer  = &vme_context->s4x_memv_distortion_buffer;
3618     surface_param.pres_mode_decision           =
3619               &vme_context->res_mode_decision[vp9_state->curr_mode_decision_index];
3620     surface_param.pres_mode_decision_prev      =
3621               &vme_context->res_mode_decision[!vp9_state->curr_mode_decision_index];
3622     surface_param.pres_output_16x16_inter_modes = &vme_context->res_output_16x16_inter_modes;
3623     surface_param.pres_mbenc_curbe_buffer      = NULL;
3624     surface_param.last_ref_obj               = vp9_state->last_ref_obj;
3625     surface_param.golden_ref_obj             = vp9_state->golden_ref_obj;
3626     surface_param.alt_ref_obj                  = vp9_state->alt_ref_obj;
3627     surface_param.pres_mb_code_surface         = &vme_context->res_mb_code_surface;
3628     surface_param.gpe_context_tx               = tx_gpe_context;
3629     surface_param.mb_data_offset             = vp9_state->mb_data_offset;
3630
3631     obj_surface = encode_state->reconstructed_object;
3632     vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3633     if (vp9_state->dys_in_use &&
3634         (pic_param->frame_width_src != pic_param->frame_height_dst ||
3635          pic_param->frame_height_src != pic_param->frame_height_dst)) {
3636         obj_surface = vp9_priv_surface->dys_surface_obj;
3637     } else
3638         obj_surface = encode_state->input_yuv_object;
3639
3640     surface_param.curr_frame_obj             = obj_surface;
3641
3642     vme_context->pfn_send_mbenc_surface(ctx,
3643                                         encode_state,
3644                                         gpe_context,
3645                                         encoder_context,
3646                                         &surface_param);
3647
3648     if (media_function == VP9_MEDIA_STATE_MBENC_I_32x32) {
3649         resolution_x = ALIGN(vp9_state->frame_width, 32) / 32;
3650         resolution_y = ALIGN(vp9_state->frame_height, 32) / 32;
3651     } else {
3652         resolution_x = ALIGN(vp9_state->frame_width, 16) / 16;
3653         resolution_y = ALIGN(vp9_state->frame_height, 16) / 16;
3654     }
3655
3656     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
3657     kernel_walker_param.resolution_x = resolution_x;
3658     kernel_walker_param.resolution_y = resolution_y;
3659
3660     if (media_function == VP9_MEDIA_STATE_MBENC_P ||
3661         media_function == VP9_MEDIA_STATE_MBENC_I_16x16) {
3662         kernel_walker_param.use_scoreboard = 1;
3663         kernel_walker_param.no_dependency = 0;
3664         kernel_walker_param.walker_degree = VP9_45Z_DEGREE;
3665     } else {
3666         kernel_walker_param.use_scoreboard = 0;
3667         kernel_walker_param.no_dependency = 1;
3668     }
3669
3670     gen8_gpe_setup_interface_data(ctx, gpe_context);
3671
3672     gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
3673
3674     gen9_run_kernel_media_object_walker(ctx, encoder_context,
3675                                         gpe_context,
3676                                         media_function,
3677                                         &media_object_walker_param);
3678     return va_status;
3679 }
3680
3681 static void
3682 gen9_init_gpe_context_vp9(struct i965_gpe_context *gpe_context,
3683                           struct vp9_encoder_kernel_parameter *kernel_param)
3684 {
3685     gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
3686
3687     gpe_context->curbe_size = ALIGN(kernel_param->curbe_size, 64);
3688
3689     gpe_context->sampler_size = 0;
3690     if (kernel_param->sampler_size) {
3691         gpe_context->sampler_size = ALIGN(kernel_param->sampler_size, 64);
3692     }
3693
3694     gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
3695     gpe_context->idrt.max_entries = NUM_KERNELS_PER_GPE_CONTEXT;
3696     gpe_context->idrt_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) *
3697                                    NUM_KERNELS_PER_GPE_CONTEXT;
3698
3699     gpe_context->surface_state_binding_table.max_entries = MAX_VP9_ENCODER_SURFACES;
3700     gpe_context->surface_state_binding_table.binding_table_offset = 0;
3701     gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_VP9_ENCODER_SURFACES * 4, 64);
3702     gpe_context->surface_state_binding_table.length = ALIGN(MAX_VP9_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_VP9_ENCODER_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
3703
3704     gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads
3705     gpe_context->vfe_state.curbe_allocation_size = MAX(1, ALIGN(gpe_context->curbe.length, 32) >> 5); // in registers
3706     gpe_context->vfe_state.urb_entry_size = MAX(1, ALIGN(kernel_param->inline_data_size, 32) >> 5); // in registers
3707     gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE -
3708                                               gpe_context->vfe_state.curbe_allocation_size -
3709                                               ((gpe_context->idrt.entry_size >> 5) *
3710                                                gpe_context->idrt.max_entries)) / gpe_context->vfe_state.urb_entry_size;
3711     gpe_context->vfe_state.num_urb_entries = CLAMP(1, 127, gpe_context->vfe_state.num_urb_entries);
3712     gpe_context->vfe_state.gpgpu_mode = 0;
3713 }
3714
3715 static void
3716 gen9_init_vfe_scoreboard_vp9(struct i965_gpe_context *gpe_context,
3717                              struct vp9_encoder_scoreboard_parameter *scoreboard_param)
3718 {
3719     gpe_context->vfe_desc5.scoreboard0.mask = scoreboard_param->mask;
3720     gpe_context->vfe_desc5.scoreboard0.type = scoreboard_param->type;
3721     gpe_context->vfe_desc5.scoreboard0.enable = scoreboard_param->enable;
3722
3723     if (scoreboard_param->walkpat_flag) {
3724         gpe_context->vfe_desc5.scoreboard0.mask = 0x0F;
3725         gpe_context->vfe_desc5.scoreboard0.type = 1;
3726
3727         gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0x0;
3728         gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0xF;
3729
3730         gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
3731         gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xE;
3732
3733         gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0xF;
3734         gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0x3;
3735
3736         gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
3737         gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0x1;
3738     } else {
3739         // Scoreboard 0
3740         gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0xF;
3741         gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0x0;
3742
3743         // Scoreboard 1
3744         gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
3745         gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xF;
3746
3747         // Scoreboard 2
3748         gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0x1;
3749         gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0xF;
3750
3751         // Scoreboard 3
3752         gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
3753         gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0xF;
3754
3755         // Scoreboard 4
3756         gpe_context->vfe_desc7.scoreboard2.delta_x4 = 0xF;
3757         gpe_context->vfe_desc7.scoreboard2.delta_y4 = 0x1;
3758
3759         // Scoreboard 5
3760         gpe_context->vfe_desc7.scoreboard2.delta_x5 = 0x0;
3761         gpe_context->vfe_desc7.scoreboard2.delta_y5 = 0xE;
3762
3763         // Scoreboard 6
3764         gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0x1;
3765         gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
3766
3767         // Scoreboard 7
3768         gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0xF;
3769         gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
3770     }
3771 }
3772
3773 #define VP9_MI_BLOCK_MASK     0x07
3774 #define VP9_VME_REF_WIN       48
3775
3776 static VAStatus
3777 gen9_encode_vp9_check_parameter(VADriverContextP ctx,
3778                               struct encode_state *encode_state,
3779                               struct intel_encoder_context *encoder_context)
3780 {
3781     struct i965_driver_data *i965 = i965_driver_data(ctx);
3782     struct gen9_vp9_state *vp9_state;
3783     VAEncPictureParameterBufferVP9  *pic_param;
3784     VAEncMiscParameterTypeVP9PerSegmantParam *seg_param;
3785     VAEncSequenceParameterBufferVP9 *seq_param;
3786     struct object_surface *obj_surface;
3787     struct object_buffer *obj_buffer;
3788     struct gen9_surface_vp9 *vp9_priv_surface;
3789
3790     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3791
3792     if (!encode_state->pic_param_ext ||
3793         !encode_state->pic_param_ext->buffer) {
3794         return VA_STATUS_ERROR_INVALID_PARAMETER;
3795     }
3796     pic_param = (VAEncPictureParameterBufferVP9 *)encode_state->pic_param_ext->buffer;
3797
3798     if (pic_param->frame_width_src & VP9_MI_BLOCK_MASK ||
3799         pic_param->frame_height_src & VP9_MI_BLOCK_MASK ||
3800         pic_param->frame_width_dst & VP9_MI_BLOCK_MASK ||
3801         pic_param->frame_height_dst & VP9_MI_BLOCK_MASK)
3802         return VA_STATUS_ERROR_INVALID_PARAMETER;
3803
3804     obj_buffer = BUFFER(pic_param->coded_buf);
3805
3806     if (!obj_buffer ||
3807         !obj_buffer->buffer_store ||
3808         !obj_buffer->buffer_store->bo)
3809         return VA_STATUS_ERROR_INVALID_PARAMETER;
3810
3811     encode_state->coded_buf_object = obj_buffer;
3812
3813     vp9_state->status_buffer.bo = obj_buffer->buffer_store->bo;
3814
3815     encode_state->reconstructed_object = SURFACE(pic_param->reconstructed_frame);
3816
3817     if (!encode_state->reconstructed_object ||
3818         !encode_state->input_yuv_object)
3819         return VA_STATUS_ERROR_INVALID_PARAMETER;
3820
3821     vp9_state->curr_frame = pic_param->reconstructed_frame;
3822     vp9_state->ref_frame_flag = 0;
3823     if (pic_param->pic_flags.bits.frame_type == KEY_FRAME ||
3824         pic_param->pic_flags.bits.intra_only) {
3825         /* this will be regarded as I-frame type */
3826         vp9_state->picture_coding_type = 0;
3827         vp9_state->last_ref_obj = NULL;
3828         vp9_state->golden_ref_obj = NULL;
3829         vp9_state->alt_ref_obj = NULL;
3830     } else {
3831         vp9_state->picture_coding_type = 1;
3832         vp9_state->ref_frame_flag = pic_param->ref_flags.bits.ref_frame_ctrl_l0 |
3833                                     pic_param->ref_flags.bits.ref_frame_ctrl_l1;
3834
3835         obj_surface = SURFACE(pic_param->reference_frames[pic_param->ref_flags.bits.ref_last_idx]);
3836         vp9_state->last_ref_obj = obj_surface;
3837         if (!obj_surface ||
3838             !obj_surface->bo ||
3839             !obj_surface->private_data) {
3840             vp9_state->last_ref_obj = NULL;
3841             vp9_state->ref_frame_flag &= ~(VP9_LAST_REF);
3842         }
3843
3844         obj_surface = SURFACE(pic_param->reference_frames[pic_param->ref_flags.bits.ref_gf_idx]);
3845         vp9_state->golden_ref_obj = obj_surface;
3846         if (!obj_surface ||
3847             !obj_surface->bo ||
3848             !obj_surface->private_data) {
3849             vp9_state->golden_ref_obj = NULL;
3850             vp9_state->ref_frame_flag &= ~(VP9_GOLDEN_REF);
3851         }
3852
3853         obj_surface = SURFACE(pic_param->reference_frames[pic_param->ref_flags.bits.ref_arf_idx]);
3854         vp9_state->alt_ref_obj = obj_surface;
3855         if (!obj_surface ||
3856             !obj_surface->bo ||
3857             !obj_surface->private_data) {
3858             vp9_state->alt_ref_obj = NULL;
3859             vp9_state->ref_frame_flag &= ~(VP9_ALT_REF);
3860         }
3861
3862         /* remove the duplicated flag and ref frame list */
3863         if (vp9_state->ref_frame_flag & VP9_LAST_REF) {
3864             if (pic_param->reference_frames[pic_param->ref_flags.bits.ref_last_idx] ==
3865                 pic_param->reference_frames[pic_param->ref_flags.bits.ref_gf_idx]) {
3866                 vp9_state->ref_frame_flag &= ~(VP9_GOLDEN_REF);
3867                 vp9_state->golden_ref_obj = NULL;
3868             }
3869
3870             if (pic_param->reference_frames[pic_param->ref_flags.bits.ref_last_idx] ==
3871                 pic_param->reference_frames[pic_param->ref_flags.bits.ref_arf_idx]) {
3872                 vp9_state->ref_frame_flag &= ~(VP9_ALT_REF);
3873                 vp9_state->alt_ref_obj = NULL;
3874             }
3875         }
3876
3877         if (vp9_state->ref_frame_flag & VP9_GOLDEN_REF) {
3878             if (pic_param->reference_frames[pic_param->ref_flags.bits.ref_gf_idx] ==
3879                 pic_param->reference_frames[pic_param->ref_flags.bits.ref_arf_idx]) {
3880                 vp9_state->ref_frame_flag &= ~(VP9_ALT_REF);
3881                 vp9_state->alt_ref_obj = NULL;
3882             }
3883         }
3884
3885         if (vp9_state->ref_frame_flag == 0)
3886             return VA_STATUS_ERROR_INVALID_PARAMETER;
3887     }
3888
3889     seg_param = NULL;
3890     if (pic_param->pic_flags.bits.segmentation_enabled) {
3891         if (!encode_state->q_matrix ||
3892             !encode_state->q_matrix->buffer) {
3893             return VA_STATUS_ERROR_INVALID_PARAMETER;
3894         }
3895         seg_param = (VAEncMiscParameterTypeVP9PerSegmantParam *)
3896                            encode_state->q_matrix->buffer;
3897     }
3898
3899     seq_param = NULL;
3900     if (encode_state->seq_param_ext &&
3901         encode_state->seq_param_ext->buffer)
3902         seq_param = (VAEncSequenceParameterBufferVP9 *)encode_state->seq_param_ext->buffer;
3903
3904     if (!seq_param) {
3905         seq_param = &vp9_state->bogus_seq_param;
3906     }
3907
3908     vp9_state->pic_param = pic_param;
3909     vp9_state->segment_param = seg_param;
3910     vp9_state->seq_param = seq_param;
3911
3912     obj_surface = encode_state->reconstructed_object;
3913     if (pic_param->frame_width_dst > obj_surface->orig_width ||
3914         pic_param->frame_height_dst > obj_surface->orig_height)
3915         return VA_STATUS_ERROR_INVALID_SURFACE;
3916
3917     if (!vp9_state->dys_enabled &&
3918          ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
3919           (pic_param->frame_height_src != pic_param->frame_height_dst)))
3920         return VA_STATUS_ERROR_UNIMPLEMENTED;
3921
3922     if (vp9_state->brc_enabled) {
3923         if (vp9_state->brc_flag_check & VP9_BRC_FAILURE) {
3924             WARN_ONCE("Rate control misc_parameter is required for BRC\n");
3925             return VA_STATUS_ERROR_INVALID_PARAMETER;
3926         }
3927
3928         if (vp9_state->first_frame) {
3929             unsigned int brc_flag;
3930             VAEncMiscParameterBuffer *misc_param;
3931
3932             brc_flag = VP9_BRC_SEQ | VP9_BRC_RC;
3933             if ((vp9_state->brc_flag_check & brc_flag) != brc_flag) {
3934                 WARN_ONCE("SPS/RC misc is required for BRC\n");
3935                 return VA_STATUS_ERROR_INVALID_PARAMETER;
3936             }
3937
3938             /* check the corresponding BRC parameter for CBR and VBR */
3939             if (encoder_context->rate_control_mode == VA_RC_CBR) {
3940                 vp9_state->target_bit_rate = seq_param->bits_per_second;
3941                 vp9_state->gop_size = seq_param->intra_period;
3942
3943                 if (vp9_state->brc_flag_check & VP9_BRC_HRD) {
3944                     VAEncMiscParameterHRD *misc_param_hrd;
3945
3946                     misc_param = (VAEncMiscParameterBuffer *)
3947                         encode_state->misc_param[VAEncMiscParameterTypeHRD]->buffer;
3948                     misc_param_hrd = (VAEncMiscParameterHRD *)misc_param->data;
3949
3950                     vp9_state->init_vbv_buffer_fullness_in_bit = misc_param_hrd->initial_buffer_fullness;
3951                     vp9_state->vbv_buffer_size_in_bit = misc_param_hrd->buffer_size;
3952                 }
3953
3954                 if (vp9_state->brc_flag_check & VP9_BRC_FR) {
3955                     VAEncMiscParameterFrameRate *misc_param_fr;
3956
3957                     misc_param = (VAEncMiscParameterBuffer *)
3958                         encode_state->misc_param[VAEncMiscParameterTypeFrameRate]->buffer;
3959                     misc_param_fr = (VAEncMiscParameterFrameRate *)misc_param->data;
3960
3961                     vp9_state->frame_rate = misc_param_fr->framerate;
3962                 } else {
3963                     /* Assign the default frame rate */
3964                     vp9_state->frame_rate = 30;
3965                 }
3966
3967                 /* RC misc will override HRD parameter */
3968                 if (vp9_state->brc_flag_check & VP9_BRC_RC) {
3969                     VAEncMiscParameterRateControl *misc_param_rc;
3970
3971                     misc_param = (VAEncMiscParameterBuffer *)
3972                         encode_state->misc_param[VAEncMiscParameterTypeRateControl]->buffer;
3973                     misc_param_rc = (VAEncMiscParameterRateControl *)misc_param->data;
3974
3975                     vp9_state->target_bit_rate = misc_param_rc->bits_per_second;
3976                     vp9_state->vbv_buffer_size_in_bit = (misc_param_rc->bits_per_second / 1000) *
3977                                                  misc_param_rc->window_size;
3978                     vp9_state->init_vbv_buffer_fullness_in_bit = vp9_state->vbv_buffer_size_in_bit / 2;
3979                     vp9_state->window_size = misc_param_rc->window_size;
3980                 }
3981                 vp9_state->max_bit_rate = vp9_state->target_bit_rate;
3982                 vp9_state->min_bit_rate = vp9_state->target_bit_rate;
3983             } else {
3984                 /* VBR mode */
3985                 brc_flag = VP9_BRC_SEQ | VP9_BRC_RC;
3986                 vp9_state->target_bit_rate = seq_param->bits_per_second;
3987                 vp9_state->gop_size = seq_param->intra_period;
3988
3989                 if (vp9_state->brc_flag_check & VP9_BRC_FR) {
3990                     VAEncMiscParameterFrameRate *misc_param_fr;
3991
3992                     misc_param = (VAEncMiscParameterBuffer *)
3993                         encode_state->misc_param[VAEncMiscParameterTypeFrameRate]->buffer;
3994                     misc_param_fr = (VAEncMiscParameterFrameRate *)misc_param->data;
3995
3996                     vp9_state->frame_rate = misc_param_fr->framerate;
3997                 } else {
3998                     /* Assign the default frame rate */
3999                     vp9_state->frame_rate = 30;
4000                 }
4001
4002                 if (vp9_state->brc_flag_check & VP9_BRC_RC) {
4003                     VAEncMiscParameterRateControl *misc_param_rc;
4004
4005                     misc_param = (VAEncMiscParameterBuffer *)
4006                         encode_state->misc_param[VAEncMiscParameterTypeRateControl]->buffer;
4007                     misc_param_rc = (VAEncMiscParameterRateControl *)misc_param->data;
4008
4009                     vp9_state->max_bit_rate = misc_param_rc->bits_per_second;
4010                     vp9_state->vbv_buffer_size_in_bit = (misc_param_rc->bits_per_second / 1000) *
4011                                                  misc_param_rc->window_size;
4012                     vp9_state->init_vbv_buffer_fullness_in_bit = vp9_state->vbv_buffer_size_in_bit / 2;
4013                     vp9_state->target_bit_rate = (misc_param_rc->bits_per_second / 100) *
4014                                 misc_param_rc->target_percentage;
4015                     vp9_state->min_bit_rate = (misc_param_rc->bits_per_second / 100) *
4016                          (2 * misc_param_rc->target_percentage - 100);
4017                     vp9_state->target_percentage = misc_param_rc->target_percentage;
4018                     vp9_state->window_size = misc_param_rc->window_size;
4019                 }
4020             }
4021         }
4022         else if (vp9_state->picture_coding_type == KEY_FRAME){
4023             VAEncMiscParameterBuffer *misc_param;
4024             /* update the BRC parameter only when it is key-frame */
4025             /* If the parameter related with RC is changed. Reset BRC */
4026             if (vp9_state->brc_flag_check & VP9_BRC_FR) {
4027                VAEncMiscParameterFrameRate *misc_param_fr;
4028
4029                misc_param = (VAEncMiscParameterBuffer *)
4030                    encode_state->misc_param[VAEncMiscParameterTypeFrameRate]->buffer;
4031                misc_param_fr = (VAEncMiscParameterFrameRate *)misc_param->data;
4032
4033                if (vp9_state->frame_rate != misc_param_fr->framerate) {
4034                    vp9_state->brc_reset = 1;
4035                    vp9_state->frame_rate = misc_param_fr->framerate;
4036                }
4037             }
4038
4039             /* check the GOP size. And bit_per_second in SPS is ignored */
4040             if (vp9_state->brc_flag_check & VP9_BRC_SEQ) {
4041                 if (vp9_state->gop_size != seq_param->intra_period) {
4042                     vp9_state->brc_reset = 1;
4043                     vp9_state->gop_size = seq_param->intra_period;
4044                 }
4045             }
4046
4047             /* update the bit_per_second */
4048             if (vp9_state->brc_flag_check & VP9_BRC_RC) {
4049                 VAEncMiscParameterRateControl *misc_param_rc;
4050
4051                 misc_param = (VAEncMiscParameterBuffer *)
4052                     encode_state->misc_param[VAEncMiscParameterTypeRateControl]->buffer;
4053                 misc_param_rc = (VAEncMiscParameterRateControl *)misc_param->data;
4054
4055                 if (encoder_context->rate_control_mode == VA_RC_CBR) {
4056                     if (vp9_state->target_bit_rate != misc_param_rc->bits_per_second ||
4057                         vp9_state->window_size != misc_param_rc->window_size) {
4058                         vp9_state->target_bit_rate = misc_param_rc->bits_per_second;
4059                         vp9_state->vbv_buffer_size_in_bit = (misc_param_rc->bits_per_second / 1000) *
4060                                                  misc_param_rc->window_size;
4061                         vp9_state->init_vbv_buffer_fullness_in_bit = vp9_state->vbv_buffer_size_in_bit * 2;
4062                         vp9_state->window_size = misc_param_rc->window_size;
4063                         vp9_state->max_bit_rate = vp9_state->target_bit_rate;
4064                         vp9_state->min_bit_rate = vp9_state->target_bit_rate;
4065                         vp9_state->brc_reset = 1;
4066                     }
4067                 } else {
4068                     /* VBR mode */
4069                     if (vp9_state->max_bit_rate != misc_param_rc->bits_per_second ||
4070                         vp9_state->target_percentage != misc_param_rc->target_percentage) {
4071
4072                         vp9_state->target_bit_rate = (misc_param_rc->bits_per_second / 100) *
4073                                 misc_param_rc->target_percentage;
4074                         vp9_state->min_bit_rate = (misc_param_rc->bits_per_second / 100) *
4075                              (2 * misc_param_rc->target_percentage - 100);
4076                         vp9_state->max_bit_rate = misc_param_rc->bits_per_second;
4077                         vp9_state->vbv_buffer_size_in_bit = (misc_param_rc->bits_per_second / 1000) *
4078                                                  misc_param_rc->window_size;
4079                         vp9_state->init_vbv_buffer_fullness_in_bit = vp9_state->vbv_buffer_size_in_bit / 2;
4080                         vp9_state->target_percentage = misc_param_rc->target_percentage;
4081                         vp9_state->window_size = misc_param_rc->window_size;
4082                         vp9_state->brc_reset = 1;
4083                     }
4084                 }
4085             }
4086         }
4087     }
4088
4089     vp9_state->frame_width = pic_param->frame_width_dst;
4090     vp9_state->frame_height = pic_param->frame_height_dst;
4091
4092     vp9_state->frame_width_4x = ALIGN(vp9_state->frame_width / 4, 16);
4093     vp9_state->frame_height_4x = ALIGN(vp9_state->frame_height / 4, 16);
4094
4095     vp9_state->frame_width_16x = ALIGN(vp9_state->frame_width / 16, 16);
4096     vp9_state->frame_height_16x = ALIGN(vp9_state->frame_height / 16, 16);
4097
4098     vp9_state->frame_width_in_mb = ALIGN(vp9_state->frame_width, 16) / 16;
4099     vp9_state->frame_height_in_mb = ALIGN(vp9_state->frame_height, 16) / 16;
4100
4101     vp9_state->downscaled_width_4x_in_mb = vp9_state->frame_width_4x / 16;
4102     vp9_state->downscaled_height_4x_in_mb = vp9_state->frame_height_4x / 16;
4103     vp9_state->downscaled_width_16x_in_mb = vp9_state->frame_width_16x / 16;
4104     vp9_state->downscaled_height_16x_in_mb = vp9_state->frame_height_16x / 16;
4105
4106     vp9_state->dys_in_use = 0;
4107     if(pic_param->frame_width_src != pic_param->frame_width_dst ||
4108        pic_param->frame_height_src != pic_param->frame_height_dst)
4109         vp9_state->dys_in_use = 1;
4110     vp9_state->dys_ref_frame_flag = 0;
4111     /* check the dys setting. The dys is supported by default. */
4112     if (pic_param->pic_flags.bits.frame_type != KEY_FRAME &&
4113         !pic_param->pic_flags.bits.intra_only) {
4114         vp9_state->dys_ref_frame_flag = vp9_state->ref_frame_flag;
4115
4116         if ((vp9_state->ref_frame_flag & VP9_LAST_REF) &&
4117              vp9_state->last_ref_obj) {
4118             obj_surface = vp9_state->last_ref_obj;
4119             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
4120
4121             if (vp9_state->frame_width == vp9_priv_surface->frame_width &&
4122                 vp9_state->frame_height == vp9_priv_surface->frame_height)
4123                 vp9_state->dys_ref_frame_flag &= ~(VP9_LAST_REF);
4124         }
4125         if ((vp9_state->ref_frame_flag & VP9_GOLDEN_REF) &&
4126              vp9_state->golden_ref_obj) {
4127             obj_surface = vp9_state->golden_ref_obj;
4128             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
4129
4130             if (vp9_state->frame_width == vp9_priv_surface->frame_width &&
4131                 vp9_state->frame_height == vp9_priv_surface->frame_height)
4132                 vp9_state->dys_ref_frame_flag &= ~(VP9_GOLDEN_REF);
4133         }
4134         if ((vp9_state->ref_frame_flag & VP9_ALT_REF) &&
4135              vp9_state->alt_ref_obj) {
4136             obj_surface = vp9_state->alt_ref_obj;
4137             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
4138
4139             if (vp9_state->frame_width == vp9_priv_surface->frame_width &&
4140                 vp9_state->frame_height == vp9_priv_surface->frame_height)
4141                 vp9_state->dys_ref_frame_flag &= ~(VP9_ALT_REF);
4142         }
4143         if (vp9_state->dys_ref_frame_flag)
4144             vp9_state->dys_in_use = 1;
4145     }
4146
4147     if (vp9_state->hme_supported) {
4148         vp9_state->hme_enabled = 1;
4149     } else {
4150         vp9_state->hme_enabled = 0;
4151     }
4152
4153     if (vp9_state->b16xme_supported) {
4154         vp9_state->b16xme_enabled = 1;
4155     } else {
4156         vp9_state->b16xme_enabled = 0;
4157     }
4158
4159     /* disable HME/16xME if the size is too small */
4160     if (vp9_state->frame_width_4x <= VP9_VME_REF_WIN ||
4161         vp9_state->frame_height_4x <= VP9_VME_REF_WIN) {
4162         vp9_state->hme_enabled = 0;
4163         vp9_state->b16xme_enabled = 0;
4164     }
4165
4166     if (vp9_state->frame_width_16x < VP9_VME_REF_WIN ||
4167         vp9_state->frame_height_16x < VP9_VME_REF_WIN)
4168         vp9_state->b16xme_enabled = 0;
4169
4170     if (pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME ||
4171         pic_param->pic_flags.bits.intra_only) {
4172         vp9_state->hme_enabled = 0;
4173         vp9_state->b16xme_enabled = 0;
4174     }
4175
4176     vp9_state->mbenc_keyframe_dist_enabled = 0;
4177     if ((vp9_state->picture_coding_type == KEY_FRAME) &&
4178         vp9_state->brc_distortion_buffer_supported)
4179         vp9_state->mbenc_keyframe_dist_enabled = 1;
4180
4181     return VA_STATUS_SUCCESS;
4182 }
4183
4184 static VAStatus
4185 gen9_vme_gpe_kernel_prepare_vp9(VADriverContextP ctx,
4186                               struct encode_state *encode_state,
4187                               struct intel_encoder_context *encoder_context)
4188 {
4189     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4190     struct vp9_surface_param surface_param;
4191     struct gen9_vp9_state *vp9_state;
4192     VAEncPictureParameterBufferVP9  *pic_param;
4193     struct object_surface *obj_surface;
4194     struct gen9_surface_vp9 *vp9_surface;
4195     int driver_header_flag = 0;
4196     VAStatus va_status;
4197
4198     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
4199
4200     if (!vp9_state || !vp9_state->pic_param)
4201         return VA_STATUS_ERROR_INVALID_PARAMETER;
4202
4203     pic_param = vp9_state->pic_param;
4204
4205     /* this is to check whether the driver should generate the uncompressed header */
4206     driver_header_flag = 1;
4207     if (encode_state->packed_header_data_ext &&
4208         encode_state->packed_header_data_ext[0] &&
4209         pic_param->bit_offset_first_partition_size) {
4210         VAEncPackedHeaderParameterBuffer *param = NULL;
4211
4212         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_params_ext[0]->buffer;
4213
4214         if (param->type == VAEncPackedHeaderRawData) {
4215             char *header_data;
4216             unsigned int length_in_bits;
4217
4218             header_data = (char *)encode_state->packed_header_data_ext[0]->buffer;
4219             length_in_bits = param->bit_length;
4220             driver_header_flag = 0;
4221
4222             vp9_state->frame_header.bit_offset_first_partition_size =
4223                           pic_param->bit_offset_first_partition_size;
4224             vp9_state->header_length = ALIGN(length_in_bits, 8) >> 3;
4225             vp9_state->alias_insert_data = header_data;
4226
4227             vp9_state->frame_header.bit_offset_ref_lf_delta = pic_param->bit_offset_ref_lf_delta;
4228             vp9_state->frame_header.bit_offset_mode_lf_delta = pic_param->bit_offset_mode_lf_delta;
4229             vp9_state->frame_header.bit_offset_lf_level = pic_param->bit_offset_lf_level;
4230             vp9_state->frame_header.bit_offset_qindex = pic_param->bit_offset_qindex;
4231             vp9_state->frame_header.bit_offset_segmentation = pic_param->bit_offset_segmentation;
4232             vp9_state->frame_header.bit_size_segmentation = pic_param->bit_size_segmentation;
4233         }
4234     }
4235
4236     if (driver_header_flag) {
4237         memset(&vp9_state->frame_header, 0, sizeof(vp9_state->frame_header));
4238         intel_write_uncompressed_header(encode_state,
4239                                         VAProfileVP9Profile0,
4240                                         vme_context->frame_header_data,
4241                                         &vp9_state->header_length,
4242                                         &vp9_state->frame_header);
4243         vp9_state->alias_insert_data = vme_context->frame_header_data;
4244     }
4245
4246     va_status = i965_check_alloc_surface_bo(ctx, encode_state->input_yuv_object,
4247                                     1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
4248     if (va_status != VA_STATUS_SUCCESS)
4249         return va_status;
4250
4251     va_status = i965_check_alloc_surface_bo(ctx, encode_state->reconstructed_object,
4252                                     1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
4253
4254     if (va_status != VA_STATUS_SUCCESS)
4255         return va_status;
4256
4257     surface_param.frame_width = vp9_state->frame_width;
4258     surface_param.frame_height = vp9_state->frame_height;
4259     va_status = gen9_vp9_init_check_surfaces(ctx,
4260                                              encode_state->reconstructed_object,
4261                                              &surface_param);
4262
4263     {
4264         vp9_surface = (struct gen9_surface_vp9*)encode_state->reconstructed_object;
4265
4266         vp9_surface->qp_value = pic_param->luma_ac_qindex + pic_param->luma_dc_qindex_delta;
4267     }
4268     if (vp9_state->dys_in_use &&
4269         (pic_param->frame_width_src != pic_param->frame_width_dst ||
4270          pic_param->frame_height_src != pic_param->frame_height_dst)) {
4271         surface_param.frame_width = pic_param->frame_width_dst;
4272         surface_param.frame_height = pic_param->frame_height_dst;
4273         va_status = gen9_vp9_check_dys_surfaces(ctx,
4274                                     encode_state->reconstructed_object,
4275                                     &surface_param);
4276
4277         if (va_status)
4278             return va_status;
4279     }
4280
4281     if (vp9_state->dys_ref_frame_flag) {
4282         if ((vp9_state->dys_ref_frame_flag & VP9_LAST_REF) &&
4283              vp9_state->last_ref_obj) {
4284             obj_surface = vp9_state->last_ref_obj;
4285             surface_param.frame_width = vp9_state->frame_width;
4286             surface_param.frame_height = vp9_state->frame_height;
4287             va_status = gen9_vp9_check_dys_surfaces(ctx,
4288                                     obj_surface,
4289                                     &surface_param);
4290
4291             if (va_status)
4292                 return va_status;
4293         }
4294         if ((vp9_state->dys_ref_frame_flag & VP9_GOLDEN_REF) &&
4295              vp9_state->golden_ref_obj) {
4296             obj_surface = vp9_state->golden_ref_obj;
4297             surface_param.frame_width = vp9_state->frame_width;
4298             surface_param.frame_height = vp9_state->frame_height;
4299             va_status = gen9_vp9_check_dys_surfaces(ctx,
4300                                     obj_surface,
4301                                     &surface_param);
4302
4303             if (va_status)
4304                 return va_status;
4305         }
4306         if ((vp9_state->dys_ref_frame_flag & VP9_ALT_REF) &&
4307              vp9_state->alt_ref_obj) {
4308             obj_surface = vp9_state->alt_ref_obj;
4309             surface_param.frame_width = vp9_state->frame_width;
4310             surface_param.frame_height = vp9_state->frame_height;
4311             va_status = gen9_vp9_check_dys_surfaces(ctx,
4312                                     obj_surface,
4313                                     &surface_param);
4314
4315             if (va_status)
4316                 return va_status;
4317         }
4318     }
4319
4320     if (va_status != VA_STATUS_SUCCESS)
4321         return va_status;
4322     /* check the corresponding ref_frame_flag && dys_ref_frame_flag */
4323
4324     return VA_STATUS_SUCCESS;
4325 }
4326
4327 static VAStatus
4328 gen9_vme_gpe_kernel_init_vp9(VADriverContextP ctx,
4329                               struct encode_state *encode_state,
4330                               struct intel_encoder_context *encoder_context)
4331 {
4332     struct i965_driver_data *i965 = i965_driver_data(ctx);
4333     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4334     struct vp9_mbenc_context *mbenc_context = &vme_context->mbenc_context;
4335     struct vp9_dys_context *dys_context = &vme_context->dys_context;
4336     struct gpe_dynamic_state_parameter ds_param;
4337     int i;
4338
4339     /*
4340      * BRC will update MBEnc curbe data buffer, so initialize GPE context for
4341      * MBEnc first
4342      */
4343     for (i = 0; i < NUM_VP9_MBENC; i++) {
4344         gen8_gpe_context_init(ctx, &mbenc_context->gpe_contexts[i]);
4345     }
4346
4347     /*
4348      * VP9_MBENC_XXX uses the same dynamic state buffer as they share the same
4349      * curbe_buffer.
4350      */
4351     ds_param.bo_size = ALIGN(sizeof(vp9_mbenc_curbe_data), 64) + 128 +
4352            ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) * NUM_VP9_MBENC;
4353     mbenc_context->mbenc_bo_dys = dri_bo_alloc(i965->intel.bufmgr,
4354                                                "mbenc_dys",
4355                                                ds_param.bo_size,
4356                                                0x1000);
4357     mbenc_context->mbenc_bo_size = ds_param.bo_size;
4358
4359     ds_param.bo = mbenc_context->mbenc_bo_dys;
4360     ds_param.curbe_offset = 0;
4361     ds_param.sampler_offset = ALIGN(sizeof(vp9_mbenc_curbe_data), 64);
4362     for (i = 0; i < NUM_VP9_MBENC; i++) {
4363         ds_param.idrt_offset = ds_param.sampler_offset + 128 +
4364                    ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) * i;
4365
4366         gen8_gpe_context_set_dynamic_buffer(ctx,
4367                                             &mbenc_context->gpe_contexts[i],
4368                                             &ds_param);
4369     }
4370
4371     gen8_gpe_context_init(ctx, &dys_context->gpe_context);
4372     gen9_vp9_dys_set_sampler_state(&dys_context->gpe_context);
4373
4374     return VA_STATUS_SUCCESS;
4375 }
4376
4377 static VAStatus
4378 gen9_vme_gpe_kernel_final_vp9(VADriverContextP ctx,
4379                               struct encode_state *encode_state,
4380                               struct intel_encoder_context *encoder_context)
4381 {
4382     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4383     struct vp9_mbenc_context *mbenc_context = &vme_context->mbenc_context;
4384
4385     dri_bo_unreference(mbenc_context->mbenc_bo_dys);
4386     mbenc_context->mbenc_bo_dys = NULL;
4387
4388     return VA_STATUS_SUCCESS;
4389 }
4390
4391 static VAStatus
4392 gen9_vme_gpe_kernel_run_vp9(VADriverContextP ctx,
4393                               struct encode_state *encode_state,
4394                               struct intel_encoder_context *encoder_context)
4395 {
4396     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4397     struct gen9_vp9_state *vp9_state;
4398     int i;
4399
4400     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
4401
4402     if (!vp9_state || !vp9_state->pic_param)
4403         return VA_STATUS_ERROR_INVALID_PARAMETER;
4404
4405     if (vp9_state->dys_in_use) {
4406         gen9_vp9_run_dys_refframes(ctx, encode_state, encoder_context);
4407     }
4408
4409     if (vp9_state->brc_enabled && (vp9_state->brc_reset || !vp9_state->brc_inited)) {
4410         gen9_vp9_brc_init_reset_kernel(ctx, encode_state, encoder_context);
4411     }
4412
4413     if (vp9_state->picture_coding_type == KEY_FRAME) {
4414         for (i = 0; i < 2; i++)
4415             i965_zero_gpe_resource(&vme_context->res_mode_decision[i]);
4416     }
4417
4418     if (vp9_state->hme_supported) {
4419         gen9_vp9_scaling_kernel(ctx, encode_state,
4420                                 encoder_context,
4421                                 0);
4422         if (vp9_state->b16xme_supported) {
4423             gen9_vp9_scaling_kernel(ctx, encode_state,
4424                                     encoder_context,
4425                                     1);
4426         }
4427     }
4428
4429     if (vp9_state->picture_coding_type && vp9_state->hme_enabled) {
4430         if (vp9_state->b16xme_enabled)
4431             gen9_vp9_me_kernel(ctx, encode_state,
4432                                encoder_context,
4433                                1);
4434
4435         gen9_vp9_me_kernel(ctx, encode_state,
4436                            encoder_context,
4437                            0);
4438     }
4439
4440     if (vp9_state->brc_enabled) {
4441         if (vp9_state->mbenc_keyframe_dist_enabled)
4442             gen9_vp9_brc_intra_dist_kernel(ctx,
4443                                            encode_state,
4444                                            encoder_context);
4445
4446         gen9_vp9_brc_update_kernel(ctx, encode_state,
4447                                    encoder_context);
4448     }
4449
4450     if (vp9_state->picture_coding_type == KEY_FRAME) {
4451         gen9_vp9_mbenc_kernel(ctx, encode_state,
4452                               encoder_context,
4453                               VP9_MEDIA_STATE_MBENC_I_32x32);
4454         gen9_vp9_mbenc_kernel(ctx, encode_state,
4455                               encoder_context,
4456                               VP9_MEDIA_STATE_MBENC_I_16x16);
4457     } else {
4458         gen9_vp9_mbenc_kernel(ctx, encode_state,
4459                               encoder_context,
4460                               VP9_MEDIA_STATE_MBENC_P);
4461     }
4462
4463     gen9_vp9_mbenc_kernel(ctx, encode_state,
4464                           encoder_context,
4465                           VP9_MEDIA_STATE_MBENC_TX);
4466
4467     vp9_state->curr_mode_decision_index ^= 1;
4468     if (vp9_state->brc_enabled) {
4469         vp9_state->brc_inited = 1;
4470         vp9_state->brc_reset = 0;
4471     }
4472
4473     return VA_STATUS_SUCCESS;
4474 }
4475
4476 static VAStatus
4477 gen9_vme_pipeline_vp9(VADriverContextP ctx,
4478                        VAProfile profile,
4479                        struct encode_state *encode_state,
4480                        struct intel_encoder_context *encoder_context)
4481 {
4482     VAStatus va_status;
4483     struct gen9_vp9_state *vp9_state;
4484
4485     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
4486
4487     if (!vp9_state)
4488         return VA_STATUS_ERROR_INVALID_CONTEXT;
4489
4490     va_status = gen9_encode_vp9_check_parameter(ctx, encode_state, encoder_context);
4491     if (va_status != VA_STATUS_SUCCESS)
4492         return va_status;
4493
4494     va_status = gen9_vp9_allocate_resources(ctx, encode_state,
4495                             encoder_context,
4496                             !vp9_state->brc_allocated);
4497
4498     if (va_status != VA_STATUS_SUCCESS)
4499         return va_status;
4500     vp9_state->brc_allocated = 1;
4501
4502     va_status = gen9_vme_gpe_kernel_prepare_vp9(ctx, encode_state, encoder_context);
4503
4504     if (va_status != VA_STATUS_SUCCESS)
4505         return va_status;
4506
4507     va_status = gen9_vme_gpe_kernel_init_vp9(ctx, encode_state, encoder_context);
4508     if (va_status != VA_STATUS_SUCCESS)
4509         return va_status;
4510
4511     va_status = gen9_vme_gpe_kernel_run_vp9(ctx, encode_state, encoder_context);
4512     if (va_status != VA_STATUS_SUCCESS)
4513         return va_status;
4514
4515     gen9_vme_gpe_kernel_final_vp9(ctx, encode_state, encoder_context);
4516
4517     return VA_STATUS_SUCCESS;
4518 }
4519
4520 static void
4521 gen9_vme_brc_context_destroy_vp9(struct vp9_brc_context *brc_context)
4522 {
4523     int i;
4524
4525     for (i = 0; i < NUM_VP9_BRC; i++)
4526         gen8_gpe_context_destroy(&brc_context->gpe_contexts[i]);
4527 }
4528
4529 static void
4530 gen9_vme_scaling_context_destroy_vp9(struct vp9_scaling_context *scaling_context)
4531 {
4532     int i;
4533
4534     for (i = 0; i < NUM_VP9_SCALING; i++)
4535         gen8_gpe_context_destroy(&scaling_context->gpe_contexts[i]);
4536 }
4537
4538 static void
4539 gen9_vme_me_context_destroy_vp9(struct vp9_me_context *me_context)
4540 {
4541     gen8_gpe_context_destroy(&me_context->gpe_context);
4542 }
4543
4544 static void
4545 gen9_vme_mbenc_context_destroy_vp9(struct vp9_mbenc_context *mbenc_context)
4546 {
4547     int i;
4548
4549     for (i = 0; i < NUM_VP9_MBENC; i++)
4550         gen8_gpe_context_destroy(&mbenc_context->gpe_contexts[i]);
4551     dri_bo_unreference(mbenc_context->mbenc_bo_dys);
4552     mbenc_context->mbenc_bo_size = 0;
4553 }
4554
4555 static void
4556 gen9_vme_dys_context_destroy_vp9(struct vp9_dys_context *dys_context)
4557 {
4558     gen8_gpe_context_destroy(&dys_context->gpe_context);
4559 }
4560
4561 static void
4562 gen9_vme_kernel_context_destroy_vp9(struct gen9_encoder_context_vp9 *vme_context)
4563 {
4564     gen9_vp9_free_resources(vme_context);
4565     gen9_vme_scaling_context_destroy_vp9(&vme_context->scaling_context);
4566     gen9_vme_me_context_destroy_vp9(&vme_context->me_context);
4567     gen9_vme_mbenc_context_destroy_vp9(&vme_context->mbenc_context);
4568     gen9_vme_brc_context_destroy_vp9(&vme_context->brc_context);
4569     gen9_vme_dys_context_destroy_vp9(&vme_context->dys_context);
4570
4571     return;
4572 }
4573
4574 static void
4575 gen9_vme_context_destroy_vp9(void *context)
4576 {
4577     struct gen9_encoder_context_vp9 *vme_context = context;
4578
4579     if (!vme_context)
4580         return;
4581
4582     gen9_vme_kernel_context_destroy_vp9(vme_context);
4583
4584     free(vme_context);
4585
4586     return;
4587 }
4588
4589 static void
4590 gen9_vme_scaling_context_init_vp9(VADriverContextP ctx,
4591                                    struct gen9_encoder_context_vp9 *vme_context,
4592                                    struct vp9_scaling_context *scaling_context)
4593 {
4594     struct i965_gpe_context *gpe_context = NULL;
4595     struct vp9_encoder_kernel_parameter kernel_param;
4596     struct vp9_encoder_scoreboard_parameter scoreboard_param;
4597     struct i965_kernel scale_kernel;
4598
4599     kernel_param.curbe_size = sizeof(vp9_scaling4x_curbe_data_cm);
4600     kernel_param.inline_data_size = sizeof(vp9_scaling4x_inline_data_cm);
4601     kernel_param.sampler_size = 0;
4602
4603     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4604     scoreboard_param.mask = 0xFF;
4605     scoreboard_param.enable = vme_context->use_hw_scoreboard;
4606     scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4607     scoreboard_param.walkpat_flag = 0;
4608
4609     gpe_context = &scaling_context->gpe_contexts[0];
4610     gen9_init_gpe_context_vp9(gpe_context, &kernel_param);
4611     gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4612
4613     scaling_context->scaling_4x_bti.scaling_frame_src_y = VP9_BTI_SCALING_FRAME_SRC_Y;
4614     scaling_context->scaling_4x_bti.scaling_frame_dst_y = VP9_BTI_SCALING_FRAME_DST_Y;
4615     scaling_context->scaling_4x_bti.scaling_frame_mbv_proc_stat_dst =
4616                            VP9_BTI_SCALING_FRAME_MBVPROCSTATS_DST_CM;
4617
4618     memset(&scale_kernel, 0, sizeof(scale_kernel));
4619
4620     intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4621                                          sizeof(media_vp9_kernels),
4622                                          INTEL_VP9_ENC_SCALING4X,
4623                                          0,
4624                                          &scale_kernel);
4625
4626     gen8_gpe_load_kernels(ctx,
4627                           gpe_context,
4628                           &scale_kernel,
4629                           1);
4630
4631     kernel_param.curbe_size = sizeof(vp9_scaling2x_curbe_data_cm);
4632     kernel_param.inline_data_size = 0;
4633     kernel_param.sampler_size = 0;
4634
4635     gpe_context = &scaling_context->gpe_contexts[1];
4636     gen9_init_gpe_context_vp9(gpe_context, &kernel_param);
4637     gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4638
4639     memset(&scale_kernel, 0, sizeof(scale_kernel));
4640
4641     intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4642                                          sizeof(media_vp9_kernels),
4643                                          INTEL_VP9_ENC_SCALING2X,
4644                                          0,
4645                                          &scale_kernel);
4646
4647     gen8_gpe_load_kernels(ctx,
4648                           gpe_context,
4649                           &scale_kernel,
4650                           1);
4651
4652     scaling_context->scaling_2x_bti.scaling_frame_src_y = VP9_BTI_SCALING_FRAME_SRC_Y;
4653     scaling_context->scaling_2x_bti.scaling_frame_dst_y = VP9_BTI_SCALING_FRAME_DST_Y;
4654     return;
4655 }
4656
4657 static void
4658 gen9_vme_me_context_init_vp9(VADriverContextP ctx,
4659                              struct gen9_encoder_context_vp9 *vme_context,
4660                              struct vp9_me_context *me_context)
4661 {
4662     struct i965_gpe_context *gpe_context = NULL;
4663     struct vp9_encoder_kernel_parameter kernel_param;
4664     struct vp9_encoder_scoreboard_parameter scoreboard_param;
4665     struct i965_kernel scale_kernel;
4666
4667     kernel_param.curbe_size = sizeof(vp9_me_curbe_data);
4668     kernel_param.inline_data_size = 0;
4669     kernel_param.sampler_size = 0;
4670
4671     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4672     scoreboard_param.mask = 0xFF;
4673     scoreboard_param.enable = vme_context->use_hw_scoreboard;
4674     scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4675     scoreboard_param.walkpat_flag = 0;
4676
4677     gpe_context = &me_context->gpe_context;
4678     gen9_init_gpe_context_vp9(gpe_context, &kernel_param);
4679     gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4680
4681     memset(&scale_kernel, 0, sizeof(scale_kernel));
4682
4683     intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4684                                          sizeof(media_vp9_kernels),
4685                                          INTEL_VP9_ENC_ME,
4686                                          0,
4687                                          &scale_kernel);
4688
4689     gen8_gpe_load_kernels(ctx,
4690                           gpe_context,
4691                           &scale_kernel,
4692                           1);
4693
4694     return;
4695 }
4696
4697 static void
4698 gen9_vme_mbenc_context_init_vp9(VADriverContextP ctx,
4699                                  struct gen9_encoder_context_vp9 *vme_context,
4700                                  struct vp9_mbenc_context *mbenc_context)
4701 {
4702     struct i965_gpe_context *gpe_context = NULL;
4703     struct vp9_encoder_kernel_parameter kernel_param;
4704     struct vp9_encoder_scoreboard_parameter scoreboard_param;
4705     int i;
4706     struct i965_kernel scale_kernel;
4707
4708     kernel_param.curbe_size = sizeof(vp9_mbenc_curbe_data);
4709     kernel_param.inline_data_size = 0;
4710     kernel_param.sampler_size = 0;
4711
4712     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4713     scoreboard_param.mask = 0xFF;
4714     scoreboard_param.enable = vme_context->use_hw_scoreboard;
4715     scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4716
4717     for (i = 0; i < NUM_VP9_MBENC; i++) {
4718         gpe_context = &mbenc_context->gpe_contexts[i];
4719
4720         if ((i == VP9_MBENC_IDX_KEY_16x16) ||
4721             (i == VP9_MBENC_IDX_INTER)) {
4722             scoreboard_param.walkpat_flag = 1;
4723         } else
4724             scoreboard_param.walkpat_flag = 0;
4725
4726         gen9_init_gpe_context_vp9(gpe_context, &kernel_param);
4727         gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4728
4729         memset(&scale_kernel, 0, sizeof(scale_kernel));
4730
4731         intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4732                                          sizeof(media_vp9_kernels),
4733                                          INTEL_VP9_ENC_MBENC,
4734                                          i,
4735                                          &scale_kernel);
4736
4737         gen8_gpe_load_kernels(ctx,
4738                               gpe_context,
4739                               &scale_kernel,
4740                               1);
4741     }
4742 }
4743
4744 static void
4745 gen9_vme_brc_context_init_vp9(VADriverContextP ctx,
4746                               struct gen9_encoder_context_vp9 *vme_context,
4747                               struct vp9_brc_context *brc_context)
4748 {
4749     struct i965_gpe_context *gpe_context = NULL;
4750     struct vp9_encoder_kernel_parameter kernel_param;
4751     struct vp9_encoder_scoreboard_parameter scoreboard_param;
4752     int i;
4753     struct i965_kernel scale_kernel;
4754
4755     kernel_param.curbe_size = sizeof(vp9_brc_curbe_data);
4756     kernel_param.inline_data_size = 0;
4757     kernel_param.sampler_size = 0;
4758
4759     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4760     scoreboard_param.mask = 0xFF;
4761     scoreboard_param.enable = vme_context->use_hw_scoreboard;
4762     scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4763
4764     for (i = 0; i < NUM_VP9_BRC; i++) {
4765         gpe_context = &brc_context->gpe_contexts[i];
4766         gen9_init_gpe_context_vp9(gpe_context, &kernel_param);
4767         gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4768
4769         memset(&scale_kernel, 0, sizeof(scale_kernel));
4770
4771         intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4772                                          sizeof(media_vp9_kernels),
4773                                          INTEL_VP9_ENC_BRC,
4774                                          i,
4775                                          &scale_kernel);
4776
4777         gen8_gpe_load_kernels(ctx,
4778                               gpe_context,
4779                               &scale_kernel,
4780                               1);
4781     }
4782 }
4783
4784 static void
4785 gen9_vme_dys_context_init_vp9(VADriverContextP ctx,
4786                                struct gen9_encoder_context_vp9 *vme_context,
4787                                struct vp9_dys_context *dys_context)
4788 {
4789     struct i965_gpe_context *gpe_context = NULL;
4790     struct vp9_encoder_kernel_parameter kernel_param;
4791     struct vp9_encoder_scoreboard_parameter scoreboard_param;
4792     struct i965_kernel scale_kernel;
4793
4794     kernel_param.curbe_size = sizeof(vp9_dys_curbe_data);
4795     kernel_param.inline_data_size = 0;
4796     kernel_param.sampler_size = sizeof(struct gen9_sampler_8x8_avs);
4797
4798     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4799     scoreboard_param.mask = 0xFF;
4800     scoreboard_param.enable = vme_context->use_hw_scoreboard;
4801     scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4802     scoreboard_param.walkpat_flag = 0;
4803
4804     gpe_context = &dys_context->gpe_context;
4805     gen9_init_gpe_context_vp9(gpe_context, &kernel_param);
4806     gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4807
4808     memset(&scale_kernel, 0, sizeof(scale_kernel));
4809
4810     intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4811                                          sizeof(media_vp9_kernels),
4812                                          INTEL_VP9_ENC_DYS,
4813                                          0,
4814                                          &scale_kernel);
4815
4816     gen8_gpe_load_kernels(ctx,
4817                           gpe_context,
4818                           &scale_kernel,
4819                           1);
4820
4821     return;
4822 }
4823
4824 static Bool
4825 gen9_vme_kernels_context_init_vp9(VADriverContextP ctx,
4826                                    struct intel_encoder_context *encoder_context,
4827                                    struct gen9_encoder_context_vp9 *vme_context)
4828 {
4829     gen9_vme_scaling_context_init_vp9(ctx, vme_context, &vme_context->scaling_context);
4830     gen9_vme_me_context_init_vp9(ctx, vme_context, &vme_context->me_context);
4831     gen9_vme_mbenc_context_init_vp9(ctx, vme_context, &vme_context->mbenc_context);
4832     gen9_vme_dys_context_init_vp9(ctx, vme_context, &vme_context->dys_context);
4833     gen9_vme_brc_context_init_vp9(ctx, vme_context, &vme_context->brc_context);
4834
4835     vme_context->pfn_set_curbe_brc = gen9_vp9_set_curbe_brc;
4836     vme_context->pfn_set_curbe_me = gen9_vp9_set_curbe_me;
4837     vme_context->pfn_send_me_surface = gen9_vp9_send_me_surface;
4838     vme_context->pfn_send_scaling_surface = gen9_vp9_send_scaling_surface;
4839
4840     vme_context->pfn_set_curbe_scaling = gen9_vp9_set_curbe_scaling_cm;
4841
4842     vme_context->pfn_send_dys_surface = gen9_vp9_send_dys_surface;
4843     vme_context->pfn_set_curbe_dys = gen9_vp9_set_curbe_dys;
4844     vme_context->pfn_set_curbe_mbenc = gen9_vp9_set_curbe_mbenc;
4845     vme_context->pfn_send_mbenc_surface = gen9_vp9_send_mbenc_surface;
4846     return true;
4847 }
4848
4849 static
4850 void gen9_vp9_write_compressed_element(char *buffer,
4851                                        int index,
4852                                        int prob,
4853                                        bool value)
4854 {
4855     struct vp9_compressed_element *base_element, *vp9_element;
4856     base_element = (struct vp9_compressed_element *)buffer;
4857
4858     vp9_element = base_element + (index >> 1);
4859     if (index % 2) {
4860         vp9_element->b_valid = 1;
4861         vp9_element->b_probdiff_select = 1;
4862         vp9_element->b_prob_select = (prob == 252) ? 1: 0;
4863         vp9_element->b_bin = value;
4864     } else {
4865         vp9_element->a_valid = 1;
4866         vp9_element->a_probdiff_select = 1;
4867         vp9_element->a_prob_select = (prob == 252) ? 1: 0;
4868         vp9_element->a_bin = value;
4869     }
4870 }
4871
4872 static void
4873 intel_vp9enc_refresh_frame_internal_buffers(VADriverContextP ctx,
4874                                             struct intel_encoder_context *encoder_context)
4875 {
4876     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
4877     VAEncPictureParameterBufferVP9 *pic_param;
4878     struct gen9_vp9_state *vp9_state;
4879     char *buffer;
4880     int i;
4881
4882     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
4883
4884     if (!pak_context || !vp9_state || !vp9_state->pic_param)
4885         return;
4886
4887     pic_param = vp9_state->pic_param;
4888     if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) ||
4889         (pic_param->pic_flags.bits.intra_only) ||
4890          pic_param->pic_flags.bits.error_resilient_mode) {
4891         /* reset current frame_context */
4892         intel_init_default_vp9_probs(&vp9_state->vp9_current_fc);
4893         if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) ||
4894             pic_param->pic_flags.bits.error_resilient_mode ||
4895             (pic_param->pic_flags.bits.reset_frame_context == 3)) {
4896             for (i = 0; i < 4; i++)
4897                 memcpy(&vp9_state->vp9_frame_ctx[i],
4898                        &vp9_state->vp9_current_fc,
4899                        sizeof(FRAME_CONTEXT));
4900         } else if (pic_param->pic_flags.bits.reset_frame_context == 2) {
4901             i = pic_param->pic_flags.bits.frame_context_idx;
4902             memcpy(&vp9_state->vp9_frame_ctx[i],
4903                    &vp9_state->vp9_current_fc, sizeof(FRAME_CONTEXT));
4904         }
4905         /* reset the frame_ctx_idx = 0 */
4906         vp9_state->frame_ctx_idx = 0;
4907     } else {
4908         vp9_state->frame_ctx_idx = pic_param->pic_flags.bits.frame_context_idx;
4909     }
4910
4911     i965_zero_gpe_resource(&pak_context->res_compressed_input_buffer);
4912     buffer = i965_map_gpe_resource(&pak_context->res_compressed_input_buffer);
4913
4914     if (!buffer)
4915         return;
4916
4917     /* write tx_size */
4918     if ((pic_param->luma_ac_qindex == 0) &&
4919         (pic_param->luma_dc_qindex_delta == 0) &&
4920         (pic_param->chroma_ac_qindex_delta == 0) &&
4921         (pic_param->chroma_dc_qindex_delta == 0)) {
4922         /* lossless flag */
4923         /* nothing is needed */
4924             gen9_vp9_write_compressed_element(buffer,
4925                                           0, 128, 0);
4926             gen9_vp9_write_compressed_element(buffer,
4927                                           1, 128, 0);
4928             gen9_vp9_write_compressed_element(buffer,
4929                                           2, 128, 0);
4930     } else {
4931         if (vp9_state->tx_mode == TX_MODE_SELECT) {
4932             gen9_vp9_write_compressed_element(buffer,
4933                                           0, 128, 1);
4934             gen9_vp9_write_compressed_element(buffer,
4935                                           1, 128, 1);
4936             gen9_vp9_write_compressed_element(buffer,
4937                                           2, 128, 1);
4938         } else if (vp9_state->tx_mode == ALLOW_32X32) {
4939             gen9_vp9_write_compressed_element(buffer,
4940                                           0, 128, 1);
4941             gen9_vp9_write_compressed_element(buffer,
4942                                           1, 128, 1);
4943             gen9_vp9_write_compressed_element(buffer,
4944                                           2, 128, 0);
4945         } else {
4946             unsigned int tx_mode;
4947
4948             tx_mode = vp9_state->tx_mode;
4949             gen9_vp9_write_compressed_element(buffer,
4950                                           0, 128, ((tx_mode) & 2));
4951             gen9_vp9_write_compressed_element(buffer,
4952                                           1, 128, ((tx_mode) & 1));
4953             gen9_vp9_write_compressed_element(buffer,
4954                                           2, 128, 0);
4955         }
4956
4957         if (vp9_state->tx_mode == TX_MODE_SELECT) {
4958
4959             gen9_vp9_write_compressed_element(buffer,
4960                                           3, 128, 0);
4961
4962             gen9_vp9_write_compressed_element(buffer,
4963                                           7, 128, 0);
4964
4965             gen9_vp9_write_compressed_element(buffer,
4966                                           15, 128, 0);
4967         }
4968     }
4969      /*Setup all the input&output object*/
4970
4971     {
4972         /* update the coeff_update flag */
4973         gen9_vp9_write_compressed_element(buffer,
4974                                       27, 128, 0);
4975         gen9_vp9_write_compressed_element(buffer,
4976                                       820, 128, 0);
4977         gen9_vp9_write_compressed_element(buffer,
4978                                       1613, 128, 0);
4979         gen9_vp9_write_compressed_element(buffer,
4980                                       2406, 128, 0);
4981     }
4982
4983
4984     if (pic_param->pic_flags.bits.frame_type && !pic_param->pic_flags.bits.intra_only)
4985     {
4986         bool allow_comp = !(
4987             (pic_param->ref_flags.bits.ref_last_sign_bias && pic_param->ref_flags.bits.ref_gf_sign_bias && pic_param->ref_flags.bits.ref_arf_sign_bias) ||
4988             (!pic_param->ref_flags.bits.ref_last_sign_bias && !pic_param->ref_flags.bits.ref_gf_sign_bias && !pic_param->ref_flags.bits.ref_arf_sign_bias)
4989             );
4990
4991         if (allow_comp)
4992         {
4993             if (pic_param->pic_flags.bits.comp_prediction_mode == REFERENCE_MODE_SELECT) {
4994                 gen9_vp9_write_compressed_element(buffer,
4995                                           3271, 128, 1);
4996                 gen9_vp9_write_compressed_element(buffer,
4997                                           3272, 128, 1);
4998             }
4999             else if (pic_param->pic_flags.bits.comp_prediction_mode == COMPOUND_REFERENCE) {
5000                 gen9_vp9_write_compressed_element(buffer,
5001                                           3271, 128, 1);
5002                 gen9_vp9_write_compressed_element(buffer,
5003                                           3272, 128, 0);
5004             }
5005             else {
5006
5007                 gen9_vp9_write_compressed_element(buffer,
5008                                           3271, 128, 0);
5009                 gen9_vp9_write_compressed_element(buffer,
5010                                           3272, 128, 0);
5011             }
5012         }
5013     }
5014
5015     i965_unmap_gpe_resource(&pak_context->res_compressed_input_buffer);
5016 }
5017
5018
5019 static void
5020 gen9_pak_vp9_pipe_mode_select(VADriverContextP ctx,
5021                            struct encode_state *encode_state,
5022                            struct intel_encoder_context *encoder_context,
5023                            struct gen9_hcpe_pipe_mode_select_param *pipe_mode_param)
5024 {
5025     struct intel_batchbuffer *batch = encoder_context->base.batch;
5026
5027     BEGIN_BCS_BATCH(batch, 6);
5028
5029     OUT_BCS_BATCH(batch, HCP_PIPE_MODE_SELECT | (6 - 2));
5030     OUT_BCS_BATCH(batch,
5031                   (pipe_mode_param->stream_out << 12) |
5032                   (pipe_mode_param->codec_mode << 5) |
5033                   (0 << 3) | /* disable Pic Status / Error Report */
5034                   (pipe_mode_param->stream_out << 2) |
5035                   HCP_CODEC_SELECT_ENCODE);
5036     OUT_BCS_BATCH(batch, 0);
5037     OUT_BCS_BATCH(batch, 0);
5038     OUT_BCS_BATCH(batch, (1 << 6));
5039     OUT_BCS_BATCH(batch, 0);
5040
5041     ADVANCE_BCS_BATCH(batch);
5042 }
5043
5044 static void
5045 gen9_vp9_add_surface_state(VADriverContextP ctx,
5046                        struct encode_state *encode_state,
5047                        struct intel_encoder_context *encoder_context,
5048                        hcp_surface_state *hcp_state)
5049 {
5050     struct intel_batchbuffer *batch = encoder_context->base.batch;
5051     if (!hcp_state)
5052         return;
5053
5054     BEGIN_BCS_BATCH(batch, 3);
5055     OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
5056     OUT_BCS_BATCH(batch,
5057                   (hcp_state->dw1.surface_id << 28) |
5058                   (hcp_state->dw1.surface_pitch - 1)
5059                  );
5060     OUT_BCS_BATCH(batch,
5061                   (hcp_state->dw2.surface_format << 28) |
5062                   (hcp_state->dw2.y_cb_offset)
5063                  );
5064     ADVANCE_BCS_BATCH(batch);
5065 }
5066
5067 static void
5068 gen9_pak_vp9_pipe_buf_addr_state(VADriverContextP ctx,
5069                                  struct encode_state *encode_state,
5070                                  struct intel_encoder_context *encoder_context)
5071 {
5072     struct intel_batchbuffer *batch = encoder_context->base.batch;
5073     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5074     struct gen9_vp9_state *vp9_state;
5075     unsigned int i;
5076     struct object_surface *obj_surface;
5077
5078     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5079
5080     if (!vp9_state || !vp9_state->pic_param)
5081          return;
5082
5083
5084     BEGIN_BCS_BATCH(batch, 104);
5085
5086     OUT_BCS_BATCH(batch, HCP_PIPE_BUF_ADDR_STATE | (104 - 2));
5087
5088     obj_surface = encode_state->reconstructed_object;
5089
5090     /* reconstructed obj_surface is already checked. So this is skipped */
5091     /* DW 1..3 decoded surface */
5092     OUT_RELOC64(batch,
5093                 obj_surface->bo,
5094                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5095                 0);
5096     OUT_BCS_BATCH(batch, 0);
5097
5098     /* DW 4..6 deblocking line */
5099     OUT_RELOC64(batch,
5100                 pak_context->res_deblocking_filter_line_buffer.bo,
5101                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5102                 0);
5103     OUT_BCS_BATCH(batch, 0);
5104
5105     /* DW 7..9 deblocking tile line */
5106     OUT_RELOC64(batch,
5107                 pak_context->res_deblocking_filter_tile_line_buffer.bo,
5108                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5109                 0);
5110     OUT_BCS_BATCH(batch, 0);
5111
5112     /* DW 10..12 deblocking tile col */
5113     OUT_RELOC64(batch,
5114                 pak_context->res_deblocking_filter_tile_col_buffer.bo,
5115                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5116                 0);
5117     OUT_BCS_BATCH(batch, 0);
5118
5119     /* DW 13..15 metadata line */
5120     OUT_RELOC64(batch,
5121                 pak_context->res_metadata_line_buffer.bo,
5122                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5123                 0);
5124     OUT_BCS_BATCH(batch, 0);
5125
5126     /* DW 16..18 metadata tile line */
5127     OUT_RELOC64(batch,
5128                 pak_context->res_metadata_tile_line_buffer.bo,
5129                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5130                 0);
5131     OUT_BCS_BATCH(batch, 0);
5132
5133     /* DW 19..21 metadata tile col */
5134     OUT_RELOC64(batch,
5135                 pak_context->res_metadata_tile_col_buffer.bo,
5136                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5137                 0);
5138     OUT_BCS_BATCH(batch, 0);
5139
5140     /* DW 22..30 SAO is not used for VP9 */
5141     OUT_BCS_BATCH(batch, 0);
5142     OUT_BCS_BATCH(batch, 0);
5143     OUT_BCS_BATCH(batch, 0);
5144     OUT_BCS_BATCH(batch, 0);
5145     OUT_BCS_BATCH(batch, 0);
5146     OUT_BCS_BATCH(batch, 0);
5147     OUT_BCS_BATCH(batch, 0);
5148     OUT_BCS_BATCH(batch, 0);
5149     OUT_BCS_BATCH(batch, 0);
5150
5151     /* DW 31..33 Current Motion vector temporal buffer */
5152     OUT_RELOC64(batch,
5153                 pak_context->res_mv_temporal_buffer[vp9_state->curr_mv_temporal_index].bo,
5154                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5155                 0);
5156     OUT_BCS_BATCH(batch, 0);
5157
5158     /* DW 34..36 Not used */
5159     OUT_BCS_BATCH(batch, 0);
5160     OUT_BCS_BATCH(batch, 0);
5161     OUT_BCS_BATCH(batch, 0);
5162
5163     /* Only the first three reference_frame is used for VP9 */
5164     /* DW 37..52 for reference_frame */
5165     i = 0;
5166     if (vp9_state->picture_coding_type) {
5167         for (i = 0; i < 3; i++) {
5168
5169             if (pak_context->reference_surfaces[i].bo) {
5170                 OUT_RELOC64(batch,
5171                             pak_context->reference_surfaces[i].bo,
5172                             I915_GEM_DOMAIN_INSTRUCTION, 0,
5173                             0);
5174             } else {
5175                 OUT_BCS_BATCH(batch, 0);
5176                 OUT_BCS_BATCH(batch, 0);
5177             }
5178         }
5179     }
5180
5181     for (; i < 8; i++) {
5182         OUT_BCS_BATCH(batch, 0);
5183         OUT_BCS_BATCH(batch, 0);
5184     }
5185
5186     OUT_BCS_BATCH(batch, 0);
5187
5188     /* DW 54..56 for source input */
5189     OUT_RELOC64(batch,
5190                 pak_context->uncompressed_picture_source.bo,
5191                 I915_GEM_DOMAIN_INSTRUCTION, 0,
5192                 0);
5193     OUT_BCS_BATCH(batch, 0);
5194
5195     /* DW 57..59 StreamOut is not used */
5196     OUT_BCS_BATCH(batch, 0);
5197     OUT_BCS_BATCH(batch, 0);
5198     OUT_BCS_BATCH(batch, 0);
5199
5200     /* DW 60..62. Not used for encoder */
5201     OUT_BCS_BATCH(batch, 0);
5202     OUT_BCS_BATCH(batch, 0);
5203     OUT_BCS_BATCH(batch, 0);
5204
5205     /* DW 63..65. ILDB Not used for encoder */
5206     OUT_BCS_BATCH(batch, 0);
5207     OUT_BCS_BATCH(batch, 0);
5208     OUT_BCS_BATCH(batch, 0);
5209
5210     /* DW 66..81 For the collocated motion vector temporal buffer */
5211     if (vp9_state->picture_coding_type) {
5212         int prev_index = vp9_state->curr_mv_temporal_index ^ 0x01;
5213         OUT_RELOC64(batch,
5214                 pak_context->res_mv_temporal_buffer[prev_index].bo,
5215                 I915_GEM_DOMAIN_INSTRUCTION, 0,
5216                 0);
5217     } else {
5218         OUT_BCS_BATCH(batch, 0);
5219         OUT_BCS_BATCH(batch, 0);
5220     }
5221
5222     for (i = 1; i < 8; i++) {
5223         OUT_BCS_BATCH(batch, 0);
5224         OUT_BCS_BATCH(batch, 0);
5225     }
5226     OUT_BCS_BATCH(batch, 0);
5227
5228     /* DW 83..85 VP9 prob buffer */
5229     OUT_RELOC64(batch,
5230                 pak_context->res_prob_buffer.bo,
5231                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5232                 0);
5233
5234     OUT_BCS_BATCH(batch, 0);
5235
5236     /* DW 86..88 Segment id buffer */
5237     if (pak_context->res_segmentid_buffer.bo) {
5238         OUT_RELOC64(batch,
5239                     pak_context->res_segmentid_buffer.bo,
5240                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5241                     0);
5242     } else {
5243         OUT_BCS_BATCH(batch, 0);
5244         OUT_BCS_BATCH(batch, 0);
5245     }
5246     OUT_BCS_BATCH(batch, 0);
5247
5248     /* DW 89..91 HVD line rowstore buffer */
5249     OUT_RELOC64(batch,
5250                 pak_context->res_hvd_line_buffer.bo,
5251                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5252                 0);
5253     OUT_BCS_BATCH(batch, 0);
5254
5255     /* DW 92..94 HVD tile line rowstore buffer */
5256     OUT_RELOC64(batch,
5257                 pak_context->res_hvd_tile_line_buffer.bo,
5258                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5259                 0);
5260     OUT_BCS_BATCH(batch, 0);
5261
5262     /* DW 95..97 SAO streamout. Not used for VP9 */
5263     OUT_BCS_BATCH(batch, 0);
5264     OUT_BCS_BATCH(batch, 0);
5265     OUT_BCS_BATCH(batch, 0);
5266
5267     /* reserved for KBL. 98..100 */
5268     OUT_BCS_BATCH(batch, 0);
5269     OUT_BCS_BATCH(batch, 0);
5270     OUT_BCS_BATCH(batch, 0);
5271
5272     /* 101..103 */
5273     OUT_BCS_BATCH(batch, 0);
5274     OUT_BCS_BATCH(batch, 0);
5275     OUT_BCS_BATCH(batch, 0);
5276
5277     ADVANCE_BCS_BATCH(batch);
5278 }
5279
5280 static void
5281 gen9_pak_vp9_ind_obj_base_addr_state(VADriverContextP ctx,
5282                                      struct encode_state *encode_state,
5283                                      struct intel_encoder_context *encoder_context)
5284 {
5285     struct intel_batchbuffer *batch = encoder_context->base.batch;
5286     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5287     struct gen9_vp9_state *vp9_state;
5288
5289     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5290
5291     /* to do */
5292     BEGIN_BCS_BATCH(batch, 29);
5293
5294     OUT_BCS_BATCH(batch, HCP_IND_OBJ_BASE_ADDR_STATE | (29 - 2));
5295
5296     /* indirect bitstream object base */
5297     OUT_BCS_BATCH(batch, 0);
5298     OUT_BCS_BATCH(batch, 0);
5299     OUT_BCS_BATCH(batch, 0);
5300     /* the upper bound of indirect bitstream object */
5301     OUT_BCS_BATCH(batch, 0);
5302     OUT_BCS_BATCH(batch, 0);
5303
5304     /* DW 6: Indirect CU object base address */
5305     OUT_RELOC64(batch,
5306                 pak_context->res_mb_code_surface.bo,
5307                 I915_GEM_DOMAIN_INSTRUCTION, 0,   /* No write domain */
5308                 vp9_state->mb_data_offset);
5309     /* default attribute */
5310     OUT_BCS_BATCH(batch, 0);
5311
5312     /* DW 9..11, PAK-BSE */
5313     OUT_RELOC64(batch,
5314                   pak_context->indirect_pak_bse_object.bo,
5315                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5316                   pak_context->indirect_pak_bse_object.offset);
5317     OUT_BCS_BATCH(batch, 0);
5318
5319     /* DW 12..13 upper bound */
5320     OUT_RELOC64(batch,
5321                   pak_context->indirect_pak_bse_object.bo,
5322                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5323                   pak_context->indirect_pak_bse_object.end_offset);
5324
5325     /* DW 14..16 compressed header buffer */
5326     OUT_RELOC64(batch,
5327                 pak_context->res_compressed_input_buffer.bo,
5328                 I915_GEM_DOMAIN_INSTRUCTION, 0,
5329                 0);
5330     OUT_BCS_BATCH(batch, 0);
5331
5332     /* DW 17..19 prob counter streamout */
5333     OUT_RELOC64(batch,
5334                 pak_context->res_prob_counter_buffer.bo,
5335                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5336                 0);
5337     OUT_BCS_BATCH(batch, 0);
5338
5339     /* DW 20..22 prob delta streamin */
5340     OUT_RELOC64(batch,
5341                 pak_context->res_prob_delta_buffer.bo,
5342                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5343                 0);
5344     OUT_BCS_BATCH(batch, 0);
5345
5346     /* DW 23..25 Tile record streamout */
5347     OUT_RELOC64(batch,
5348                 pak_context->res_tile_record_streamout_buffer.bo,
5349                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5350                 0);
5351     OUT_BCS_BATCH(batch, 0);
5352
5353     /* DW 26..28 CU record streamout */
5354     OUT_RELOC64(batch,
5355                 pak_context->res_cu_stat_streamout_buffer.bo,
5356                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5357                 0);
5358     OUT_BCS_BATCH(batch, 0);
5359
5360     ADVANCE_BCS_BATCH(batch);
5361 }
5362
5363 static void
5364 gen9_pak_vp9_segment_state(VADriverContextP ctx,
5365                            struct encode_state *encode_state,
5366                            struct intel_encoder_context *encoder_context,
5367                            VAEncSegParamVP9 *seg_param, uint8_t seg_id)
5368 {
5369     struct intel_batchbuffer *batch = encoder_context->base.batch;
5370     uint32_t batch_value, tmp;
5371     VAEncPictureParameterBufferVP9 *pic_param;
5372
5373     if (!encode_state->pic_param_ext ||
5374         !encode_state->pic_param_ext->buffer) {
5375         return;
5376     }
5377
5378     pic_param = (VAEncPictureParameterBufferVP9 *)encode_state->pic_param_ext->buffer;
5379
5380     batch_value = seg_param->seg_flags.bits.segment_reference;
5381     if (pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME ||
5382         pic_param->pic_flags.bits.intra_only)
5383         batch_value = 0;
5384
5385     BEGIN_BCS_BATCH(batch, 8);
5386
5387     OUT_BCS_BATCH(batch, HCP_VP9_SEGMENT_STATE | (8 - 2));
5388     OUT_BCS_BATCH(batch, seg_id << 0); /* DW 1 - SegmentID */
5389     OUT_BCS_BATCH(batch,
5390                   (seg_param->seg_flags.bits.segment_reference_enabled << 3) |
5391                   (batch_value << 1) |
5392                   (seg_param->seg_flags.bits.segment_reference_skipped <<0)
5393                  );
5394
5395     /* DW 3..6 is not used for encoder */
5396     OUT_BCS_BATCH(batch, 0);
5397     OUT_BCS_BATCH(batch, 0);
5398     OUT_BCS_BATCH(batch, 0);
5399     OUT_BCS_BATCH(batch, 0);
5400
5401     /* DW 7 Mode */
5402     tmp = intel_convert_sign_mag(seg_param->segment_qindex_delta, 9);
5403     batch_value = tmp;
5404     tmp = intel_convert_sign_mag(seg_param->segment_lf_level_delta, 7);
5405     batch_value |= (tmp << 16);
5406     OUT_BCS_BATCH(batch, batch_value);
5407
5408     ADVANCE_BCS_BATCH(batch);
5409
5410 }
5411
5412 static void
5413 intel_vp9enc_construct_pak_insertobj_batchbuffer(VADriverContextP ctx,
5414                                  struct intel_encoder_context *encoder_context,
5415                                  struct i965_gpe_resource *obj_batch_buffer)
5416 {
5417     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5418     struct gen9_vp9_state *vp9_state;
5419     int uncompressed_header_length;
5420     unsigned int *cmd_ptr;
5421     unsigned int dw_length, bits_in_last_dw;
5422
5423     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5424
5425     if (!pak_context || !vp9_state || !vp9_state->pic_param)
5426         return;
5427
5428     uncompressed_header_length = vp9_state->header_length;
5429     cmd_ptr = i965_map_gpe_resource(obj_batch_buffer);
5430
5431     if (!cmd_ptr)
5432         return;
5433
5434     bits_in_last_dw = uncompressed_header_length % 4;
5435     bits_in_last_dw *= 8;
5436
5437     if (bits_in_last_dw == 0)
5438         bits_in_last_dw = 32;
5439
5440     /* get the DWORD length of the inserted_data */
5441     dw_length = ALIGN(uncompressed_header_length, 4) / 4;
5442     *cmd_ptr++ = HCP_INSERT_PAK_OBJECT | dw_length;
5443
5444     *cmd_ptr++ = ((0 << 31) | /* indirect payload */
5445                   (0 << 16) | /* the start offset in first DW */
5446                   (0 << 15) |
5447                   (bits_in_last_dw << 8) | /* bits_in_last_dw */
5448                   (0 << 4) |  /* skip emulation byte count. 0 for VP9 */
5449                   (0 << 3) |  /* emulation flag. 0 for VP9 */
5450                   (1 << 2) |  /* last header flag. */
5451                   (0 << 1));
5452     memcpy(cmd_ptr, vp9_state->alias_insert_data, dw_length * sizeof(unsigned int));
5453
5454     cmd_ptr += dw_length;
5455
5456     *cmd_ptr++ = MI_NOOP;
5457     *cmd_ptr++ = MI_BATCH_BUFFER_END;
5458     i965_unmap_gpe_resource(obj_batch_buffer);
5459 }
5460
5461 static void
5462 gen9_vp9_pak_picture_level(VADriverContextP ctx,
5463                            struct encode_state *encode_state,
5464                            struct intel_encoder_context *encoder_context)
5465 {
5466     struct intel_batchbuffer *batch = encoder_context->base.batch;
5467     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5468     struct object_surface *obj_surface;
5469     VAEncPictureParameterBufferVP9 *pic_param;
5470     VAEncMiscParameterTypeVP9PerSegmantParam *seg_param, tmp_seg_param;
5471     struct gen9_vp9_state *vp9_state;
5472     struct gen9_surface_vp9 *vp9_priv_surface;
5473     int i;
5474     struct gen9_hcpe_pipe_mode_select_param mode_param;
5475     hcp_surface_state hcp_surface;
5476     struct gpe_mi_batch_buffer_start_parameter second_level_batch;
5477     int segment_count;
5478
5479     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5480
5481     if (!pak_context || !vp9_state || !vp9_state->pic_param)
5482         return;
5483
5484     pic_param = vp9_state->pic_param;
5485     seg_param = vp9_state->segment_param;
5486
5487     if (vp9_state->curr_pak_pass == 0)
5488     {
5489         intel_vp9enc_construct_pak_insertobj_batchbuffer(ctx, encoder_context,
5490                            &pak_context->res_pak_uncompressed_input_buffer);
5491
5492         // Check if driver already programmed pic state as part of BRC update kernel programming.
5493         if (!vp9_state->brc_enabled)
5494         {
5495             intel_vp9enc_construct_picstate_batchbuf(ctx, encode_state,
5496                  encoder_context, &pak_context->res_pic_state_brc_write_hfw_read_buffer);
5497         }
5498     }
5499
5500     if (vp9_state->curr_pak_pass == 0)
5501     {
5502         intel_vp9enc_refresh_frame_internal_buffers(ctx, encoder_context);
5503     }
5504
5505     {
5506         /* copy the frame_context[frame_idx] into curr_frame_context */
5507         memcpy(&vp9_state->vp9_current_fc,
5508                &(vp9_state->vp9_frame_ctx[vp9_state->frame_ctx_idx]),
5509                sizeof(FRAME_CONTEXT));
5510         {
5511             uint8_t *prob_ptr;
5512
5513             prob_ptr = i965_map_gpe_resource(&pak_context->res_prob_buffer);
5514
5515             if (!prob_ptr)
5516                 return;
5517
5518             /* copy the current fc to vp9_prob buffer */
5519             memcpy(prob_ptr, &vp9_state->vp9_current_fc, sizeof(FRAME_CONTEXT));
5520             if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) ||
5521                  pic_param->pic_flags.bits.intra_only) {
5522                  FRAME_CONTEXT *frame_ptr = (FRAME_CONTEXT *)prob_ptr;
5523
5524                  memcpy(frame_ptr->partition_prob, vp9_kf_partition_probs,
5525                         sizeof(vp9_kf_partition_probs));
5526                  memcpy(frame_ptr->uv_mode_prob, vp9_kf_uv_mode_prob,
5527                         sizeof(vp9_kf_uv_mode_prob));
5528             }
5529             i965_unmap_gpe_resource(&pak_context->res_prob_buffer);
5530         }
5531     }
5532
5533     if (vp9_state->brc_enabled && vp9_state->curr_pak_pass) {
5534         /* read image status and insert the conditional end cmd */
5535         /* image ctrl/status is already accessed */
5536         struct gpe_mi_conditional_batch_buffer_end_parameter mi_cond_end;
5537         struct vp9_encode_status_buffer_internal *status_buffer;
5538
5539         status_buffer = &vp9_state->status_buffer;
5540         memset(&mi_cond_end, 0, sizeof(mi_cond_end));
5541         mi_cond_end.offset = status_buffer->image_status_mask_offset;
5542         mi_cond_end.bo = status_buffer->bo;
5543         mi_cond_end.compare_data = 0;
5544         mi_cond_end.compare_mask_mode_disabled = 1;
5545         gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch,
5546                                                  &mi_cond_end);
5547     }
5548
5549     mode_param.codec_mode = 1;
5550     mode_param.stream_out = 0;
5551     gen9_pak_vp9_pipe_mode_select(ctx, encode_state, encoder_context, &mode_param);
5552
5553     /* reconstructed surface */
5554     memset(&hcp_surface, 0, sizeof(hcp_surface));
5555     obj_surface = encode_state->reconstructed_object;
5556     hcp_surface.dw1.surface_id = 0;
5557     hcp_surface.dw1.surface_pitch = obj_surface->width;
5558     hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5559     hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5560     gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5561                                &hcp_surface);
5562
5563     /* Input surface */
5564     if (vp9_state->dys_in_use &&
5565         ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
5566          (pic_param->frame_height_src != pic_param->frame_height_dst))) {
5567         vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
5568         obj_surface = vp9_priv_surface->dys_surface_obj;
5569     } else {
5570         obj_surface = encode_state->input_yuv_object;
5571     }
5572
5573     hcp_surface.dw1.surface_id = 1;
5574     hcp_surface.dw1.surface_pitch = obj_surface->width;
5575     hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5576     hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5577     gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5578                                &hcp_surface);
5579
5580     if (vp9_state->picture_coding_type) {
5581         /* Add surface for last */
5582         if (vp9_state->last_ref_obj) {
5583             obj_surface = vp9_state->last_ref_obj;
5584             hcp_surface.dw1.surface_id = 2;
5585             hcp_surface.dw1.surface_pitch = obj_surface->width;
5586             hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5587             hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5588             gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5589                                &hcp_surface);
5590         }
5591         if (vp9_state->golden_ref_obj) {
5592             obj_surface = vp9_state->golden_ref_obj;
5593             hcp_surface.dw1.surface_id = 3;
5594             hcp_surface.dw1.surface_pitch = obj_surface->width;
5595             hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5596             hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5597             gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5598                                &hcp_surface);
5599         }
5600         if (vp9_state->alt_ref_obj) {
5601             obj_surface = vp9_state->alt_ref_obj;
5602             hcp_surface.dw1.surface_id = 4;
5603             hcp_surface.dw1.surface_pitch = obj_surface->width;
5604             hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5605             hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5606             gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5607                                &hcp_surface);
5608         }
5609     }
5610
5611     gen9_pak_vp9_pipe_buf_addr_state(ctx, encode_state, encoder_context);
5612
5613     gen9_pak_vp9_ind_obj_base_addr_state(ctx, encode_state, encoder_context);
5614
5615     // Using picstate zero with updated QP and LF deltas by HuC for repak, irrespective of how many Pak passes were run in multi-pass mode.
5616     memset(&second_level_batch, 0, sizeof(second_level_batch));
5617
5618     if (vp9_state->curr_pak_pass == 0) {
5619         second_level_batch.offset = 0;
5620     } else
5621         second_level_batch.offset = vp9_state->curr_pak_pass * VP9_PIC_STATE_BUFFER_SIZE;
5622
5623     second_level_batch.is_second_level = 1;
5624     second_level_batch.bo = pak_context->res_pic_state_brc_write_hfw_read_buffer.bo;
5625
5626     gen9_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5627
5628     if (pic_param->pic_flags.bits.segmentation_enabled &&
5629         seg_param)
5630         segment_count = 8;
5631     else {
5632         segment_count = 1;
5633         memset(&tmp_seg_param, 0, sizeof(tmp_seg_param));
5634         seg_param = &tmp_seg_param;
5635     }
5636     for (i = 0; i < segment_count; i++)
5637     {
5638         gen9_pak_vp9_segment_state(ctx, encode_state,
5639                                    encoder_context,
5640                                    &seg_param->seg_data[i], i);
5641     }
5642
5643     /* Insert the uncompressed header buffer */
5644     second_level_batch.is_second_level = 1;
5645     second_level_batch.offset = 0;
5646     second_level_batch.bo = pak_context->res_pak_uncompressed_input_buffer.bo;
5647
5648     gen9_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5649
5650     /* PAK_OBJECT */
5651     second_level_batch.is_second_level = 1;
5652     second_level_batch.offset = 0;
5653     second_level_batch.bo = pak_context->res_mb_code_surface.bo;
5654     gen9_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5655
5656     return;
5657 }
5658
5659 static void
5660 gen9_vp9_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5661 {
5662     struct intel_batchbuffer *batch = encoder_context->base.batch;
5663     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5664     struct gpe_mi_store_register_mem_parameter mi_store_reg_mem_param;
5665     struct gpe_mi_flush_dw_parameter mi_flush_dw_param;
5666     //struct gpe_mi_copy_mem_parameter mi_copy_mem_param;
5667     struct vp9_encode_status_buffer_internal *status_buffer;
5668     struct gen9_vp9_state *vp9_state;
5669
5670     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5671     if (!vp9_state || !pak_context || !batch)
5672         return;
5673
5674     status_buffer = &(vp9_state->status_buffer);
5675
5676     memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
5677     gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);
5678
5679     memset(&mi_store_reg_mem_param, 0, sizeof(mi_store_reg_mem_param));
5680     mi_store_reg_mem_param.bo = status_buffer->bo;
5681     mi_store_reg_mem_param.offset = status_buffer->bs_byte_count_offset;
5682     mi_store_reg_mem_param.mmio_offset = status_buffer->vp9_bs_frame_reg_offset;
5683     gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5684
5685     mi_store_reg_mem_param.bo = pak_context->res_brc_bitstream_size_buffer.bo;
5686     mi_store_reg_mem_param.offset = 0;
5687     mi_store_reg_mem_param.mmio_offset = status_buffer->vp9_bs_frame_reg_offset;
5688     gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5689
5690     /* Read HCP Image status */
5691     mi_store_reg_mem_param.bo = status_buffer->bo;
5692     mi_store_reg_mem_param.offset = status_buffer->image_status_mask_offset;
5693     mi_store_reg_mem_param.mmio_offset =
5694                                status_buffer->vp9_image_mask_reg_offset;
5695     gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5696
5697     mi_store_reg_mem_param.bo = status_buffer->bo;
5698     mi_store_reg_mem_param.offset = status_buffer->image_status_ctrl_offset;
5699     mi_store_reg_mem_param.mmio_offset =
5700                                status_buffer->vp9_image_ctrl_reg_offset;
5701     gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5702
5703     mi_store_reg_mem_param.bo = pak_context->res_brc_bitstream_size_buffer.bo;
5704     mi_store_reg_mem_param.offset = 4;
5705     mi_store_reg_mem_param.mmio_offset =
5706                                status_buffer->vp9_image_ctrl_reg_offset;
5707     gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5708
5709     gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);
5710
5711     return;
5712 }
5713
5714 static VAStatus
5715 gen9_vp9_pak_pipeline_prepare(VADriverContextP ctx,
5716                               struct encode_state *encode_state,
5717                               struct intel_encoder_context *encoder_context)
5718 {
5719     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5720     struct object_surface *obj_surface;
5721     struct object_buffer *obj_buffer;
5722     struct i965_coded_buffer_segment *coded_buffer_segment;
5723     VAEncPictureParameterBufferVP9 *pic_param;
5724     struct gen9_vp9_state *vp9_state;
5725     dri_bo *bo;
5726     int i;
5727
5728     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5729     if (!vp9_state ||
5730         !vp9_state->pic_param)
5731         return VA_STATUS_ERROR_INVALID_PARAMETER;
5732
5733     pic_param = vp9_state->pic_param;
5734
5735     /* reconstructed surface */
5736     obj_surface = encode_state->reconstructed_object;
5737     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5738
5739     pak_context->reconstructed_object.bo = obj_surface->bo;
5740     dri_bo_reference(pak_context->reconstructed_object.bo);
5741
5742     /* set vp9 reference frames */
5743     for (i = 0; i < ARRAY_ELEMS(pak_context->reference_surfaces); i++) {
5744         if (pak_context->reference_surfaces[i].bo)
5745             dri_bo_unreference(pak_context->reference_surfaces[i].bo);
5746         pak_context->reference_surfaces[i].bo = NULL;
5747     }
5748
5749     /* Three reference frames are enough for VP9 */
5750     if (pic_param->pic_flags.bits.frame_type &&
5751         !pic_param->pic_flags.bits.intra_only) {
5752         for (i = 0; i < 3; i++) {
5753             obj_surface = encode_state->reference_objects[i];
5754             if (obj_surface && obj_surface->bo) {
5755                 pak_context->reference_surfaces[i].bo = obj_surface->bo;
5756                 dri_bo_reference(obj_surface->bo);
5757             }
5758         }
5759     }
5760
5761     /* input YUV surface */
5762     dri_bo_unreference(pak_context->uncompressed_picture_source.bo);
5763     pak_context->uncompressed_picture_source.bo = NULL;
5764     obj_surface = encode_state->reconstructed_object;
5765     if (vp9_state->dys_in_use &&
5766         ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
5767          (pic_param->frame_height_src != pic_param->frame_height_dst))) {
5768         struct gen9_surface_vp9 *vp9_priv_surface =
5769             (struct gen9_surface_vp9 *)(obj_surface->private_data);
5770         obj_surface = vp9_priv_surface->dys_surface_obj;
5771     } else
5772         obj_surface = encode_state->input_yuv_object;
5773
5774     pak_context->uncompressed_picture_source.bo = obj_surface->bo;
5775     dri_bo_reference(pak_context->uncompressed_picture_source.bo);
5776
5777     /* coded buffer */
5778     dri_bo_unreference(pak_context->indirect_pak_bse_object.bo);
5779     pak_context->indirect_pak_bse_object.bo = NULL;
5780     obj_buffer = encode_state->coded_buf_object;
5781     bo = obj_buffer->buffer_store->bo;
5782     pak_context->indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
5783     pak_context->indirect_pak_bse_object.end_offset = ALIGN((obj_buffer->size_element - 0x1000), 0x1000);
5784     pak_context->indirect_pak_bse_object.bo = bo;
5785     dri_bo_reference(pak_context->indirect_pak_bse_object.bo);
5786
5787     /* set the internal flag to 0 to indicate the coded size is unknown */
5788     dri_bo_map(bo, 1);
5789     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
5790     coded_buffer_segment->mapped = 0;
5791     coded_buffer_segment->codec = encoder_context->codec;
5792     dri_bo_unmap(bo);
5793
5794     return VA_STATUS_SUCCESS;
5795 }
5796
5797 static void
5798 gen9_vp9_pak_brc_prepare(struct encode_state *encode_state,
5799                           struct intel_encoder_context *encoder_context)
5800 {
5801     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5802     struct gen9_vp9_state *vp9_state;
5803
5804     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5805
5806     if (!vp9_state || !pak_context)
5807         return;
5808
5809     if (vp9_state->brc_enabled) {
5810         /* check the buffer related with BRC */
5811         vp9_state->brc_flag_check = 0;
5812         if (encode_state->seq_param_ext && encode_state->seq_param_ext->buffer) {
5813             vp9_state->brc_flag_check |= VP9_BRC_SEQ;
5814         }
5815
5816         /* Frame_rate */
5817         if (encode_state->misc_param[VAEncMiscParameterTypeFrameRate] &&
5818             encode_state->misc_param[VAEncMiscParameterTypeFrameRate]->buffer) {
5819             vp9_state->brc_flag_check |= VP9_BRC_FR;
5820         }
5821
5822         /* HRD */
5823         if (encode_state->misc_param[VAEncMiscParameterTypeRateControl] &&
5824             encode_state->misc_param[VAEncMiscParameterTypeRateControl]->buffer) {
5825             vp9_state->brc_flag_check |= VP9_BRC_RC;
5826         }
5827
5828         if (encode_state->misc_param[VAEncMiscParameterTypeHRD] &&
5829             encode_state->misc_param[VAEncMiscParameterTypeHRD]->buffer) {
5830             vp9_state->brc_flag_check |= VP9_BRC_HRD;
5831         }
5832
5833         /*
5834          * If user-app doesn't pass the buffer related with BRC for the first
5835          * frame, the error flag is returned.
5836          */
5837         if (vp9_state->brc_flag_check == 0 && vp9_state->first_frame) {
5838             vp9_state->brc_flag_check |= VP9_BRC_FAILURE;
5839         }
5840     }
5841     return;
5842 }
5843
5844 static void
5845 gen9_vp9_pak_context_destroy(void *context)
5846 {
5847     struct gen9_encoder_context_vp9 *pak_context = context;
5848     int i;
5849
5850     dri_bo_unreference(pak_context->reconstructed_object.bo);
5851     pak_context->reconstructed_object.bo = NULL;
5852
5853     dri_bo_unreference(pak_context->uncompressed_picture_source.bo);
5854     pak_context->uncompressed_picture_source.bo = NULL;
5855
5856     dri_bo_unreference(pak_context->indirect_pak_bse_object.bo);
5857     pak_context->indirect_pak_bse_object.bo = NULL;
5858
5859     for (i = 0; i < 8; i++){
5860         dri_bo_unreference(pak_context->reference_surfaces[i].bo);
5861         pak_context->reference_surfaces[i].bo = NULL;
5862     }
5863
5864     /* vme & pak same the same structure, so don't free the context here */
5865 }
5866
5867 static VAStatus
5868 gen9_vp9_pak_pipeline(VADriverContextP ctx,
5869                        VAProfile profile,
5870                        struct encode_state *encode_state,
5871                        struct intel_encoder_context *encoder_context)
5872 {
5873     struct i965_driver_data *i965 = i965_driver_data(ctx);
5874     struct intel_batchbuffer *batch = encoder_context->base.batch;
5875     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5876     VAStatus va_status;
5877     struct gen9_vp9_state *vp9_state;
5878     VAEncPictureParameterBufferVP9 *pic_param;
5879     int i;
5880
5881     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5882
5883     if (!vp9_state || !vp9_state->pic_param || !pak_context)
5884         return VA_STATUS_ERROR_INVALID_PARAMETER;
5885
5886     va_status = gen9_vp9_pak_pipeline_prepare(ctx, encode_state, encoder_context);
5887
5888     if (va_status != VA_STATUS_SUCCESS)
5889         return va_status;
5890
5891     if (i965->intel.has_bsd2)
5892         intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
5893     else
5894         intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
5895
5896     intel_batchbuffer_emit_mi_flush(batch);
5897
5898     BEGIN_BCS_BATCH(batch, 64);
5899     for (i = 0; i < 64; i++)
5900         OUT_BCS_BATCH(batch, MI_NOOP);
5901
5902     ADVANCE_BCS_BATCH(batch);
5903
5904     for (vp9_state->curr_pak_pass = 0;
5905          vp9_state->curr_pak_pass < vp9_state->num_pak_passes;
5906          vp9_state->curr_pak_pass++) {
5907
5908         if (vp9_state->curr_pak_pass == 0) {
5909             /* Initialize the VP9 Image Ctrl reg for the first pass */
5910             struct gpe_mi_load_register_imm_parameter mi_load_reg_imm;
5911             struct vp9_encode_status_buffer_internal *status_buffer;
5912
5913             status_buffer = &(vp9_state->status_buffer);
5914             memset(&mi_load_reg_imm, 0, sizeof(mi_load_reg_imm));
5915             mi_load_reg_imm.mmio_offset = status_buffer->vp9_image_ctrl_reg_offset;
5916             mi_load_reg_imm.data = 0;
5917             gen9_gpe_mi_load_register_imm(ctx, batch, &mi_load_reg_imm);
5918         }
5919         gen9_vp9_pak_picture_level(ctx, encode_state, encoder_context);
5920         gen9_vp9_read_mfc_status(ctx, encoder_context);
5921     }
5922
5923     intel_batchbuffer_end_atomic(batch);
5924     intel_batchbuffer_flush(batch);
5925
5926     pic_param = vp9_state->pic_param;
5927     vp9_state->vp9_last_frame.frame_width = pic_param->frame_width_dst;
5928     vp9_state->vp9_last_frame.frame_height = pic_param->frame_height_dst;
5929     vp9_state->vp9_last_frame.frame_type = pic_param->pic_flags.bits.frame_type;
5930     vp9_state->vp9_last_frame.show_frame = pic_param->pic_flags.bits.show_frame;
5931     vp9_state->vp9_last_frame.refresh_frame_context = pic_param->pic_flags.bits.refresh_frame_context;
5932     vp9_state->vp9_last_frame.frame_context_idx = pic_param->pic_flags.bits.frame_context_idx;
5933     vp9_state->vp9_last_frame.intra_only = pic_param->pic_flags.bits.intra_only;
5934     vp9_state->frame_number++;
5935     vp9_state->curr_mv_temporal_index ^= 1;
5936     vp9_state->first_frame = 0;
5937
5938     return VA_STATUS_SUCCESS;
5939 }
5940
5941 Bool
5942 gen9_vp9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5943 {
5944     struct gen9_encoder_context_vp9 *vme_context = NULL;
5945     struct gen9_vp9_state *vp9_state = NULL;
5946
5947     vme_context = calloc(1, sizeof(struct gen9_encoder_context_vp9));
5948     vp9_state = calloc(1, sizeof(struct gen9_vp9_state));
5949
5950     if (!vme_context || !vp9_state) {
5951         if (vme_context)
5952             free(vme_context);
5953         if (vp9_state)
5954             free(vp9_state);
5955         return false;
5956     }
5957
5958     encoder_context->enc_priv_state = vp9_state;
5959     vme_context->enc_priv_state = vp9_state;
5960
5961     /* Initialize the features that are supported by VP9 */
5962     vme_context->hme_supported = 1;
5963     vme_context->use_hw_scoreboard = 1;
5964     vme_context->use_hw_non_stalling_scoreboard = 1;
5965
5966     vp9_state->tx_mode = TX_MODE_SELECT;
5967     vp9_state->multi_ref_qp_check = 0;
5968     vp9_state->target_usage = INTEL_ENC_VP9_TU_NORMAL;
5969     vp9_state->num_pak_passes = 1;
5970     vp9_state->hme_supported = vme_context->hme_supported;
5971     vp9_state->b16xme_supported = 1;
5972
5973     if (encoder_context->rate_control_mode != VA_RC_NONE &&
5974         encoder_context->rate_control_mode != VA_RC_CQP) {
5975         vp9_state->brc_enabled = 1;
5976         vp9_state->brc_distortion_buffer_supported = 1;
5977         vp9_state->brc_constant_buffer_supported = 1;
5978         vp9_state->num_pak_passes = 4;
5979     }
5980     vp9_state->dys_enabled = 1; /* this is supported by default */
5981     vp9_state->first_frame = 1;
5982
5983     /* the definition of status buffer offset for VP9 */
5984     {
5985         struct vp9_encode_status_buffer_internal *status_buffer;
5986
5987         status_buffer = &vp9_state->status_buffer;
5988         memset(status_buffer, 0,
5989                sizeof(struct vp9_encode_status_buffer_internal));
5990
5991         status_buffer->bs_byte_count_offset = 2048;
5992         status_buffer->image_status_mask_offset = 2052;
5993         status_buffer->image_status_ctrl_offset = 2056;
5994         status_buffer->media_index_offset       = 2060;
5995
5996         status_buffer->vp9_bs_frame_reg_offset = 0x1E9E0;
5997         status_buffer->vp9_image_mask_reg_offset = 0x1E9F0;
5998         status_buffer->vp9_image_ctrl_reg_offset = 0x1E9F4;
5999     }
6000
6001     gen9_vme_kernels_context_init_vp9(ctx, encoder_context, vme_context);
6002
6003     encoder_context->vme_context = vme_context;
6004     encoder_context->vme_pipeline = gen9_vme_pipeline_vp9;
6005     encoder_context->vme_context_destroy = gen9_vme_context_destroy_vp9;
6006
6007     return true;
6008 }
6009
6010 Bool
6011 gen9_vp9_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
6012 {
6013     /* VME & PAK share the same context */
6014     struct gen9_encoder_context_vp9 *pak_context = encoder_context->vme_context;
6015
6016     if (!pak_context)
6017         return false;
6018
6019     encoder_context->mfc_context = pak_context;
6020     encoder_context->mfc_context_destroy = gen9_vp9_pak_context_destroy;
6021     encoder_context->mfc_pipeline = gen9_vp9_pak_pipeline;
6022     encoder_context->mfc_brc_prepare = gen9_vp9_pak_brc_prepare;
6023
6024     return true;
6025 }
6026
6027 VAStatus
6028 gen9_vp9_get_coded_status(VADriverContextP ctx,
6029                           char *buffer,
6030                           struct hw_context *hw_context)
6031 {
6032     struct intel_encoder_context *encoder_context =
6033                       (struct intel_encoder_context *)hw_context;
6034     struct gen9_vp9_state *vp9_state = NULL;
6035     struct vp9_encode_status_buffer_internal *status_buffer;
6036     struct i965_coded_buffer_segment *coded_buf_seg;
6037     unsigned int *buf_ptr;
6038
6039     if (!encoder_context || !buffer)
6040         return VA_STATUS_ERROR_INVALID_BUFFER;
6041
6042     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
6043     coded_buf_seg = (struct i965_coded_buffer_segment *) buffer;
6044
6045     if (!vp9_state)
6046         return VA_STATUS_ERROR_INVALID_BUFFER;
6047
6048     status_buffer = &vp9_state->status_buffer;
6049
6050     buf_ptr = (unsigned int *)(buffer + status_buffer->bs_byte_count_offset);
6051     coded_buf_seg->base.buf = buffer + I965_CODEDBUFFER_HEADER_SIZE;
6052     coded_buf_seg->base.next = NULL;
6053
6054     /* the stream size is writen into the bs_byte_count_offset address of buffer */
6055     coded_buf_seg->base.size = *buf_ptr;
6056
6057     coded_buf_seg->mapped = 1;
6058
6059     /* One VACodedBufferSegment for VP9 will be added later.
6060      * It will be linked to the next element of coded_buf_seg->base.next
6061      */
6062
6063     return VA_STATUS_SUCCESS;
6064 }